camera: macOS port via darwin/ split (no shared-file pragmas)

Reuse the AVFoundation Swift files between iOS and macOS without sprinkling `#if canImport(UIKit)` through them. The split is: darwin/Camera/ platform-shared (AVFoundation only) CameraPlugin channel + instance map CameraInstance session + outputs + texture CameraSession AVCaptureSession + runtime-error obs CaptureDevice front/back discovery PhotoOutput AVCapturePhotoOutput PreviewSink CVPixelBuffer → FlutterTexture VideoRecorder AVAssetWriter DeviceOrientation wire-string enum ios/Classes/Camera/ iOS-only impls + extensions AudioSession AVAudioSession.upgradeForRecording DeviceOrientationBridge UIDevice.orientation listener CameraSession+iOS AVCaptureSessionWasInterrupted obs + InterruptionReason decode + the application-audio-session flags (all iOS-only on AVCaptureSession) CameraSettings UIApplication.openSettingsURLString FlutterRegistrar+iOS method-form of textures/messenger macos/Classes/Camera/ macOS no-op stubs (same surface) AudioSession no-op (no AVAudioSession on macOS) DeviceOrientationBridge no-op (desktops don't rotate) CameraSession+macOS no-op setupPlatform() CameraSettings NSWorkspace → System Settings' Privacy_Camera pane FlutterRegistrar+macOS property-form of textures/messenger `CameraSession.init` now calls `setupPlatform()` which each platform provides via an extension — keeps the iOS-only interruption observer and the `automaticallyConfiguresApplicationAudioSession` / `usesApplicationAudioSession` flags (both iOS-only on AVCaptureSession) out of the shared file. Flash-mode in PhotoOutput uses `if #available(macOS 11/13, *)` rather than `#if`, since those are plain version gates not platform splits. The shared files compile into the iOS pod from `ios/Classes/Camera-shared/` and into the macOS pod from `macos/Classes/Camera-shared/`, each a mirror populated by a `prepare_command` in the podspec: rm -rf Classes/Camera-shared && cp -R ../darwin/Camera Classes/Camera-shared Symlinks and `../` source globs both fail — Pathname.glob bails on symlinks, and CocoaPods silently drops paths that escape the pod directory. The mirror destinations are .gitignore'd. macOS UxPlugin now registers CameraPlugin alongside the others.
2026-05-13 18:53:46 +03:00
parent 16f986ab37
commit 14565ebd7a
22 changed files with 282 additions and 106 deletions
--- a/darwin/Camera/VideoRecorder.swift
+++ b/darwin/Camera/VideoRecorder.swift
@@ -0,0 +1,563 @@
+import AVFoundation
+import CoreMedia
+import Foundation
+
+/// Owns one `AVAssetWriter`. Mirrors telegram-ios's
+/// [VideoRecorder.swift](file:///Users/agra/projects/telegram-ios/submodules/Camera/Sources/VideoRecorder.swift)
+/// state machine closely — lazy per-type input creation,
+/// gated `startWriting`, pending-audio-buffer queue:
+///
+/// 1. `start()` only creates the `AVAssetWriter` shell and sets
+///    `recordingStartSampleTime` to wall-clock now. No inputs yet.
+/// 2. First **video** sample → create `videoInput` from its
+///    `CMFormatDescription` (`sourceFormatHint:`) + transform.
+///    Pre-`startWriting` because audio input may still be pending.
+/// 3. First **audio** sample (if `hasAudio`) → create `audioInput`
+///    with sample-rate / channel-layout extracted from the audio
+///    `CMFormatDescription` merged into `baseAudioSettings`
+///    (`recommendedAudioSettingsForAssetWriter`).
+/// 4. Next video sample arrives with both inputs added →
+///    `assetWriter.startWriting()`. Sample is dropped (telegram's
+///    behaviour — initial frame loss is acceptable, the writer needs
+///    one cycle to settle).
+/// 5. Subsequent video sample → `startSession(atSourceTime: pts)`,
+///    `recordingStartSampleTime = pts`. Appends begin.
+/// 6. Audio samples that arrive before `recordingStartSampleTime` is
+///    set are queued in `pendingAudioSampleBuffers`. After each
+///    successful video append, the queue is drained for samples whose
+///    `endTime <= lastVideoSampleTime`.
+/// 7. `stop()` sets `recordingStopSampleTime` to wall-clock now.
+///    Sample callbacks set `hasAllVideoBuffers` / `hasAllAudioBuffers`
+///    when their PTS crosses the stop time. `maybeFinish()` runs when
+///    both flags are set, gates `finishWriting` on
+///    `writer.status == .writing`. If audio never arrived, the audio
+///    flag is set synchronously in `stop()` so the video side can
+///    complete on its own.
+///
+/// Sample-count diagnostics emit via [onDiagnostic] at each major
+/// checkpoint so the operator can verify "audio actually captured"
+/// without instrumenting the call sites. The closure is wired to the
+/// Dart-side `Log.tag('camera').i(...)` via the `ux/camera/events`
+/// channel — visible in `~/banlu/tools/log_server/data/banlu.jsonl`.
+final class VideoRecorder {
+    /// Maps Flutter's `DeviceOrientation` to the rotation transform
+    /// embedded as `AVAssetWriterInput.transform`. Source buffers
+    /// are portrait-shape (see [CameraInstance.applyVideoOrientationOnPreview]),
+    /// so the table assumes portrait source — see
+    /// [CameraOrientationTests] for the four cases.
+    public static func transform(
+        for orientation: DeviceOrientationFlutter
+    ) -> CGAffineTransform {
+        switch orientation {
+        case .portraitUp:        return .identity
+        case .portraitDown:      return CGAffineTransform(rotationAngle: .pi)
+        case .landscapeLeft:     return CGAffineTransform(rotationAngle: -.pi / 2)
+        case .landscapeRight:    return CGAffineTransform(rotationAngle: .pi / 2)
+        }
+    }
+
+    // MARK: - immutable config
+
+    private let url: URL
+    private let videoTransform: CGAffineTransform
+    private let hasAudio: Bool
+    private let baseVideoSettings: [String: Any]?
+    private let baseAudioSettings: [String: Any]
+    private let recorderQueue: DispatchQueue
+
+    // MARK: - mutable state (always touched on recorderQueue)
+
+    private var writer: AVAssetWriter?
+    private var videoInput: AVAssetWriterInput?
+    private var audioInput: AVAssetWriterInput?
+
+    /// Wall-clock "start" time set by [start], then overwritten to the
+    /// first video sample's PTS once the session is started. Used to
+    /// gate samples whose PTS is older than start.
+    private var recordingStartSampleTime: CMTime = .invalid
+
+    /// Set by [stop]. Samples whose PTS crosses this set the matching
+    /// `hasAllXBuffers` flag and trigger [maybeFinish].
+    private var recordingStopSampleTime: CMTime = .invalid
+
+    /// PTS of the last video sample successfully appended. Used to
+    /// gate audio drains (audio samples whose `endTime` exceeds this
+    /// stay queued until video catches up).
+    private var lastVideoSampleTime: CMTime = .invalid
+
+    private var startedSession = false
+    private var stopped = false
+    private var hasAllVideoBuffers = false
+    private var hasAllAudioBuffers = false
+    private var failed = false
+
+    /// Audio samples arriving before video has caught up. Drained
+    /// after each successful video append.
+    private var pendingAudioSampleBuffers: [CMSampleBuffer] = []
+
+    private var completion: ((Result<URL, NSError>) -> Void)?
+
+    // MARK: - diagnostics (emit via [onDiagnostic] → ux.Log)
+
+    private func diag(_ message: String) {
+        onDiagnostic?(message)
+    }
+
+    private var videoReceived: Int = 0
+    private var videoAppended: Int = 0
+    private var audioReceived: Int = 0
+    private var audioAppended: Int = 0
+    private var audioQueued: Int = 0
+
+    /// Set by [CameraInstance] to ship diagnostic messages over the
+    /// `ux/camera/events` channel as `{event: "diagnostic"}`. The
+    /// Dart-side controller turns those into `Log.tag('camera').i(...)`
+    /// — so they land in the log_server pipeline and can be tailed
+    /// from `~/banlu/tools/log_server/data/banlu.jsonl`.
+    var onDiagnostic: ((String) -> Void)?
+
+    // MARK: - init / start
+
+    init(
+        url: URL,
+        orientation: DeviceOrientationFlutter,
+        hasAudio: Bool,
+        baseVideoSettings: [String: Any]?,
+        baseAudioSettings: [String: Any],
+        recorderQueue: DispatchQueue
+    ) {
+        self.url = url
+        self.videoTransform = VideoRecorder.transform(for: orientation)
+        self.hasAudio = hasAudio
+        self.baseVideoSettings = baseVideoSettings
+        self.baseAudioSettings = baseAudioSettings
+        self.recorderQueue = recorderQueue
+    }
+
+    /// Open the file. Inputs are created lazily on the first sample
+    /// of each type — see class doc. Throws on `AVAssetWriter`
+    /// allocation failure (typically a path / file-system issue).
+    func start() throws {
+        let writer = try AVAssetWriter(url: url, fileType: .mp4)
+        self.writer = writer
+        // Sentinel until the first video sample's PTS overwrites it —
+        // see [handleVideo] when it calls `writer.startSession`.
+        recordingStartSampleTime = CMTime(
+            seconds: CACurrentMediaTime(),
+            preferredTimescale: CMTimeScale(NSEC_PER_SEC)
+        )
+        diag("start: file=\(url.lastPathComponent) hasAudio=\(hasAudio)")
+    }
+
+    /// Hard cancel — drop pending audio, `cancelWriting` if the writer
+    /// is writing, delete the partial file. Mirrors telegram-ios's
+    /// [`VideoRecorder.cancelRecording`](file:///Users/agra/projects/telegram-ios/submodules/Camera/Sources/VideoRecorder.swift#L329).
+    /// Used by [CameraInstance.dispose] when a recording is in flight
+    /// at teardown — there's no caller to deliver the file to, so no
+    /// reason to wait for `finishWriting` to flush.
+    func cancel(completion: (() -> Void)? = nil) {
+        recorderQueue.async {
+            if self.stopped || self.failed {
+                completion?()
+                return
+            }
+            self.stopped = true
+            self.pendingAudioSampleBuffers = []
+            if let writer = self.writer, writer.status == .writing {
+                writer.cancelWriting()
+            }
+            try? FileManager.default.removeItem(at: self.url)
+            self.diag("cancel: vRecv=\(self.videoReceived) aRecv=\(self.audioReceived)")
+            // Resolve any pending stop() completion so the caller's
+            // Future doesn't dangle.
+            if let cb = self.completion {
+                self.completion = nil
+                cb(.failure(NSError(
+                    domain: "ux.camera", code: -12,
+                    userInfo: [NSLocalizedDescriptionKey: "Recording cancelled"]
+                )))
+            }
+            completion?()
+        }
+    }
+
+    /// Stop. Sets `recordingStopSampleTime` so the next video / audio
+    /// sample crossing it flips the matching `hasAllXBuffers` flag,
+    /// which triggers `maybeFinish` → `finishWriting`. Completion
+    /// fires once when the writer finishes.
+    ///
+    /// Idempotent: a second call while a stop is already in flight is
+    /// silently dropped.
+    func stop(completion: @escaping (Result<URL, NSError>) -> Void) {
+        recorderQueue.async {
+            if self.completion != nil { return }
+            self.completion = completion
+
+            let stopTime = CMTime(
+                seconds: CACurrentMediaTime(),
+                preferredTimescale: CMTimeScale(NSEC_PER_SEC)
+            )
+            self.recordingStopSampleTime = stopTime
+
+            self.diag("stop: vRecv=\(self.videoReceived) vApp=\(self.videoAppended)"
+                + " aRecv=\(self.audioReceived) aApp=\(self.audioAppended)"
+                + " aQueued=\(self.pendingAudioSampleBuffers.count)")
+
+            // Nothing ever arrived — no sample callback will ever
+            // trigger `maybeFinish`. Cancel the writer instead.
+            if !self.startedSession {
+                self.writer?.cancelWriting()
+                self.failed = true
+                self.deliver(.failure(NSError(
+                    domain: "ux.camera", code: -11,
+                    userInfo: [
+                        NSLocalizedDescriptionKey:
+                            "Recording stopped before any samples were written"
+                    ]
+                )))
+                return
+            }
+
+            // No audio path (mic permission denied, etc.) — the audio
+            // side is "drained" by definition. `maybeFinish` then only
+            // waits for the next video sample whose PTS crosses
+            // `stopTime` (~one frame later, ~33ms at 30fps).
+            if self.audioInput == nil || self.audioReceived == 0 {
+                self.hasAllAudioBuffers = true
+            }
+        }
+    }
+
+    // MARK: - sample append (from videoBufferQueue / audioBufferQueue)
+
+    func appendVideo(_ sampleBuffer: CMSampleBuffer) {
+        recorderQueue.async { self.handleVideo(sampleBuffer) }
+    }
+
+    func appendAudio(_ sampleBuffer: CMSampleBuffer) {
+        recorderQueue.async { self.handleAudio(sampleBuffer) }
+    }
+
+    // MARK: - recorderQueue handlers
+
+    private func handleVideo(_ sampleBuffer: CMSampleBuffer) {
+        guard !stopped, !failed else { return }
+        guard let writer = writer else { return }
+        guard
+            let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer),
+            CMFormatDescriptionGetMediaType(formatDescription) == kCMMediaType_Video
+        else { return }
+
+        videoReceived += 1
+        let presentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
+
+        // 1. Lazy create the video input on first video sample, with
+        //    the buffer's format description as `sourceFormatHint`.
+        if videoInput == nil {
+            let videoSettings = baseVideoSettings ?? [:]
+            if writer.canApply(outputSettings: videoSettings, forMediaType: .video) {
+                let input = AVAssetWriterInput(
+                    mediaType: .video,
+                    outputSettings: videoSettings,
+                    sourceFormatHint: formatDescription
+                )
+                input.expectsMediaDataInRealTime = true
+                input.transform = videoTransform
+                if writer.canAdd(input) {
+                    writer.add(input)
+                    videoInput = input
+                    diag("video input added")
+                } else {
+                    fail(NSError(domain: "ux.camera", code: -30,
+                                 userInfo: [NSLocalizedDescriptionKey: "canAdd videoInput failed"]))
+                    return
+                }
+            } else {
+                fail(NSError(domain: "ux.camera", code: -31,
+                             userInfo: [NSLocalizedDescriptionKey: "canApply videoSettings failed"]))
+                return
+            }
+        }
+
+        // 2. Writer state machine
+        if writer.status == .unknown {
+            // Drop samples that arrived BEFORE the wall-clock start
+            // (rare, but happens if the session was already running
+            // before start() was called).
+            if presentationTime < recordingStartSampleTime {
+                return
+            }
+            // Only start the writer when ALL needed inputs are ready.
+            if videoInput != nil && (audioInput != nil || !hasAudio) {
+                if !writer.startWriting() {
+                    fail(writer.error)
+                    return
+                }
+                diag("startWriting")
+            }
+            // Drop this sample regardless — the writer needs a cycle
+            // to settle. Next sample will hit the `.writing` branch.
+            return
+        } else if writer.status == .writing && !startedSession {
+            writer.startSession(atSourceTime: presentationTime)
+            recordingStartSampleTime = presentationTime
+            lastVideoSampleTime = presentationTime
+            startedSession = true
+            diag(String(format: "startSession at %.3fs", presentationTime.seconds))
+        }
+
+        // Drop pre-start samples (post-startSession).
+        if recordingStartSampleTime == .invalid
+            || presentationTime < recordingStartSampleTime {
+            return
+        }
+
+        if writer.status == .writing && startedSession {
+            // 3. Stop-time gating — set hasAllVideoBuffers when we
+            //    see a sample past stop time, trigger finish.
+            if recordingStopSampleTime.isValid
+                && presentationTime > recordingStopSampleTime {
+                hasAllVideoBuffers = true
+                maybeFinish()
+                return
+            }
+
+            guard let input = videoInput else { return }
+            // Busy-wait briefly if the input isn't ready. Matches
+            // telegram-ios's pattern at VideoRecorder.swift:202-206.
+            // Real-time capture; we can't backpressure the camera.
+            while !input.isReadyForMoreMediaData {
+                RunLoop.current.run(until: Date(timeIntervalSinceNow: 0.05))
+            }
+
+            if input.append(sampleBuffer) {
+                lastVideoSampleTime = presentationTime
+                videoAppended += 1
+            }
+
+            // 4. Drain any pending audio whose endTime now fits
+            //    under lastVideoSampleTime.
+            if !tryAppendingPendingAudioBuffers() {
+                fail(writer.error)
+            }
+        }
+    }
+
+    private func handleAudio(_ sampleBuffer: CMSampleBuffer) {
+        guard !stopped, !failed, hasAudio else { return }
+        guard let writer = writer else { return }
+        guard
+            let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer),
+            CMFormatDescriptionGetMediaType(formatDescription) == kCMMediaType_Audio
+        else { return }
+
+        audioReceived += 1
+
+        // 1. Lazy create audio input on first audio sample, with
+        //    sample-rate / channel-layout extracted from the
+        //    sample's CMAudioFormatDescription.
+        if audioInput == nil {
+            var audioSettings = baseAudioSettings
+
+            if let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) {
+                audioSettings[AVSampleRateKey] = asbd.pointee.mSampleRate
+                audioSettings[AVNumberOfChannelsKey] = asbd.pointee.mChannelsPerFrame
+            }
+
+            var channelLayoutSize: Int = 0
+            let channelLayoutPtr = CMAudioFormatDescriptionGetChannelLayout(
+                formatDescription, sizeOut: &channelLayoutSize
+            )
+            let channelLayoutData: Data
+            if let ptr = channelLayoutPtr, channelLayoutSize > 0 {
+                channelLayoutData = Data(bytes: ptr, count: channelLayoutSize)
+            } else {
+                channelLayoutData = Data()
+            }
+            audioSettings[AVChannelLayoutKey] = channelLayoutData
+
+            if writer.canApply(outputSettings: audioSettings, forMediaType: .audio) {
+                let input = AVAssetWriterInput(
+                    mediaType: .audio,
+                    outputSettings: audioSettings,
+                    sourceFormatHint: formatDescription
+                )
+                input.expectsMediaDataInRealTime = true
+                if writer.canAdd(input) {
+                    writer.add(input)
+                    audioInput = input
+                    diag("audio input added"
+                         + " sr=\(audioSettings[AVSampleRateKey] ?? "?")"
+                         + " ch=\(audioSettings[AVNumberOfChannelsKey] ?? "?")")
+                } else {
+                    diag("canAdd audioInput failed")
+                    return
+                }
+            } else {
+                diag("canApply audioSettings failed")
+                return
+            }
+        }
+
+        // 2. Need the video stream to have given us a session start
+        //    time before any audio can be appended.
+        if recordingStartSampleTime == .invalid { return }
+
+        let presentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
+        if presentationTime < recordingStartSampleTime { return }
+
+        // 3. Stop-time gating.
+        if recordingStopSampleTime.isValid
+            && presentationTime > recordingStopSampleTime {
+            hasAllAudioBuffers = true
+            maybeFinish()
+            return
+        }
+
+        // 4. Append (or queue) — drain pending first, then this
+        //    sample. tryAppendingAudioSampleBuffer chooses queue vs
+        //    immediate-append based on its endTime vs lastVideoSampleTime.
+        if !tryAppendingPendingAudioBuffers()
+            || !tryAppendingAudioSampleBuffer(sampleBuffer) {
+            fail(writer.error)
+        }
+    }
+
+    // MARK: - audio buffer queue
+
+    /// Append [sampleBuffer] immediately if its `endTime` doesn't
+    /// run past the latest video sample; otherwise enqueue.
+    private func tryAppendingAudioSampleBuffer(_ sampleBuffer: CMSampleBuffer) -> Bool {
+        if sampleBuffer.endTime > lastVideoSampleTime {
+            pendingAudioSampleBuffers.append(sampleBuffer)
+            audioQueued += 1
+            return true
+        }
+        return internalAppendAudioSampleBuffer(sampleBuffer)
+    }
+
+    /// Drain queued audio samples that have caught up to the latest
+    /// video sample. Called after every video append.
+    private func tryAppendingPendingAudioBuffers() -> Bool {
+        guard !pendingAudioSampleBuffers.isEmpty else { return true }
+
+        var stillPending: [CMSampleBuffer] = []
+        stillPending.reserveCapacity(pendingAudioSampleBuffers.count)
+        var ok = true
+        for sample in pendingAudioSampleBuffers {
+            if !ok {
+                stillPending.append(sample)
+                continue
+            }
+            if sample.endTime <= lastVideoSampleTime {
+                if !internalAppendAudioSampleBuffer(sample) {
+                    ok = false
+                }
+            } else {
+                stillPending.append(sample)
+            }
+        }
+        pendingAudioSampleBuffers = stillPending
+        return ok
+    }
+
+    private func internalAppendAudioSampleBuffer(_ sampleBuffer: CMSampleBuffer) -> Bool {
+        guard startedSession, let input = audioInput else { return true }
+        while !input.isReadyForMoreMediaData {
+            RunLoop.current.run(until: Date(timeIntervalSinceNow: 0.05))
+        }
+        if input.append(sampleBuffer) {
+            audioAppended += 1
+            return true
+        }
+        if writer?.error != nil {
+            return false
+        }
+        // Append returned false but no writer error — treat as
+        // recoverable. Telegram does the same.
+        return true
+    }
+
+    // MARK: - finish
+
+    private func maybeFinish() {
+        guard hasAllVideoBuffers,
+              (!hasAudio || hasAllAudioBuffers),
+              !stopped, !failed else { return }
+        stopped = true
+        finish()
+    }
+
+    private func finish() {
+        // Drain any audio buffer still pending up to the stop time.
+        _ = tryAppendingPendingAudioBuffers()
+
+        guard let writer = writer else {
+            deliver(.failure(NSError(
+                domain: "ux.camera", code: -40,
+                userInfo: [NSLocalizedDescriptionKey: "writer missing on finish"]
+            )))
+            return
+        }
+
+        // Only `finishWriting` when the writer reached `.writing`.
+        guard writer.status == .writing else {
+            diag("finish skipped — writer.status=\(writer.status.rawValue)")
+            failOnError(writer.error)
+            return
+        }
+
+        let url = self.url
+        diag("finishWriting:"
+            + " vRecv=\(videoReceived) vApp=\(videoAppended)"
+            + " aRecv=\(audioReceived) aApp=\(audioAppended)"
+            + " aQueuedDrop=\(pendingAudioSampleBuffers.count)")
+
+        writer.finishWriting { [weak self] in
+            self?.recorderQueue.async {
+                guard let self = self else { return }
+                if writer.status == .completed {
+                    self.deliver(.success(url))
+                } else {
+                    self.failOnError(writer.error)
+                }
+            }
+        }
+    }
+
+    private func fail(_ error: Error?) {
+        if failed { return }
+        failed = true
+        failOnError(error)
+    }
+
+    private func failOnError(_ error: Error?) {
+        try? FileManager.default.removeItem(at: url)
+        let ns = (error as NSError?) ?? NSError(
+            domain: "ux.camera", code: -41,
+            userInfo: [NSLocalizedDescriptionKey: "AVAssetWriter failed"]
+        )
+        deliver(.failure(ns))
+    }
+
+    private func deliver(_ outcome: Result<URL, NSError>) {
+        let cb = completion
+        completion = nil
+        cb?(outcome)
+    }
+}
+
+// MARK: - CMSampleBuffer ergonomics
+
+private extension CMSampleBuffer {
+    /// `presentationTime + duration` — last instant covered by this
+    /// buffer. Used to pace audio against video.
+    var endTime: CMTime {
+        let pts = CMSampleBufferGetPresentationTimeStamp(self)
+        let dur = CMSampleBufferGetDuration(self)
+        if dur.flags.contains(.valid) {
+            return pts + dur
+        }
+        return pts
+    }
+}