I am recording movies body by body and infrequently a sound performs (from an MP3 asset). I wish to composite these sounds into the video on the appropriate timings. However this does not work. Actually pulling my hair out right here. I’ve tried the whole lot, together with including one after one other after which inserting silence in between (allegedly this pushes subsequent clips again) however nothing works.
Right here, _currentTime is the present time based on the video frames added, that are added at 20Hz.
You possibly can see I’m including silence lengthy sufficient to cowl the time from the tip of the final audio clip to now, plus additional padding to comprise the audio we’re about so as to add. Would not matter if I take away this, it simply does not work. Generally I can get two items of audio to play however by no means a 3rd and normally, solely the primary audio performs, after which nothing after.
I am utterly stumped.
func addFrame(_ pixelBuffer: CVPixelBuffer) {
guard CGSize(width: pixelBuffer.width, peak: pixelBuffer.peak) == _outputSize else { return }
let frameTime = CMTimeMake(worth: Int64(_frameCount), timescale: _frameRate)
if _videoInput?.isReadyForMoreMediaData == true {
_pixelBufferAdaptor?.append(pixelBuffer, withPresentationTime: frameTime)
_frameCount += 1
_currentTime = frameTime
}
}
func addMP3AudioClip(_ audioData: Knowledge) async throws {
let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString + ".mp3")
defer {
attempt? FileManager.default.removeItem(at: tempURL)
}
attempt audioData.write(to: tempURL)
let asset = AVAsset(url: tempURL)
let period = attempt await asset.load(.period)
let audioTrack = attempt await asset.loadTracks(withMediaType: .audio).first!
// Insert silence
var currentAudioTime = _currentTime.convertScale(period.timescale, methodology: .default)
if CMTimeCompare(_lastAudioClipEndTime, currentAudioTime) < 0 {
// _lastAudioClipEndTime < currentAudioTime. Secure to insert silence. Generally, that is
// not true because of some form of timing mismatch and the time vary finally ends up NaN in that case.
let precedingSilence = CMTimeRangeFromTimeToTime(begin: _lastAudioClipEndTime, finish: currentAudioTime)
_audioTrack?.insertEmptyTimeRange(precedingSilence)
_audioTrack?.insertEmptyTimeRange(CMTimeRangeFromTimeToTime(begin: currentAudioTime, finish: CMTimeAdd(currentAudioTime, period)))
print("[VideoRecorder] (currentAudioTime.seconds) (period.seconds) (precedingSilence.begin.seconds) (precedingSilence.finish.seconds) (_lastAudioClipEndTime.seconds)")
} else {
currentAudioTime = _lastAudioClipEndTime
_audioTrack?.insertEmptyTimeRange(CMTimeRangeFromTimeToTime(begin: currentAudioTime, finish: CMTimeAdd(currentAudioTime, period)))
print("[VideoRecorder] (currentAudioTime.seconds) (period.seconds) (_lastAudioClipEndTime.seconds)")
}
// Audio clip
let timeRange = CMTimeRangeMake(begin: .zero, period: period)
attempt _audioTrack?.insertTimeRange(timeRange, of: audioTrack, at: currentAudioTime)
_lastAudioClipEndTime = CMTimeAdd(currentAudioTime, period)
}
For reference, the entire class is right here:
//
// VideoRecorder.swift
// RoBart
//
// Created by Bart Trzynadlowski on 10/2/24.
//
import UIKit
import AVFoundation
import Photographs
actor VideoRecorder {
personal var _assetWriter: AVAssetWriter?
personal var _videoInput: AVAssetWriterInput?
personal var _audioInput: AVAssetWriterInput?
personal var _pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor?
personal var _frameCount: Int64 = 0
personal let _frameRate: Int32
personal let _outputSize: CGSize
personal var _currentTime: CMTime = .zero
personal var _lastAudioClipEndTime: CMTime = .zero
personal var _audioMixer: AVMutableComposition?
personal var _audioTrack: AVMutableCompositionTrack?
init(outputSize: CGSize, frameRate: Int32) {
self._outputSize = outputSize
self._frameRate = frameRate
}
func startRecording() throws {
let outputURL = FileManager.default.temporaryDirectory.appendingPathComponent("output.mp4")
attempt? FileManager.default.removeItem(at: outputURL) // simply in case we crashed and a file continues to be there
_assetWriter = attempt AVAssetWriter(outputURL: outputURL, fileType: .mp4)
let videoSettings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: _outputSize.width,
AVVideoHeightKey: _outputSize.height,
]
_videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoSettings)
_videoInput?.expectsMediaDataInRealTime = true
let rotationTransform = CGAffineTransform(rotationAngle: .pi / 2) // vertical orientation
_videoInput?.rework = rotationTransform
let sourcePixelBufferAttributes: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32ARGB),
kCVPixelBufferWidthKey as String: _outputSize.width,
kCVPixelBufferHeightKey as String: _outputSize.height
]
_pixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(
assetWriterInput: _videoInput!,
sourcePixelBufferAttributes: sourcePixelBufferAttributes
)
let audioSettings: [String: Any] = [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVSampleRateKey: 44100,
AVNumberOfChannelsKey: 2,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
]
_audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: audioSettings)
_audioInput?.expectsMediaDataInRealTime = false // audio consists right into a monitor and copied to video after we end recording
_audioMixer = AVMutableComposition()
_audioTrack = _audioMixer!.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid)
_assetWriter?.add(_videoInput!)
_assetWriter?.add(_audioInput!)
_assetWriter?.startWriting()
_assetWriter?.startSession(atSourceTime: .zero)
_currentTime = .zero
_lastAudioClipEndTime = .zero
}
func addFrame(_ pixelBuffer: CVPixelBuffer) {
guard CGSize(width: pixelBuffer.width, peak: pixelBuffer.peak) == _outputSize else { return }
let frameTime = CMTimeMake(worth: Int64(_frameCount), timescale: _frameRate)
if _videoInput?.isReadyForMoreMediaData == true {
_pixelBufferAdaptor?.append(pixelBuffer, withPresentationTime: frameTime)
_frameCount += 1
_currentTime = frameTime
}
}
func addMP3AudioClip(_ audioData: Knowledge) async throws {
let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString + ".mp3")
defer {
attempt? FileManager.default.removeItem(at: tempURL)
}
attempt audioData.write(to: tempURL)
let asset = AVAsset(url: tempURL)
let period = attempt await asset.load(.period)
let audioTrack = attempt await asset.loadTracks(withMediaType: .audio).first!
// Insert silence
var currentAudioTime = _currentTime.convertScale(period.timescale, methodology: .default)
if CMTimeCompare(_lastAudioClipEndTime, currentAudioTime) < 0 {
// _lastAudioClipEndTime < currentAudioTime. Secure to insert silence. Generally, that is
// not true because of some form of timing mismatch and the time vary finally ends up NaN in that case.
let precedingSilence = CMTimeRangeFromTimeToTime(begin: _lastAudioClipEndTime, finish: currentAudioTime)
_audioTrack?.insertEmptyTimeRange(precedingSilence)
_audioTrack?.insertEmptyTimeRange(CMTimeRangeFromTimeToTime(begin: currentAudioTime, finish: CMTimeAdd(currentAudioTime, period)))
print("[VideoRecorder] (currentAudioTime.seconds) (period.seconds) (precedingSilence.begin.seconds) (precedingSilence.finish.seconds) (_lastAudioClipEndTime.seconds)")
} else {
currentAudioTime = _lastAudioClipEndTime
_audioTrack?.insertEmptyTimeRange(CMTimeRangeFromTimeToTime(begin: currentAudioTime, finish: CMTimeAdd(currentAudioTime, period)))
print("[VideoRecorder] (currentAudioTime.seconds) (period.seconds) (_lastAudioClipEndTime.seconds)")
}
// Audio clip
let timeRange = CMTimeRangeMake(begin: .zero, period: period)
attempt _audioTrack?.insertTimeRange(timeRange, of: audioTrack, at: currentAudioTime)
_lastAudioClipEndTime = CMTimeAdd(currentAudioTime, period)
}
func finishRecording() async throws {
guard let videoInput = _videoInput,
let audioInput = _audioInput,
let audioMixer = _audioMixer,
let audioTrack = _audioTrack else { return }
// Video is completed
videoInput.markAsFinished()
print("[VideoRecorder] Whole video size: (_currentTime.seconds)")
// Write audio
let audioOutputSettings: [String: Any] = [
AVFormatIDKey: kAudioFormatLinearPCM, // convert to PCM from MP3
AVSampleRateKey: 44100,
AVNumberOfChannelsKey: 2,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsNonInterleaved: false,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false
]
let audioReader = attempt AVAssetReader(asset: audioMixer)
let audioReaderOutput = AVAssetReaderTrackOutput(monitor: audioTrack, outputSettings: audioOutputSettings)
audioReader.add(audioReaderOutput)
audioReader.startReading()
// Learn the audio from the monitor we composed and add it to the video
whereas audioInput.isReadyForMoreMediaData == true {
if let sampleBuffer = audioReaderOutput.copyNextSampleBuffer() {
audioInput.append(sampleBuffer)
} else {
audioInput.markAsFinished()
break
}
}
// Write the asset file and put it aside to the picture library
return attempt await withCheckedThrowingContinuation { continuation in
_assetWriter?.finishWriting {
if let error = self._assetWriter?.error {
print("[VideoRecorder] Error writing asset file: (error)")
continuation.resume(throwing: error)
} else if let outputURL = self._assetWriter?.outputURL {
self.saveVideoToPhotoLibrary(outputURL: outputURL) { error in
if let error = error {
print("[VideoRecorder] Error saving video: (error)")
continuation.resume(throwing: error)
} else {
continuation.resume()
print("[VideoRecorder] Saved video to picture library")
}
}
} else {
continuation.resume(throwing: NSError(area: "VideoCreatorError", code: 0, userInfo: [NSLocalizedDescriptionKey: "Unknown error occurred"]))
}
}
}
}
personal func saveVideoToPhotoLibrary(outputURL: URL, completion: @escaping (Error?) -> Void) {
PHPhotoLibrary.shared().performChanges({
PHAssetChangeRequest.creationRequestForAssetFromVideo(atFileURL: outputURL)
}) { success, error in
attempt? FileManager.default.removeItem(at: outputURL)
if success {
completion(nil)
} else {
completion(error)
}
}
}
}