有没有办法使用 Swift 和 AudioKit 创建音频文件的频谱图？

Question

我正在尝试使用 Swift 为 macOS 应用程序从音频文件创建一个频谱图，如图所示。我正在使用 AppKit，但也可以实现 SwiftUI。我发现了音频套件，它似乎是用于此类事物的完美库，但我无法在音频套件存储库、音频套件 UI 或食谱中找到我正在寻找的任何示例。音频套件可以做到这一点吗？如果是这样，有人可以帮我吗？非常感谢！

我之前尝试过使用apple的示例项目并更改了AudioSpectrogram + AVCaptureAudioDataOutputSampleBufferDelegate文件中的代码。原代码如下：

extension AudioSpectrogram: AVCaptureAudioDataOutputSampleBufferDelegate {

public func captureOutput(_ output: AVCaptureOutput,
                          didOutput sampleBuffer: CMSampleBuffer,
                          from connection: AVCaptureConnection) {

    var audioBufferList = AudioBufferList()
    var blockBuffer: CMBlockBuffer?

    CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
        sampleBuffer,
        bufferListSizeNeededOut: nil,
        bufferListOut: &audioBufferList,
        bufferListSize: MemoryLayout.stride(ofValue: audioBufferList),
        blockBufferAllocator: nil,
        blockBufferMemoryAllocator: nil,
        flags: kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment,
        blockBufferOut: &blockBuffer)
    
    guard let data = audioBufferList.mBuffers.mData else {
        return
    }

    /// The _Nyquist frequency_ is the highest frequency that a sampled system can properly
    /// reproduce and is half the sampling rate of such a system. Although  this app doesn't use
    /// `nyquistFrequency` you may find this code useful to add an overlay to the user interface.
    if nyquistFrequency == nil {
        let duration = Float(CMSampleBufferGetDuration(sampleBuffer).value)
        let timescale = Float(CMSampleBufferGetDuration(sampleBuffer).timescale)
        let numsamples = Float(CMSampleBufferGetNumSamples(sampleBuffer))
        nyquistFrequency = 0.5 / (duration / timescale / numsamples)
    }

    if self.rawAudioData.count < AudioSpectrogram.sampleCount * 2 {
        let actualSampleCount = CMSampleBufferGetNumSamples(sampleBuffer)
        
        let ptr = data.bindMemory(to: Int16.self, capacity: actualSampleCount)
        let buf = UnsafeBufferPointer(start: ptr, count: actualSampleCount)
        
        rawAudioData.append(contentsOf: Array(buf))
    }

    while self.rawAudioData.count >= AudioSpectrogram.sampleCount {
        let dataToProcess = Array(self.rawAudioData[0 ..< AudioSpectrogram.sampleCount])
        self.rawAudioData.removeFirst(AudioSpectrogram.hopCount)
        self.processData(values: dataToProcess)
    }
 
    createAudioSpectrogram()
}

func configureCaptureSession() {
    // Also note that:
    //
    // When running in iOS, you must add a "Privacy - Microphone Usage
    // Description" entry.
    //
    // When running in macOS, you must add a "Privacy - Microphone Usage
    // Description" entry to `Info.plist`, and check "audio input" and
    // "camera access" under the "Resource Access" category of "Hardened
    // Runtime".
    switch AVCaptureDevice.authorizationStatus(for: .audio) {
        case .authorized:
                break
        case .notDetermined:
            sessionQueue.suspend()
            AVCaptureDevice.requestAccess(for: .audio,
                                          completionHandler: { granted in
                if !granted {
                    fatalError("App requires microphone access.")
                } else {
                    self.configureCaptureSession()
                    self.sessionQueue.resume()
                }
            })
            return
        default:
            // Users can add authorization in "Settings > Privacy > Microphone"
            // on an iOS device, or "System Preferences > Security & Privacy >
            // Microphone" on a macOS device.
            fatalError("App requires microphone access.")
    }
    
    captureSession.beginConfiguration()
    
    #if os(macOS)
    // Note than in macOS, you can change the sample rate, for example to
    // `AVSampleRateKey: 22050`. This reduces the Nyquist frequency and
    // increases the resolution at lower frequencies.
    audioOutput.audioSettings = [
        AVFormatIDKey: kAudioFormatLinearPCM,
        AVLinearPCMIsFloatKey: false,
        AVLinearPCMBitDepthKey: 16,
        AVNumberOfChannelsKey: 1]
    #endif
    
    if captureSession.canAddOutput(audioOutput) {
        captureSession.addOutput(audioOutput)
    } else {
        fatalError("Can't add `audioOutput`.")
    }
    
    guard
        let microphone = AVCaptureDevice.default(.builtInMicrophone,
                                                 for: .audio,
                                                 position: .unspecified),
        let microphoneInput = try? AVCaptureDeviceInput(device: microphone) else {
            fatalError("Can't create microphone.")
    }
    
    if captureSession.canAddInput(microphoneInput) {
        captureSession.addInput(microphoneInput)
    }
    
    captureSession.commitConfiguration()
}

/// Starts the audio spectrogram.
func startRunning() {
    sessionQueue.async {
        if AVCaptureDevice.authorizationStatus(for: .audio) == .authorized {
            self.captureSession.startRunning()
        }
    }
}

}

我去掉了configureCaptureSession函数并替换了其余的代码以获得以下代码：

public func captureBuffer() {
    
    var samplesArray:[Int16] = []
    
    let asset = AVAsset(url: audioFileUrl)

    
    let reader = try! AVAssetReader(asset: asset)

    let track = asset.tracks(withMediaType: AVMediaType.audio)[0]

        let settings = [
            AVFormatIDKey : kAudioFormatLinearPCM
        ]

        let readerOutput = AVAssetReaderTrackOutput(track: track, outputSettings: settings)
        reader.add(readerOutput)
        reader.startReading()


    while let buffer = readerOutput.copyNextSampleBuffer() {

       var audioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: 0, mData: nil))
       var blockBuffer: CMBlockBuffer?

       CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
        buffer,
        bufferListSizeNeededOut: nil,
        bufferListOut: &audioBufferList,
        bufferListSize:  MemoryLayout<AudioBufferList>.size,
        blockBufferAllocator: nil,
        blockBufferMemoryAllocator: nil,
        flags: kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment,
        blockBufferOut: &blockBuffer
       );
    
        let buffers = UnsafeBufferPointer<AudioBuffer>(start: &audioBufferList.mBuffers, count: Int(audioBufferList.mNumberBuffers))
        
        for buffer in buffers {

            let samplesCount = Int(buffer.mDataByteSize) / MemoryLayout<Int16>.size
            let samplesPointer = audioBufferList.mBuffers.mData!.bindMemory(to: Int16.self, capacity: samplesCount)
            let samples = UnsafeMutableBufferPointer<Int16>(start: samplesPointer, count: samplesCount)
            
            
            for sample in samples {

                //do something with you sample (which is Int16 amplitude value)
                samplesArray.append(sample)

            }
        }
        guard let data = audioBufferList.mBuffers.mData else {
            return
        }

        /// The _Nyquist frequency_ is the highest frequency that a sampled system can properly
        /// reproduce and is half the sampling rate of such a system. Although  this app doesn't use
        /// `nyquistFrequency` you may find this code useful to add an overlay to the user interface.
        if nyquistFrequency == nil {
            let duration = Float(CMSampleBufferGetDuration(buffer).value)
            let timescale = Float(CMSampleBufferGetDuration(buffer).timescale)
            let numsamples = Float(CMSampleBufferGetNumSamples(buffer))
            nyquistFrequency = 0.5 / (duration / timescale / numsamples)
        }

        if self.rawAudioData.count < AudioSpectrogram.sampleCount * 2 {
            let actualSampleCount = CMSampleBufferGetNumSamples(buffer)
            
            let ptr = data.bindMemory(to: Int16.self, capacity: actualSampleCount)
            let buf = UnsafeBufferPointer(start: ptr, count: actualSampleCount)
            
            rawAudioData.append(contentsOf: Array(buf))
        }

        while self.rawAudioData.count >= AudioSpectrogram.sampleCount {
            let dataToProcess = Array(self.rawAudioData[0 ..< AudioSpectrogram.sampleCount])
            self.rawAudioData.removeFirst(AudioSpectrogram.hopCount)
            self.processData(values: dataToProcess)
        }
     
        createAudioSpectrogram()
    }
}

在 AudioSpectrogram: CALayer 文件中，我将原来的第 10-30 行更改为

public class AudioSpectrogram: CALayer {

// MARK: Initialization

override init() {
    super.init()
    
    contentsGravity = .resize
    
    configureCaptureSession()
    audioOutput.setSampleBufferDelegate(self,
                                        queue: captureQueue)
}

required init?(coder: NSCoder) {
    fatalError("init(coder:) has not been implemented")
}

override public init(layer: Any) {
    super.init(layer: layer)
}

以下内容：

public class AudioSpectrogram: CALayer {

@objc var audioFileUrl: URL
// MARK: Initialization

override init() {
    self.audioFileUrl = selectedTrackUrl!
    super.init()
    
    
    contentsGravity = .resize
    captureBuffer()
    
}

required init?(coder: NSCoder) {
    fatalError("init(coder:) has not been implemented")
}

override public init(layer: Any) {
    self.audioFileUrl = selectedTrackUrl!
    super.init(layer: layer)
}

更改后的代码允许我指定从应用程序中的另一个区域调用频谱图时要使用的音频文件。

以下是我想要实现的目标的示例。它是使用 FFMPEG 完成的。