有没有办法使用 Swift 和 AudioKit 创建音频文件的频谱图?

问题描述 投票:0回答:3

我正在尝试使用 Swift 为 macOS 应用程序从音频文件创建一个频谱图,如图所示。我正在使用 AppKit,但也可以实现 SwiftUI。我发现了音频套件,它似乎是用于此类事物的完美库,但我无法在音频套件存储库、音频套件 UI 或食谱中找到我正在寻找的任何示例。音频套件可以做到这一点吗?如果是这样,有人可以帮我吗? 非常感谢!

我之前尝试过使用apple的示例项目并更改了AudioSpectrogram + AVCaptureAudioDataOutputSampleBufferDelegate文件中的代码。原代码如下:

extension AudioSpectrogram: AVCaptureAudioDataOutputSampleBufferDelegate {

public func captureOutput(_ output: AVCaptureOutput,
                          didOutput sampleBuffer: CMSampleBuffer,
                          from connection: AVCaptureConnection) {

    var audioBufferList = AudioBufferList()
    var blockBuffer: CMBlockBuffer?

    CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
        sampleBuffer,
        bufferListSizeNeededOut: nil,
        bufferListOut: &audioBufferList,
        bufferListSize: MemoryLayout.stride(ofValue: audioBufferList),
        blockBufferAllocator: nil,
        blockBufferMemoryAllocator: nil,
        flags: kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment,
        blockBufferOut: &blockBuffer)
    
    guard let data = audioBufferList.mBuffers.mData else {
        return
    }

    /// The _Nyquist frequency_ is the highest frequency that a sampled system can properly
    /// reproduce and is half the sampling rate of such a system. Although  this app doesn't use
    /// `nyquistFrequency` you may find this code useful to add an overlay to the user interface.
    if nyquistFrequency == nil {
        let duration = Float(CMSampleBufferGetDuration(sampleBuffer).value)
        let timescale = Float(CMSampleBufferGetDuration(sampleBuffer).timescale)
        let numsamples = Float(CMSampleBufferGetNumSamples(sampleBuffer))
        nyquistFrequency = 0.5 / (duration / timescale / numsamples)
    }

    if self.rawAudioData.count < AudioSpectrogram.sampleCount * 2 {
        let actualSampleCount = CMSampleBufferGetNumSamples(sampleBuffer)
        
        let ptr = data.bindMemory(to: Int16.self, capacity: actualSampleCount)
        let buf = UnsafeBufferPointer(start: ptr, count: actualSampleCount)
        
        rawAudioData.append(contentsOf: Array(buf))
    }

    while self.rawAudioData.count >= AudioSpectrogram.sampleCount {
        let dataToProcess = Array(self.rawAudioData[0 ..< AudioSpectrogram.sampleCount])
        self.rawAudioData.removeFirst(AudioSpectrogram.hopCount)
        self.processData(values: dataToProcess)
    }
 
    createAudioSpectrogram()
}

func configureCaptureSession() {
    // Also note that:
    //
    // When running in iOS, you must add a "Privacy - Microphone Usage
    // Description" entry.
    //
    // When running in macOS, you must add a "Privacy - Microphone Usage
    // Description" entry to `Info.plist`, and check "audio input" and
    // "camera access" under the "Resource Access" category of "Hardened
    // Runtime".
    switch AVCaptureDevice.authorizationStatus(for: .audio) {
        case .authorized:
                break
        case .notDetermined:
            sessionQueue.suspend()
            AVCaptureDevice.requestAccess(for: .audio,
                                          completionHandler: { granted in
                if !granted {
                    fatalError("App requires microphone access.")
                } else {
                    self.configureCaptureSession()
                    self.sessionQueue.resume()
                }
            })
            return
        default:
            // Users can add authorization in "Settings > Privacy > Microphone"
            // on an iOS device, or "System Preferences > Security & Privacy >
            // Microphone" on a macOS device.
            fatalError("App requires microphone access.")
    }
    
    captureSession.beginConfiguration()
    
    #if os(macOS)
    // Note than in macOS, you can change the sample rate, for example to
    // `AVSampleRateKey: 22050`. This reduces the Nyquist frequency and
    // increases the resolution at lower frequencies.
    audioOutput.audioSettings = [
        AVFormatIDKey: kAudioFormatLinearPCM,
        AVLinearPCMIsFloatKey: false,
        AVLinearPCMBitDepthKey: 16,
        AVNumberOfChannelsKey: 1]
    #endif
    
    if captureSession.canAddOutput(audioOutput) {
        captureSession.addOutput(audioOutput)
    } else {
        fatalError("Can't add `audioOutput`.")
    }
    
    guard
        let microphone = AVCaptureDevice.default(.builtInMicrophone,
                                                 for: .audio,
                                                 position: .unspecified),
        let microphoneInput = try? AVCaptureDeviceInput(device: microphone) else {
            fatalError("Can't create microphone.")
    }
    
    if captureSession.canAddInput(microphoneInput) {
        captureSession.addInput(microphoneInput)
    }
    
    captureSession.commitConfiguration()
}

/// Starts the audio spectrogram.
func startRunning() {
    sessionQueue.async {
        if AVCaptureDevice.authorizationStatus(for: .audio) == .authorized {
            self.captureSession.startRunning()
        }
    }
}

}

我去掉了configureCaptureSession函数并替换了其余的代码以获得以下代码:

public func captureBuffer() {
    
    var samplesArray:[Int16] = []
    
    let asset = AVAsset(url: audioFileUrl)

    
    let reader = try! AVAssetReader(asset: asset)

    let track = asset.tracks(withMediaType: AVMediaType.audio)[0]

        let settings = [
            AVFormatIDKey : kAudioFormatLinearPCM
        ]

        let readerOutput = AVAssetReaderTrackOutput(track: track, outputSettings: settings)
        reader.add(readerOutput)
        reader.startReading()


    while let buffer = readerOutput.copyNextSampleBuffer() {

       var audioBufferList = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: 0, mData: nil))
       var blockBuffer: CMBlockBuffer?

       CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(
        buffer,
        bufferListSizeNeededOut: nil,
        bufferListOut: &audioBufferList,
        bufferListSize:  MemoryLayout<AudioBufferList>.size,
        blockBufferAllocator: nil,
        blockBufferMemoryAllocator: nil,
        flags: kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment,
        blockBufferOut: &blockBuffer
       );
    
        let buffers = UnsafeBufferPointer<AudioBuffer>(start: &audioBufferList.mBuffers, count: Int(audioBufferList.mNumberBuffers))
        
        for buffer in buffers {

            let samplesCount = Int(buffer.mDataByteSize) / MemoryLayout<Int16>.size
            let samplesPointer = audioBufferList.mBuffers.mData!.bindMemory(to: Int16.self, capacity: samplesCount)
            let samples = UnsafeMutableBufferPointer<Int16>(start: samplesPointer, count: samplesCount)
            
            
            for sample in samples {

                //do something with you sample (which is Int16 amplitude value)
                samplesArray.append(sample)

            }
        }
        guard let data = audioBufferList.mBuffers.mData else {
            return
        }

        /// The _Nyquist frequency_ is the highest frequency that a sampled system can properly
        /// reproduce and is half the sampling rate of such a system. Although  this app doesn't use
        /// `nyquistFrequency` you may find this code useful to add an overlay to the user interface.
        if nyquistFrequency == nil {
            let duration = Float(CMSampleBufferGetDuration(buffer).value)
            let timescale = Float(CMSampleBufferGetDuration(buffer).timescale)
            let numsamples = Float(CMSampleBufferGetNumSamples(buffer))
            nyquistFrequency = 0.5 / (duration / timescale / numsamples)
        }

        if self.rawAudioData.count < AudioSpectrogram.sampleCount * 2 {
            let actualSampleCount = CMSampleBufferGetNumSamples(buffer)
            
            let ptr = data.bindMemory(to: Int16.self, capacity: actualSampleCount)
            let buf = UnsafeBufferPointer(start: ptr, count: actualSampleCount)
            
            rawAudioData.append(contentsOf: Array(buf))
        }

        while self.rawAudioData.count >= AudioSpectrogram.sampleCount {
            let dataToProcess = Array(self.rawAudioData[0 ..< AudioSpectrogram.sampleCount])
            self.rawAudioData.removeFirst(AudioSpectrogram.hopCount)
            self.processData(values: dataToProcess)
        }
     
        createAudioSpectrogram()
    }
}

在 AudioSpectrogram: CALayer 文件中,我将原来的第 10-30 行更改为

public class AudioSpectrogram: CALayer {

// MARK: Initialization

override init() {
    super.init()
    
    contentsGravity = .resize
    
    configureCaptureSession()
    audioOutput.setSampleBufferDelegate(self,
                                        queue: captureQueue)
}

required init?(coder: NSCoder) {
    fatalError("init(coder:) has not been implemented")
}

override public init(layer: Any) {
    super.init(layer: layer)
}

以下内容:

public class AudioSpectrogram: CALayer {

@objc var audioFileUrl: URL
// MARK: Initialization

override init() {
    self.audioFileUrl = selectedTrackUrl!
    super.init()
    
    
    contentsGravity = .resize
    captureBuffer()
    
}

required init?(coder: NSCoder) {
    fatalError("init(coder:) has not been implemented")
}

override public init(layer: Any) {
    self.audioFileUrl = selectedTrackUrl!
    super.init(layer: layer)
}

更改后的代码允许我指定从应用程序中的另一个区域调用频谱图时要使用的音频文件。

以下是我想要实现的目标的示例。它是使用 FFMPEG 完成的。 Example Spectrogram

这是我从代码中得到的输出: Output Image

swift appkit audiokit spectrogram
3个回答
1
投票

AudioKitUI Swift 包中实际上有一个频谱图:https://github.com/AudioKit/AudioKitUI/blob/main/Sources/AudioKitUI/Visualizations/SpectrogramView.swift

您需要向其传递一个 AudioKit 节点,但它应该可以与 Cookbook 中的其他可视化工具互换。


0
投票

AudioKit 不是您想要的工具。你需要 AVFoundation。 Apple 有 一个示例项目 正是您所描述的内容。

其核心工具是 DCT(离散余弦变换),用于将样本窗口转换为可以可视化的分量频率集合。 AVFoundation 是用于将音频文件或现场录音转换为音频样本缓冲区的工具,以便您可以应用 DCT。


0
投票

几周后,您在 AudioKitUI Swift 包中找到了频谱图的实现:https://github.com/AudioKit/AudioKitUI/blob/main/Sources/AudioKitUI/Visualizations/SpectrogramFlatView/SpectrogramFlatView.swift

它被称为PectrogramFlatView并显示滚动图。

您需要向它传递一个 AudioKit 节点来播放您的文件,但它应该可以与 Cookbook 中的其他可视化工具互换。

© www.soinside.com 2019 - 2024. All rights reserved.