在 Python 中删除 Click 和 Pop 声音

问题描述 投票:0回答:1

我目前正在使用 Python 制作一个 DSP 项目,该项目使用 tkinter 制作的旋钮 UI 进行 10 个频段的均衡。这是我的音频处理逻辑,使用 RFT 和 IRFFT,并使用声音设备和声音文件进行流式传输。当我调节任何旋钮时,会定期出现令人不安但不是很大的咔哒声或爆裂声。因此,我尽可能地增加缓冲区来减少这些声音,但它不会消失。有什么想法可以完美减少这些烦人的声音吗?

<Audio Processing Logic>

import sounddevice as sd
import soundfile as sf
import numpy as np
import sys

class AudioPlayer_sounddevice:
def __init__(self):
    self.current_frame = 0
    self.playback_active = False
    self.playback_finished = False
    self.stream = None
    self.fs = None
    self.volume = 1.0
    self.file = None
    self.bands = [(40, 80), (80, 160), (200, 300), (400, 600), (800, 1200),
                  (1600, 2400), (3000, 5000), (7000, 9000), (11000, 14000), (15000, 17000)]
    self.gain = [0] * len(self.bands)
    self.overlap_buffer = None
    self.frame_size = 8192*4  # I increased overlap frame size
    self.overlap_size = self.frame_size // 2  # as a half size of frame size
    
def adjust_volume(self, volume):
    self.volume = max(0.0, min(1.0, volume))

def adjust_gain(self, former, sliders):
    self.gain = sliders


def load(self, filepath):
    self.filepath = filepath
    self.file = sf.SoundFile(self.filepath, mode='r')
    self.fs = self.file.samplerate
    self.current_frame = 0
    self.playback_active = False
    self.playback_finished = False

def play(self):
    self.playback_finished = False
    if not self.playback_active:
        self.playback_active = True
        if self.playback_finished or self.stream is None:
            self.current_frame = 0
            self.playback_finished = False
            self._start_stream()
        else:
            self._start_stream()
    elif self.playback_finished:
        self.current_frame = 0
        self.playback_finished = False
        self._start_stream()

def _start_stream(self):
    if self.stream is not None:
        self.stream.stop()
        self.stream.close()
    self.stream = sd.OutputStream(callback=self.audio_callback, samplerate=self.fs, channels=self.file.channels, blocksize=8192*4) # I increased buffer size
    self.stream.start()
        
# This function is important, which has the main Audio Processing Logic
def audio_callback(self, outdata, frames, time, status):
    if status:
        print(status, file=sys.stderr)
    if not self.playback_active:
        outdata.fill(0)
        return

    data = self.file.read(frames, dtype='float32', always_2d=True)
    read_frames = data.shape[0]

    # Overlap Manipulation
    if self.overlap_buffer is not None:
        data[:self.overlap_size] += self.overlap_buffer

    # FFT Transformation
    freq_data = np.fft.rfft(data, axis=0)
    freq_bins = np.fft.rfftfreq(frames, d=1/self.fs)
    
    # Band Gain Filtering
    for i, band in enumerate(self.bands):
        start_idx = np.searchsorted(freq_bins, band[0])
        end_idx = np.searchsorted(freq_bins, band[1])
        gain_linear = 10 ** (self.gain[i] / 20)
        freq_data[start_idx:end_idx] *= np.float32(gain_linear)
        
    # IFFT Transformation
    processed_data = np.fft.irfft(freq_data, n=frames, axis=0)
    # processed_data = self.butter_lowpass_filter(processed_data, cutoff=30000, fs=self.fs)
    
    # processed_data's real length calculation
    processed_length = processed_data.shape[0] - 2 * self.overlap_size

    # Real data length + Padding
    required_length = frames - processed_length

    # Flattening outdata's size
    if required_length > 0:
        padding = np.zeros((required_length, data.shape[1]), dtype=processed_data.dtype)
        outdata[:] = np.concatenate((processed_data[self.overlap_size:-self.overlap_size], padding))
    else:
        # if padding isn't needed
        outdata[:] = processed_data[self.overlap_size:self.overlap_size+frames]
    
    if read_frames < frames:
        outdata[:read_frames] = processed_data[:read_frames] * np.float32(self.volume * 0.7)
        outdata[read_frames:] = 0
        self.playback_finished = True
        self.playback_active = False
        raise sd.CallbackStop
    else:
        outdata[:read_frames] = processed_data * np.float32(self.volume * 0.7)

    self.current_frame += read_frames

def stop(self):
    if self.stream is not None:
        self.stream.stop()
        self.stream.close()
    self.stream = None
    self.playback_active = False
    self.playback_finished = True
    self.current_frame = 0

def pause(self):
    if self.playback_active:
        self.playback_active = False
        if self.stream is not None:
            self.stream.stop()

def unpause(self):
    if not self.playback_active and not self.playback_finished:
        self.play()
python numpy signal-processing python-sounddevice soundfile
1个回答
0
投票

正如评论中所述,咔嗒声很可能来自快速变化的音量。在一段时间内平稳地改变音量是件好事。

由于您使用的是 FFT 滤波方法,在傅立叶域中调整音量,因此您无法在那里应用淡入/淡出(很容易,没有卷积,我认为您想避免)。

时域淡入

您可以首先检查过滤器参数是否已更改。如果没有,请使用您当前的方法。如果是,请再执行 3 个步骤:

  1. 使用旧的过滤器参数计算新的过滤样本缓冲区 ->
    s_old
  2. 使用新的过滤器参数计算新的过滤样本缓冲区 ->
    s_new
  3. 通过在
    s_new
    上应用淡入,在
    s_old
  4. 上应用淡入,对两个信号进行加权平均
© www.soinside.com 2019 - 2024. All rights reserved.