我想增加 wav 文件中特定频率的音量,使它们比其余频率更响亮(更容易听到)。
我完全没有音频处理经验,在开发代码时学习基础知识......
到目前为止我所做的(或者至少我相信)是找到我想要调高的特定频率,我陷入了我必须增加其增益的部分,它在我的音频中添加了烦人的嗡嗡声音频。
这是代码:
import array
from pydub import AudioSegment
from pydub.generators import Sine
import numpy as np
from scipy.fft import fft
import sezone.fmanagement as file_management
import math
def find_frequencies_in_segment(segment: AudioSegment) -> (array, tuple):
raw_data = np.array(segment.get_array_of_samples())
fft_result = fft(raw_data)
freqs = np.fft.fftfreq(len(fft_result), d=1/segment.frame_rate)
return (freqs, fft_result)
def create_audio_sample() -> AudioSegment:
# in Hz
FREQUENCY_LOW = 60
FREQUENCY_MEDIUM = 100
tone_low: AudioSegment = Sine(FREQUENCY_LOW).to_audio_segment(duration=500)
tone_medium: AudioSegment = Sine(FREQUENCY_MEDIUM).to_audio_segment(duration=125)
audio: AudioSegment = tone_low + tone_medium
audio = audio - 20 #making it more quiet, so it'd be easier for me notice any difference in the volume after processing.
return audio
def calc_scale_factor(original_sample: AudioSegment, fft_result: array) -> float:
max_original_amp = np.array(original_sample.get_array_of_samples()).max()
max_fft_amp = np.array(fft_result).max()
return max_original_amp/max_fft_amp
def find_indexes_with_bandwidth(samples: array, amplitude: int, bandwith: int) -> np.ndarray:
indexes = []
for i in range(len(samples)):
min_value = samples[i] - bandwith
max_value = samples[i] + bandwith
if amplitude >= min_value and amplitude <= max_value:
indexes.append(i)
return indexes
def original_sample_index_for_frequency(audio: AudioSegment, frequency: int) -> array:
# Apply FFT to get the frequencies
(freqs, fft_result) = find_frequencies_in_segment(audio)
# spectral magnitude
amps = np.abs(fft_result)
# indexes containing my desired frequency
filtered_indexes = [i for i in range(len(freqs)) if math.floor(freqs[i]) == frequency]
# the amplitude of the desired frequency. From the spectral magnitude returned by the FFT
freq_amp = amps[filtered_indexes[0]]
# gets the corresponding amplitude in my original audio sample array
original_amp = freq_amp * calc_scale_factor(audio, amps)
original_samples = audio.get_array_of_samples()
indexes = find_indexes_with_bandwidth(original_samples, original_amp, 2)
# the indexes in my original array sample corresponding to the frequency i want to pitch up.
return indexes
def apply_gain_pydub(audio: AudioSegment, indexes: array) -> AudioSegment:
original_order = audio.get_sample_slice(0, 0)
processed_index = -1
for i in indexes:
# the part of the audio I dont want to change
original_order += audio.get_sample_slice(processed_index + 1, i - 1)# todo verify index 0
# the part of the audio I want to change
audio_to_change = audio.get_sample_slice(i, i+1)
changed_audio = audio_to_change.apply_gain(20)
# putting both audios together, in sequence
original_order += changed_audio
processed_index = i
# do the same as the loop above, but for the remainig part of the audio.
original_order += audio.get_sample_slice(processed_index + 1, len(audio.get_array_of_samples()) - 1)
original_order.export("../output/new_audio_pydub.wav", format="wav")
return original_order
def apply_gain_manually(audio: AudioSegment, indexes: array) -> AudioSegment:
original_samples = audio.get_array_of_samples()
# loop for the indexes applying gain (pitching the volume up)
for i in indexes:
gain = 10**(20 / 20.0)
original_samples[i] = int(original_samples[i] * gain)
reconstructed_audio = AudioSegment(original_samples.tobytes(),
frame_rate=audio.frame_rate,
sample_width=2,
channels=1)
reconstructed_audio.export("../output/new_audio_manually.wav", format="wav")
return reconstructed_audio
def main():
audio = create_audio_sample()
audio.export("../output/audio_sample.wav", format="wav")
# it gives me the indexes of the frequencies i want to pitch up.
# in this case, 60Hz
indexes = original_sample_index_for_frequency(audio, 60)
# do the same thing, but in differente ways to see if there is any difference
audio_pydub = apply_gain_pydub(audio, indexes)
audio_manually = apply_gain_manually(audio, indexes)
# making sure none audio has been clipped.
print("maximum possible value: ", (2 ** 15) - 1)# 16 bits
print("audio max: ", np.array(audio.get_array_of_samples()).max())
print("audio_pydub max: ", np.array(audio_pydub.get_array_of_samples()).max())
print("audio_manually max: ", np.array(audio_manually.get_array_of_samples()).max())
# Both audios have a buzzing sound after the processing.
if __name__ == '__main__':
print("\n")
main()
print("\n")
我想了解:
由于我仍在学习音频处理的基础知识,任何背景或上下文信息都会有所帮助。
提前致谢
您可能需要修改
apply_gain_pydub
和 apply_gain_manually
函数,尤其是增益应用于已识别索引的部分。我建议您可以应用基于频率分量幅度的增益,而不是应用固定增益。您可以这样做:
from pydub.utils import ratio_to_db, db_to_float
gain_adjustment = 20 # Adjust the gain according to your requirements
def apply_gain_pydub(audio: AudioSegment, indexes: array) -> AudioSegment:
original_order = audio.get_sample_slice(0, 0)
processed_index = -1
for i in indexes:
original_order += audio.get_sample_slice(processed_index + 1, i - 1)
audio_to_change = audio.get_sample_slice(i, i + 1)
# Calculate gain based on the amplitude of the frequency component
amplitude_db = ratio_to_db(db_to_float(audio_to_change.max_possible_amplitude))
gain = amplitude_db + gain_adjustment # Adjust the gain according to your requirements
changed_audio = audio_to_change.apply_gain(gain)
original_order += changed_audio
processed_index = i
original_order += audio.get_sample_slice(processed_index + 1, len(audio.get_array_of_samples()) - 1)
original_order.export("../output/new_audio_pydub.wav", format="wav")
return original_order
def apply_gain_manually(audio: AudioSegment, indexes: array) -> AudioSegment:
original_samples = audio.get_array_of_samples()
for i in indexes:
audio_to_change = audio.get_sample_slice(i, i + 1)
amplitude = max(audio_to_change.get_array_of_samples())
# Calculate gain based on the amplitude of the frequency component
amplitude_db = ratio_to_db(db_to_float(amplitude))
gain = amplitude_db + gain_adjustment # Adjust the gain according to your requirements
gain_multiplier = 10 ** (gain / gain_adjustment)
original_samples[i] = int(original_samples[i] * gain_multiplier)
reconstructed_audio = AudioSegment(original_samples.tobytes(), frame_rate=audio.frame_rate, sample_width=2, channels=1)
reconstructed_audio.export("../output/new_audio_manually.wav", format="wav")
return reconstructed_audio
附注我按照我自己的代码之一修改了您的这部分代码,看看这是否适合您。