我正在尝试使用我的数据集微调 wev2vec2 模型。因此我加载了音频。现在想将它们下采样到 16khz。但 librosa.reshape 函数给出了我无法解决的错误。错误是“resample() 需要 1 个位置参数,但给出了 3 个”
首先我尝试使用 sr 16khz 的 librosa 加载它。但由于我在这方面的经验较少。因此,我在项目的后期部分遇到了问题。我找到了一个以不同方式完成的代码。所以。我尝试使用他的方法,但现在面临问题。 这部分工作正常
database={}
audios = []
psr = []
for path in df['audio']:
speech_array,sr = torchaudio.load(path)
audios.append(speech_array[0].numpy())
psr.append(sr)
database['audio'] = audios
database['psr'] = psr
每个索引都出现错误
import librosa
import numpy as np
# Assuming 'database' is your DataFrame containing 'audio' and 'psr' columns
# List to store new sampling rates
new_sr = []
# Resample each audio signal and store the new sampling rate
for i in range(len(database['psr'])):
try:
audio_signal = np.asarray(database['audio'][i]) # Convert audio to numpy array
original_sr = database['psr'][i] # Original sampling rate
# Check if the audio signal is mono (single-channel)
if audio_signal.ndim == 1:
# Resample mono audio signal
resampled_audio = librosa.resample(audio_signal, original_sr, 16000)
else:
# Resample each channel separately for multi-channel audio
resampled_channels = []
for channel in audio_signal:
resampled_channel = librosa.resample(channel, original_sr, 16000)
resampled_channels.append(resampled_channel)
resampled_audio = np.array(resampled_channels)
# Store resampled audio back in DataFrame
database['audio'][i] = resampled_audio
# Store new sampling rate (16000 Hz)
new_sr.append(16000)
except Exception as e:
print(f"Error processing audio at index {i}: {e}")
# Add new sampling rates to the DataFrame
database['newsr'] = new_sr
这是
reshape
[src]的定义:
@cache(level=20)
def resample(
y: np.ndarray,
*, # forces you to pass all the following arguments only as named ones
orig_sr: float,
target_sr: float,
res_type: str = "soxr_hq",
fix: bool = True,
scale: bool = False,
axis: int = -1,
**kwargs: Any,
) -> np.ndarray:
文档还提供了这样做的示例:
y, sr = librosa.load(librosa.ex('trumpet'), sr=22050)
y_8k = librosa.resample(y, orig_sr=sr, target_sr=8000)