我尝试使用 pyav 创建音频帧,但出现了一个错误,几天内无法修复。
这是我的自定义音频类:
class CustomAudioTrack(MediaStreamTrack):
kind = "audio"
def __init__(self, rate=48000, channels=2):
super().__init__()
self.rate = rate
self.channels = channels
self._timestamp = 0
async def recv(self):
# Nombre de frames à lire pour 0.1 seconde de son
frames = int(self.rate / 10)
data = sd.rec(frames, samplerate=self.rate,
channels=self.channels, dtype=np.int16)
sd.wait() # Attendre que l'enregistrement soit terminé
# Mise à jour des timestamps
self._timestamp += frames
pts = self._timestamp
time_base = fractions.Fraction(1, self.rate)
# Création de la trame audio
audio_frame = av.AudioFrame.from_ndarray(
data, format='s16', layout='stereo')
audio_frame.sample_rate = self.rate
audio_frame.pts = pts
audio_frame.time_base = time_base
return audio_frame
我的错误是:
Traceback (most recent call last):
File "C:\Users\poppppp\AppData\Local\Programs\Python\Python311\Lib\site-packages\aiortc\rtcrtpsender.py", line 328, in _run_rtp
enc_frame = await self._next_encoded_frame(codec)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\poppppp\AppData\Local\Programs\Python\Python311\Lib\site-packages\aiortc\rtcrtpsender.py", line 270, in _next_encoded_frame
data = await self.__track.recv()
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\documents\project\pythonProject\webrtc_tutorial\sadda_python\offerer.py", line 44, in recv
audio_frame = av.AudioFrame.from_ndarray(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "av\audio\frame.pyx", line 125, in av.audio.frame.AudioFrame.from_ndarray
File "av\utils.pyx", line 79, in av.utils.check_ndarray_shape
ValueError: Unexpected numpy array shape (4800, 2)
你知道我该如何解决这个问题吗?
我找到了解决办法, 问题是,当我们将 numpy 数组提供给 from_ndarray 函数时,它的格式应该如下所示:[ [ 1] [ 1] [0] .... [ 1] [ 0] ] 所以现在我使用 pyaudio 来听麦克风,但 numpy 数组的形状不正确,所以我使用 data.reshape(-1, 1) 来完美地重塑它 这是最终的自定义类:
class CustomAudioTrack(MediaStreamTrack):
kind = "audio"
def __init__(self, rate=48000, channels=2):
super().__init__()
self.rate = rate
self.channels = channels
self._timestamp = 0
# Initialiser PyAudio
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(format=pyaudio.paInt16,
channels=2,
rate=48000,
input=True,
frames_per_buffer=960)
async def recv(self):
frames_per_buffer = 960
# Lire les données du stream PyAudio
data = np.frombuffer(self.stream.read(
frames_per_buffer), dtype=np.int16)
data = data.reshape(-1, 1)
self._timestamp += frames_per_buffer
pts = self._timestamp
time_base = Fraction(1, self.rate)
# Préparation des données pour PyAV
audio_frame = av.AudioFrame.from_ndarray(
data.T, format='s16', layout='stereo')
audio_frame.sample_rate = self.rate
audio_frame.pts = pts
audio_frame.time_base = time_base
return audio_frame
def __del__(self):
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()