将 mp3 转录为文本(python)-->“RIFF id”错误

问题描述 投票:0回答:3

我正在尝试将 mp3 文件转换为文本,但我的代码返回了下面列出的错误。任何帮助表示赞赏!

这是一个示例 mp3 文件。以下是我尝试过的:

import speech_recognition as sr print(sr.__version__) r = sr.Recognizer() file_audio = sr.AudioFile(r"C:\Users\Andrew\Podcast.mp3") with file_audio as source: audio_text = r.record(source) print(type(audio_text)) print(r.recognize_google(audio_text))
我得到的完整

error。似乎是:

Error: file does not start with RIFF id
感谢您的帮助!

python python-3.x speech-recognition speech-to-text transcription
3个回答
3
投票
你需要先把mp3转成wav,然后你就可以转录了,下面是你的代码修改后的版本。

import speech_recognition as sr from pydub import AudioSegment # convert mp3 file to wav src=(r"C:\Users\Andrew\Podcast.mp3") sound = AudioSegment.from_mp3(src) sound.export("C:\Users\Andrew\podcast.wav", format="wav") file_audio = sr.AudioFile(r"C:\Users\Andrew\Podcast.wav") # use the audio file as the audio source r = sr.Recognizer() with file_audio as source: audio_text = r.record(source) print(type(audio_text)) print(r.recognize_google(audio_text))
在上面的修改代码中,首先将mp3文件转换为wav,然后进行转录过程。


1
投票
您可以做的一件事是将 mp3 转换为 wav。使用 mp3 文件进行测试时,我遇到了与您相同的错误。但转换后,您的代码运行良好。也可以编写您的代码,以便您可以使用 mp3,但我的知识到此为止。

也许其他人比我知道的更多,而不是他发布的。但如果你只是想测试,你现在可以使用 audacity 之类的东西来转换它。

如果您使用大文件,您也可能会遇到问题,请在线阅读有关此的内容。但是没有什么能阻止你尝试。

这是相关网站:

https://www.geeksforgeeks.org/python-speech-recognition-on-large-audio-files/


0
投票
import speech_recognition as sr import os from pydub import AudioSegment from pydub.silence import split_on_silence # Create a speech recognition object recognizer = sr.Recognizer() def transcribe_large_audio_file(audio_path): """Split audio into chunks and apply speech recognition""" # Load audio file with pydub audio = AudioSegment.from_mp3(audio_path) # Split audio at silent parts with duration of 700ms or more and obtain chunks audio_chunks = split_on_silence(audio, min_silence_len=700, silence_thresh=audio.dBFS-14, keep_silence=700) # Create a directory to store audio chunks chunks_dir = "audio-chunks" if not os.path.isdir(chunks_dir): os.mkdir(chunks_dir) full_text = "" failed_attempts = 0 # Process each audio chunk for i, chunk in enumerate(audio_chunks, start=1): # Save chunk in the directory chunk_file_name = os.path.join(chunks_dir, f"chunk{i}.wav") chunk.export(chunk_file_name, format="wav") # Recognize audio from the chunk with sr.WavFile(chunk_file_name) as src: listened_audio = recognizer.listen(src) # Convert audio to text try: text = recognizer.recognize(listened_audio) except: failed_attempts += 1 if failed_attempts == 5: print(f"Skipping {audio_path} due to too many errors") break else: failed_attempts = 0 text = f"{text.capitalize()}. " print(chunk_file_name, ":", text) full_text += text # Return the transcription for all chunks return full_text output_dir = "C:\\Store\\output" os.makedirs(output_dir, exist_ok=True) processed_files = [] # Iterate through all .mp3 files in the directory with open(os.path.join(output_dir, 'result.txt'), 'w') as result_file: for file in os.listdir(output_dir): if file.endswith(".mp3") and file not in processed_files: mp3_file_path = os.path.join(output_dir, file) print(f"Processing {mp3_file_path}") try: transcription = transcribe_large_audio_file(mp3_file_path) except LookupError as error: print(f"Skipping {mp3_file_path} due to error: {error}") continue else: print(transcription) # Save the transcription to a file with the same name as the audio file txt_file_path = os.path.join(output_dir, f"{os.path.splitext(file)[0]}.txt") with open(txt_file_path, 'w') as txt_file: txt_file.write(transcription) print(f"Transcription saved to {txt_file_path}") print(transcription, file=result_file) processed_files.append(file)
    
© www.soinside.com 2019 - 2024. All rights reserved.