我正在尝试制作一个简单的 Python 脚本来打印字幕。我有一个 API 并启用了相关的 YouTube API。脚本运行但它没有捕获成绩单,即使我可以看到视频有成绩单。
如有任何建议,我将不胜感激!
import os
import google.auth
import google.auth.transport.requests
import google.oauth2.credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import urllib.request
import re
# Prompt the user for the video ID
video_id = input(": Enter the YouTube video ID: ")
# Set up the YouTube API client
api_service_name = "youtube"
api_version = "v3"
api_key = ""
from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_file(
r"")
youtube = build(api_service_name, api_version, credentials=credentials)
try:
# Call the YouTube API to retrieve the video caption tracks
caption_request = youtube.captions().list(
part="snippet",
videoId=video_id
)
caption_response = caption_request.execute()
# Check if an asr caption track was found
asr_track_id = None
for item in caption_response["items"]:
if item["snippet"]["trackKind"] == "asr":
asr_track_id = item["id"]
break
if asr_track_id is None:
print("ASR caption track not available.")
else:
# Call the YouTube API to download the caption track in "srt" format
caption_url = "https://www.youtube.com/api/timedtext?v={}&lang=en&fmt=srv3&id={}".format(video_id, asr_track_id)
response = urllib.request.urlopen(caption_url)
caption_data = response.read().decode("utf-8")
# Parse the "srt" caption data to extract the transcript
pattern = r"\d+\n\d\d:\d\d:\d\d,\d\d\d --> \d\d:\d\d:\d\d,\d\d\d\n(.+?)(?:\n\n|\Z)"
matches = re.findall(pattern, caption_data)
if matches:
transcript = " ".join(matches).replace("\n", " ")
else:
transcript = ""
if not transcript:
print("Transcript not available.")
else:
print(transcript)
# Split the transcript into paragraphs of no more than three sentences each
sentences = transcript.split(". ")
paragraphs = []
current_paragraph = ""
for sentence in sentences:
if len(current_paragraph.split(". ")) < 3:
current_paragraph += sentence + ". "
else:
paragraphs.append(current_paragraph)
current_paragraph = sentence + ". "
if current_paragraph:
paragraphs.append(current_paragraph)
# Print the formatted transcript
for paragraph in paragraphs:
print(paragraph + "\n")
except HttpError as e:
print("An HTTP error occurred: %s" % e)
except Exception as e:
print("An error occurred: %s" % e)
当视频有文字记录时,它运行但不显示任何文字记录。