我正在尝试从我的 youtube 频道获取以下信息(“视频标题”、“视频 URL”、“可见性”、“观看次数”、“喜欢”、“评论”、“发布日期”、“持续时间”、“ Original Channel”、“Views/Day”、“Likes/View”、“Comments/View”)为了避免被 google api 抛出,我在每个请求之间设置了 1 分钟的计时器(我有超过 4000 个视频)。 如果一段时间后仍然出现 api 错误,我可以在第二天重新启动脚本以完成浏览我所有的视频。你能帮我更正我的代码或有其他想法来获取我的数据吗?非常感谢您的帮助
import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import openpyxl
import time
from datetime import datetime
import os
# Define your API key
API_KEY = "xxxx"
# Initialize the YouTube API client
youtube = build("youtube", "v3", developerKey=API_KEY)
# Define the ID of the YouTube channel
channel_id = "xxxxxxxx"
# Initialize the Excel workbook
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.append(["Video Title", "Video URL", "Visibility", "Views", "Likes", "Comments", "Publish Date", "Duration", "Original Channel", "Views/Day", "Likes/View", "Comments/View"])
# Define the parameters for the API request
request_params = {
"part": "snippet,statistics,status",
"maxResults": 50,
"fields": "nextPageToken,items(id,snippet(title),statistics(viewCount,likeCount,commentCount),status(privacyStatus),snippet(publishedAt),contentDetails(duration),snippet(channelId))"
}
# Check if the Excel workbook already exists and read the last video ID and row number
if os.path.isfile("youtube_videos.xlsx"):
workbook = openpyxl.load_workbook("youtube_videos.xlsx")
worksheet = workbook.active
last_row = worksheet.max_row
last_video_id = worksheet.cell(row=last_row, column=2).value.split("=")[-1]
last_video_url = worksheet.cell(row=last_row, column=3).value
print(f"Resuming from video ID: {last_video_id}")
request_params["pageToken"] = None
request_params["maxResults"] = 50
request_params["fields"] = "nextPageToken,items(id,snippet(publishedAt,channelId,title),statistics(viewCount,likeCount,commentCount),status(privacyStatus),contentDetails(duration))"
request_params["id"] = worksheet.cell(row=last_row, column=2).value.split("=")[-1]
# Define the initial API request
request = youtube.videos().list(**request_params)
try:
# Loop through the API request until there are no more pages
while request is not None:
# Execute the API request and retrieve the response
response = request.execute()
# Loop through each video in the response and extract its information
for item in response["items"]:
video_title = item["snippet"]["title"]
video_url = item["id"]
visibility = item["status"]["privacyStatus"]
views = item["statistics"]["viewCount"] if "viewCount" in item["statistics"] else 0
likes = item["statistics"]["likeCount"] if "likeCount" in item["statistics"] else 0
comments = item["statistics"]["commentCount"] if "commentCount" in item["statistics"] else 0
publish_date = datetime.strptime(item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ").date()
duration = item["contentDetails"]["duration"]
original_channel_id = item["snippet"]["channelId"]
original_channel = youtube.channels().list(part="snippet", id=original_channel_id).execute()["items"][0]["snippet"]["title"]
# Calculate additional statistics
days_since_publish = (datetime.now().date() - publish_date).days
views_per_day = views / days_since_publish if days_since_publish > 0 else views
likes_per_view = likes / views if views > 0 else likes
comments
comments_per_view = comments / views if views > 0 else comments
# Write the video information to the Excel worksheet
worksheet.append([video_title, video_url, visibility, views, likes, comments, publish_date, duration, original_channel, views_per_day, likes_per_view, comments_per_view])
# Wait for 1 minutes before making the next API request
time.sleep(60)
# Check if there are more pages to retrieve
request = youtube.videos().list_next(request, response)
finally:
# Save the Excel workbook
workbook.save("youtube_videos.xlsx")