从我的 YouTube 频道 Python/API 中提取数据

问题描述 投票:0回答:0

我正在尝试从我的 youtube 频道获取以下信息(“视频标题”、“视频 URL”、“可见性”、“观看次数”、“喜欢”、“评论”、“发布日期”、“持续时间”、“ Original Channel”、“Views/Day”、“Likes/View”、“Comments/View”)为了避免被 google api 抛出,我在每个请求之间设置了 1 分钟的计时器(我有超过 4000 个视频)。 如果一段时间后仍然出现 api 错误,我可以在第二天重新启动脚本以完成浏览我所有的视频。你能帮我更正我的代码或有其他想法来获取我的数据吗?非常感谢您的帮助

import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import openpyxl
import time
from datetime import datetime
import os

# Define your API key
API_KEY = "xxxx"

# Initialize the YouTube API client
youtube = build("youtube", "v3", developerKey=API_KEY)

# Define the ID of the YouTube channel
channel_id = "xxxxxxxx"

# Initialize the Excel workbook
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.append(["Video Title", "Video URL", "Visibility", "Views", "Likes", "Comments", "Publish Date", "Duration", "Original Channel", "Views/Day", "Likes/View", "Comments/View"])

# Define the parameters for the API request
request_params = {
   "part": "snippet,statistics,status",
   "maxResults": 50,
   "fields": "nextPageToken,items(id,snippet(title),statistics(viewCount,likeCount,commentCount),status(privacyStatus),snippet(publishedAt),contentDetails(duration),snippet(channelId))"
}

# Check if the Excel workbook already exists and read the last video ID and row number
if os.path.isfile("youtube_videos.xlsx"):
   workbook = openpyxl.load_workbook("youtube_videos.xlsx")
   worksheet = workbook.active
   last_row = worksheet.max_row
   last_video_id = worksheet.cell(row=last_row, column=2).value.split("=")[-1]
   last_video_url = worksheet.cell(row=last_row, column=3).value
   print(f"Resuming from video ID: {last_video_id}")
   request_params["pageToken"] = None
   request_params["maxResults"] = 50
   request_params["fields"] = "nextPageToken,items(id,snippet(publishedAt,channelId,title),statistics(viewCount,likeCount,commentCount),status(privacyStatus),contentDetails(duration))"
   request_params["id"] = worksheet.cell(row=last_row, column=2).value.split("=")[-1]


# Define the initial API request
request = youtube.videos().list(**request_params)

try:
    # Loop through the API request until there are no more pages
    while request is not None:
        # Execute the API request and retrieve the response
        response = request.execute()

        # Loop through each video in the response and extract its information
        for item in response["items"]:
            video_title = item["snippet"]["title"]
            video_url = item["id"]
            visibility = item["status"]["privacyStatus"]
            views = item["statistics"]["viewCount"] if "viewCount" in item["statistics"] else 0
            likes = item["statistics"]["likeCount"] if "likeCount" in item["statistics"] else 0
            comments = item["statistics"]["commentCount"] if "commentCount" in item["statistics"] else 0
            publish_date = datetime.strptime(item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ").date()
            duration = item["contentDetails"]["duration"]
            original_channel_id = item["snippet"]["channelId"]
            original_channel = youtube.channels().list(part="snippet", id=original_channel_id).execute()["items"][0]["snippet"]["title"]
            # Calculate additional statistics
            days_since_publish = (datetime.now().date() - publish_date).days
            views_per_day = views / days_since_publish if days_since_publish > 0 else views
            likes_per_view = likes / views if views > 0 else likes
            comments

            comments_per_view = comments / views if views > 0 else comments

            # Write the video information to the Excel worksheet
            worksheet.append([video_title, video_url, visibility, views, likes, comments, publish_date, duration, original_channel, views_per_day, likes_per_view, comments_per_view])

        # Wait for 1 minutes before making the next API request
        time.sleep(60)

        # Check if there are more pages to retrieve
        request = youtube.videos().list_next(request, response)

finally:
    # Save the Excel workbook
    workbook.save("youtube_videos.xlsx")
python google-cloud-platform youtube-data-api
© www.soinside.com 2019 - 2024. All rights reserved.