Python中使用YouTube API获取视频与频道数据的完整教程-CSDN博客

本文链接：https://blog.csdn.net/gitblog_00292/article/details/148758380

Python中使用YouTube API获取视频与频道数据的完整教程

前言

YouTube作为全球最大的视频分享平台，其API为开发者提供了丰富的功能来获取和处理视频数据。本教程将详细介绍如何使用Python通过YouTube Data API v3来获取视频详情、频道信息以及评论数据。

准备工作

安装必要库

首先需要安装Google API客户端库：

pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib

获取API凭证

在Google开发者控制台创建项目
启用YouTube Data API v3
创建OAuth 2.0客户端ID凭证
下载credentials.json文件到项目目录

API认证流程

认证函数实现

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pickle
import os

SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]

def youtube_authenticate():
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
    api_service_name = "youtube"
    api_version = "v3"
    client_secrets_file = "credentials.json"
    creds = None
    
    # 检查是否存在已保存的凭证
    if os.path.exists("token.pickle"):
        with open("token.pickle", "rb") as token:
            creds = pickle.load(token)
    
    # 如果凭证无效或不存在，重新获取
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
            creds = flow.run_local_server(port=0)
        
        # 保存凭证供下次使用
        with open("token.pickle", "wb") as token:
            pickle.dump(creds, token)

    return build(api_service_name, api_version, credentials=creds)

# 认证并创建YouTube服务对象
youtube = youtube_authenticate()

视频数据获取

从URL提取视频ID

import urllib.parse as p

def get_video_id_by_url(url):
    """从视频URL中提取视频ID"""
    parsed_url = p.urlparse(url)
    video_id = p.parse_qs(parsed_url.query).get("v")
    if video_id:
        return video_id[0]
    else:
        raise Exception(f"无法解析视频URL: {url}")

获取视频详情

def get_video_details(youtube, **kwargs):
    return youtube.videos().list(
        part="snippet,contentDetails,statistics",
        **kwargs
    ).execute()

格式化并打印视频信息

import re

def print_video_infos(video_response):
    items = video_response.get("items")[0]
    snippet = items["snippet"]
    statistics = items["statistics"]
    content_details = items["contentDetails"]
    
    # 从snippet获取信息
    channel_title = snippet["channelTitle"]
    title = snippet["title"]
    description = snippet["description"]
    publish_time = snippet["publishedAt"]
    
    # 获取统计数据
    comment_count = statistics["commentCount"]
    like_count = statistics["likeCount"]
    view_count = statistics["viewCount"]
    
    # 处理视频时长格式
    duration = content_details["duration"]
    parsed_duration = re.search(r"PT(\d+H)?(\d+M)?(\d+S)", duration).groups()
    duration_str = ""
    for d in parsed_duration:
        if d:
            duration_str += f"{d[:-1]}:"
    duration_str = duration_str.strip(":")
    
    print(f"""\
    Title: {title}
    Description: {description}
    Channel Title: {channel_title}
    Publish time: {publish_time}
    Duration: {duration_str}
    Number of comments: {comment_count}
    Number of likes: {like_count}
    Number of views: {view_count}
    """)

使用示例

video_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
video_id = get_video_id_by_url(video_url)
response = get_video_details(youtube, id=video_id)
print_video_infos(response)

搜索功能实现

搜索视频

def search(youtube, **kwargs):
    return youtube.search().list(
        part="snippet",
        **kwargs
    ).execute()

# 搜索"python"关键词并获取2个结果
response = search(youtube, q="python", maxResults=2)
items = response.get("items")
for item in items:
    video_id = item["id"]["videoId"]
    video_response = get_video_details(youtube, id=video_id)
    print_video_infos(video_response)
    print("="*50)

频道数据处理

解析频道URL

def parse_channel_url(url):
    """解析频道URL，返回类型和ID"""
    path = p.urlparse(url).path
    id = path.split("/")[-1]
    if "/c/" in path:
        return "c", id
    elif "/channel/" in path:
        return "channel", id
    elif "/user/" in path:
        return "user", id

获取频道ID

def get_channel_id_by_url(youtube, url):
    """通过URL获取频道ID"""
    method, id = parse_channel_url(url)
    if method == "channel":
        return id
    elif method == "user":
        response = get_channel_details(youtube, forUsername=id)
        items = response.get("items")
        if items:
            return items[0].get("id")
    elif method == "c":
        response = search(youtube, q=id, maxResults=1)
        items = response.get("items")
        if items:
            return items[0]["snippet"]["channelId"]
    raise Exception(f"无法找到ID:{id}使用方法:{method}")

获取频道详情

def get_channel_details(youtube, **kwargs):
    return youtube.channels().list(
        part="statistics,snippet,contentDetails",
        **kwargs
    ).execute()

def get_channel_videos(youtube, **kwargs):
    return youtube.search().list(**kwargs).execute()

获取频道信息和视频

channel_url = "https://www.youtube.com/channel/UC8butISFwT-Wl7EV0hUK0BQ"
channel_id = get_channel_id_by_url(youtube, channel_url)
response = get_channel_details(youtube, id=channel_id)

# 提取频道信息
snippet = response["items"][0]["snippet"]
statistics = response["items"][0]["statistics"]
print(f"""
Title: {snippet['title']}
Published At: {snippet['publishedAt']}
Description: {snippet['description']}
Country: {snippet.get('country', 'N/A')}
Number of videos: {statistics['videoCount']}
Number of subscribers: {statistics['subscriberCount']}
Total views: {statistics['viewCount']}
""")

# 获取频道视频
n_pages = 2
n_videos = 0
next_page_token = None
for i in range(n_pages):
    params = {
        'part': 'snippet',
        'channelId': channel_id,
        'type': 'video',
    }
    if next_page_token:
        params['pageToken'] = next_page_token
    res = get_channel_videos(youtube, **params)
    for video in res.get("items", []):
        n_videos += 1
        video_id = video["id"]["videoId"]
        video_response = get_video_details(youtube, id=video_id)
        print(f"================Video #{n_videos}================")
        print_video_infos(video_response)
        print("="*40)
    if "nextPageToken" in res:
        next_page_token = res["nextPageToken"]

评论数据获取

获取评论

def get_comments(youtube, **kwargs):
    return youtube.commentThreads().list(
        part="snippet",
        **kwargs
    ).execute()

获取视频或频道评论

url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
params = {
    'maxResults': 2,
    'order': 'relevance',
}

if "watch" in url:
    params['videoId'] = get_video_id_by_url(url)
else:
    params['allThreadsRelatedToChannelId'] = get_channel_id_by_url(url)

n_pages = 2
for i in range(n_pages):
    response = get_comments(youtube, **params)
    for item in response.get("items", []):
        comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
        updated_at = item["snippet"]["topLevelComment"]["snippet"]["updatedAt"]
        like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
        print(f"""
        Comment: {comment}
        Likes: {like_count}
        Updated At: {updated_at}
        ==================================
        """)
    if "nextPageToken" in response:
        params["pageToken"] = response["nextPageToken"]
    else:
        break