Python中使用YouTube API获取视频与频道数据的完整教程
前言
YouTube作为全球最大的视频分享平台,其API为开发者提供了丰富的功能来获取和处理视频数据。本教程将详细介绍如何使用Python通过YouTube Data API v3来获取视频详情、频道信息以及评论数据。
准备工作
安装必要库
首先需要安装Google API客户端库:
pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
获取API凭证
- 在Google开发者控制台创建项目
- 启用YouTube Data API v3
- 创建OAuth 2.0客户端ID凭证
- 下载credentials.json文件到项目目录
API认证流程
认证函数实现
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pickle
import os
SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]
def youtube_authenticate():
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "credentials.json"
creds = None
# 检查是否存在已保存的凭证
if os.path.exists("token.pickle"):
with open("token.pickle", "rb") as token:
creds = pickle.load(token)
# 如果凭证无效或不存在,重新获取
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
creds = flow.run_local_server(port=0)
# 保存凭证供下次使用
with open("token.pickle", "wb") as token:
pickle.dump(creds, token)
return build(api_service_name, api_version, credentials=creds)
# 认证并创建YouTube服务对象
youtube = youtube_authenticate()
视频数据获取
从URL提取视频ID
import urllib.parse as p
def get_video_id_by_url(url):
"""从视频URL中提取视频ID"""
parsed_url = p.urlparse(url)
video_id = p.parse_qs(parsed_url.query).get("v")
if video_id:
return video_id[0]
else:
raise Exception(f"无法解析视频URL: {url}")
获取视频详情
def get_video_details(youtube, **kwargs):
return youtube.videos().list(
part="snippet,contentDetails,statistics",
**kwargs
).execute()
格式化并打印视频信息
import re
def print_video_infos(video_response):
items = video_response.get("items")[0]
snippet = items["snippet"]
statistics = items["statistics"]
content_details = items["contentDetails"]
# 从snippet获取信息
channel_title = snippet["channelTitle"]
title = snippet["title"]
description = snippet["description"]
publish_time = snippet["publishedAt"]
# 获取统计数据
comment_count = statistics["commentCount"]
like_count = statistics["likeCount"]
view_count = statistics["viewCount"]
# 处理视频时长格式
duration = content_details["duration"]
parsed_duration = re.search(r"PT(\d+H)?(\d+M)?(\d+S)", duration).groups()
duration_str = ""
for d in parsed_duration:
if d:
duration_str += f"{d[:-1]}:"
duration_str = duration_str.strip(":")
print(f"""\
Title: {title}
Description: {description}
Channel Title: {channel_title}
Publish time: {publish_time}
Duration: {duration_str}
Number of comments: {comment_count}
Number of likes: {like_count}
Number of views: {view_count}
""")
使用示例
video_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
video_id = get_video_id_by_url(video_url)
response = get_video_details(youtube, id=video_id)
print_video_infos(response)
搜索功能实现
搜索视频
def search(youtube, **kwargs):
return youtube.search().list(
part="snippet",
**kwargs
).execute()
# 搜索"python"关键词并获取2个结果
response = search(youtube, q="python", maxResults=2)
items = response.get("items")
for item in items:
video_id = item["id"]["videoId"]
video_response = get_video_details(youtube, id=video_id)
print_video_infos(video_response)
print("="*50)
频道数据处理
解析频道URL
def parse_channel_url(url):
"""解析频道URL,返回类型和ID"""
path = p.urlparse(url).path
id = path.split("/")[-1]
if "/c/" in path:
return "c", id
elif "/channel/" in path:
return "channel", id
elif "/user/" in path:
return "user", id
获取频道ID
def get_channel_id_by_url(youtube, url):
"""通过URL获取频道ID"""
method, id = parse_channel_url(url)
if method == "channel":
return id
elif method == "user":
response = get_channel_details(youtube, forUsername=id)
items = response.get("items")
if items:
return items[0].get("id")
elif method == "c":
response = search(youtube, q=id, maxResults=1)
items = response.get("items")
if items:
return items[0]["snippet"]["channelId"]
raise Exception(f"无法找到ID:{id}使用方法:{method}")
获取频道详情
def get_channel_details(youtube, **kwargs):
return youtube.channels().list(
part="statistics,snippet,contentDetails",
**kwargs
).execute()
def get_channel_videos(youtube, **kwargs):
return youtube.search().list(**kwargs).execute()
获取频道信息和视频
channel_url = "https://www.youtube.com/channel/UC8butISFwT-Wl7EV0hUK0BQ"
channel_id = get_channel_id_by_url(youtube, channel_url)
response = get_channel_details(youtube, id=channel_id)
# 提取频道信息
snippet = response["items"][0]["snippet"]
statistics = response["items"][0]["statistics"]
print(f"""
Title: {snippet['title']}
Published At: {snippet['publishedAt']}
Description: {snippet['description']}
Country: {snippet.get('country', 'N/A')}
Number of videos: {statistics['videoCount']}
Number of subscribers: {statistics['subscriberCount']}
Total views: {statistics['viewCount']}
""")
# 获取频道视频
n_pages = 2
n_videos = 0
next_page_token = None
for i in range(n_pages):
params = {
'part': 'snippet',
'channelId': channel_id,
'type': 'video',
}
if next_page_token:
params['pageToken'] = next_page_token
res = get_channel_videos(youtube, **params)
for video in res.get("items", []):
n_videos += 1
video_id = video["id"]["videoId"]
video_response = get_video_details(youtube, id=video_id)
print(f"================Video #{n_videos}================")
print_video_infos(video_response)
print("="*40)
if "nextPageToken" in res:
next_page_token = res["nextPageToken"]
评论数据获取
获取评论
def get_comments(youtube, **kwargs):
return youtube.commentThreads().list(
part="snippet",
**kwargs
).execute()
获取视频或频道评论
url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
params = {
'maxResults': 2,
'order': 'relevance',
}
if "watch" in url:
params['videoId'] = get_video_id_by_url(url)
else:
params['allThreadsRelatedToChannelId'] = get_channel_id_by_url(url)
n_pages = 2
for i in range(n_pages):
response = get_comments(youtube, **params)
for item in response.get("items", []):
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
updated_at = item["snippet"]["topLevelComment"]["snippet"]["updatedAt"]
like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
print(f"""
Comment: {comment}
Likes: {like_count}
Updated At: {updated_at}
==================================
""")
if "nextPageToken" in response:
params["pageToken"] = response["nextPageToken"]
else:
break
总结
本教程详细介绍了如何使用Python和YouTube Data API v3来:
- 进行API认证和授权
- 从URL中提取视频ID
- 获取视频的详细信息(标题、描述、统计数据等)
- 实现视频搜索功能
- 获取频道信息和频道下的视频列表
- 获取视频或频道的评论数据
通过这些功能,开发者可以构建各种YouTube数据分析应用,如视频分析工具、频道监控系统等。在实际应用中,请注意遵守YouTube API的使用条款和配额限制。
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考