Files
sci-gui-agent-benchmark/resouce_collection/youtube/vlc_player/vlc_player.py
2023-12-16 22:10:11 +08:00

66 lines
1.8 KiB
Python

import json
import os
from googleapiclient.discovery import build
def search_youtube(api_key, query, max_results=50, language="en"):
youtube = build('youtube', 'v3', developerKey=api_key)
videos = []
next_page_token = None
total_results = 0
while True:
search_response = youtube.search().list(
q=query,
part="id,snippet",
maxResults=max_results,
pageToken=next_page_token,
type="video",
relevanceLanguage=language
).execute()
video_ids = [item['id']['videoId'] for item in search_response.get("items", []) if
item['id']['kind'] == 'youtube#video']
# Fetch metadata for each video
videos.extend([get_video_metadata(api_key, video_id) for video_id in video_ids])
total_results += len(video_ids)
next_page_token = search_response.get('nextPageToken')
if not next_page_token or total_results >= max_results:
break
# Sort videos by view count
sorted_videos = sorted(videos, key=lambda x: int(x['items'][0]['statistics']['viewCount']), reverse=True)
return sorted_videos
def get_video_metadata(api_key, video_id):
youtube = build('youtube', 'v3', developerKey=api_key)
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_id
)
response = request.execute()
return response
api_key = 'API_KEY' # Replace with your actual API key
# Search for videos related to "VLC player"
vlc_related_videos = search_youtube(api_key, "VLC player", max_results=10)
# create data folder if not exist
if not os.path.exists("data"):
os.makedirs("data")
for video in vlc_related_videos:
# store the video metadata into a json file
with open(f"data/{video['etag']}.json", "w") as f:
json.dump(video, f, indent=4)