66 lines
1.8 KiB
Python
66 lines
1.8 KiB
Python
import json
|
|
import os
|
|
from googleapiclient.discovery import build
|
|
|
|
|
|
def search_youtube(api_key, query, max_results=50, language="en"):
|
|
youtube = build('youtube', 'v3', developerKey=api_key)
|
|
|
|
videos = []
|
|
next_page_token = None
|
|
total_results = 0
|
|
|
|
while True:
|
|
search_response = youtube.search().list(
|
|
q=query,
|
|
part="id,snippet",
|
|
maxResults=max_results,
|
|
pageToken=next_page_token,
|
|
type="video",
|
|
relevanceLanguage=language
|
|
).execute()
|
|
|
|
video_ids = [item['id']['videoId'] for item in search_response.get("items", []) if
|
|
item['id']['kind'] == 'youtube#video']
|
|
|
|
# Fetch metadata for each video
|
|
videos.extend([get_video_metadata(api_key, video_id) for video_id in video_ids])
|
|
|
|
total_results += len(video_ids)
|
|
next_page_token = search_response.get('nextPageToken')
|
|
|
|
if not next_page_token or total_results >= max_results:
|
|
break
|
|
|
|
# Sort videos by view count
|
|
sorted_videos = sorted(videos, key=lambda x: int(x['items'][0]['statistics']['viewCount']), reverse=True)
|
|
|
|
return sorted_videos
|
|
|
|
|
|
def get_video_metadata(api_key, video_id):
|
|
youtube = build('youtube', 'v3', developerKey=api_key)
|
|
|
|
request = youtube.videos().list(
|
|
part="snippet,contentDetails,statistics",
|
|
id=video_id
|
|
)
|
|
response = request.execute()
|
|
|
|
return response
|
|
|
|
|
|
api_key = 'API_KEY' # Replace with your actual API key
|
|
|
|
# Search for videos related to "VLC player"
|
|
vlc_related_videos = search_youtube(api_key, "VLC player", max_results=10)
|
|
|
|
# create data folder if not exist
|
|
if not os.path.exists("data"):
|
|
os.makedirs("data")
|
|
|
|
for video in vlc_related_videos:
|
|
# store the video metadata into a json file
|
|
with open(f"data/{video['etag']}.json", "w") as f:
|
|
json.dump(video, f, indent=4)
|