From facfcf1b3c8a966b932706515185cb0fe4a4d5aa Mon Sep 17 00:00:00 2001 From: 211250101 <211250101@smail.nju.edu.cn> Date: Sun, 17 Dec 2023 20:50:27 +0800 Subject: [PATCH] add to YouTube scrape --- .../youtube/Ubuntu/get_ubuntu.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 resouce_collection/youtube/Ubuntu/get_ubuntu.py diff --git a/resouce_collection/youtube/Ubuntu/get_ubuntu.py b/resouce_collection/youtube/Ubuntu/get_ubuntu.py new file mode 100644 index 0000000..3e2949c --- /dev/null +++ b/resouce_collection/youtube/Ubuntu/get_ubuntu.py @@ -0,0 +1,52 @@ +import json +import os +from googleapiclient.discovery import build +import socket +socket.setdefaulttimeout(500) + + +def search_youtube(api_key, query, max_results=50): + youtube = build('youtube', 'v3', developerKey=api_key) + + search_response = youtube.search().list( + q=query, + part="id,snippet", + maxResults=max_results, + type="video" + ).execute() + + videos = [] + + for search_result in search_response.get("items", []): + if search_result["id"]["kind"] == "youtube#video": + video_id = search_result["id"]["videoId"] + video_metadata = get_video_metadata(api_key, video_id) + videos.append(video_metadata) + + return videos + + +def get_video_metadata(api_key, video_id): + youtube = build('youtube', 'v3', developerKey=api_key) + + request = youtube.videos().list( + part="snippet,contentDetails,statistics", + id=video_id + ) + response = request.execute() + return response + + +api_key = 'AIzaSyDI_BBExs-HypVZFxgnR5tj5S6-uKyU4vk' # Replace with your actual API key + +# Search for videos related to "VLC player" +vlc_related_videos = search_youtube(api_key, "Ubuntu Desktop Tutorial", max_results=10) + +# create data folder if not exist +if not os.path.exists("data"): + os.makedirs("data") + +for video in vlc_related_videos: + # store the video metadata into a json file + with open(f"data/{video['etag']}.json", "w") as f: + json.dump(video, f, indent=4) \ No newline at end of file