add to YouTube scrape
This commit is contained in:
52
resouce_collection/youtube/Ubuntu/get_ubuntu.py
Normal file
52
resouce_collection/youtube/Ubuntu/get_ubuntu.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import json
|
||||
import os
|
||||
from googleapiclient.discovery import build
|
||||
import socket
|
||||
socket.setdefaulttimeout(500)
|
||||
|
||||
|
||||
def search_youtube(api_key, query, max_results=50):
|
||||
youtube = build('youtube', 'v3', developerKey=api_key)
|
||||
|
||||
search_response = youtube.search().list(
|
||||
q=query,
|
||||
part="id,snippet",
|
||||
maxResults=max_results,
|
||||
type="video"
|
||||
).execute()
|
||||
|
||||
videos = []
|
||||
|
||||
for search_result in search_response.get("items", []):
|
||||
if search_result["id"]["kind"] == "youtube#video":
|
||||
video_id = search_result["id"]["videoId"]
|
||||
video_metadata = get_video_metadata(api_key, video_id)
|
||||
videos.append(video_metadata)
|
||||
|
||||
return videos
|
||||
|
||||
|
||||
def get_video_metadata(api_key, video_id):
|
||||
youtube = build('youtube', 'v3', developerKey=api_key)
|
||||
|
||||
request = youtube.videos().list(
|
||||
part="snippet,contentDetails,statistics",
|
||||
id=video_id
|
||||
)
|
||||
response = request.execute()
|
||||
return response
|
||||
|
||||
|
||||
api_key = 'AIzaSyDI_BBExs-HypVZFxgnR5tj5S6-uKyU4vk' # Replace with your actual API key
|
||||
|
||||
# Search for videos related to "VLC player"
|
||||
vlc_related_videos = search_youtube(api_key, "Ubuntu Desktop Tutorial", max_results=10)
|
||||
|
||||
# create data folder if not exist
|
||||
if not os.path.exists("data"):
|
||||
os.makedirs("data")
|
||||
|
||||
for video in vlc_related_videos:
|
||||
# store the video metadata into a json file
|
||||
with open(f"data/{video['etag']}.json", "w") as f:
|
||||
json.dump(video, f, indent=4)
|
||||
Reference in New Issue
Block a user