add to YouTube scrape

This commit is contained in:
211250101
2023-12-17 20:05:15 +08:00
parent 4faf8099ce
commit a381a8a818
3 changed files with 156 additions and 0 deletions

View File

@@ -0,0 +1,52 @@
import json
import os
from googleapiclient.discovery import build
import socket
socket.setdefaulttimeout(500)
def search_youtube(api_key, query, max_results=50):
youtube = build('youtube', 'v3', developerKey=api_key)
search_response = youtube.search().list(
q=query,
part="id,snippet",
maxResults=max_results,
type="video"
).execute()
videos = []
for search_result in search_response.get("items", []):
if search_result["id"]["kind"] == "youtube#video":
video_id = search_result["id"]["videoId"]
video_metadata = get_video_metadata(api_key, video_id)
videos.append(video_metadata)
return videos
def get_video_metadata(api_key, video_id):
youtube = build('youtube', 'v3', developerKey=api_key)
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_id
)
response = request.execute()
return response
api_key = 'AIzaSyDI_BBExs-HypVZFxgnR5tj5S6-uKyU4vk' # Replace with your actual API key
# Search for videos related to "VLC player"
vlc_related_videos = search_youtube(api_key, "LibreOffice Impress Tutorial", max_results=10)
# create data folder if not exist
if not os.path.exists("data"):
os.makedirs("data")
for video in vlc_related_videos:
# store the video metadata into a json file
with open(f"data/{video['etag']}.json", "w") as f:
json.dump(video, f, indent=4)

View File

@@ -0,0 +1,52 @@
import json
import os
from googleapiclient.discovery import build
import socket
socket.setdefaulttimeout(500)
def search_youtube(api_key, query, max_results=50):
youtube = build('youtube', 'v3', developerKey=api_key)
search_response = youtube.search().list(
q=query,
part="id,snippet",
maxResults=max_results,
type="video"
).execute()
videos = []
for search_result in search_response.get("items", []):
if search_result["id"]["kind"] == "youtube#video":
video_id = search_result["id"]["videoId"]
video_metadata = get_video_metadata(api_key, video_id)
videos.append(video_metadata)
return videos
def get_video_metadata(api_key, video_id):
youtube = build('youtube', 'v3', developerKey=api_key)
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_id
)
response = request.execute()
return response
api_key = 'AIzaSyDI_BBExs-HypVZFxgnR5tj5S6-uKyU4vk' # Replace with your actual API key
# Search for videos related to "VLC player"
vlc_related_videos = search_youtube(api_key, "LibreOffice Calc Tutorial", max_results=10)
# create data folder if not exist
if not os.path.exists("data"):
os.makedirs("data")
for video in vlc_related_videos:
# store the video metadata into a json file
with open(f"data/{video['etag']}.json", "w") as f:
json.dump(video, f, indent=4)

View File

@@ -0,0 +1,52 @@
import json
import os
from googleapiclient.discovery import build
import socket
socket.setdefaulttimeout(500)
def search_youtube(api_key, query, max_results=50):
youtube = build('youtube', 'v3', developerKey=api_key)
search_response = youtube.search().list(
q=query,
part="id,snippet",
maxResults=max_results,
type="video"
).execute()
videos = []
for search_result in search_response.get("items", []):
if search_result["id"]["kind"] == "youtube#video":
video_id = search_result["id"]["videoId"]
video_metadata = get_video_metadata(api_key, video_id)
videos.append(video_metadata)
return videos
def get_video_metadata(api_key, video_id):
youtube = build('youtube', 'v3', developerKey=api_key)
request = youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_id
)
response = request.execute()
return response
api_key = 'AIzaSyDI_BBExs-HypVZFxgnR5tj5S6-uKyU4vk' # Replace with your actual API key
# Search for videos related to "VLC player"
vlc_related_videos = search_youtube(api_key, "Thunderbird Tutorial", max_results=10)
# create data folder if not exist
if not os.path.exists("data"):
os.makedirs("data")
for video in vlc_related_videos:
# store the video metadata into a json file
with open(f"data/{video['etag']}.json", "w") as f:
json.dump(video, f, indent=4)