Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wiseplat
GitHub Repository: wiseplat/python-code
Path: blob/master/parsing-youtube/main.py
5925 views
1
import urllib.request
2
import json
3
4
5
def get_all_video_from_channel(channel_id):
6
api_key = "AIzaSyAAseOhUtnaN1q7YEdyoMoZ4Vf1qCjhHCk"
7
8
base_video_url = 'https://www.youtube.com/watch?v='
9
base_search_url = 'https://www.googleapis.com/youtube/v3/search?'
10
11
first_url = base_search_url + 'key={}&channelId={}&part=snippet,id&order=date&maxResults=25'.format(api_key,
12
channel_id)
13
14
video_links = []
15
url = first_url
16
while True:
17
print(url)
18
inp = urllib.request.urlopen(url)
19
resp = json.load(inp)
20
21
for i in resp['items']:
22
if i['id']['kind'] == "youtube#video":
23
video_links.append(base_video_url + i['id']['videoId'])
24
25
try:
26
next_page_token = resp['nextPageToken']
27
url = first_url + '&pageToken={}'.format(next_page_token)
28
except:
29
break
30
return video_links
31
32
33
def get_all_video_with_titles_from_channel(channel_id):
34
api_key = "AIzaSyAAseOhUtnaN1q7YEdyoMoZ4Vf1qCjhHCk"
35
36
base_video_url = 'https://www.youtube.com/watch?v='
37
base_search_url = 'https://www.googleapis.com/youtube/v3/search?'
38
39
first_url = base_search_url + 'key={}&channelId={}&part=snippet,id&order=date&maxResults=25'.format(api_key,
40
channel_id)
41
42
video_links = []
43
url = first_url
44
while True:
45
#print(url)
46
inp = urllib.request.urlopen(url)
47
resp = json.load(inp)
48
49
for i in resp['items']:
50
if i['id']['kind'] == "youtube#video":
51
video_links.append([base_video_url + i['id']['videoId'],
52
i['id']['videoId'],
53
i['snippet']['title'],
54
i['snippet']['publishTime']])
55
56
try:
57
next_page_token = resp['nextPageToken']
58
url = first_url + '&pageToken={}'.format(next_page_token)
59
except:
60
break
61
return video_links
62
63
64
def get_information_from_youtube_video(video_id):
65
api_key = "AIzaSyAAseOhUtnaN1q7YEdyoMoZ4Vf1qCjhHCk"
66
67
base_info_url = 'https://www.googleapis.com/youtube/v3/videos?'
68
69
info_url = base_info_url + 'part=statistics&key={}&id={}'.format(api_key, video_id)
70
71
print(info_url)
72
inp = urllib.request.urlopen(info_url)
73
resp = json.load(inp)
74
75
return resp
76
77
78
# youtube_channel_id = "UC7f5bVxWsm3jlZIPDzOMcAg"
79
# all_video_links = get_all_video_from_channel(youtube_channel_id)
80
# for link in all_video_links:
81
# print(link)
82
# print(link, file=open(youtube_channel_id+"_1.txt", "a"))
83
#
84
# print("Total: ",len(all_video_links))
85
86
youtube_channel_id = "UC7f5bVxWsm3jlZIPDzOMcAg"
87
# all_video_links = get_all_video_with_titles_from_channel(youtube_channel_id)
88
# print(all_video_links)
89
90
import pickle
91
# with open(youtube_channel_id+'_1.data', 'wb') as fp:
92
# pickle.dump(all_video_links, fp)
93
94
with open(youtube_channel_id+'_1.data', 'rb') as fp:
95
all_video_links = pickle.load(fp)
96
print(all_video_links)
97
98
# info_from_video = get_information_from_youtube_video("ZJER0vEtzd0")
99
# print(info_from_video)
100
# print(info_from_video['items'][0]['statistics']['viewCount'])
101
102
103
from youtube_dl import YoutubeDL
104
# video = all_video_links[0]
105
# video = "https://www.youtube.com/watch?v=ZJER0vEtzd0"
106
107
video = all_video_links[0][0]
108
print("For: ", video)
109
110
youtube_dl_opts = {
111
'ignoreerrors': True,
112
'quiet': True
113
}
114
115
with YoutubeDL(youtube_dl_opts) as ydl:
116
info_dict = ydl.extract_info(video, download=False)
117
video_id = info_dict.get("id", None)
118
video_views = info_dict.get("view_count", None)
119
video_date = info_dict.get("upload_date", None)
120
video_duration = info_dict.get("duration", None)
121
video_title = info_dict.get('title', None)
122
print(video_id, video_views, video_date, video_duration, video_title)
123
124