Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/downloader/ytdl.py
9096 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Downloader module for URLs requiring youtube-dl support"""
10
11
from .common import DownloaderBase
12
from .. import ytdl, text
13
from xml.etree import ElementTree
14
from http.cookiejar import Cookie
15
import os
16
17
18
class YoutubeDLDownloader(DownloaderBase):
19
scheme = "ytdl"
20
21
def __init__(self, job):
22
DownloaderBase.__init__(self, job)
23
24
extractor = job.extractor
25
self.retries = self.config("retries", extractor._retries)
26
self.ytdl_opts = {
27
"retries": self.retries+1 if self.retries >= 0 else float("inf"),
28
"socket_timeout": self.config("timeout", extractor._timeout),
29
"nocheckcertificate": not self.config("verify", extractor._verify),
30
"proxy": self.proxies.get("http") if self.proxies else None,
31
"ignoreerrors": True,
32
}
33
34
self.ytdl_instance = None
35
self.rate_dyn = None
36
self.forward_cookies = self.config("forward-cookies", True)
37
self.progress = self.config("progress", 3.0)
38
self.outtmpl = self.config("outtmpl")
39
40
def download(self, url, pathfmt):
41
kwdict = pathfmt.kwdict
42
tries = 0
43
44
kwdict["_mtime_http"] = None
45
if ytdl_instance := kwdict.pop("_ytdl_instance", None):
46
# 'ytdl' extractor
47
self._prepare(ytdl_instance)
48
info_dict = kwdict.pop("_ytdl_info_dict")
49
else:
50
# other extractors
51
ytdl_instance = self.ytdl_instance
52
if not ytdl_instance:
53
try:
54
module = ytdl.import_module(self.config("module"))
55
except (ImportError, SyntaxError) as exc:
56
if exc.__context__:
57
self.log.error("Cannot import yt-dlp or youtube-dl")
58
else:
59
self.log.error("Cannot import module '%s'",
60
getattr(exc, "name", ""))
61
self.log.traceback(exc)
62
self.download = lambda u, p: False
63
return False
64
65
try:
66
ytdl_version = module.version.__version__
67
except Exception:
68
ytdl_version = ""
69
self.log.debug("Using %s version %s", module, ytdl_version)
70
71
self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL(
72
module, self, self.ytdl_opts, kwdict.get("_ytdl_params"))
73
self.ytdl_pp = module.postprocessor
74
if self.outtmpl == "default":
75
self.outtmpl = module.DEFAULT_OUTTMPL
76
self._prepare(ytdl_instance)
77
78
if self.forward_cookies:
79
self.log.debug("Forwarding cookies to %s",
80
ytdl_instance.__module__)
81
set_cookie = ytdl_instance.cookiejar.set_cookie
82
for cookie in self.session.cookies:
83
set_cookie(cookie)
84
85
url = url[5:]
86
manifest = kwdict.get("_ytdl_manifest")
87
while True:
88
tries += 1
89
self.error = None
90
try:
91
if manifest is None:
92
info_dict = self._extract_url(
93
ytdl_instance, url)
94
else:
95
info_dict = self._extract_manifest(
96
ytdl_instance, url, kwdict)
97
except Exception as exc:
98
self.log.traceback(exc)
99
cls = exc.__class__
100
if cls.__module__ == "builtins":
101
tries = False
102
msg = f"{cls.__name__}: {exc}"
103
else:
104
if self.error is not None:
105
msg = self.error
106
elif not info_dict:
107
msg = "Empty 'info_dict' data"
108
else:
109
break
110
111
if tries:
112
self.log.error("%s (%s/%s)", msg, tries, self.retries+1)
113
else:
114
self.log.error(msg)
115
return False
116
if tries > self.retries:
117
return False
118
119
if extra := kwdict.get("_ytdl_extra"):
120
info_dict.update(extra)
121
122
while True:
123
tries += 1
124
self.error = None
125
try:
126
if "entries" in info_dict:
127
success = self._download_playlist(
128
ytdl_instance, pathfmt, info_dict)
129
else:
130
success = self._download_video(
131
ytdl_instance, pathfmt, info_dict)
132
except Exception as exc:
133
self.log.traceback(exc)
134
cls = exc.__class__
135
if cls.__module__ == "builtins":
136
tries = False
137
msg = f"{cls.__name__}: {exc}"
138
else:
139
if self.error is not None:
140
msg = self.error
141
elif not success:
142
msg = "Error"
143
else:
144
break
145
146
if tries:
147
self.log.error("%s (%s/%s)", msg, tries, self.retries+1)
148
else:
149
self.log.error(msg)
150
return False
151
if tries > self.retries:
152
return False
153
return True
154
155
def _extract_url(self, ytdl, url):
156
return ytdl.extract_info(url, download=False)
157
158
def _extract_manifest(self, ytdl, url, kwdict):
159
extr = ytdl.get_info_extractor("Generic")
160
video_id = extr._generic_id(url)
161
162
if cookies := kwdict.get("_ytdl_manifest_cookies"):
163
if isinstance(cookies, dict):
164
cookies = cookies.items()
165
set_cookie = ytdl.cookiejar.set_cookie
166
for name, value in cookies:
167
set_cookie(Cookie(
168
0, name, value, None, False,
169
"", False, False, "/", False,
170
False, None, False, None, None, {},
171
))
172
173
type = kwdict["_ytdl_manifest"]
174
data = kwdict.get("_ytdl_manifest_data")
175
remux = kwdict.get("_ytdl_manifest_remux")
176
headers = kwdict.get("_ytdl_manifest_headers")
177
if type == "hls":
178
ext = "ytdl" if remux else "mp4"
179
protocol = "m3u8_native"
180
181
if data is None:
182
try:
183
fmts, subs = extr._extract_m3u8_formats_and_subtitles(
184
url, video_id, ext, protocol, headers=headers)
185
except AttributeError:
186
fmts = extr._extract_m3u8_formats(
187
url, video_id, ext, protocol, headers=headers)
188
subs = None
189
else:
190
try:
191
fmts, subs = extr._parse_m3u8_formats_and_subtitles(
192
data, url, ext, protocol, headers=headers)
193
except AttributeError:
194
fmts = extr._parse_m3u8_formats(
195
data, url, ext, protocol, headers=headers)
196
subs = None
197
198
elif type == "dash":
199
if data is None:
200
try:
201
fmts, subs = extr._extract_mpd_formats_and_subtitles(
202
url, video_id, headers=headers)
203
except AttributeError:
204
fmts = extr._extract_mpd_formats(
205
url, video_id, headers=headers)
206
subs = None
207
else:
208
if isinstance(data, str):
209
data = ElementTree.fromstring(data)
210
try:
211
fmts, subs = extr._parse_mpd_formats_and_subtitles(
212
data, mpd_id="dash")
213
except AttributeError:
214
fmts = extr._parse_mpd_formats(
215
data, mpd_id="dash")
216
subs = None
217
218
else:
219
raise ValueError(f"Unsupported manifest type '{type}'")
220
221
if headers:
222
for fmt in fmts:
223
fmt["http_headers"] = headers
224
225
info_dict = {
226
"extractor": "",
227
"id" : video_id,
228
"title" : video_id,
229
"formats" : fmts,
230
"subtitles": subs,
231
}
232
info_dict = ytdl.process_ie_result(info_dict, download=False)
233
234
if remux:
235
info_dict["__postprocessors"] = [
236
self.ytdl_pp.FFmpegVideoRemuxerPP(self.ytdl_instance, remux)]
237
238
return info_dict
239
240
def _download_video(self, ytdl_instance, pathfmt, info_dict):
241
if "url" in info_dict:
242
if "filename" in pathfmt.kwdict:
243
pathfmt.kwdict["extension"] = \
244
text.ext_from_url(info_dict["url"])
245
else:
246
text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
247
248
formats = info_dict.get("requested_formats")
249
if formats and not compatible_formats(formats):
250
info_dict["ext"] = "mkv"
251
elif "ext" not in info_dict:
252
try:
253
info_dict["ext"] = info_dict["formats"][0]["ext"]
254
except LookupError:
255
info_dict["ext"] = "mp4"
256
257
if self.outtmpl:
258
self._set_outtmpl(ytdl_instance, self.outtmpl)
259
pathfmt.filename = filename = \
260
ytdl_instance.prepare_filename(info_dict)
261
pathfmt.extension = info_dict["ext"]
262
pathfmt.path = pathfmt.directory + filename
263
pathfmt.realpath = pathfmt.temppath = (
264
pathfmt.realdirectory + filename)
265
elif info_dict["ext"] != "ytdl":
266
pathfmt.set_extension(info_dict["ext"])
267
pathfmt.build_path()
268
269
if pathfmt.exists():
270
pathfmt.temppath = ""
271
return True
272
273
if self.rate_dyn is not None:
274
# static ratelimits are set in ytdl.construct_YoutubeDL
275
ytdl_instance.params["ratelimit"] = self.rate_dyn()
276
277
self.out.start(pathfmt.path)
278
if self.part:
279
pathfmt.kwdict["extension"] = pathfmt.prefix
280
filename = pathfmt.build_filename(pathfmt.kwdict)
281
pathfmt.kwdict["extension"] = info_dict["ext"]
282
if self.partdir:
283
path = os.path.join(self.partdir, filename)
284
else:
285
path = pathfmt.realdirectory + filename
286
path = path.replace("%", "%%") + "%(ext)s"
287
else:
288
path = pathfmt.realpath.replace("%", "%%")
289
290
self._set_outtmpl(ytdl_instance, path)
291
ytdl_instance.process_info(info_dict)
292
pathfmt.temppath = info_dict.get("filepath") or info_dict["_filename"]
293
return True
294
295
def _download_playlist(self, ytdl_instance, pathfmt, info_dict):
296
pathfmt.kwdict["extension"] = pathfmt.prefix
297
filename = pathfmt.build_filename(pathfmt.kwdict)
298
pathfmt.kwdict["extension"] = pathfmt.extension
299
path = pathfmt.realdirectory + filename
300
path = path.replace("%", "%%") + "%(playlist_index)s.%(ext)s"
301
self._set_outtmpl(ytdl_instance, path)
302
303
status = False
304
for entry in info_dict["entries"]:
305
if not entry:
306
continue
307
if self.rate_dyn is not None:
308
ytdl_instance.params["ratelimit"] = self.rate_dyn()
309
try:
310
ytdl_instance.process_info(entry)
311
status = True
312
except Exception as exc:
313
self.log.traceback(exc)
314
self.log.error("%s: %s", exc.__class__.__name__, exc)
315
return status
316
317
def _prepare(self, ytdl_instance):
318
if "__gdl_initialize" not in ytdl_instance.params:
319
return
320
321
del ytdl_instance.params["__gdl_initialize"]
322
if self.progress is not None:
323
ytdl_instance.add_progress_hook(self._progress_hook)
324
if rlf := ytdl_instance.params.pop("__gdl_ratelimit_func", False):
325
self.rate_dyn = rlf
326
ytdl_instance.params["logger"] = LoggerAdapter(self, ytdl_instance)
327
328
def _progress_hook(self, info):
329
if info["status"] == "downloading" and \
330
info["elapsed"] >= self.progress:
331
total = info.get("total_bytes") or info.get("total_bytes_estimate")
332
speed = info.get("speed")
333
self.out.progress(
334
None if total is None else int(total),
335
info["downloaded_bytes"],
336
int(speed) if speed else 0,
337
)
338
339
def _set_outtmpl(self, ytdl_instance, outtmpl):
340
try:
341
ytdl_instance._parse_outtmpl
342
except AttributeError:
343
try:
344
ytdl_instance.outtmpl_dict["default"] = outtmpl
345
except AttributeError:
346
ytdl_instance.params["outtmpl"] = outtmpl
347
else:
348
ytdl_instance.params["outtmpl"] = {"default": outtmpl}
349
350
351
class LoggerAdapter():
352
__slots__ = ("obj", "log")
353
354
def __init__(self, obj, ytdl_instance):
355
self.obj = obj
356
self.log = ytdl_instance.params.get("logger")
357
358
def debug(self, msg):
359
if self.log is not None:
360
if msg[0] == "[":
361
msg = msg[msg.find("]")+2:]
362
self.log.debug(msg)
363
364
def warning(self, msg):
365
if self.log is not None:
366
if "WARNING:" in msg:
367
msg = msg[msg.find(" ")+1:]
368
self.log.warning(msg)
369
370
def error(self, msg):
371
if "ERROR:" in msg:
372
msg = msg[msg.find(" ")+1:]
373
self.obj.error = msg
374
375
376
def compatible_formats(formats):
377
"""Returns True if 'formats' are compatible for merge"""
378
video_ext = formats[0].get("ext")
379
audio_ext = formats[1].get("ext")
380
381
if video_ext == "webm" and audio_ext == "webm":
382
return True
383
384
exts = ("mp3", "mp4", "m4a", "m4p", "m4b", "m4r", "m4v", "ismv", "isma")
385
return video_ext in exts and audio_ext in exts
386
387
388
__downloader__ = YoutubeDLDownloader
389
390