Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/ytdl.py
8920 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2021-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Helpers for interacting with youtube-dl"""
10
11
import shlex
12
import itertools
13
from . import text, util, exception
14
15
16
def import_module(module_name):
17
if module_name is None:
18
try:
19
return __import__("yt_dlp")
20
except (ImportError, SyntaxError):
21
return __import__("youtube_dl")
22
return util.import_file(module_name)
23
24
25
def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
26
opts = argv = None
27
config = obj.config
28
29
if not config("deprecations"):
30
module.YoutubeDL.deprecated_feature = util.false
31
module.YoutubeDL.deprecation_warning = util.false
32
33
if cfg := config("config-file"):
34
with open(util.expand_path(cfg), encoding="utf-8") as fp:
35
contents = fp.read()
36
argv = shlex.split(contents, comments=True)
37
38
if cmd := config("cmdline-args"):
39
if isinstance(cmd, str):
40
cmd = shlex.split(cmd)
41
argv = (argv + cmd) if argv else cmd
42
43
try:
44
opts = parse_command_line(module, argv) if argv else user_opts
45
except SystemExit:
46
raise exception.AbortExtraction("Invalid command-line option")
47
48
if opts.get("format") is None:
49
opts["format"] = config("format")
50
if opts.get("nopart") is None:
51
opts["nopart"] = not config("part", True)
52
if opts.get("updatetime") is None:
53
opts["updatetime"] = config("mtime", True)
54
if opts.get("min_filesize") is None:
55
opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None)
56
if opts.get("max_filesize") is None:
57
opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None)
58
if opts.get("overwrites") is None and not config("skip", True):
59
opts["overwrites"] = True
60
if opts.get("ratelimit") is None:
61
if rate := config("rate"):
62
func = util.build_selection_func(rate, 0, text.parse_bytes)
63
if hasattr(func, "args"):
64
opts["__gdl_ratelimit_func"] = func
65
else:
66
opts["ratelimit"] = func() or None
67
else:
68
opts["ratelimit"] = None
69
70
if raw_opts := config("raw-options"):
71
opts.update(raw_opts)
72
if config("logging", True):
73
opts["logger"] = obj.log
74
if system_opts:
75
opts.update(system_opts)
76
77
opts["__gdl_initialize"] = True
78
return module.YoutubeDL(opts)
79
80
81
def parse_command_line(module, argv):
82
parser, opts, args = module.parseOpts(argv)
83
84
ytdlp = hasattr(module, "cookies")
85
try:
86
std_headers = module.utils.networking.std_headers
87
except AttributeError:
88
std_headers = module.std_headers
89
90
try:
91
parse_bytes = module.parse_bytes
92
except AttributeError:
93
parse_bytes = module.FileDownloader.parse_bytes
94
95
# HTTP headers
96
if opts.user_agent is not None:
97
std_headers["User-Agent"] = opts.user_agent
98
if opts.referer is not None:
99
std_headers["Referer"] = opts.referer
100
if opts.headers:
101
if isinstance(opts.headers, dict):
102
std_headers.update(opts.headers)
103
else:
104
for h in opts.headers:
105
key, _, value = h.partition(":")
106
std_headers[key] = value
107
108
if opts.ratelimit is not None:
109
opts.ratelimit = parse_bytes(opts.ratelimit)
110
if getattr(opts, "throttledratelimit", None) is not None:
111
opts.throttledratelimit = parse_bytes(opts.throttledratelimit)
112
if opts.min_filesize is not None:
113
opts.min_filesize = parse_bytes(opts.min_filesize)
114
if opts.max_filesize is not None:
115
opts.max_filesize = parse_bytes(opts.max_filesize)
116
if opts.max_sleep_interval is None:
117
opts.max_sleep_interval = opts.sleep_interval
118
if getattr(opts, "overwrites", None):
119
opts.continue_dl = False
120
if opts.retries is not None:
121
opts.retries = parse_retries(opts.retries)
122
if getattr(opts, "file_access_retries", None) is not None:
123
opts.file_access_retries = parse_retries(opts.file_access_retries)
124
if opts.fragment_retries is not None:
125
opts.fragment_retries = parse_retries(opts.fragment_retries)
126
if getattr(opts, "extractor_retries", None) is not None:
127
opts.extractor_retries = parse_retries(opts.extractor_retries)
128
if opts.buffersize is not None:
129
opts.buffersize = parse_bytes(opts.buffersize)
130
if opts.http_chunk_size is not None:
131
opts.http_chunk_size = parse_bytes(opts.http_chunk_size)
132
if opts.extractaudio:
133
opts.audioformat = opts.audioformat.lower()
134
if opts.audioquality:
135
opts.audioquality = opts.audioquality.strip("kK")
136
if opts.recodevideo is not None:
137
opts.recodevideo = opts.recodevideo.replace(" ", "")
138
if getattr(opts, "remuxvideo", None) is not None:
139
opts.remuxvideo = opts.remuxvideo.replace(" ", "")
140
if getattr(opts, "wait_for_video", None) is not None:
141
min_wait, _, max_wait = opts.wait_for_video.partition("-")
142
opts.wait_for_video = (module.parse_duration(min_wait),
143
module.parse_duration(max_wait))
144
145
if opts.date is not None:
146
date = module.DateRange.day(opts.date)
147
else:
148
date = module.DateRange(opts.dateafter, opts.datebefore)
149
150
decodeOption = getattr(module, "decodeOption", util.identity)
151
compat_opts = getattr(opts, "compat_opts", ())
152
153
def _unused_compat_opt(name):
154
if name not in compat_opts:
155
return False
156
compat_opts.discard(name)
157
compat_opts.update([f"*{name}"])
158
return True
159
160
def set_default_compat(
161
compat_name, opt_name, default=True, remove_compat=True):
162
attr = getattr(opts, opt_name, None)
163
if compat_name in compat_opts:
164
if attr is None:
165
setattr(opts, opt_name, not default)
166
return True
167
else:
168
if remove_compat:
169
_unused_compat_opt(compat_name)
170
return False
171
elif attr is None:
172
setattr(opts, opt_name, default)
173
return None
174
175
set_default_compat("abort-on-error", "ignoreerrors", "only_download")
176
set_default_compat("no-playlist-metafiles", "allow_playlist_files")
177
set_default_compat("no-clean-infojson", "clean_infojson")
178
if "format-sort" in compat_opts:
179
opts.format_sort.extend(module.InfoExtractor.FormatSort.ytdl_default)
180
_video_multistreams_set = set_default_compat(
181
"multistreams", "allow_multiple_video_streams",
182
False, remove_compat=False)
183
_audio_multistreams_set = set_default_compat(
184
"multistreams", "allow_multiple_audio_streams",
185
False, remove_compat=False)
186
if _video_multistreams_set is False and _audio_multistreams_set is False:
187
_unused_compat_opt("multistreams")
188
189
if isinstance(opts.outtmpl, dict):
190
outtmpl = opts.outtmpl
191
outtmpl_default = outtmpl.get("default")
192
else:
193
opts.outtmpl = outtmpl = outtmpl_default = ""
194
195
if "filename" in compat_opts:
196
if outtmpl_default is None:
197
outtmpl_default = outtmpl["default"] = "%(title)s-%(id)s.%(ext)s"
198
else:
199
_unused_compat_opt("filename")
200
201
if opts.extractaudio and not opts.keepvideo and opts.format is None:
202
opts.format = "bestaudio/best"
203
204
if ytdlp:
205
def metadataparser_actions(f):
206
if isinstance(f, str):
207
yield module.MetadataFromFieldPP.to_action(f)
208
else:
209
REPLACE = module.MetadataParserPP.Actions.REPLACE
210
args = f[1:]
211
for x in f[0].split(","):
212
action = [REPLACE, x]
213
action += args
214
yield action
215
216
parse_metadata = getattr(opts, "parse_metadata", None)
217
if isinstance(parse_metadata, dict):
218
if opts.metafromtitle is not None:
219
if "pre_process" not in parse_metadata:
220
parse_metadata["pre_process"] = []
221
parse_metadata["pre_process"].append(
222
f"title:{opts.metafromtitle}")
223
opts.parse_metadata = {
224
k: list(itertools.chain.from_iterable(map(
225
metadataparser_actions, v)))
226
for k, v in parse_metadata.items()
227
}
228
else:
229
if parse_metadata is None:
230
parse_metadata = []
231
if opts.metafromtitle is not None:
232
parse_metadata.append(f"title:{opts.metafromtitle}")
233
opts.parse_metadata = list(itertools.chain.from_iterable(map(
234
metadataparser_actions, parse_metadata)))
235
236
opts.metafromtitle = None
237
else:
238
opts.parse_metadata = ()
239
240
download_archive_fn = module.expand_path(opts.download_archive) \
241
if opts.download_archive is not None else opts.download_archive
242
243
if getattr(opts, "getcomments", None):
244
opts.writeinfojson = True
245
246
if getattr(opts, "no_sponsorblock", None):
247
opts.sponsorblock_mark = set()
248
opts.sponsorblock_remove = set()
249
else:
250
opts.sponsorblock_mark = \
251
getattr(opts, "sponsorblock_mark", None) or set()
252
opts.sponsorblock_remove = \
253
getattr(opts, "sponsorblock_remove", None) or set()
254
opts.remove_chapters = getattr(opts, "remove_chapters", None) or ()
255
256
try:
257
postprocessors = list(module.get_postprocessors(opts))
258
except AttributeError:
259
postprocessors = legacy_postprocessors(
260
opts, module, ytdlp, compat_opts)
261
262
match_filter = (
263
None if opts.match_filter is None
264
else module.match_filter_func(opts.match_filter))
265
266
if cookiesfrombrowser := getattr(opts, "cookiesfrombrowser", None):
267
pattern = text.re(r"""(?x)
268
(?P<name>[^+:]+)
269
(?:\s*\+\s*(?P<keyring>[^:]+))?
270
(?:\s*:\s*(?!:)(?P<profile>.+?))?
271
(?:\s*::\s*(?P<container>.+))?""")
272
if match := pattern.fullmatch(cookiesfrombrowser):
273
browser, keyring, profile, container = match.groups()
274
if keyring is not None:
275
keyring = keyring.upper()
276
cookiesfrombrowser = (browser.lower(), profile, keyring, container)
277
else:
278
cookiesfrombrowser = None
279
280
return {
281
"usenetrc": opts.usenetrc,
282
"netrc_location": getattr(opts, "netrc_location", None),
283
"username": opts.username,
284
"password": opts.password,
285
"twofactor": opts.twofactor,
286
"videopassword": opts.videopassword,
287
"ap_mso": opts.ap_mso,
288
"ap_username": opts.ap_username,
289
"ap_password": opts.ap_password,
290
"quiet": opts.quiet,
291
"no_warnings": opts.no_warnings,
292
"forceurl": opts.geturl,
293
"forcetitle": opts.gettitle,
294
"forceid": opts.getid,
295
"forcethumbnail": opts.getthumbnail,
296
"forcedescription": opts.getdescription,
297
"forceduration": opts.getduration,
298
"forcefilename": opts.getfilename,
299
"forceformat": opts.getformat,
300
"forceprint": getattr(opts, "forceprint", None) or (),
301
"force_write_download_archive": getattr(
302
opts, "force_write_download_archive", None),
303
"simulate": opts.simulate,
304
"skip_download": opts.skip_download,
305
"format": opts.format,
306
"allow_unplayable_formats": getattr(
307
opts, "allow_unplayable_formats", None),
308
"ignore_no_formats_error": getattr(
309
opts, "ignore_no_formats_error", None),
310
"format_sort": getattr(
311
opts, "format_sort", None),
312
"format_sort_force": getattr(
313
opts, "format_sort_force", None),
314
"allow_multiple_video_streams": opts.allow_multiple_video_streams,
315
"allow_multiple_audio_streams": opts.allow_multiple_audio_streams,
316
"check_formats": getattr(
317
opts, "check_formats", None),
318
"outtmpl": opts.outtmpl,
319
"outtmpl_na_placeholder": opts.outtmpl_na_placeholder,
320
"paths": getattr(opts, "paths", None),
321
"autonumber_size": opts.autonumber_size,
322
"autonumber_start": opts.autonumber_start,
323
"restrictfilenames": opts.restrictfilenames,
324
"windowsfilenames": getattr(opts, "windowsfilenames", None),
325
"ignoreerrors": opts.ignoreerrors,
326
"force_generic_extractor": opts.force_generic_extractor,
327
"ratelimit": opts.ratelimit,
328
"throttledratelimit": getattr(opts, "throttledratelimit", None),
329
"overwrites": getattr(opts, "overwrites", None),
330
"retries": opts.retries,
331
"file_access_retries": getattr(opts, "file_access_retries", None),
332
"fragment_retries": opts.fragment_retries,
333
"extractor_retries": getattr(opts, "extractor_retries", None),
334
"skip_unavailable_fragments": opts.skip_unavailable_fragments,
335
"keep_fragments": opts.keep_fragments,
336
"concurrent_fragment_downloads": getattr(
337
opts, "concurrent_fragment_downloads", None),
338
"buffersize": opts.buffersize,
339
"noresizebuffer": opts.noresizebuffer,
340
"http_chunk_size": opts.http_chunk_size,
341
"continuedl": opts.continue_dl,
342
"noprogress": True if opts.noprogress is None else opts.noprogress,
343
"playliststart": opts.playliststart,
344
"playlistend": opts.playlistend,
345
"playlistreverse": opts.playlist_reverse,
346
"playlistrandom": opts.playlist_random,
347
"noplaylist": opts.noplaylist,
348
"logtostderr": outtmpl_default == "-",
349
"consoletitle": opts.consoletitle,
350
"nopart": opts.nopart,
351
"updatetime": opts.updatetime,
352
"writedescription": opts.writedescription,
353
"writeannotations": getattr(opts, "writeannotations", None),
354
"writeinfojson": opts.writeinfojson,
355
"allow_playlist_files": opts.allow_playlist_files,
356
"clean_infojson": opts.clean_infojson,
357
"getcomments": getattr(opts, "getcomments", None),
358
"writethumbnail": opts.writethumbnail is True,
359
"write_all_thumbnails": getattr(opts, "write_all_thumbnails", None) or
360
opts.writethumbnail == "all",
361
"writelink": getattr(opts, "writelink", None),
362
"writeurllink": getattr(opts, "writeurllink", None),
363
"writewebloclink": getattr(opts, "writewebloclink", None),
364
"writedesktoplink": getattr(opts, "writedesktoplink", None),
365
"writesubtitles": opts.writesubtitles,
366
"writeautomaticsub": opts.writeautomaticsub,
367
"allsubtitles": opts.allsubtitles,
368
"subtitlesformat": opts.subtitlesformat,
369
"subtitleslangs": opts.subtitleslangs,
370
"matchtitle": decodeOption(opts.matchtitle),
371
"rejecttitle": decodeOption(opts.rejecttitle),
372
"max_downloads": opts.max_downloads,
373
"prefer_free_formats": opts.prefer_free_formats,
374
"trim_file_name": getattr(opts, "trim_file_name", None),
375
"verbose": opts.verbose,
376
"dump_intermediate_pages": opts.dump_intermediate_pages,
377
"write_pages": opts.write_pages,
378
"test": opts.test,
379
"keepvideo": opts.keepvideo,
380
"min_filesize": opts.min_filesize,
381
"max_filesize": opts.max_filesize,
382
"min_views": opts.min_views,
383
"max_views": opts.max_views,
384
"daterange": date,
385
"cachedir": opts.cachedir,
386
"youtube_print_sig_code": getattr(
387
opts, "youtube_print_sig_code", None),
388
"age_limit": opts.age_limit,
389
"download_archive": download_archive_fn,
390
"break_on_existing": getattr(opts, "break_on_existing", None),
391
"break_on_reject": getattr(opts, "break_on_reject", None),
392
"break_per_url": getattr(opts, "break_per_url", None),
393
"skip_playlist_after_errors": getattr(
394
opts, "skip_playlist_after_errors", None),
395
"cookiefile": opts.cookiefile,
396
"cookiesfrombrowser": cookiesfrombrowser,
397
"nocheckcertificate": opts.no_check_certificate,
398
"prefer_insecure": opts.prefer_insecure,
399
"proxy": opts.proxy,
400
"socket_timeout": opts.socket_timeout,
401
"bidi_workaround": opts.bidi_workaround,
402
"debug_printtraffic": opts.debug_printtraffic,
403
"prefer_ffmpeg": getattr(opts, "prefer_ffmpeg", None),
404
"include_ads": getattr(opts, "include_ads", None),
405
"default_search": opts.default_search,
406
"dynamic_mpd": getattr(opts, "dynamic_mpd", None),
407
"extractor_args": getattr(opts, "extractor_args", None),
408
"youtube_include_dash_manifest": getattr(
409
opts, "youtube_include_dash_manifest", None),
410
"youtube_include_hls_manifest": getattr(
411
opts, "youtube_include_hls_manifest", None),
412
"encoding": opts.encoding,
413
"extract_flat": opts.extract_flat,
414
"live_from_start": getattr(opts, "live_from_start", None),
415
"wait_for_video": getattr(opts, "wait_for_video", None),
416
"mark_watched": opts.mark_watched,
417
"merge_output_format": opts.merge_output_format,
418
"postprocessors": postprocessors,
419
"fixup": opts.fixup,
420
"source_address": opts.source_address,
421
"sleep_interval_requests": getattr(
422
opts, "sleep_interval_requests", None),
423
"sleep_interval": opts.sleep_interval,
424
"max_sleep_interval": opts.max_sleep_interval,
425
"sleep_interval_subtitles": getattr(
426
opts, "sleep_interval_subtitles", None),
427
"external_downloader": opts.external_downloader,
428
"playlist_items": opts.playlist_items,
429
"xattr_set_filesize": getattr(opts, "xattr_set_filesize", None),
430
"match_filter": match_filter,
431
"no_color": getattr(opts, "no_color", None),
432
"ffmpeg_location": opts.ffmpeg_location,
433
"hls_prefer_native": opts.hls_prefer_native,
434
"hls_use_mpegts": opts.hls_use_mpegts,
435
"hls_split_discontinuity": getattr(
436
opts, "hls_split_discontinuity", None),
437
"external_downloader_args": opts.external_downloader_args,
438
"postprocessor_args": opts.postprocessor_args,
439
"cn_verification_proxy": getattr(opts, "cn_verification_proxy", None),
440
"geo_verification_proxy": opts.geo_verification_proxy,
441
"geo_bypass": getattr(
442
opts, "geo_bypass", "default"),
443
"geo_bypass_country": getattr(
444
opts, "geo_bypass_country", None),
445
"geo_bypass_ip_block": getattr(
446
opts, "geo_bypass_ip_block", None),
447
"compat_opts": compat_opts,
448
}
449
450
451
def parse_retries(retries, name=""):
452
if retries in ("inf", "infinite"):
453
return float("inf")
454
return int(retries)
455
456
457
def legacy_postprocessors(opts, module, ytdlp, compat_opts):
458
postprocessors = []
459
460
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
461
if opts.metafromtitle:
462
postprocessors.append({
463
"key": "MetadataFromTitle",
464
"titleformat": opts.metafromtitle,
465
})
466
if getattr(opts, "add_postprocessors", None):
467
postprocessors += list(opts.add_postprocessors)
468
if sponsorblock_query:
469
postprocessors.append({
470
"key": "SponsorBlock",
471
"categories": sponsorblock_query,
472
"api": opts.sponsorblock_api,
473
"when": "pre_process",
474
})
475
if opts.parse_metadata:
476
postprocessors.append({
477
"key": "MetadataParser",
478
"actions": opts.parse_metadata,
479
"when": "pre_process",
480
})
481
if opts.convertsubtitles:
482
pp = {"key": "FFmpegSubtitlesConvertor",
483
"format": opts.convertsubtitles}
484
if ytdlp:
485
pp["when"] = "before_dl"
486
postprocessors.append(pp)
487
if getattr(opts, "convertthumbnails", None):
488
postprocessors.append({
489
"key": "FFmpegThumbnailsConvertor",
490
"format": opts.convertthumbnails,
491
"when": "before_dl",
492
})
493
if getattr(opts, "exec_before_dl_cmd", None):
494
postprocessors.append({
495
"key": "Exec",
496
"exec_cmd": opts.exec_before_dl_cmd,
497
"when": "before_dl",
498
})
499
if opts.extractaudio:
500
postprocessors.append({
501
"key": "FFmpegExtractAudio",
502
"preferredcodec": opts.audioformat,
503
"preferredquality": opts.audioquality,
504
"nopostoverwrites": opts.nopostoverwrites,
505
})
506
if getattr(opts, "remuxvideo", None):
507
postprocessors.append({
508
"key": "FFmpegVideoRemuxer",
509
"preferedformat": opts.remuxvideo,
510
})
511
if opts.recodevideo:
512
postprocessors.append({
513
"key": "FFmpegVideoConvertor",
514
"preferedformat": opts.recodevideo,
515
})
516
if opts.embedsubtitles:
517
pp = {"key": "FFmpegEmbedSubtitle"}
518
if ytdlp:
519
pp["already_have_subtitle"] = (
520
opts.writesubtitles and "no-keep-subs" not in compat_opts)
521
postprocessors.append(pp)
522
if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts:
523
opts.writesubtitles = True
524
if opts.allsubtitles and not opts.writeautomaticsub:
525
opts.writesubtitles = True
526
remove_chapters_patterns, remove_ranges = [], []
527
for regex in opts.remove_chapters:
528
if regex.startswith("*"):
529
dur = list(map(module.parse_duration, regex[1:].split("-")))
530
if len(dur) == 2 and all(t is not None for t in dur):
531
remove_ranges.append(tuple(dur))
532
continue
533
remove_chapters_patterns.append(text.re(regex))
534
if opts.remove_chapters or sponsorblock_query:
535
postprocessors.append({
536
"key": "ModifyChapters",
537
"remove_chapters_patterns": remove_chapters_patterns,
538
"remove_sponsor_segments": opts.sponsorblock_remove,
539
"remove_ranges": remove_ranges,
540
"sponsorblock_chapter_title": opts.sponsorblock_chapter_title,
541
"force_keyframes": opts.force_keyframes_at_cuts,
542
})
543
addchapters = getattr(opts, "addchapters", None)
544
embed_infojson = getattr(opts, "embed_infojson", None)
545
if opts.addmetadata or addchapters or embed_infojson:
546
pp = {"key": "FFmpegMetadata"}
547
if ytdlp:
548
if embed_infojson is None:
549
embed_infojson = "if_exists"
550
pp["add_metadata"] = opts.addmetadata
551
pp["add_chapters"] = addchapters
552
pp["add_infojson"] = embed_infojson
553
554
postprocessors.append(pp)
555
if getattr(opts, "sponskrub", False) is not False:
556
postprocessors.append({
557
"key": "SponSkrub",
558
"path": opts.sponskrub_path,
559
"args": opts.sponskrub_args,
560
"cut": opts.sponskrub_cut,
561
"force": opts.sponskrub_force,
562
"ignoreerror": opts.sponskrub is None,
563
"_from_cli": True,
564
})
565
if opts.embedthumbnail:
566
already_have_thumbnail = (opts.writethumbnail or
567
getattr(opts, "write_all_thumbnails", False))
568
postprocessors.append({
569
"key": "EmbedThumbnail",
570
"already_have_thumbnail": already_have_thumbnail,
571
})
572
if not already_have_thumbnail:
573
opts.writethumbnail = True
574
if isinstance(opts.outtmpl, dict):
575
opts.outtmpl["pl_thumbnail"] = ""
576
if getattr(opts, "split_chapters", None):
577
postprocessors.append({
578
"key": "FFmpegSplitChapters",
579
"force_keyframes": opts.force_keyframes_at_cuts,
580
})
581
if opts.xattrs:
582
postprocessors.append({"key": "XAttrMetadata"})
583
if opts.exec_cmd:
584
postprocessors.append({
585
"key": "Exec",
586
"exec_cmd": opts.exec_cmd,
587
"when": "after_move",
588
})
589
590
return postprocessors
591
592