Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/__init__.py
5457 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
import sys
10
import logging
11
from . import version, config, option, output, extractor, job, util, exception
12
13
__author__ = "Mike Fährmann"
14
__copyright__ = "Copyright 2014-2025 Mike Fährmann"
15
__license__ = "GPLv2"
16
__maintainer__ = "Mike Fährmann"
17
__email__ = "[email protected]"
18
__version__ = version.__version__
19
20
21
def main():
22
try:
23
parser = option.build_parser()
24
args = parser.parse_args()
25
log = output.initialize_logging(args.loglevel)
26
27
# configuration
28
if args.config_load:
29
config.load()
30
if args.configs_json:
31
config.load(args.configs_json, strict=True)
32
if args.configs_yaml:
33
import yaml
34
config.load(args.configs_yaml, strict=True, loads=yaml.safe_load)
35
if args.configs_toml:
36
try:
37
import tomllib as toml
38
except ImportError:
39
import toml
40
config.load(args.configs_toml, strict=True, loads=toml.loads)
41
if not args.colors:
42
output.ANSI = False
43
config.set((), "colors", False)
44
if util.WINDOWS:
45
config.set(("output",), "ansi", False)
46
if args.filename:
47
filename = args.filename
48
if filename == "/O":
49
filename = "{filename}.{extension}"
50
elif filename.startswith("\\f"):
51
filename = f"\f{filename[2:]}"
52
config.set((), "filename", filename)
53
if args.directory is not None:
54
config.set((), "base-directory", args.directory)
55
config.set((), "directory", ())
56
if args.postprocessors:
57
config.set((), "postprocessors", args.postprocessors)
58
if args.abort:
59
config.set((), "skip", f"abort:{args.abort}")
60
if args.terminate:
61
config.set((), "skip", f"terminate:{args.terminate}")
62
if args.cookies_from_browser:
63
browser, _, profile = args.cookies_from_browser.partition(":")
64
browser, _, keyring = browser.partition("+")
65
browser, _, domain = browser.partition("/")
66
if profile and profile[0] == ":":
67
container = profile[1:]
68
profile = None
69
else:
70
profile, _, container = profile.partition("::")
71
config.set((), "cookies", (
72
browser, profile, keyring, container, domain))
73
if args.options_pp:
74
config.set((), "postprocessor-options", args.options_pp)
75
for opts in args.options:
76
config.set(*opts)
77
78
output.configure_standard_streams()
79
80
# signals
81
if signals := config.get((), "signals-ignore"):
82
import signal
83
if isinstance(signals, str):
84
signals = signals.split(",")
85
for signal_name in signals:
86
signal_num = getattr(signal, signal_name, None)
87
if signal_num is None:
88
log.warning("signal '%s' is not defined", signal_name)
89
else:
90
signal.signal(signal_num, signal.SIG_IGN)
91
92
if signals := config.get((), "signals-actions"):
93
from . import actions
94
actions.parse_signals(signals)
95
96
# enable ANSI escape sequences on Windows
97
if util.WINDOWS and config.get(("output",), "ansi", output.COLORS):
98
from ctypes import windll, wintypes, byref
99
kernel32 = windll.kernel32
100
mode = wintypes.DWORD()
101
102
for handle_id in (-11, -12): # stdout and stderr
103
handle = kernel32.GetStdHandle(handle_id)
104
kernel32.GetConsoleMode(handle, byref(mode))
105
if not mode.value & 0x4:
106
mode.value |= 0x4
107
kernel32.SetConsoleMode(handle, mode)
108
109
output.ANSI = True
110
111
# filter environment
112
filterenv = config.get((), "filters-environment", True)
113
if filterenv is True:
114
pass
115
elif not filterenv:
116
util.compile_expression = util.compile_expression_raw
117
elif isinstance(filterenv, str):
118
if filterenv == "raw":
119
util.compile_expression = util.compile_expression_raw
120
elif filterenv.startswith("default"):
121
util.compile_expression = util.compile_expression_defaultdict
122
123
# format string separator
124
if separator := config.get((), "format-separator"):
125
from . import formatter
126
formatter._SEPARATOR = separator
127
128
# eval globals
129
if path := config.get((), "globals"):
130
util.GLOBALS.update(util.import_file(path).__dict__)
131
132
# loglevels
133
output.configure_logging(args.loglevel)
134
if args.loglevel >= logging.WARNING:
135
config.set(("output",), "mode", "null")
136
config.set(("downloader",), "progress", None)
137
elif args.loglevel <= logging.DEBUG:
138
import platform
139
import requests
140
141
if util.EXECUTABLE:
142
extra = f" - Executable ({version.__variant__})"
143
elif git_head := util.git_head():
144
extra = " - Git HEAD: " + git_head
145
else:
146
extra = ""
147
148
log.debug("Version %s%s", __version__, extra)
149
log.debug("Python %s - %s",
150
platform.python_version(), platform.platform())
151
try:
152
log.debug("requests %s - urllib3 %s",
153
requests.__version__,
154
requests.packages.urllib3.__version__)
155
except AttributeError:
156
pass
157
158
log.debug("Configuration Files %s", config._files)
159
160
if args.clear_cache:
161
from . import cache
162
log = logging.getLogger("cache")
163
cnt = cache.clear(args.clear_cache)
164
165
if cnt is None:
166
log.error("Database file not available")
167
return 1
168
169
log.info("Deleted %d entr%s from '%s'",
170
cnt, "y" if cnt == 1 else "ies", cache._path())
171
return 0
172
173
if args.config:
174
if args.config == "init":
175
return config.initialize()
176
elif args.config == "status":
177
return config.status()
178
else:
179
return config.open_extern()
180
181
if args.print_traffic:
182
import requests
183
requests.packages.urllib3.connection.HTTPConnection.debuglevel = 1
184
185
if args.update:
186
from . import update
187
extr = update.UpdateExtractor.from_url("update:" + args.update)
188
ujob = update.UpdateJob(extr)
189
return ujob.run()
190
191
# category renaming
192
config.remap_categories()
193
194
# extractor modules
195
modules = config.get(("extractor",), "modules")
196
if modules is not None:
197
if isinstance(modules, str):
198
modules = modules.split(",")
199
extractor.modules = modules
200
201
# external modules
202
if args.extractor_sources:
203
sources = args.extractor_sources
204
sources.append(None)
205
else:
206
sources = config.get(("extractor",), "module-sources")
207
208
if sources:
209
import os
210
modules = []
211
212
for source in sources:
213
if source:
214
path = util.expand_path(source)
215
try:
216
files = os.listdir(path)
217
modules.append(extractor._modules_path(path, files))
218
except Exception as exc:
219
log.warning("Unable to load modules from %s (%s: %s)",
220
path, exc.__class__.__name__, exc)
221
else:
222
modules.append(extractor._modules_internal())
223
224
if len(modules) > 1:
225
import itertools
226
extractor._module_iter = itertools.chain(*modules)
227
elif not modules:
228
extractor._module_iter = ()
229
else:
230
extractor._module_iter = iter(modules[0])
231
232
if args.list_modules:
233
extractor.modules.append("")
234
sys.stdout.write("\n".join(extractor.modules))
235
236
elif args.list_extractors is not None:
237
write = sys.stdout.write
238
fmt = ("{}{}\nCategory: {} - Subcategory: {}"
239
"\nExample : {}\n\n").format
240
241
extractors = extractor.extractors()
242
if args.list_extractors:
243
fltr = util.build_extractor_filter(
244
args.list_extractors, negate=False)
245
extractors = filter(fltr, extractors)
246
247
for extr in extractors:
248
write(fmt(
249
extr.__name__,
250
"\n" + extr.__doc__ if extr.__doc__ else "",
251
extr.category, extr.subcategory,
252
extr.example,
253
))
254
255
else:
256
if input_files := config.get((), "input-files"):
257
for input_file in input_files:
258
if isinstance(input_file, str):
259
input_file = (input_file, None)
260
args.input_files.append(input_file)
261
262
if not args.urls and not args.input_files:
263
if args.cookies_from_browser or config.interpolate(
264
("extractor",), "cookies"):
265
args.urls.append("noop")
266
else:
267
parser.error(
268
"The following arguments are required: URL\nUse "
269
"'gallery-dl --help' to get a list of all options.")
270
271
if args.list_urls:
272
jobtype = job.UrlJob
273
jobtype.maxdepth = args.list_urls
274
if config.get(("output",), "fallback", True):
275
jobtype.handle_url = jobtype.handle_url_fallback
276
elif args.dump_json:
277
jobtype = job.DataJob
278
jobtype.resolve = args.dump_json - 1
279
else:
280
jobtype = args.jobtype or job.DownloadJob
281
282
input_manager = InputManager()
283
input_manager.log = input_log = logging.getLogger("inputfile")
284
285
# unsupported file logging handler
286
if handler := output.setup_logging_handler(
287
"unsupportedfile", fmt="{message}"):
288
ulog = job.Job.ulog = logging.getLogger("unsupported")
289
ulog.addHandler(handler)
290
ulog.propagate = False
291
292
# error file logging handler
293
if handler := output.setup_logging_handler(
294
"errorfile", fmt="{message}", mode="a"):
295
elog = input_manager.err = logging.getLogger("errorfile")
296
elog.addHandler(handler)
297
elog.propagate = False
298
299
# collect input URLs
300
input_manager.add_list(args.urls)
301
302
if args.input_files:
303
for input_file, action in args.input_files:
304
try:
305
path = util.expand_path(input_file)
306
input_manager.add_file(path, action)
307
except Exception as exc:
308
input_log.error(exc)
309
return getattr(exc, "code", 128)
310
311
pformat = config.get(("output",), "progress", True)
312
if pformat and len(input_manager.urls) > 1 and \
313
args.loglevel < logging.ERROR:
314
input_manager.progress(pformat)
315
316
if catmap := config.interpolate(("extractor",), "category-map"):
317
if catmap == "compat":
318
catmap = {
319
"coomer" : "coomerparty",
320
"kemono" : "kemonoparty",
321
"schalenetwork": "koharu",
322
"naver-blog" : "naver",
323
"naver-chzzk" : "chzzk",
324
"naver-webtoon": "naverwebtoon",
325
"pixiv-novel" : "pixiv",
326
"pixiv-novel:novel" : ("pixiv", "novel"),
327
"pixiv-novel:user" : ("pixiv", "novel-user"),
328
"pixiv-novel:series" : ("pixiv", "novel-series"),
329
"pixiv-novel:bookmark": ("pixiv", "novel-bookmark"),
330
}
331
from .extractor import common
332
common.CATEGORY_MAP = catmap
333
334
# process input URLs
335
retval = 0
336
for url in input_manager:
337
try:
338
log.debug("Starting %s for '%s'", jobtype.__name__, url)
339
340
if isinstance(url, ExtendedUrl):
341
for opts in url.gconfig:
342
config.set(*opts)
343
with config.apply(url.lconfig):
344
status = jobtype(url.value).run()
345
else:
346
status = jobtype(url).run()
347
348
if status:
349
retval |= status
350
input_manager.error()
351
else:
352
input_manager.success()
353
354
except exception.RestartExtraction:
355
log.debug("Restarting '%s'", url)
356
continue
357
except exception.ControlException:
358
pass
359
except exception.NoExtractorError:
360
log.error("Unsupported URL '%s'", url)
361
retval |= 64
362
input_manager.error()
363
364
input_manager.next()
365
return retval
366
return 0
367
368
except KeyboardInterrupt:
369
raise SystemExit("\nKeyboardInterrupt")
370
except BrokenPipeError:
371
pass
372
except OSError as exc:
373
import errno
374
if exc.errno != errno.EPIPE:
375
raise
376
return 1
377
378
379
class InputManager():
380
381
def __init__(self):
382
self.urls = []
383
self.files = ()
384
self.log = self.err = None
385
386
self._url = ""
387
self._item = None
388
self._index = 0
389
self._pformat = None
390
391
def add_url(self, url):
392
self.urls.append(url)
393
394
def add_list(self, urls):
395
self.urls += urls
396
397
def add_file(self, path, action=None):
398
"""Process an input file.
399
400
Lines starting with '#' and empty lines will be ignored.
401
Lines starting with '-' will be interpreted as a key-value pair
402
separated by an '='. where
403
'key' is a dot-separated option name and
404
'value' is a JSON-parsable string.
405
These configuration options will be applied
406
while processing the next URL only.
407
Lines starting with '-G' are the same as above, except these options
408
will be applied for *all* following URLs, i.e. they are Global.
409
Everything else will be used as a potential URL.
410
411
Example input file:
412
413
# settings global options
414
-G base-directory = "/tmp/"
415
-G skip = false
416
417
# setting local options for the next URL
418
-filename="spaces_are_optional.jpg"
419
-skip = true
420
421
https://example.org/
422
423
# next URL uses default filename and 'skip' is false.
424
https://example.com/index.htm # comment1
425
https://example.com/404.htm # comment2
426
"""
427
if path == "-" and not action:
428
try:
429
lines = sys.stdin.readlines()
430
except Exception:
431
raise exception.InputFileError("stdin is not readable")
432
path = None
433
else:
434
try:
435
with open(path, encoding="utf-8") as fp:
436
lines = fp.readlines()
437
except Exception as exc:
438
raise exception.InputFileError(str(exc))
439
440
if self.files:
441
self.files[path] = lines
442
else:
443
self.files = {path: lines}
444
445
if action == "c":
446
action = self._action_comment
447
elif action == "d":
448
action = self._action_delete
449
else:
450
action = None
451
452
gconf = []
453
lconf = []
454
indicies = []
455
strip_comment = None
456
append = self.urls.append
457
458
for n, line in enumerate(lines):
459
line = line.strip()
460
461
if not line or line[0] == "#":
462
# empty line or comment
463
continue
464
465
elif line[0] == "-":
466
# config spec
467
if len(line) >= 2 and line[1] == "G":
468
conf = gconf
469
line = line[2:]
470
else:
471
conf = lconf
472
line = line[1:]
473
if action:
474
indicies.append(n)
475
476
key, sep, value = line.partition("=")
477
if not sep:
478
raise exception.InputFileError(
479
f"Invalid KEY=VALUE pair '{line}' "
480
f"on line {n+1} in {path}")
481
482
try:
483
value = util.json_loads(value.strip())
484
except ValueError as exc:
485
self.log.debug("%s: %s", exc.__class__.__name__, exc)
486
raise exception.InputFileError(
487
f"Unable to parse '{value}' on line {n+1} in {path}")
488
489
key = key.strip().split(".")
490
conf.append((key[:-1], key[-1], value))
491
492
else:
493
# url
494
if " #" in line or "\t#" in line:
495
if strip_comment is None:
496
strip_comment = util.re(r"\s+#.*").sub
497
line = strip_comment("", line)
498
if gconf or lconf:
499
url = ExtendedUrl(line, gconf, lconf)
500
gconf = []
501
lconf = []
502
else:
503
url = line
504
505
if action:
506
indicies.append(n)
507
append((url, path, action, indicies))
508
indicies = []
509
else:
510
append(url)
511
512
def progress(self, pformat=True):
513
if pformat is True:
514
pformat = "[{current}/{total}] {url}\n"
515
else:
516
pformat += "\n"
517
self._pformat = pformat.format_map
518
519
def next(self):
520
self._index += 1
521
522
def success(self):
523
if self._item:
524
self._rewrite()
525
526
def error(self):
527
if self.err:
528
if self._item:
529
url, path, action, indicies = self._item
530
lines = self.files[path]
531
out = "".join(lines[i] for i in indicies)
532
if out and out[-1] == "\n":
533
out = out[:-1]
534
self._rewrite()
535
else:
536
out = str(self._url)
537
self.err.info(out)
538
539
def _rewrite(self):
540
url, path, action, indicies = self._item
541
lines = self.files[path]
542
action(lines, indicies)
543
try:
544
with open(path, "w", encoding="utf-8") as fp:
545
fp.writelines(lines)
546
except Exception as exc:
547
self.log.warning(
548
"Unable to update '%s' (%s: %s)",
549
path, exc.__class__.__name__, exc)
550
551
def _action_comment(self, lines, indicies):
552
for i in indicies:
553
lines[i] = "# " + lines[i]
554
555
def _action_delete(self, lines, indicies):
556
for i in indicies:
557
lines[i] = ""
558
559
def __iter__(self):
560
self._index = 0
561
return self
562
563
def __next__(self):
564
try:
565
url = self.urls[self._index]
566
except IndexError:
567
raise StopIteration
568
569
if isinstance(url, tuple):
570
self._item = url
571
url = url[0]
572
else:
573
self._item = None
574
self._url = url
575
576
if self._pformat:
577
output.stderr_write(self._pformat({
578
"total" : len(self.urls),
579
"current": self._index + 1,
580
"url" : url,
581
}))
582
return url
583
584
585
class ExtendedUrl():
586
"""URL with attached config key-value pairs"""
587
__slots__ = ("value", "gconfig", "lconfig")
588
589
def __init__(self, url, gconf, lconf):
590
self.value = url
591
self.gconfig = gconf
592
self.lconfig = lconf
593
594
def __str__(self):
595
return self.value
596
597