Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/scripts/init.py
5457 views
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
4
# Copyright 2025 Mike Fährmann
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License version 2 as
8
# published by the Free Software Foundation.
9
10
"""Initialize extractor modules"""
11
12
import re
13
import logging
14
import argparse
15
import datetime as dt
16
import util # noqa
17
18
from gallery_dl import text
19
20
LOG = logging.getLogger("init")
21
NONE = {}
22
ENCODING = """\
23
# -*- coding: utf-8 -*-
24
"""
25
LICENSE = """\
26
# This program is free software; you can redistribute it and/or modify
27
# it under the terms of the GNU General Public License version 2 as
28
# published by the Free Software Foundation.
29
"""
30
31
32
def init_extractor(args):
33
category = args.category
34
35
files = []
36
if args.init_module:
37
files.append((util.path("gallery_dl", "extractor", f"{category}.py"),
38
generate_module, False))
39
files.append((util.path("gallery_dl", "extractor", "__init__.py"),
40
insert_into_modules_list, True))
41
if args.init_test:
42
files.append((util.path("test", "results", f"{category}.py"),
43
generate_test, False))
44
if args.site_name:
45
files.append((util.path("scripts", "supportedsites.py"),
46
insert_into_supportedsites, True))
47
48
for path, func, lines in files:
49
LOG.info(util.trim(path))
50
51
if lines:
52
with util.lines(path) as lines:
53
if not func(args, lines):
54
LOG.warning("'%s' already present", category)
55
else:
56
try:
57
with util.open(path, args.open_mode) as fp:
58
fp.write(func(args))
59
except FileExistsError:
60
LOG.warning("File already present")
61
except Exception as exc:
62
LOG.error("%s: %s", exc.__class__.__name__, exc, exc_info=exc)
63
64
if args.git:
65
util.git("add", path)
66
67
68
###############################################################################
69
# Extractor ###################################################################
70
71
def generate_module(args):
72
type = args.type
73
if type == "manga":
74
generate_extractors = generate_extractors_manga
75
elif type == "user":
76
generate_extractors = generate_extractors_user
77
else:
78
generate_extractors = generate_extractors_basic
79
80
if copyright := args.copyright:
81
copyright = f"\n# Copyright {dt.date.today().year} {copyright}\n#"
82
83
return f'''\
84
{ENCODING}{copyright}
85
{LICENSE}
86
"""Extractors for {args.root}/"""
87
88
{generate_extractors(args)}\
89
'''
90
91
92
def generate_extractors_basic(args):
93
cat = args.category
94
ccat = cat.capitalize()
95
96
result = f'''\
97
from .common import Extractor, Message
98
from .. import text
99
100
{build_base_pattern(args)}
101
102
class {ccat}Extractor(Extractor):
103
"""Base class for {cat} extractors"""
104
category = "{cat}"
105
root = "{args.root}"
106
'''
107
108
for subcat in args.subcategories:
109
subcat = subcat.lower()
110
result = f'''{result}
111
112
class {ccat}{subcat.capitalize()}Extractor({ccat}Extractor):
113
subcategory = "{subcat}"
114
pattern = rf"{{BASE_PATTERN}}/PATH"
115
example = "{args.root}/..."
116
117
def items(self):
118
pass
119
'''
120
121
return result
122
123
124
def generate_extractors_manga(args):
125
cat = args.category
126
ccat = cat.capitalize()
127
128
return f'''\
129
from .common import ChapterExtractor, MangaExtractor
130
from .. import text
131
132
{build_base_pattern(args)}
133
134
class {ccat}Base():
135
"""Base class for {cat} extractors"""
136
category = "{cat}"
137
root = "{args.root}"
138
139
140
class {ccat}ChapterExtractor({ccat}Base, ChapterExtractor):
141
"""Extractor for {cat} manga chapters"""
142
pattern = rf"{{BASE_PATTERN}}/PATH"
143
example = "{args.root}/..."
144
145
def __init__(self, match):
146
url = f"{{self.root}}/PATH"
147
ChapterExtractor.__init__(self, match, url)
148
149
def metadata(self, page):
150
chapter, sep, minor = chapter.partition(".")
151
152
return {{
153
"manga" : text.unescape(manga),
154
"manga_id": text.parse_int(manga_id),
155
"title" : "",
156
"volume" : text.parse_int(volume),
157
"chapter" : text.parse_int(chapter),
158
"chapter_minor": sep + minor,
159
"chapter_id" : text.parse_int(chapter_id),
160
"lang" : "en",
161
"language": "English",
162
}}
163
164
def images(self, page):
165
return [
166
(url, None)
167
for url in text.extract_iter(page, "", "")
168
]
169
170
171
class {ccat}MangaExtractor({ccat}Base, MangaExtractor):
172
"""Extractor for {cat} manga"""
173
chapterclass = {ccat}ChapterExtractor
174
pattern = rf"{{BASE_PATTERN}}/PATH"
175
example = "{args.root}/..."
176
177
def __init__(self, match):
178
url = f"{{self.root}}/PATH"
179
MangaExtractor.__init__(self, match, url)
180
181
def chapters(self, page):
182
results = []
183
184
while True:
185
results.append((url, None))
186
187
return results
188
'''
189
190
191
def generate_extractors_user(args):
192
cat = args.category
193
ccat = cat.capitalize()
194
195
return f'''\
196
from .common import Extractor, Message, Dispatch
197
from .. import text
198
199
{build_base_pattern(args)}
200
USER_PATTERN = rf"{{BASE_PATTERN}}/([^/?#]+)"
201
202
class {ccat}Extractor(Extractor):
203
"""Base class for {cat} extractors"""
204
category = "{cat}"
205
root = "{args.root}"
206
207
208
class {ccat}UserExtractor(Dispatch, {ccat}Extractor)
209
"""Extractor for {cat} user profiles"""
210
pattern = rf"{{USER_PATTERN}}/?(?:$|\\?|#)"
211
example = "{args.root}/USER/"
212
213
def items(self):
214
base = f"{{self.root}}/"
215
return self._dispatch_extractors((
216
({ccat}InfoExtractor, f"{{base}}info"),
217
), ("posts",))
218
'''
219
220
221
def build_base_pattern(args):
222
domain = args.domain
223
if domain.count(".") > 1:
224
subdomain, domain, tld = domain.rsplit(".", 2)
225
domain = f"{domain}.{tld}"
226
if subdomain == "www":
227
subdomain = "(?:www\\.)?"
228
else:
229
subdomain = re.escape(subdomain + ".")
230
else:
231
subdomain = "(?:www\\.)?"
232
233
return f"""\
234
BASE_PATTERN = r"(?:https?://)?{subdomain}{re.escape(domain)}"
235
"""
236
237
238
###############################################################################
239
# Test Results ################################################################
240
241
def generate_test(args):
242
category = args.category
243
244
if category[0].isdecimal():
245
import_stmt = f"""\
246
gallery_dl = __import__("gallery_dl.extractor.{category}")
247
_{category} = getattr(gallery_dl.extractor, "{category}")
248
"""
249
else:
250
import_stmt = f"""\
251
from gallery_dl.extractor import {category}
252
"""
253
254
return f"""\
255
{ENCODING}
256
{LICENSE}
257
{import_stmt}
258
259
__tests__ = (
260
)
261
"""
262
263
264
###############################################################################
265
# Modules List ################################################################
266
267
def insert_into_modules_list(args, lines):
268
category = args.category
269
270
module_name = f' "{category}",\n'
271
if module_name in lines:
272
return False
273
274
compare = False
275
for idx, line in enumerate(lines):
276
if compare:
277
cat = text.extr(line, '"', '"')
278
if cat == category:
279
return False
280
if cat > category or cat == "booru":
281
break
282
elif line.startswith("modules = "):
283
compare = True
284
285
lines.insert(idx, module_name)
286
return True
287
288
289
###############################################################################
290
# Supported Sites #############################################################
291
292
def insert_into_supportedsites(args, lines):
293
category = args.category
294
295
compare = False
296
for idx, line in enumerate(lines):
297
if compare:
298
cat = text.extr(line, '"', '"')
299
if cat == category:
300
return False
301
if cat > category:
302
break
303
elif line.startswith("CATEGORY_MAP = "):
304
compare = True
305
306
ws = " " * max(15 - len(category), 0)
307
line = f''' "{category}"{ws}: "{args.site_name}",\n'''
308
lines.insert(idx, line)
309
return True
310
311
312
###############################################################################
313
# Command-Line Options ########################################################
314
315
def parse_args(args=None):
316
parser = argparse.ArgumentParser(args)
317
318
parser.add_argument(
319
"-s", "--subcategory",
320
dest="subcategories", metavar="SUBCaT", action="append")
321
parser.add_argument(
322
"-n", "--name",
323
dest="site_name", metavar="TITLE")
324
parser.add_argument(
325
"-c", "--copyright",
326
dest="copyright", metavar="NAME", default="")
327
parser.add_argument(
328
"-C",
329
dest="copyright", action="store_const", const="Mike Fährmann")
330
parser.add_argument(
331
"-F", "--force",
332
dest="open_mode", action="store_const", const="w", default="x")
333
parser.add_argument(
334
"-g", "--git",
335
dest="git", action="store_true")
336
parser.add_argument(
337
"-M", "--no-module",
338
dest="init_module", action="store_false")
339
parser.add_argument(
340
"-T", "--no-test",
341
dest="init_test", action="store_false")
342
parser.add_argument(
343
"-t", "--type",
344
dest="type", metavar="TYPE")
345
parser.add_argument(
346
"--manga",
347
dest="type", action="store_const", const="manga")
348
parser.add_argument(
349
"--base",
350
dest="type", action="store_const", const="base")
351
parser.add_argument(
352
"--user",
353
dest="type", action="store_const", const="user")
354
355
parser.add_argument("category")
356
parser.add_argument("root", nargs="?")
357
358
args = parser.parse_args()
359
360
if root := args.root:
361
if "://" in root:
362
root = root.rstrip("/")
363
domain = root[root.find("://")+3:]
364
else:
365
root = root.strip(":/")
366
domain = root
367
root = f"https://{root}"
368
369
if domain.startswith("www."):
370
domain = domain[4:]
371
372
args.root = root
373
args.domain = domain
374
elif args.init_module:
375
parser.error("'root' URL required")
376
else:
377
args.domain = ""
378
379
return args
380
381
382
def main():
383
args = parse_args()
384
init_extractor(args)
385
386
387
if __name__ == "__main__":
388
logging.basicConfig(
389
level=logging.DEBUG,
390
format="[%(levelname)s] %(message)s",
391
)
392
main()
393
394