CoCalc -- init.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/scripts/init.py
⁵⁴⁵⁷ views
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3

4
# Copyright 2025 Mike Fährmann
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License version 2 as
8
# published by the Free Software Foundation.
9

10
"""Initialize extractor modules"""
11

12
import re
13
import logging
14
import argparse
15
import datetime as dt
16
import util  # noqa
17

18
from gallery_dl import text
19

20
LOG = logging.getLogger("init")
21
NONE = {}
22
ENCODING = """\
23
# -*- coding: utf-8 -*-
24
"""
25
LICENSE = """\
26
# This program is free software; you can redistribute it and/or modify
27
# it under the terms of the GNU General Public License version 2 as
28
# published by the Free Software Foundation.
29
"""
30

31

32
def init_extractor(args):
33
    category = args.category
34

35
    files = []
36
    if args.init_module:
37
        files.append((util.path("gallery_dl", "extractor", f"{category}.py"),
38
                      generate_module, False))
39
        files.append((util.path("gallery_dl", "extractor", "__init__.py"),
40
                      insert_into_modules_list, True))
41
    if args.init_test:
42
        files.append((util.path("test", "results", f"{category}.py"),
43
                      generate_test, False))
44
    if args.site_name:
45
        files.append((util.path("scripts", "supportedsites.py"),
46
                      insert_into_supportedsites, True))
47

48
    for path, func, lines in files:
49
        LOG.info(util.trim(path))
50

51
        if lines:
52
            with util.lines(path) as lines:
53
                if not func(args, lines):
54
                    LOG.warning("'%s' already present", category)
55
        else:
56
            try:
57
                with util.open(path, args.open_mode) as fp:
58
                    fp.write(func(args))
59
            except FileExistsError:
60
                LOG.warning("File already present")
61
            except Exception as exc:
62
                LOG.error("%s: %s", exc.__class__.__name__, exc, exc_info=exc)
63

64
        if args.git:
65
            util.git("add", path)
66

67

68
###############################################################################
69
# Extractor ###################################################################
70

71
def generate_module(args):
72
    type = args.type
73
    if type == "manga":
74
        generate_extractors = generate_extractors_manga
75
    elif type == "user":
76
        generate_extractors = generate_extractors_user
77
    else:
78
        generate_extractors = generate_extractors_basic
79

80
    if copyright := args.copyright:
81
        copyright = f"\n# Copyright {dt.date.today().year} {copyright}\n#"
82

83
    return f'''\
84
{ENCODING}{copyright}
85
{LICENSE}
86
"""Extractors for {args.root}/"""
87

88
{generate_extractors(args)}\
89
'''
90

91

92
def generate_extractors_basic(args):
93
    cat = args.category
94
    ccat = cat.capitalize()
95

96
    result = f'''\
97
from .common import Extractor, Message
98
from .. import text
99

100
{build_base_pattern(args)}
101

102
class {ccat}Extractor(Extractor):
103
    """Base class for {cat} extractors"""
104
    category = "{cat}"
105
    root = "{args.root}"
106
'''
107

108
    for subcat in args.subcategories:
109
        subcat = subcat.lower()
110
        result = f'''{result}
111

112
class {ccat}{subcat.capitalize()}Extractor({ccat}Extractor):
113
    subcategory = "{subcat}"
114
    pattern = rf"{{BASE_PATTERN}}/PATH"
115
    example = "{args.root}/..."
116

117
    def items(self):
118
        pass
119
'''
120

121
    return result
122

123

124
def generate_extractors_manga(args):
125
    cat = args.category
126
    ccat = cat.capitalize()
127

128
    return f'''\
129
from .common import ChapterExtractor, MangaExtractor
130
from .. import text
131

132
{build_base_pattern(args)}
133

134
class {ccat}Base():
135
    """Base class for {cat} extractors"""
136
    category = "{cat}"
137
    root = "{args.root}"
138

139

140
class {ccat}ChapterExtractor({ccat}Base, ChapterExtractor):
141
    """Extractor for {cat} manga chapters"""
142
    pattern = rf"{{BASE_PATTERN}}/PATH"
143
    example = "{args.root}/..."
144

145
    def __init__(self, match):
146
        url = f"{{self.root}}/PATH"
147
        ChapterExtractor.__init__(self, match, url)
148

149
    def metadata(self, page):
150
        chapter, sep, minor = chapter.partition(".")
151

152
        return {{
153
            "manga"   : text.unescape(manga),
154
            "manga_id": text.parse_int(manga_id),
155
            "title"   : "",
156
            "volume"  : text.parse_int(volume),
157
            "chapter" : text.parse_int(chapter),
158
            "chapter_minor": sep + minor,
159
            "chapter_id"   : text.parse_int(chapter_id),
160
            "lang"    : "en",
161
            "language": "English",
162
        }}
163

164
    def images(self, page):
165
        return [
166
            (url, None)
167
            for url in text.extract_iter(page, "", "")
168
        ]
169

170

171
class {ccat}MangaExtractor({ccat}Base, MangaExtractor):
172
    """Extractor for {cat} manga"""
173
    chapterclass = {ccat}ChapterExtractor
174
    pattern = rf"{{BASE_PATTERN}}/PATH"
175
    example = "{args.root}/..."
176

177
    def __init__(self, match):
178
        url = f"{{self.root}}/PATH"
179
        MangaExtractor.__init__(self, match, url)
180

181
    def chapters(self, page):
182
        results = []
183

184
        while True:
185
            results.append((url, None))
186

187
        return results
188
'''
189

190

191
def generate_extractors_user(args):
192
    cat = args.category
193
    ccat = cat.capitalize()
194

195
    return f'''\
196
from .common import Extractor, Message, Dispatch
197
from .. import text
198

199
{build_base_pattern(args)}
200
USER_PATTERN = rf"{{BASE_PATTERN}}/([^/?#]+)"
201

202
class {ccat}Extractor(Extractor):
203
    """Base class for {cat} extractors"""
204
    category = "{cat}"
205
    root = "{args.root}"
206

207

208
class {ccat}UserExtractor(Dispatch, {ccat}Extractor)
209
    """Extractor for {cat} user profiles"""
210
    pattern = rf"{{USER_PATTERN}}/?(?:$|\\?|#)"
211
    example = "{args.root}/USER/"
212

213
    def items(self):
214
        base = f"{{self.root}}/"
215
        return self._dispatch_extractors((
216
            ({ccat}InfoExtractor, f"{{base}}info"),
217
        ), ("posts",))
218
'''
219

220

221
def build_base_pattern(args):
222
    domain = args.domain
223
    if domain.count(".") > 1:
224
        subdomain, domain, tld = domain.rsplit(".", 2)
225
        domain = f"{domain}.{tld}"
226
        if subdomain == "www":
227
            subdomain = "(?:www\\.)?"
228
        else:
229
            subdomain = re.escape(subdomain + ".")
230
    else:
231
        subdomain = "(?:www\\.)?"
232

233
    return f"""\
234
BASE_PATTERN = r"(?:https?://)?{subdomain}{re.escape(domain)}"
235
"""
236

237

238
###############################################################################
239
# Test Results ################################################################
240

241
def generate_test(args):
242
    category = args.category
243

244
    if category[0].isdecimal():
245
        import_stmt = f"""\
246
gallery_dl = __import__("gallery_dl.extractor.{category}")
247
_{category} = getattr(gallery_dl.extractor, "{category}")
248
"""
249
    else:
250
        import_stmt = f"""\
251
from gallery_dl.extractor import {category}
252
"""
253

254
    return f"""\
255
{ENCODING}
256
{LICENSE}
257
{import_stmt}
258

259
__tests__ = (
260
)
261
"""
262

263

264
###############################################################################
265
# Modules List ################################################################
266

267
def insert_into_modules_list(args, lines):
268
    category = args.category
269

270
    module_name = f'    "{category}",\n'
271
    if module_name in lines:
272
        return False
273

274
    compare = False
275
    for idx, line in enumerate(lines):
276
        if compare:
277
            cat = text.extr(line, '"', '"')
278
            if cat == category:
279
                return False
280
            if cat > category or cat == "booru":
281
                break
282
        elif line.startswith("modules = "):
283
            compare = True
284

285
    lines.insert(idx, module_name)
286
    return True
287

288

289
###############################################################################
290
# Supported Sites #############################################################
291

292
def insert_into_supportedsites(args, lines):
293
    category = args.category
294

295
    compare = False
296
    for idx, line in enumerate(lines):
297
        if compare:
298
            cat = text.extr(line, '"', '"')
299
            if cat == category:
300
                return False
301
            if cat > category:
302
                break
303
        elif line.startswith("CATEGORY_MAP = "):
304
            compare = True
305

306
    ws = " " * max(15 - len(category), 0)
307
    line = f'''    "{category}"{ws}: "{args.site_name}",\n'''
308
    lines.insert(idx, line)
309
    return True
310

311

312
###############################################################################
313
# Command-Line Options ########################################################
314

315
def parse_args(args=None):
316
    parser = argparse.ArgumentParser(args)
317

318
    parser.add_argument(
319
        "-s", "--subcategory",
320
        dest="subcategories", metavar="SUBCaT", action="append")
321
    parser.add_argument(
322
        "-n", "--name",
323
        dest="site_name", metavar="TITLE")
324
    parser.add_argument(
325
        "-c", "--copyright",
326
        dest="copyright", metavar="NAME", default="")
327
    parser.add_argument(
328
        "-C",
329
        dest="copyright", action="store_const", const="Mike Fährmann")
330
    parser.add_argument(
331
        "-F", "--force",
332
        dest="open_mode", action="store_const", const="w", default="x")
333
    parser.add_argument(
334
        "-g", "--git",
335
        dest="git", action="store_true")
336
    parser.add_argument(
337
        "-M", "--no-module",
338
        dest="init_module", action="store_false")
339
    parser.add_argument(
340
        "-T", "--no-test",
341
        dest="init_test", action="store_false")
342
    parser.add_argument(
343
        "-t", "--type",
344
        dest="type", metavar="TYPE")
345
    parser.add_argument(
346
        "--manga",
347
        dest="type", action="store_const", const="manga")
348
    parser.add_argument(
349
        "--base",
350
        dest="type", action="store_const", const="base")
351
    parser.add_argument(
352
        "--user",
353
        dest="type", action="store_const", const="user")
354

355
    parser.add_argument("category")
356
    parser.add_argument("root", nargs="?")
357

358
    args = parser.parse_args()
359

360
    if root := args.root:
361
        if "://" in root:
362
            root = root.rstrip("/")
363
            domain = root[root.find("://")+3:]
364
        else:
365
            root = root.strip(":/")
366
            domain = root
367
            root = f"https://{root}"
368

369
        if domain.startswith("www."):
370
            domain = domain[4:]
371

372
        args.root = root
373
        args.domain = domain
374
    elif args.init_module:
375
        parser.error("'root' URL required")
376
    else:
377
        args.domain = ""
378

379
    return args
380

381

382
def main():
383
    args = parse_args()
384
    init_extractor(args)
385

386

387
if __name__ == "__main__":
388
    logging.basicConfig(
389
        level=logging.DEBUG,
390
        format="[%(levelname)s] %(message)s",
391
    )
392
    main()
393

394
Product

Resources

Company