CoCalc -- __init_

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/__init__.py
⁵³⁹⁹ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
import sys
10
from ..text import re_compile
11

12
modules = [
13
    "2ch",
14
    "2chan",
15
    "2chen",
16
    "35photo",
17
    "3dbooru",
18
    "4chan",
19
    "4archive",
20
    "4chanarchives",
21
    "500px",
22
    "8chan",
23
    "8muses",
24
    "adultempire",
25
    "agnph",
26
    "ao3",
27
    "arcalive",
28
    "architizer",
29
    "artstation",
30
    "aryion",
31
    "batoto",
32
    "bbc",
33
    "behance",
34
    "bilibili",
35
    "blogger",
36
    "bluesky",
37
    "boosty",
38
    "booth",
39
    "bunkr",
40
    "catbox",
41
    "chevereto",
42
    "cien",
43
    "civitai",
44
    "comick",
45
    "comicvine",
46
    "cyberdrop",
47
    "danbooru",
48
    "dankefuerslesen",
49
    "desktopography",
50
    "deviantart",
51
    "discord",
52
    "dynastyscans",
53
    "e621",
54
    "erome",
55
    "everia",
56
    "exhentai",
57
    "facebook",
58
    "fanbox",
59
    "fantia",
60
    "fapello",
61
    "fapachi",
62
    "flickr",
63
    "furaffinity",
64
    "furry34",
65
    "fuskator",
66
    "gelbooru",
67
    "gelbooru_v01",
68
    "gelbooru_v02",
69
    "girlsreleased",
70
    "girlswithmuscle",
71
    "gofile",
72
    "hatenablog",
73
    "hentai2read",
74
    "hentaicosplays",
75
    "hentaifoundry",
76
    "hentaihand",
77
    "hentaihere",
78
    "hentainexus",
79
    "hiperdex",
80
    "hitomi",
81
    "hotleak",
82
    "idolcomplex",
83
    "imagebam",
84
    "imagechest",
85
    "imagefap",
86
    "imgbb",
87
    "imgbox",
88
    "imgth",
89
    "imgur",
90
    "imhentai",
91
    "inkbunny",
92
    "instagram",
93
    "issuu",
94
    "itaku",
95
    "itchio",
96
    "iwara",
97
    "jschan",
98
    "kabeuchi",
99
    "keenspot",
100
    "kemono",
101
    "khinsider",
102
    "komikcast",
103
    "leakgallery",
104
    "lensdump",
105
    "lexica",
106
    "lightroom",
107
    "livedoor",
108
    "lofter",
109
    "luscious",
110
    "lynxchan",
111
    "madokami",
112
    "mangadex",
113
    "mangafox",
114
    "mangahere",
115
    "manganelo",
116
    "mangapark",
117
    "mangaread",
118
    "mangoxo",
119
    "misskey",
120
    "motherless",
121
    "myhentaigallery",
122
    "myportfolio",
123
    "naverblog",
124
    "naverchzzk",
125
    "naverwebtoon",
126
    "nekohouse",
127
    "newgrounds",
128
    "nhentai",
129
    "nijie",
130
    "nitter",
131
    "nozomi",
132
    "nsfwalbum",
133
    "nudostar",
134
    "paheal",
135
    "patreon",
136
    "pexels",
137
    "philomena",
138
    "photovogue",
139
    "picarto",
140
    "pictoa",
141
    "piczel",
142
    "pillowfort",
143
    "pinterest",
144
    "pixeldrain",
145
    "pixiv",
146
    "pixnet",
147
    "plurk",
148
    "poipiku",
149
    "poringa",
150
    "pornhub",
151
    "pornpics",
152
    "postmill",
153
    "rawkuma",
154
    "reactor",
155
    "readcomiconline",
156
    "realbooru",
157
    "redbust",
158
    "reddit",
159
    "redgifs",
160
    "rule34us",
161
    "rule34vault",
162
    "rule34xyz",
163
    "saint",
164
    "sankaku",
165
    "sankakucomplex",
166
    "schalenetwork",
167
    "scrolller",
168
    "seiga",
169
    "senmanga",
170
    "sexcom",
171
    "shimmie2",
172
    "simplyhentai",
173
    "sizebooru",
174
    "skeb",
175
    "slickpic",
176
    "slideshare",
177
    "smugmug",
178
    "soundgasm",
179
    "speakerdeck",
180
    "steamgriddb",
181
    "subscribestar",
182
    "szurubooru",
183
    "tapas",
184
    "tcbscans",
185
    "telegraph",
186
    "tenor",
187
    "tiktok",
188
    "tmohentai",
189
    "toyhouse",
190
    "tsumino",
191
    "tumblr",
192
    "tumblrgallery",
193
    "tungsten",
194
    "twibooru",
195
    "twitter",
196
    "urlgalleries",
197
    "unsplash",
198
    "uploadir",
199
    "urlshortener",
200
    "vanillarock",
201
    "vichan",
202
    "vipergirls",
203
    "vk",
204
    "vsco",
205
    "wallhaven",
206
    "wallpapercave",
207
    "warosu",
208
    "weasyl",
209
    "webmshare",
210
    "webtoons",
211
    "weebcentral",
212
    "weibo",
213
    "wikiart",
214
    "wikifeet",
215
    "wikimedia",
216
    "xasiat",
217
    "xfolio",
218
    "xhamster",
219
    "xvideos",
220
    "yiffverse",
221
    "zerochan",
222
    "booru",
223
    "moebooru",
224
    "foolfuuka",
225
    "foolslide",
226
    "mastodon",
227
    "shopify",
228
    "lolisafe",
229
    "imagehosts",
230
    "directlink",
231
    "recursive",
232
    "oauth",
233
    "noop",
234
    "ytdl",
235
    "generic",
236
]
237

238

239
def find(url):
240
    """Find a suitable extractor for the given URL"""
241
    for cls in _list_classes():
242
        if match := cls.pattern.match(url):
243
            return cls(match)
244
    return None
245

246

247
def add(cls):
248
    """Add 'cls' to the list of available extractors"""
249
    if isinstance(cls.pattern, str):
250
        cls.pattern = re_compile(cls.pattern)
251
    _cache.append(cls)
252
    return cls
253

254

255
def add_module(module):
256
    """Add all extractors in 'module' to the list of available extractors"""
257
    if classes := _get_classes(module):
258
        if isinstance(classes[0].pattern, str):
259
            for cls in classes:
260
                cls.pattern = re_compile(cls.pattern)
261
        _cache.extend(classes)
262
    return classes
263

264

265
def extractors():
266
    """Yield all available extractor classes"""
267
    return sorted(
268
        _list_classes(),
269
        key=lambda x: x.__name__
270
    )
271

272

273
# --------------------------------------------------------------------
274
# internals
275

276

277
def _list_classes():
278
    """Yield available extractor classes"""
279
    yield from _cache
280

281
    for module in _module_iter:
282
        yield from add_module(module)
283

284
    globals()["_list_classes"] = lambda : _cache
285

286

287
def _modules_internal():
288
    globals_ = globals()
289
    for module_name in modules:
290
        yield __import__(module_name, globals_, None, (), 1)
291

292

293
def _modules_path(path, files):
294
    sys.path.insert(0, path)
295
    try:
296
        return [
297
            __import__(name[:-3])
298
            for name in files
299
            if name.endswith(".py")
300
        ]
301
    finally:
302
        del sys.path[0]
303

304

305
def _get_classes(module):
306
    """Return a list of all extractor classes in a module"""
307
    return [
308
        cls for cls in module.__dict__.values() if (
309
            hasattr(cls, "pattern") and cls.__module__ == module.__name__
310
        )
311
    ]
312

313

314
_cache = []
315
_module_iter = _modules_internal()
316

317
Product

Resources

Company