Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/__init__.py
8901 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
import sys
10
from ..text import re_compile
11
12
modules = [
13
"2ch",
14
"2chan",
15
"2chen",
16
"35photo",
17
"3dbooru",
18
"4chan",
19
"4archive",
20
"4chanarchives",
21
"500px",
22
"8chan",
23
"8muses",
24
"adultempire",
25
"agnph",
26
"ahottie",
27
"ao3",
28
"arcalive",
29
"architizer",
30
"arena",
31
"artstation",
32
"aryion",
33
"audiochan",
34
"bbc",
35
"behance",
36
"bellazon",
37
"bilibili",
38
"blogger",
39
"bluesky",
40
"boosty",
41
"booth",
42
"bunkr",
43
"catbox",
44
"cfake",
45
"chevereto",
46
"cien",
47
"civitai",
48
"comedywildlifephoto",
49
"comick",
50
"comicvine",
51
"cyberdrop",
52
"cyberfile",
53
"danbooru",
54
"dandadan",
55
"dankefuerslesen",
56
"desktopography",
57
"deviantart",
58
"discord",
59
"dynastyscans",
60
"e621",
61
"eporner",
62
"erome",
63
"everia",
64
"exhentai",
65
"facebook",
66
"fanbox",
67
"fansly",
68
"fantia",
69
"fapello",
70
"fapachi",
71
"fikfap",
72
"fitnakedgirls",
73
"flickr",
74
"furaffinity",
75
"furry34",
76
"fuskator",
77
"gelbooru",
78
"gelbooru_v01",
79
"gelbooru_v02",
80
"girlsreleased",
81
"girlswithmuscle",
82
"gofile",
83
"hatenablog",
84
"hdoujin",
85
"hentai2read",
86
"hentaicosplays",
87
"hentaifoundry",
88
"hentaihand",
89
"hentaihere",
90
"hentainexus",
91
"hiperdex",
92
"hitomi",
93
"hotleak",
94
"idolcomplex",
95
"imagebam",
96
"imagechest",
97
"imagefap",
98
"imgbb",
99
"imgbox",
100
"imgpile",
101
"imgth",
102
"imgur",
103
"imhentai",
104
"inkbunny",
105
"instagram",
106
"issuu",
107
"itaku",
108
"itchio",
109
"iwara",
110
"jschan",
111
"kabeuchi",
112
"kaliscan",
113
"keenspot",
114
"kemono",
115
"khinsider",
116
"komikcast",
117
"koofr",
118
"leakgallery",
119
"lensdump",
120
"lexica",
121
"lightroom",
122
"listal",
123
"livedoor",
124
"lofter",
125
"luscious",
126
"lynxchan",
127
"madokami",
128
"mangadex",
129
"mangafire",
130
"mangafox",
131
"mangafreak",
132
"mangahere",
133
"manganelo",
134
"mangapark",
135
"mangaread",
136
"mangareader",
137
"mangataro",
138
"mangatown",
139
"mangoxo",
140
"misskey",
141
"motherless",
142
"myhentaigallery",
143
"myportfolio",
144
"naverblog",
145
"naverchzzk",
146
"naverwebtoon",
147
"nekohouse",
148
"newgrounds",
149
"nhentai",
150
"nijie",
151
"nitter",
152
"nozomi",
153
"nsfwalbum",
154
"nudostar",
155
"okporn",
156
"paheal",
157
"patreon",
158
"pexels",
159
"philomena",
160
"photovogue",
161
"picarto",
162
"picazor",
163
"pictoa",
164
"piczel",
165
"pillowfort",
166
"pinterest",
167
"pixeldrain",
168
"pixiv",
169
"pixnet",
170
"plurk",
171
"poipiku",
172
"poringa",
173
"pornhub",
174
"pornpics",
175
"pornstarstube",
176
"postmill",
177
"rawkuma",
178
"reactor",
179
"readcomiconline",
180
"realbooru",
181
"reddit",
182
"redgifs",
183
"rule34us",
184
"rule34vault",
185
"rule34xyz",
186
"s3ndpics",
187
"sankaku",
188
"sankakucomplex",
189
"schalenetwork",
190
"scrolller",
191
"seiga",
192
"senmanga",
193
"sexcom",
194
"shimmie2",
195
"simplyhentai",
196
"sizebooru",
197
"skeb",
198
"slickpic",
199
"slideshare",
200
"smugmug",
201
"soundgasm",
202
"speakerdeck",
203
"steamgriddb",
204
"subscribestar",
205
"sxypix",
206
"szurubooru",
207
"tapas",
208
"tcbscans",
209
"telegraph",
210
"tenor",
211
"thefap",
212
"thehentaiworld",
213
"tiktok",
214
"tmohentai",
215
"toyhouse",
216
"tumblr",
217
"tumblrgallery",
218
"tungsten",
219
"turbo",
220
"twibooru",
221
"twitter",
222
"urlgalleries",
223
"unsplash",
224
"uploadir",
225
"urlshortener",
226
"vanillarock",
227
"vichan",
228
"vipergirls",
229
"vk",
230
"vsco",
231
"wallhaven",
232
"wallpapercave",
233
"warosu",
234
"weasyl",
235
"webmshare",
236
"webtoons",
237
"weebcentral",
238
"weebdex",
239
"weibo",
240
"whyp",
241
"wikiart",
242
"wikifeet",
243
"wikimedia",
244
"xasiat",
245
"xenforo",
246
"xfolio",
247
"xhamster",
248
"xvideos",
249
"yiffverse",
250
"yourlesbians",
251
"zerochan",
252
"booru",
253
"moebooru",
254
"foolfuuka",
255
"foolslide",
256
"mastodon",
257
"shopify",
258
"lolisafe",
259
"imagehosts",
260
"directlink",
261
"recursive",
262
"oauth",
263
"noop",
264
"ytdl",
265
"generic",
266
]
267
268
269
def find(url):
270
"""Find a suitable extractor for the given URL"""
271
for cls in _list_classes():
272
if match := cls.pattern.match(url):
273
return cls(match)
274
return None
275
276
277
def add(cls):
278
"""Add 'cls' to the list of available extractors"""
279
if isinstance(cls.pattern, str):
280
cls.pattern = re_compile(cls.pattern)
281
_cache.append(cls)
282
return cls
283
284
285
def add_module(module):
286
"""Add all extractors in 'module' to the list of available extractors"""
287
if classes := _get_classes(module):
288
if isinstance(classes[0].pattern, str):
289
for cls in classes:
290
cls.pattern = re_compile(cls.pattern)
291
_cache.extend(classes)
292
return classes
293
294
295
def extractors():
296
"""Yield all available extractor classes"""
297
return sorted(
298
_list_classes(),
299
key=lambda x: x.__name__
300
)
301
302
303
# --------------------------------------------------------------------
304
# internals
305
306
307
def _list_classes():
308
"""Yield available extractor classes"""
309
yield from _cache
310
311
for module in _module_iter:
312
yield from add_module(module)
313
314
globals()["_list_classes"] = lambda : _cache
315
316
317
def _modules_internal():
318
globals_ = globals()
319
for module_name in modules:
320
yield __import__(module_name, globals_, None, None, 1)
321
322
323
def _modules_path(path, files):
324
sys.path.insert(0, path)
325
try:
326
return [
327
__import__(name[:-3])
328
for name in files
329
if name.endswith(".py")
330
]
331
finally:
332
del sys.path[0]
333
334
335
def _get_classes(module):
336
"""Return a list of all extractor classes in a module"""
337
return [
338
cls for cls in module.__dict__.values() if (
339
hasattr(cls, "pattern") and cls.__module__ == module.__name__
340
)
341
]
342
343
344
_cache = []
345
_module_iter = _modules_internal()
346
347