Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/__init__.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
import sys
10
from ..text import re_compile
11
12
modules = [
13
"2ch",
14
"2chan",
15
"2chen",
16
"35photo",
17
"3dbooru",
18
"4chan",
19
"4archive",
20
"4chanarchives",
21
"500px",
22
"8chan",
23
"8muses",
24
"adultempire",
25
"agnph",
26
"ao3",
27
"arcalive",
28
"architizer",
29
"artstation",
30
"aryion",
31
"batoto",
32
"bbc",
33
"behance",
34
"bilibili",
35
"blogger",
36
"bluesky",
37
"boosty",
38
"booth",
39
"bunkr",
40
"catbox",
41
"chevereto",
42
"cien",
43
"civitai",
44
"comick",
45
"comicvine",
46
"cyberdrop",
47
"danbooru",
48
"dankefuerslesen",
49
"desktopography",
50
"deviantart",
51
"discord",
52
"dynastyscans",
53
"e621",
54
"erome",
55
"everia",
56
"exhentai",
57
"facebook",
58
"fanbox",
59
"fantia",
60
"fapello",
61
"fapachi",
62
"flickr",
63
"furaffinity",
64
"furry34",
65
"fuskator",
66
"gelbooru",
67
"gelbooru_v01",
68
"gelbooru_v02",
69
"girlsreleased",
70
"girlswithmuscle",
71
"gofile",
72
"hatenablog",
73
"hentai2read",
74
"hentaicosplays",
75
"hentaifoundry",
76
"hentaihand",
77
"hentaihere",
78
"hentainexus",
79
"hiperdex",
80
"hitomi",
81
"hotleak",
82
"idolcomplex",
83
"imagebam",
84
"imagechest",
85
"imagefap",
86
"imgbb",
87
"imgbox",
88
"imgth",
89
"imgur",
90
"imhentai",
91
"inkbunny",
92
"instagram",
93
"issuu",
94
"itaku",
95
"itchio",
96
"iwara",
97
"jschan",
98
"kabeuchi",
99
"keenspot",
100
"kemono",
101
"khinsider",
102
"komikcast",
103
"leakgallery",
104
"lensdump",
105
"lexica",
106
"lightroom",
107
"livedoor",
108
"lofter",
109
"luscious",
110
"lynxchan",
111
"madokami",
112
"mangadex",
113
"mangafox",
114
"mangahere",
115
"manganelo",
116
"mangapark",
117
"mangaread",
118
"mangoxo",
119
"misskey",
120
"motherless",
121
"myhentaigallery",
122
"myportfolio",
123
"naverblog",
124
"naverchzzk",
125
"naverwebtoon",
126
"nekohouse",
127
"newgrounds",
128
"nhentai",
129
"nijie",
130
"nitter",
131
"nozomi",
132
"nsfwalbum",
133
"nudostar",
134
"paheal",
135
"patreon",
136
"pexels",
137
"philomena",
138
"photovogue",
139
"picarto",
140
"pictoa",
141
"piczel",
142
"pillowfort",
143
"pinterest",
144
"pixeldrain",
145
"pixiv",
146
"pixnet",
147
"plurk",
148
"poipiku",
149
"poringa",
150
"pornhub",
151
"pornpics",
152
"postmill",
153
"rawkuma",
154
"reactor",
155
"readcomiconline",
156
"realbooru",
157
"redbust",
158
"reddit",
159
"redgifs",
160
"rule34us",
161
"rule34vault",
162
"rule34xyz",
163
"saint",
164
"sankaku",
165
"sankakucomplex",
166
"schalenetwork",
167
"scrolller",
168
"seiga",
169
"senmanga",
170
"sexcom",
171
"shimmie2",
172
"simplyhentai",
173
"sizebooru",
174
"skeb",
175
"slickpic",
176
"slideshare",
177
"smugmug",
178
"soundgasm",
179
"speakerdeck",
180
"steamgriddb",
181
"subscribestar",
182
"szurubooru",
183
"tapas",
184
"tcbscans",
185
"telegraph",
186
"tenor",
187
"tiktok",
188
"tmohentai",
189
"toyhouse",
190
"tsumino",
191
"tumblr",
192
"tumblrgallery",
193
"tungsten",
194
"twibooru",
195
"twitter",
196
"urlgalleries",
197
"unsplash",
198
"uploadir",
199
"urlshortener",
200
"vanillarock",
201
"vichan",
202
"vipergirls",
203
"vk",
204
"vsco",
205
"wallhaven",
206
"wallpapercave",
207
"warosu",
208
"weasyl",
209
"webmshare",
210
"webtoons",
211
"weebcentral",
212
"weibo",
213
"wikiart",
214
"wikifeet",
215
"wikimedia",
216
"xasiat",
217
"xfolio",
218
"xhamster",
219
"xvideos",
220
"yiffverse",
221
"zerochan",
222
"booru",
223
"moebooru",
224
"foolfuuka",
225
"foolslide",
226
"mastodon",
227
"shopify",
228
"lolisafe",
229
"imagehosts",
230
"directlink",
231
"recursive",
232
"oauth",
233
"noop",
234
"ytdl",
235
"generic",
236
]
237
238
239
def find(url):
240
"""Find a suitable extractor for the given URL"""
241
for cls in _list_classes():
242
if match := cls.pattern.match(url):
243
return cls(match)
244
return None
245
246
247
def add(cls):
248
"""Add 'cls' to the list of available extractors"""
249
if isinstance(cls.pattern, str):
250
cls.pattern = re_compile(cls.pattern)
251
_cache.append(cls)
252
return cls
253
254
255
def add_module(module):
256
"""Add all extractors in 'module' to the list of available extractors"""
257
if classes := _get_classes(module):
258
if isinstance(classes[0].pattern, str):
259
for cls in classes:
260
cls.pattern = re_compile(cls.pattern)
261
_cache.extend(classes)
262
return classes
263
264
265
def extractors():
266
"""Yield all available extractor classes"""
267
return sorted(
268
_list_classes(),
269
key=lambda x: x.__name__
270
)
271
272
273
# --------------------------------------------------------------------
274
# internals
275
276
277
def _list_classes():
278
"""Yield available extractor classes"""
279
yield from _cache
280
281
for module in _module_iter:
282
yield from add_module(module)
283
284
globals()["_list_classes"] = lambda : _cache
285
286
287
def _modules_internal():
288
globals_ = globals()
289
for module_name in modules:
290
yield __import__(module_name, globals_, None, (), 1)
291
292
293
def _modules_path(path, files):
294
sys.path.insert(0, path)
295
try:
296
return [
297
__import__(name[:-3])
298
for name in files
299
if name.endswith(".py")
300
]
301
finally:
302
del sys.path[0]
303
304
305
def _get_classes(module):
306
"""Return a list of all extractor classes in a module"""
307
return [
308
cls for cls in module.__dict__.values() if (
309
hasattr(cls, "pattern") and cls.__module__ == module.__name__
310
)
311
]
312
313
314
_cache = []
315
_module_iter = _modules_internal()
316
317