Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/bunkr.py
8906 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2022-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://bunkr.si/"""
10
11
from .common import Extractor
12
from .lolisafe import LolisafeAlbumExtractor
13
from .. import text, util, config, exception
14
from ..cache import memcache
15
import random
16
17
if config.get(("extractor", "bunkr"), "tlds"):
18
BASE_PATTERN = (
19
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
20
r"(?:https?://)?(?:app\.)?(bunkr+\.\w+))"
21
)
22
else:
23
BASE_PATTERN = (
24
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
25
r"(?:https?://)?(?:app\.)?(bunkr+"
26
r"\.(?:s[kiu]|c[ir]|fi|p[hks]|ru|la|is|to|a[cx]"
27
r"|black|cat|media|red|site|ws|org)))"
28
)
29
30
DOMAINS = [
31
"bunkr.ac",
32
"bunkr.ci",
33
"bunkr.cr",
34
"bunkr.fi",
35
"bunkr.ph",
36
"bunkr.pk",
37
"bunkr.ps",
38
"bunkr.si",
39
"bunkr.sk",
40
"bunkr.ws",
41
"bunkr.black",
42
"bunkr.red",
43
"bunkr.media",
44
"bunkr.site",
45
]
46
LEGACY_DOMAINS = {
47
"bunkr.ax",
48
"bunkr.cat",
49
"bunkr.ru",
50
"bunkrr.ru",
51
"bunkr.su",
52
"bunkrr.su",
53
"bunkr.la",
54
"bunkr.is",
55
"bunkr.to",
56
}
57
CF_DOMAINS = set()
58
59
60
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
61
"""Extractor for bunkr.si albums"""
62
category = "bunkr"
63
root = "https://bunkr.si"
64
root_dl = "https://get.bunkrr.su"
65
root_api = "https://apidl.bunkr.ru"
66
archive_fmt = "{album_id}_{id|id_url|slug}"
67
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
68
example = "https://bunkr.si/a/ID"
69
70
def __init__(self, match):
71
LolisafeAlbumExtractor.__init__(self, match)
72
domain = self.groups[0] or self.groups[1]
73
if domain not in LEGACY_DOMAINS:
74
self.root = "https://" + domain
75
76
def _init(self):
77
LolisafeAlbumExtractor._init(self)
78
79
endpoint = self.config("endpoint")
80
if not endpoint:
81
endpoint = self.root_api + "/api/_001_v2"
82
elif endpoint[0] == "/":
83
endpoint = self.root_api + endpoint
84
85
self.endpoint = endpoint
86
self.offset = 0
87
88
def skip(self, num):
89
self.offset = num
90
return num
91
92
def request(self, url, **kwargs):
93
kwargs["encoding"] = "utf-8"
94
kwargs["allow_redirects"] = False
95
96
while True:
97
try:
98
response = Extractor.request(self, url, **kwargs)
99
if response.status_code < 300:
100
return response
101
102
# redirect
103
url = response.headers["Location"]
104
if url[0] == "/":
105
url = self.root + url
106
continue
107
root, path = self._split(url)
108
if root not in CF_DOMAINS:
109
continue
110
self.log.debug("Redirect to known CF challenge domain '%s'",
111
root)
112
113
except exception.HttpError as exc:
114
if exc.status != 403:
115
raise
116
117
# CF challenge
118
root, path = self._split(url)
119
CF_DOMAINS.add(root)
120
self.log.debug("Added '%s' to CF challenge domains", root)
121
122
try:
123
DOMAINS.remove(root.rpartition("/")[2])
124
except ValueError:
125
pass
126
else:
127
if not DOMAINS:
128
raise exception.AbortExtraction(
129
"All Bunkr domains require solving a CF challenge")
130
131
# select alternative domain
132
self.root = root = "https://" + random.choice(DOMAINS)
133
self.log.debug("Trying '%s' as fallback", root)
134
url = root + path
135
136
def fetch_album(self, album_id):
137
# album metadata
138
page = self.request(f"{self.root}/a/{album_id}?advanced=1").text
139
title = text.unescape(text.unescape(text.extr(
140
page, 'property="og:title" content="', '"')))
141
142
# files
143
items = text.extr(
144
page, "window.albumFiles = [", "</script>").split("\n},\n")
145
146
return self._extract_files(items), {
147
"album_id" : album_id,
148
"album_name" : title,
149
"album_size" : text.extr(
150
page, '<span class="font-semibold">(', ')'),
151
"count" : len(items),
152
}
153
154
def _extract_files(self, items):
155
if self.offset:
156
items = util.advance(items, self.offset)
157
158
for item in items:
159
try:
160
data_id = text.extr(item, " id: ", ",").strip()
161
file = self._extract_file(data_id)
162
163
file["name"] = util.json_loads(text.extr(
164
item, 'original:', ',\n').replace("\\'", "'"))
165
file["slug"] = util.json_loads(text.extr(
166
item, 'slug: ', ',\n').replace("\\'", "'"))
167
file["uuid"] = text.extr(
168
item, 'name: "', ".")
169
file["size"] = text.parse_int(text.extr(
170
item, "size: ", " ,\n"))
171
file["date"] = self.parse_datetime(text.extr(
172
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
173
174
yield file
175
except exception.ControlException:
176
raise
177
except Exception as exc:
178
self.log.error("%s: %s", exc.__class__.__name__, exc)
179
self.log.debug("%s", item, exc_info=exc)
180
if isinstance(exc, exception.HttpError) and \
181
exc.status == 400 and \
182
exc.response.url.startswith(self.root_api):
183
raise exception.AbortExtraction("Album deleted")
184
185
def _extract_file(self, data_id):
186
referer = f"{self.root_dl}/file/{data_id}"
187
headers = {"Referer": referer, "Origin": self.root_dl}
188
data = self.request_json(self.endpoint, method="POST", headers=headers,
189
json={"id": data_id})
190
191
if data.get("encrypted"):
192
key = "SECRET_KEY_" + str(data["timestamp"] // 3600)
193
file_url = util.decrypt_xor(data["url"], key.encode())
194
else:
195
file_url = data["url"]
196
197
return {
198
"file" : file_url,
199
"id_url" : data_id,
200
"_http_headers" : {"Referer": referer},
201
"_http_validate": self._validate,
202
}
203
204
def _validate(self, response):
205
if response.history and response.url.endswith(
206
("/maint.mp4", "/maintenance-vid.mp4")):
207
self.log.warning("File server in maintenance mode")
208
return False
209
return True
210
211
def _split(self, url):
212
pos = url.index("/", 8)
213
return url[:pos], url[pos:]
214
215
216
class BunkrMediaExtractor(BunkrAlbumExtractor):
217
"""Extractor for bunkr.si media links"""
218
subcategory = "media"
219
directory_fmt = ("{category}",)
220
pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)"
221
example = "https://bunkr.si/f/FILENAME"
222
223
def fetch_album(self, album_id):
224
try:
225
page = self.request(self.root + album_id).text
226
data_id = text.extr(page, 'data-file-id="', '"')
227
file = self._extract_file(data_id)
228
file["name"] = text.unquote(text.unescape(text.extr(
229
page, "<h1", "<").rpartition(">")[2]))
230
file["slug"] = album_id.rpartition("/")[2]
231
file["uuid"] = text.extr(page, "/thumbs/", ".")
232
except Exception as exc:
233
self.log.error("%s: %s", exc.__class__.__name__, exc)
234
return (), {}
235
236
album_id, album_name, album_size = self._album_info(text.extr(
237
page, ' href="../a/', '"'))
238
return (file,), {
239
"album_id" : album_id,
240
"album_name": album_name,
241
"album_size": album_size,
242
"count" : 1,
243
}
244
245
@memcache(keyarg=1)
246
def _album_info(self, album_id):
247
if album_id:
248
try:
249
page = self.request(f"{self.root}/a/{album_id}").text
250
return (
251
album_id,
252
text.unescape(text.unescape(text.extr(
253
page, 'property="og:title" content="', '"'))),
254
text.extr(page, '<span class="font-semibold">(', ')'),
255
)
256
except Exception:
257
pass
258
return album_id, "", -1
259
260