Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/bunkr.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2022-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://bunkr.si/"""
10
11
from .common import Extractor
12
from .lolisafe import LolisafeAlbumExtractor
13
from .. import text, util, config, exception
14
import random
15
16
if config.get(("extractor", "bunkr"), "tlds"):
17
BASE_PATTERN = (
18
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
19
r"(?:https?://)?(?:app\.)?(bunkr+\.\w+))"
20
)
21
else:
22
BASE_PATTERN = (
23
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
24
r"(?:https?://)?(?:app\.)?(bunkr+"
25
r"\.(?:s[kiu]|c[ir]|fi|p[hks]|ru|la|is|to|a[cx]"
26
r"|black|cat|media|red|site|ws|org)))"
27
)
28
29
DOMAINS = [
30
"bunkr.ac",
31
"bunkr.ci",
32
"bunkr.cr",
33
"bunkr.fi",
34
"bunkr.ph",
35
"bunkr.pk",
36
"bunkr.ps",
37
"bunkr.si",
38
"bunkr.sk",
39
"bunkr.ws",
40
"bunkr.black",
41
"bunkr.red",
42
"bunkr.media",
43
"bunkr.site",
44
]
45
LEGACY_DOMAINS = {
46
"bunkr.ax",
47
"bunkr.cat",
48
"bunkr.ru",
49
"bunkrr.ru",
50
"bunkr.su",
51
"bunkrr.su",
52
"bunkr.la",
53
"bunkr.is",
54
"bunkr.to",
55
}
56
CF_DOMAINS = set()
57
58
59
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
60
"""Extractor for bunkr.si albums"""
61
category = "bunkr"
62
root = "https://bunkr.si"
63
root_dl = "https://get.bunkrr.su"
64
root_api = "https://apidl.bunkr.ru"
65
archive_fmt = "{album_id}_{id|id_url|slug}"
66
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
67
example = "https://bunkr.si/a/ID"
68
69
def __init__(self, match):
70
LolisafeAlbumExtractor.__init__(self, match)
71
domain = self.groups[0] or self.groups[1]
72
if domain not in LEGACY_DOMAINS:
73
self.root = "https://" + domain
74
75
def _init(self):
76
LolisafeAlbumExtractor._init(self)
77
78
endpoint = self.config("endpoint")
79
if not endpoint:
80
endpoint = self.root_api + "/api/_001_v2"
81
elif endpoint[0] == "/":
82
endpoint = self.root_api + endpoint
83
84
self.endpoint = endpoint
85
self.offset = 0
86
87
def skip(self, num):
88
self.offset = num
89
return num
90
91
def request(self, url, **kwargs):
92
kwargs["encoding"] = "utf-8"
93
kwargs["allow_redirects"] = False
94
95
while True:
96
try:
97
response = Extractor.request(self, url, **kwargs)
98
if response.status_code < 300:
99
return response
100
101
# redirect
102
url = response.headers["Location"]
103
if url[0] == "/":
104
url = self.root + url
105
continue
106
root, path = self._split(url)
107
if root not in CF_DOMAINS:
108
continue
109
self.log.debug("Redirect to known CF challenge domain '%s'",
110
root)
111
112
except exception.HttpError as exc:
113
if exc.status != 403:
114
raise
115
116
# CF challenge
117
root, path = self._split(url)
118
CF_DOMAINS.add(root)
119
self.log.debug("Added '%s' to CF challenge domains", root)
120
121
try:
122
DOMAINS.remove(root.rpartition("/")[2])
123
except ValueError:
124
pass
125
else:
126
if not DOMAINS:
127
raise exception.AbortExtraction(
128
"All Bunkr domains require solving a CF challenge")
129
130
# select alternative domain
131
self.root = root = "https://" + random.choice(DOMAINS)
132
self.log.debug("Trying '%s' as fallback", root)
133
url = root + path
134
135
def fetch_album(self, album_id):
136
# album metadata
137
page = self.request(f"{self.root}/a/{album_id}?advanced=1").text
138
title = text.unescape(text.unescape(text.extr(
139
page, 'property="og:title" content="', '"')))
140
141
# files
142
items = text.extr(
143
page, "window.albumFiles = [", "</script>").split("\n},\n")
144
145
return self._extract_files(items), {
146
"album_id" : album_id,
147
"album_name" : title,
148
"album_size" : text.extr(
149
page, '<span class="font-semibold">(', ')'),
150
"count" : len(items),
151
}
152
153
def _extract_files(self, items):
154
if self.offset:
155
items = util.advance(items, self.offset)
156
157
for item in items:
158
try:
159
data_id = text.extr(item, " id: ", ",").strip()
160
file = self._extract_file(data_id)
161
162
file["name"] = util.json_loads(text.extr(
163
item, 'original:', ',\n'))
164
file["slug"] = util.json_loads(text.extr(
165
item, 'slug: ', ',\n'))
166
file["uuid"] = text.extr(
167
item, 'name: "', ".")
168
file["size"] = text.parse_int(text.extr(
169
item, "size: ", " ,\n"))
170
file["date"] = text.parse_datetime(text.extr(
171
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
172
173
yield file
174
except exception.ControlException:
175
raise
176
except Exception as exc:
177
self.log.error("%s: %s", exc.__class__.__name__, exc)
178
self.log.debug("", exc_info=exc)
179
180
def _extract_file(self, data_id):
181
referer = f"{self.root_dl}/file/{data_id}"
182
headers = {"Referer": referer, "Origin": self.root_dl}
183
data = self.request_json(self.endpoint, method="POST", headers=headers,
184
json={"id": data_id})
185
186
if data.get("encrypted"):
187
key = f"SECRET_KEY_{data['timestamp'] // 3600}"
188
file_url = util.decrypt_xor(data["url"], key.encode())
189
else:
190
file_url = data["url"]
191
192
return {
193
"file" : file_url,
194
"id_url" : data_id,
195
"_http_headers" : {"Referer": referer},
196
"_http_validate": self._validate,
197
}
198
199
def _validate(self, response):
200
if response.history and response.url.endswith("/maintenance-vid.mp4"):
201
self.log.warning("File server in maintenance mode")
202
return False
203
return True
204
205
def _split(self, url):
206
pos = url.index("/", 8)
207
return url[:pos], url[pos:]
208
209
210
class BunkrMediaExtractor(BunkrAlbumExtractor):
211
"""Extractor for bunkr.si media links"""
212
subcategory = "media"
213
directory_fmt = ("{category}",)
214
pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)"
215
example = "https://bunkr.si/f/FILENAME"
216
217
def fetch_album(self, album_id):
218
try:
219
page = self.request(f"{self.root}{album_id}").text
220
data_id = text.extr(page, 'data-file-id="', '"')
221
file = self._extract_file(data_id)
222
file["name"] = text.unescape(text.extr(
223
page, "<h1", "<").rpartition(">")[2])
224
file["slug"] = album_id.rpartition("/")[2]
225
file["uuid"] = text.extr(page, "/thumbs/", ".")
226
except Exception as exc:
227
self.log.error("%s: %s", exc.__class__.__name__, exc)
228
return (), {}
229
230
return (file,), {
231
"album_id" : "",
232
"album_name" : "",
233
"album_size" : -1,
234
"description": "",
235
"count" : 1,
236
}
237
238