Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/boosty.py
8906 views
1
# -*- coding: utf-8 -*-
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as
5
# published by the Free Software Foundation.
6
7
"""Extractors for https://www.boosty.to/"""
8
9
from .common import Extractor, Message
10
from .. import text, util, exception
11
import itertools
12
13
BASE_PATTERN = r"(?:https?://)?boosty\.to"
14
15
16
class BoostyExtractor(Extractor):
17
"""Base class for boosty extractors"""
18
category = "boosty"
19
root = "https://www.boosty.to"
20
directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
21
"{post[date]:%Y-%m-%d} {post[int_id]}")
22
filename_fmt = "{num:>02} {file[id]}.{extension}"
23
archive_fmt = "{file[id]}"
24
cookies_domain = ".boosty.to"
25
cookies_names = ("auth",)
26
27
def _init(self):
28
self.api = BoostyAPI(self)
29
30
self._user = None if self.config("metadata") else False
31
self.only_allowed = self.config("allowed", True)
32
self.only_bought = self.config("bought")
33
34
videos = self.config("videos")
35
if videos is None or videos:
36
if isinstance(videos, str):
37
videos = videos.split(",")
38
elif not isinstance(videos, (list, tuple)):
39
# ultra_hd: 2160p
40
# quad_hd: 1440p
41
# full_hd: 1080p
42
# high: 720p
43
# medium: 480p
44
# low: 360p
45
# lowest: 240p
46
# tiny: 144p
47
videos = ("ultra_hd", "quad_hd", "full_hd",
48
"high", "medium", "low", "lowest", "tiny")
49
self.videos = videos
50
51
def items(self):
52
headers = self.api.headers.copy()
53
del headers["Accept"]
54
55
for post in self.posts():
56
if not post.get("hasAccess"):
57
self.log.warning("Not allowed to access post %s", post["id"])
58
continue
59
60
files = self._extract_files(post)
61
if self._user:
62
post["user"] = self._user
63
data = {
64
"post" : post,
65
"user" : post.pop("user", None),
66
"count": len(files),
67
"_http_headers": headers,
68
}
69
70
yield Message.Directory, "", data
71
for data["num"], file in enumerate(files, 1):
72
data["file"] = file
73
url = file["url"]
74
yield Message.Url, url, text.nameext_from_url(url, data)
75
76
def posts(self):
77
"""Yield JSON content of all relevant posts"""
78
79
def _extract_files(self, post):
80
files = []
81
post["content"] = content = []
82
post["links"] = links = []
83
84
if "createdAt" in post:
85
post["date"] = self.parse_timestamp(post["createdAt"])
86
87
for block in post["data"]:
88
try:
89
type = block["type"]
90
if type == "text":
91
if block["modificator"] == "BLOCK_END":
92
continue
93
c = util.json_loads(block["content"])
94
content.append(c[0])
95
96
elif type == "image":
97
files.append(self._update_url(post, block))
98
99
elif type == "ok_video":
100
if not self.videos:
101
self.log.debug("%s: Skipping video %s",
102
post["id"], block["id"])
103
continue
104
fmts = {
105
fmt["type"]: fmt["url"]
106
for fmt in block["playerUrls"]
107
if fmt["url"]
108
}
109
formats = [
110
fmts[fmt]
111
for fmt in self.videos
112
if fmt in fmts
113
]
114
if formats:
115
formats = iter(formats)
116
block["url"] = next(formats)
117
block["_fallback"] = formats
118
files.append(block)
119
else:
120
self.log.warning(
121
"%s: Found no suitable video format for %s",
122
post["id"], block["id"])
123
124
elif type == "link":
125
url = block["url"]
126
links.append(url)
127
content.append(url)
128
129
elif type == "audio_file":
130
files.append(self._update_url(post, block))
131
132
elif type == "file":
133
files.append(self._update_url(post, block))
134
135
elif type == "smile":
136
content.append(":" + block["name"] + ":")
137
138
else:
139
self.log.debug("%s: Unsupported data type '%s'",
140
post["id"], type)
141
except Exception as exc:
142
self.log.debug("%s: %s", exc.__class__.__name__, exc)
143
144
del post["data"]
145
return files
146
147
def _update_url(self, post, block):
148
url = block["url"]
149
sep = "&" if "?" in url else "?"
150
151
if signed_query := post.get("signedQuery"):
152
url += sep + signed_query[1:]
153
sep = "&"
154
155
migrated = post.get("isMigrated")
156
if migrated is not None:
157
url += sep + "is_migrated=" + str(migrated).lower()
158
159
block["url"] = url
160
return block
161
162
163
class BoostyUserExtractor(BoostyExtractor):
164
"""Extractor for boosty.to user profiles"""
165
subcategory = "user"
166
pattern = BASE_PATTERN + r"/([^/?#]+)(?:\?([^#]+))?$"
167
example = "https://boosty.to/USER"
168
169
def posts(self):
170
user, query = self.groups
171
params = text.parse_query(query)
172
if self._user is None:
173
self._user = self.api.user(user)
174
return self.api.blog_posts(user, params)
175
176
177
class BoostyMediaExtractor(BoostyExtractor):
178
"""Extractor for boosty.to user media"""
179
subcategory = "media"
180
directory_fmt = "{category}", "{user[blogUrl]} ({user[id]})", "media"
181
filename_fmt = "{post[id]}_{num}.{extension}"
182
pattern = BASE_PATTERN + r"/([^/?#]+)/media/([^/?#]+)(?:\?([^#]+))?"
183
example = "https://boosty.to/USER/media/all"
184
185
def posts(self):
186
user, media, query = self.groups
187
params = text.parse_query(query)
188
self._user = self.api.user(user)
189
return self.api.blog_media_album(user, media, params)
190
191
192
class BoostyFeedExtractor(BoostyExtractor):
193
"""Extractor for your boosty.to subscription feed"""
194
subcategory = "feed"
195
pattern = BASE_PATTERN + r"/(?:\?([^#]+))?(?:$|#)"
196
example = "https://boosty.to/"
197
198
def posts(self):
199
params = text.parse_query(self.groups[0])
200
return self.api.feed_posts(params)
201
202
203
class BoostyPostExtractor(BoostyExtractor):
204
"""Extractor for boosty.to posts"""
205
subcategory = "post"
206
pattern = BASE_PATTERN + r"/([^/?#]+)/posts/([0-9a-f-]+)"
207
example = "https://boosty.to/USER/posts/01234567-89ab-cdef-0123-456789abcd"
208
209
def posts(self):
210
user, post_id = self.groups
211
if self._user is None:
212
self._user = self.api.user(user)
213
return (self.api.post(user, post_id),)
214
215
216
class BoostyFollowingExtractor(BoostyExtractor):
217
"""Extractor for your boosty.to subscribed users"""
218
subcategory = "following"
219
pattern = BASE_PATTERN + r"/app/settings/subscriptions"
220
example = "https://boosty.to/app/settings/subscriptions"
221
222
def items(self):
223
for user in self.api.user_subscriptions():
224
url = f"{self.root}/{user['blog']['blogUrl']}"
225
user["_extractor"] = BoostyUserExtractor
226
yield Message.Queue, url, user
227
228
229
class BoostyDirectMessagesExtractor(BoostyExtractor):
230
"""Extractor for boosty.to direct messages"""
231
subcategory = "direct-messages"
232
directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
233
"Direct Messages")
234
pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)"
235
example = "https://boosty.to/app/messages?dialogId=12345"
236
237
def items(self):
238
"""Yield direct messages from a given dialog ID."""
239
dialog_id = self.groups[0]
240
response = self.api.dialog(dialog_id)
241
signed_query = response.get("signedQuery")
242
243
try:
244
messages = response["messages"]["data"]
245
offset = messages[0]["id"]
246
except Exception:
247
return
248
249
try:
250
user = self.api.user(response["chatmate"]["url"])
251
except Exception:
252
user = None
253
254
messages.reverse()
255
for message in itertools.chain(
256
messages,
257
self.api.dialog_messages(dialog_id, offset=offset)
258
):
259
message["signedQuery"] = signed_query
260
files = self._extract_files(message)
261
data = {
262
"post": message,
263
"user": user,
264
"count": len(files),
265
}
266
267
yield Message.Directory, "", data
268
for data["num"], file in enumerate(files, 1):
269
data["file"] = file
270
url = file["url"]
271
yield Message.Url, url, text.nameext_from_url(url, data)
272
273
274
class BoostyAPI():
275
"""Interface for the Boosty API"""
276
root = "https://api.boosty.to"
277
278
def __init__(self, extractor, access_token=None):
279
self.extractor = extractor
280
self.headers = {
281
"Accept": "application/json, text/plain, */*",
282
"Origin": extractor.root,
283
}
284
285
if not access_token:
286
if auth := self.extractor.cookies.get("auth", domain=".boosty.to"):
287
auth = text.unquote(auth)
288
access_token = text.extr(auth, '"accessToken":"', '"')
289
if expires := text.extr(auth, '"expiresAt":', ','):
290
import time
291
if text.parse_int(expires) < time.time() * 1000:
292
extractor.log.warning("'auth' cookie tokens expired")
293
if access_token:
294
self.headers["Authorization"] = "Bearer " + access_token
295
296
def blog_posts(self, username, params):
297
endpoint = f"/v1/blog/{username}/post/"
298
params = self._merge_params(params, {
299
"limit" : "5",
300
"offset" : None,
301
"comments_limit": "2",
302
"reply_limit" : "1",
303
})
304
return self._pagination(endpoint, params)
305
306
def blog_media_album(self, username, type="all", params=()):
307
endpoint = f"/v1/blog/{username}/media_album/"
308
params = self._merge_params(params, {
309
"type" : type.rstrip("s"),
310
"limit" : "15",
311
"limit_by": "media",
312
"offset" : None,
313
})
314
return self._pagination(endpoint, params, self._transform_media_posts)
315
316
def _transform_media_posts(self, data):
317
posts = []
318
319
for obj in data["mediaPosts"]:
320
post = obj["post"]
321
post["data"] = obj["media"]
322
posts.append(post)
323
324
return posts
325
326
def post(self, username, post_id):
327
endpoint = f"/v1/blog/{username}/post/{post_id}"
328
return self._call(endpoint)
329
330
def feed_posts(self, params=None):
331
endpoint = "/v1/feed/post/"
332
params = self._merge_params(params, {
333
"limit" : "5",
334
"offset" : None,
335
"comments_limit": "2",
336
})
337
if "only_allowed" not in params and self.extractor.only_allowed:
338
params["only_allowed"] = "true"
339
if "only_bought" not in params and self.extractor.only_bought:
340
params["only_bought"] = "true"
341
return self._pagination(endpoint, params, key="posts")
342
343
def user(self, username):
344
endpoint = "/v1/blog/" + username
345
user = self._call(endpoint)
346
user["id"] = user["owner"]["id"]
347
return user
348
349
def user_subscriptions(self, params=None):
350
endpoint = "/v1/user/subscriptions"
351
params = self._merge_params(params, {
352
"limit" : "30",
353
"with_follow": "true",
354
"offset" : None,
355
})
356
return self._pagination_users(endpoint, params)
357
358
def _merge_params(self, params_web, params_api):
359
if params_web:
360
web_to_api = {
361
"isOnlyAllowedPosts": "is_only_allowed",
362
"postsTagsIds" : "tags_ids",
363
"postsFrom" : "from_ts",
364
"postsTo" : "to_ts",
365
}
366
for name, value in params_web.items():
367
name = web_to_api.get(name, name)
368
params_api[name] = value
369
return params_api
370
371
def _call(self, endpoint, params=None):
372
url = self.root + endpoint
373
374
while True:
375
response = self.extractor.request(
376
url, params=params, headers=self.headers,
377
fatal=None, allow_redirects=False)
378
379
if response.status_code < 300:
380
return response.json()
381
382
elif response.status_code < 400:
383
raise exception.AuthenticationError("Invalid API access token")
384
385
elif response.status_code == 429:
386
self.extractor.wait(seconds=600)
387
388
else:
389
self.extractor.log.debug(response.text)
390
raise exception.AbortExtraction("API request failed")
391
392
def _pagination(self, endpoint, params, transform=None, key=None):
393
if "is_only_allowed" not in params and self.extractor.only_allowed:
394
params["only_allowed"] = "true"
395
params["is_only_allowed"] = "true"
396
397
while True:
398
data = self._call(endpoint, params)
399
400
if transform:
401
yield from transform(data["data"])
402
elif key:
403
yield from data["data"][key]
404
else:
405
yield from data["data"]
406
407
extra = data["extra"]
408
if extra.get("isLast"):
409
return
410
offset = extra.get("offset")
411
if not offset:
412
return
413
params["offset"] = offset
414
415
def _pagination_users(self, endpoint, params):
416
while True:
417
data = self._call(endpoint, params)
418
419
yield from data["data"]
420
421
offset = data["offset"] + data["limit"]
422
if offset > data["total"]:
423
return
424
params["offset"] = offset
425
426
def dialog(self, dialog_id):
427
endpoint = "/v1/dialog/" + dialog_id
428
return self._call(endpoint)
429
430
def dialog_messages(self, dialog_id, limit=300, offset=None):
431
endpoint = f"/v1/dialog/{dialog_id}/message/"
432
params = {
433
"limit": limit,
434
"reverse": "true",
435
"offset": offset,
436
}
437
return self._pagination_dialog(endpoint, params)
438
439
def _pagination_dialog(self, endpoint, params):
440
while True:
441
data = self._call(endpoint, params)
442
443
yield from data["data"]
444
445
try:
446
extra = data["extra"]
447
if extra.get("isLast"):
448
break
449
params["offset"] = offset = extra["offset"]
450
if not offset:
451
break
452
except Exception:
453
break
454
455