Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/boosty.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as
5
# published by the Free Software Foundation.
6
7
"""Extractors for https://www.boosty.to/"""
8
9
from .common import Extractor, Message
10
from .. import text, util, exception
11
import itertools
12
13
BASE_PATTERN = r"(?:https?://)?boosty\.to"
14
15
16
class BoostyExtractor(Extractor):
17
"""Base class for boosty extractors"""
18
category = "boosty"
19
root = "https://www.boosty.to"
20
directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
21
"{post[date]:%Y-%m-%d} {post[int_id]}")
22
filename_fmt = "{num:>02} {file[id]}.{extension}"
23
archive_fmt = "{file[id]}"
24
cookies_domain = ".boosty.to"
25
cookies_names = ("auth",)
26
27
def _init(self):
28
self.api = BoostyAPI(self)
29
30
self._user = None if self.config("metadata") else False
31
self.only_allowed = self.config("allowed", True)
32
self.only_bought = self.config("bought")
33
34
videos = self.config("videos")
35
if videos is None or videos:
36
if isinstance(videos, str):
37
videos = videos.split(",")
38
elif not isinstance(videos, (list, tuple)):
39
# ultra_hd: 2160p
40
# quad_hd: 1440p
41
# full_hd: 1080p
42
# high: 720p
43
# medium: 480p
44
# low: 360p
45
# lowest: 240p
46
# tiny: 144p
47
videos = ("ultra_hd", "quad_hd", "full_hd",
48
"high", "medium", "low", "lowest", "tiny")
49
self.videos = videos
50
51
def items(self):
52
for post in self.posts():
53
if not post.get("hasAccess"):
54
self.log.warning("Not allowed to access post %s", post["id"])
55
continue
56
57
files = self._extract_files(post)
58
if self._user:
59
post["user"] = self._user
60
data = {
61
"post" : post,
62
"user" : post.pop("user", None),
63
"count": len(files),
64
}
65
66
yield Message.Directory, data
67
for data["num"], file in enumerate(files, 1):
68
data["file"] = file
69
url = file["url"]
70
yield Message.Url, url, text.nameext_from_url(url, data)
71
72
def posts(self):
73
"""Yield JSON content of all relevant posts"""
74
75
def _extract_files(self, post):
76
files = []
77
post["content"] = content = []
78
post["links"] = links = []
79
80
if "createdAt" in post:
81
post["date"] = text.parse_timestamp(post["createdAt"])
82
83
for block in post["data"]:
84
try:
85
type = block["type"]
86
if type == "text":
87
if block["modificator"] == "BLOCK_END":
88
continue
89
c = util.json_loads(block["content"])
90
content.append(c[0])
91
92
elif type == "image":
93
files.append(self._update_url(post, block))
94
95
elif type == "ok_video":
96
if not self.videos:
97
self.log.debug("%s: Skipping video %s",
98
post["id"], block["id"])
99
continue
100
fmts = {
101
fmt["type"]: fmt["url"]
102
for fmt in block["playerUrls"]
103
if fmt["url"]
104
}
105
formats = [
106
fmts[fmt]
107
for fmt in self.videos
108
if fmt in fmts
109
]
110
if formats:
111
formats = iter(formats)
112
block["url"] = next(formats)
113
block["_fallback"] = formats
114
files.append(block)
115
else:
116
self.log.warning(
117
"%s: Found no suitable video format for %s",
118
post["id"], block["id"])
119
120
elif type == "link":
121
url = block["url"]
122
links.append(url)
123
content.append(url)
124
125
elif type == "audio_file":
126
files.append(self._update_url(post, block))
127
128
elif type == "file":
129
files.append(self._update_url(post, block))
130
131
elif type == "smile":
132
content.append(":" + block["name"] + ":")
133
134
else:
135
self.log.debug("%s: Unsupported data type '%s'",
136
post["id"], type)
137
except Exception as exc:
138
self.log.debug("%s: %s", exc.__class__.__name__, exc)
139
140
del post["data"]
141
return files
142
143
def _update_url(self, post, block):
144
url = block["url"]
145
sep = "&" if "?" in url else "?"
146
147
if signed_query := post.get("signedQuery"):
148
url += sep + signed_query[1:]
149
sep = "&"
150
151
migrated = post.get("isMigrated")
152
if migrated is not None:
153
url += sep + "is_migrated=" + str(migrated).lower()
154
155
block["url"] = url
156
return block
157
158
159
class BoostyUserExtractor(BoostyExtractor):
160
"""Extractor for boosty.to user profiles"""
161
subcategory = "user"
162
pattern = BASE_PATTERN + r"/([^/?#]+)(?:\?([^#]+))?$"
163
example = "https://boosty.to/USER"
164
165
def posts(self):
166
user, query = self.groups
167
params = text.parse_query(query)
168
if self._user is None:
169
self._user = self.api.user(user)
170
return self.api.blog_posts(user, params)
171
172
173
class BoostyMediaExtractor(BoostyExtractor):
174
"""Extractor for boosty.to user media"""
175
subcategory = "media"
176
directory_fmt = "{category}", "{user[blogUrl]} ({user[id]})", "media"
177
filename_fmt = "{post[id]}_{num}.{extension}"
178
pattern = BASE_PATTERN + r"/([^/?#]+)/media/([^/?#]+)(?:\?([^#]+))?"
179
example = "https://boosty.to/USER/media/all"
180
181
def posts(self):
182
user, media, query = self.groups
183
params = text.parse_query(query)
184
self._user = self.api.user(user)
185
return self.api.blog_media_album(user, media, params)
186
187
188
class BoostyFeedExtractor(BoostyExtractor):
189
"""Extractor for your boosty.to subscription feed"""
190
subcategory = "feed"
191
pattern = BASE_PATTERN + r"/(?:\?([^#]+))?(?:$|#)"
192
example = "https://boosty.to/"
193
194
def posts(self):
195
params = text.parse_query(self.groups[0])
196
return self.api.feed_posts(params)
197
198
199
class BoostyPostExtractor(BoostyExtractor):
200
"""Extractor for boosty.to posts"""
201
subcategory = "post"
202
pattern = BASE_PATTERN + r"/([^/?#]+)/posts/([0-9a-f-]+)"
203
example = "https://boosty.to/USER/posts/01234567-89ab-cdef-0123-456789abcd"
204
205
def posts(self):
206
user, post_id = self.groups
207
if self._user is None:
208
self._user = self.api.user(user)
209
return (self.api.post(user, post_id),)
210
211
212
class BoostyFollowingExtractor(BoostyExtractor):
213
"""Extractor for your boosty.to subscribed users"""
214
subcategory = "following"
215
pattern = BASE_PATTERN + r"/app/settings/subscriptions"
216
example = "https://boosty.to/app/settings/subscriptions"
217
218
def items(self):
219
for user in self.api.user_subscriptions():
220
url = f"{self.root}/{user['blog']['blogUrl']}"
221
user["_extractor"] = BoostyUserExtractor
222
yield Message.Queue, url, user
223
224
225
class BoostyDirectMessagesExtractor(BoostyExtractor):
226
"""Extractor for boosty.to direct messages"""
227
subcategory = "direct-messages"
228
directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
229
"Direct Messages")
230
pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)"
231
example = "https://boosty.to/app/messages?dialogId=12345"
232
233
def items(self):
234
"""Yield direct messages from a given dialog ID."""
235
dialog_id = self.groups[0]
236
response = self.api.dialog(dialog_id)
237
signed_query = response.get("signedQuery")
238
239
try:
240
messages = response["messages"]["data"]
241
offset = messages[0]["id"]
242
except Exception:
243
return
244
245
try:
246
user = self.api.user(response["chatmate"]["url"])
247
except Exception:
248
user = None
249
250
messages.reverse()
251
for message in itertools.chain(
252
messages,
253
self.api.dialog_messages(dialog_id, offset=offset)
254
):
255
message["signedQuery"] = signed_query
256
files = self._extract_files(message)
257
data = {
258
"post": message,
259
"user": user,
260
"count": len(files),
261
}
262
263
yield Message.Directory, data
264
for data["num"], file in enumerate(files, 1):
265
data["file"] = file
266
url = file["url"]
267
yield Message.Url, url, text.nameext_from_url(url, data)
268
269
270
class BoostyAPI():
271
"""Interface for the Boosty API"""
272
root = "https://api.boosty.to"
273
274
def __init__(self, extractor, access_token=None):
275
self.extractor = extractor
276
self.headers = {
277
"Accept": "application/json, text/plain, */*",
278
"Origin": extractor.root,
279
}
280
281
if not access_token:
282
if auth := self.extractor.cookies.get("auth", domain=".boosty.to"):
283
access_token = text.extr(
284
auth, "%22accessToken%22%3A%22", "%22")
285
if access_token:
286
self.headers["Authorization"] = "Bearer " + access_token
287
288
def blog_posts(self, username, params):
289
endpoint = f"/v1/blog/{username}/post/"
290
params = self._merge_params(params, {
291
"limit" : "5",
292
"offset" : None,
293
"comments_limit": "2",
294
"reply_limit" : "1",
295
})
296
return self._pagination(endpoint, params)
297
298
def blog_media_album(self, username, type="all", params=()):
299
endpoint = f"/v1/blog/{username}/media_album/"
300
params = self._merge_params(params, {
301
"type" : type.rstrip("s"),
302
"limit" : "15",
303
"limit_by": "media",
304
"offset" : None,
305
})
306
return self._pagination(endpoint, params, self._transform_media_posts)
307
308
def _transform_media_posts(self, data):
309
posts = []
310
311
for obj in data["mediaPosts"]:
312
post = obj["post"]
313
post["data"] = obj["media"]
314
posts.append(post)
315
316
return posts
317
318
def post(self, username, post_id):
319
endpoint = f"/v1/blog/{username}/post/{post_id}"
320
return self._call(endpoint)
321
322
def feed_posts(self, params=None):
323
endpoint = "/v1/feed/post/"
324
params = self._merge_params(params, {
325
"limit" : "5",
326
"offset" : None,
327
"comments_limit": "2",
328
})
329
if "only_allowed" not in params and self.extractor.only_allowed:
330
params["only_allowed"] = "true"
331
if "only_bought" not in params and self.extractor.only_bought:
332
params["only_bought"] = "true"
333
return self._pagination(endpoint, params, key="posts")
334
335
def user(self, username):
336
endpoint = "/v1/blog/" + username
337
user = self._call(endpoint)
338
user["id"] = user["owner"]["id"]
339
return user
340
341
def user_subscriptions(self, params=None):
342
endpoint = "/v1/user/subscriptions"
343
params = self._merge_params(params, {
344
"limit" : "30",
345
"with_follow": "true",
346
"offset" : None,
347
})
348
return self._pagination_users(endpoint, params)
349
350
def _merge_params(self, params_web, params_api):
351
if params_web:
352
web_to_api = {
353
"isOnlyAllowedPosts": "is_only_allowed",
354
"postsTagsIds" : "tags_ids",
355
"postsFrom" : "from_ts",
356
"postsTo" : "to_ts",
357
}
358
for name, value in params_web.items():
359
name = web_to_api.get(name, name)
360
params_api[name] = value
361
return params_api
362
363
def _call(self, endpoint, params=None):
364
url = self.root + endpoint
365
366
while True:
367
response = self.extractor.request(
368
url, params=params, headers=self.headers,
369
fatal=None, allow_redirects=False)
370
371
if response.status_code < 300:
372
return response.json()
373
374
elif response.status_code < 400:
375
raise exception.AuthenticationError("Invalid API access token")
376
377
elif response.status_code == 429:
378
self.extractor.wait(seconds=600)
379
380
else:
381
self.extractor.log.debug(response.text)
382
raise exception.AbortExtraction("API request failed")
383
384
def _pagination(self, endpoint, params, transform=None, key=None):
385
if "is_only_allowed" not in params and self.extractor.only_allowed:
386
params["only_allowed"] = "true"
387
params["is_only_allowed"] = "true"
388
389
while True:
390
data = self._call(endpoint, params)
391
392
if transform:
393
yield from transform(data["data"])
394
elif key:
395
yield from data["data"][key]
396
else:
397
yield from data["data"]
398
399
extra = data["extra"]
400
if extra.get("isLast"):
401
return
402
offset = extra.get("offset")
403
if not offset:
404
return
405
params["offset"] = offset
406
407
def _pagination_users(self, endpoint, params):
408
while True:
409
data = self._call(endpoint, params)
410
411
yield from data["data"]
412
413
offset = data["offset"] + data["limit"]
414
if offset > data["total"]:
415
return
416
params["offset"] = offset
417
418
def dialog(self, dialog_id):
419
endpoint = f"/v1/dialog/{dialog_id}"
420
return self._call(endpoint)
421
422
def dialog_messages(self, dialog_id, limit=300, offset=None):
423
endpoint = f"/v1/dialog/{dialog_id}/message/"
424
params = {
425
"limit": limit,
426
"reverse": "true",
427
"offset": offset,
428
}
429
return self._pagination_dialog(endpoint, params)
430
431
def _pagination_dialog(self, endpoint, params):
432
while True:
433
data = self._call(endpoint, params)
434
435
yield from data["data"]
436
437
try:
438
extra = data["extra"]
439
if extra.get("isLast"):
440
break
441
params["offset"] = offset = extra["offset"]
442
if not offset:
443
break
444
except Exception:
445
break
446
447