Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/deviantart.py
8894 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2026 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://www.deviantart.com/"""
10
11
from .common import Extractor, Message, Dispatch
12
from .. import text, util, dt, exception
13
from ..cache import cache, memcache
14
import collections
15
import mimetypes
16
import binascii
17
import time
18
19
BASE_PATTERN = (
20
r"(?:https?://)?(?:"
21
r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
22
r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
23
)
24
DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
25
26
27
class DeviantartExtractor(Extractor):
28
"""Base class for deviantart extractors"""
29
category = "deviantart"
30
root = "https://www.deviantart.com"
31
directory_fmt = ("{category}", "{username}")
32
filename_fmt = "{category}_{index}_{title}.{extension}"
33
cookies_domain = ".deviantart.com"
34
cookies_names = ("auth", "auth_secure", "userinfo")
35
_last_request = 0
36
37
def __init__(self, match):
38
Extractor.__init__(self, match)
39
self.user = (match[1] or match[2] or "").lower()
40
self.offset = 0
41
42
def _init(self):
43
self.jwt = self.config("jwt", False)
44
self.flat = self.config("flat", True)
45
self.extra = self.config("extra", False)
46
self.quality = self.config("quality", "100")
47
self.original = self.config("original", True)
48
self.previews = self.config("previews", False)
49
self.intermediary = self.config("intermediary", True)
50
self.comments_avatars = self.config("comments-avatars", False)
51
self.comments = self.comments_avatars or self.config("comments", False)
52
53
self.api = DeviantartOAuthAPI(self)
54
self.eclipse_api = None
55
self.group = False
56
self._premium_cache = {}
57
58
if self.config("auto-unwatch"):
59
self.unwatch = []
60
self.finalize = self._unwatch_premium
61
else:
62
self.unwatch = None
63
64
if self.quality:
65
if self.quality == "png":
66
self.quality = "-fullview.png?"
67
self.quality_sub = text.re(r"-fullview\.[a-z0-9]+\?").sub
68
else:
69
self.quality = ",q_" + str(self.quality)
70
self.quality_sub = text.re(r",q_\d+").sub
71
72
if self.intermediary:
73
self.intermediary_subn = text.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
74
75
if isinstance(self.original, str) and \
76
self.original.lower().startswith("image"):
77
self.original = True
78
self._update_content = self._update_content_image
79
else:
80
self._update_content = self._update_content_default
81
82
if self.previews == "all":
83
self.previews_images = self.previews = True
84
else:
85
self.previews_images = False
86
87
journals = self.config("journals", "html")
88
if journals == "html":
89
self.commit_journal = self._commit_journal_html
90
elif journals == "text":
91
self.commit_journal = self._commit_journal_text
92
else:
93
self.commit_journal = None
94
95
def request(self, url, **kwargs):
96
if "fatal" not in kwargs:
97
kwargs["fatal"] = False
98
while True:
99
response = Extractor.request(self, url, **kwargs)
100
if response.status_code != 403 or \
101
b"Request blocked." not in response.content:
102
return response
103
self.wait(seconds=300, reason="CloudFront block")
104
105
def skip(self, num):
106
self.offset += num
107
return num
108
109
def login(self):
110
if self.cookies_check(self.cookies_names):
111
return True
112
113
username, password = self._get_auth_info()
114
if username:
115
self.cookies_update(_login_impl(self, username, password))
116
return True
117
118
def items(self):
119
if self.user:
120
if group := self.config("group", True):
121
if user := _user_details(self, self.user):
122
self.user = user["username"]
123
self.group = False
124
elif group == "skip":
125
self.log.info("Skipping group '%s'", self.user)
126
raise exception.AbortExtraction()
127
else:
128
self.subcategory = "group-" + self.subcategory
129
self.group = True
130
131
for deviation in self.deviations():
132
if isinstance(deviation, tuple):
133
url, data = deviation
134
yield Message.Queue, url, data
135
continue
136
137
if deviation["is_deleted"]:
138
# prevent crashing in case the deviation really is
139
# deleted
140
self.log.debug(
141
"Skipping %s (deleted)", deviation["deviationid"])
142
continue
143
144
tier_access = deviation.get("tier_access")
145
if tier_access == "locked":
146
self.log.debug(
147
"Skipping %s (access locked)", deviation["deviationid"])
148
continue
149
150
if "premium_folder_data" in deviation:
151
data = self._fetch_premium(deviation)
152
if not data:
153
continue
154
deviation.update(data)
155
156
self.prepare(deviation)
157
yield Message.Directory, "", deviation
158
159
if "content" in deviation:
160
content = self._extract_content(deviation)
161
yield self.commit(deviation, content)
162
163
elif deviation["is_downloadable"]:
164
content = self.api.deviation_download(deviation["deviationid"])
165
deviation["is_original"] = True
166
yield self.commit(deviation, content)
167
168
if "videos" in deviation and deviation["videos"]:
169
video = max(deviation["videos"],
170
key=lambda x: text.parse_int(x["quality"][:-1]))
171
deviation["is_original"] = False
172
yield self.commit(deviation, video)
173
174
if "flash" in deviation:
175
deviation["is_original"] = True
176
yield self.commit(deviation, deviation["flash"])
177
178
if self.commit_journal:
179
if journal := self._extract_journal(deviation):
180
if self.extra:
181
deviation["_journal"] = journal["html"]
182
deviation["is_original"] = True
183
yield self.commit_journal(deviation, journal)
184
185
if self.comments_avatars:
186
for comment in deviation["comments"]:
187
user = comment["user"]
188
name = user["username"].lower()
189
if user["usericon"] == DEFAULT_AVATAR:
190
self.log.debug(
191
"Skipping avatar of '%s' (default)", name)
192
continue
193
_user_details.update(name, user)
194
195
url = f"{self.root}/{name}/avatar/"
196
comment["_extractor"] = DeviantartAvatarExtractor
197
yield Message.Queue, url, comment
198
199
if self.previews and "preview" in deviation:
200
preview = deviation["preview"]
201
deviation["is_preview"] = True
202
if self.previews_images:
203
yield self.commit(deviation, preview)
204
else:
205
mtype = mimetypes.guess_type(
206
"a." + deviation["extension"], False)[0]
207
if mtype and not mtype.startswith("image/"):
208
yield self.commit(deviation, preview)
209
del deviation["is_preview"]
210
211
if not self.extra:
212
continue
213
214
# ref: https://www.deviantart.com
215
# /developers/http/v1/20210526/object/editor_text
216
# the value of "features" is a JSON string with forward
217
# slashes escaped
218
text_content = \
219
deviation["text_content"]["body"]["features"].replace(
220
"\\/", "/") if "text_content" in deviation else None
221
for txt in (text_content, deviation.get("description"),
222
deviation.get("_journal")):
223
if txt is None:
224
continue
225
for match in DeviantartStashExtractor.pattern.finditer(txt):
226
url = text.ensure_http_scheme(match[0])
227
deviation["_extractor"] = DeviantartStashExtractor
228
yield Message.Queue, url, deviation
229
230
def deviations(self):
231
"""Return an iterable containing all relevant Deviation-objects"""
232
233
def prepare(self, deviation):
234
"""Adjust the contents of a Deviation-object"""
235
if "index" not in deviation:
236
try:
237
if deviation["url"].startswith((
238
"https://www.deviantart.com/stash/", "https://sta.sh",
239
)):
240
filename = deviation["content"]["src"].split("/")[5]
241
deviation["index_base36"] = filename.partition("-")[0][1:]
242
deviation["index"] = id_from_base36(
243
deviation["index_base36"])
244
else:
245
deviation["index"] = text.parse_int(
246
deviation["url"].rpartition("-")[2])
247
except KeyError:
248
deviation["index"] = 0
249
deviation["index_base36"] = "0"
250
if "index_base36" not in deviation:
251
deviation["index_base36"] = base36_from_id(deviation["index"])
252
253
if self.user:
254
deviation["username"] = self.user
255
deviation["_username"] = self.user.lower()
256
else:
257
deviation["username"] = deviation["author"]["username"]
258
deviation["_username"] = deviation["username"].lower()
259
260
deviation["published_time"] = text.parse_int(
261
deviation["published_time"])
262
deviation["date"] = self.parse_timestamp(
263
deviation["published_time"])
264
265
if self.comments:
266
deviation["comments"] = (
267
self._extract_comments(deviation["deviationid"], "deviation")
268
if deviation["stats"]["comments"] else ()
269
)
270
271
# filename metadata
272
sub = text.re(r"\W").sub
273
deviation["filename"] = "".join((
274
sub("_", deviation["title"].lower()), "_by_",
275
sub("_", deviation["author"]["username"].lower()), "-d",
276
deviation["index_base36"],
277
))
278
279
def commit(self, deviation, target):
280
url = target["src"]
281
name = target.get("filename") or url
282
target = target.copy()
283
target["filename"] = deviation["filename"]
284
deviation["target"] = target
285
deviation["extension"] = target["extension"] = text.ext_from_url(name)
286
if "is_original" not in deviation:
287
deviation["is_original"] = ("/v1/" not in url)
288
return Message.Url, url, deviation
289
290
def _commit_journal_html(self, deviation, journal):
291
title = text.escape(deviation["title"])
292
url = deviation["url"]
293
thumbs = deviation.get("thumbs") or deviation.get("files")
294
html = journal["html"]
295
tmpl = self.utils("journal")
296
shadow = tmpl.SHADOW.format_map(thumbs[0]) if thumbs else ""
297
298
if not html:
299
self.log.warning("%s: Empty journal content", deviation["index"])
300
301
if "css" in journal:
302
css, cls = journal["css"], "withskin"
303
elif html.startswith("<style"):
304
css, _, html = html.partition("</style>")
305
css = css.partition(">")[2]
306
cls = "withskin"
307
else:
308
css, cls = "", "journal-green"
309
310
if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
311
needle = '<div class="boxtop journaltop">'
312
header = tmpl.HEADER_CUSTOM.format(
313
title=title, url=url, date=deviation["date"],
314
)
315
else:
316
needle = '<div usr class="gr">'
317
username = deviation["author"]["username"]
318
urlname = deviation.get("username") or username.lower()
319
header = tmpl.HEADER.format(
320
title=title,
321
url=url,
322
userurl=f"{self.root}/{urlname}/",
323
username=username,
324
date=deviation["date"],
325
)
326
327
if needle in html:
328
html = html.replace(needle, header, 1)
329
else:
330
html = tmpl.HTML_EXTRA.format(header, html)
331
332
html = tmpl.HTML.format(
333
title=title, html=html, shadow=shadow, css=css, cls=cls)
334
335
deviation["extension"] = "htm"
336
return Message.Url, html, deviation
337
338
def _commit_journal_text(self, deviation, journal):
339
html = journal["html"]
340
if not html:
341
self.log.warning("%s: Empty journal content", deviation["index"])
342
elif html.startswith("<style"):
343
html = html.partition("</style>")[2]
344
head, _, tail = html.rpartition("<script")
345
content = "\n".join(
346
text.unescape(text.remove_html(txt))
347
for txt in (head or tail).split("<br />")
348
)
349
txt = self.utils("journal").TEXT.format(
350
title=deviation["title"],
351
username=deviation["author"]["username"],
352
date=deviation["date"],
353
content=content,
354
)
355
356
deviation["extension"] = "txt"
357
return Message.Url, txt, deviation
358
359
def _extract_journal(self, deviation):
360
if "excerpt" in deviation:
361
# # empty 'html'
362
# return self.api.deviation_content(deviation["deviationid"])
363
364
if "_page" in deviation:
365
page = deviation["_page"]
366
del deviation["_page"]
367
else:
368
page = self._limited_request(deviation["url"]).text
369
370
# extract journal html from webpage
371
html = text.extr(
372
page,
373
"<h2>Literature Text</h2></span><div>",
374
"</div></section></div></div>")
375
if html:
376
return {"html": html}
377
378
self.log.debug("%s: Failed to extract journal HTML from webpage. "
379
"Falling back to __INITIAL_STATE__ markup.",
380
deviation["index"])
381
382
# parse __INITIAL_STATE__ as fallback
383
state = util.json_loads(text.extr(
384
page, 'window.__INITIAL_STATE__ = JSON.parse("', '");')
385
.replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"'))
386
deviations = state["@@entities"]["deviation"]
387
content = deviations.popitem()[1]["textContent"]
388
389
if html := self._textcontent_to_html(deviation, content):
390
return {"html": html}
391
return {"html": content["excerpt"].replace("\n", "<br />")}
392
393
if "body" in deviation:
394
return {"html": deviation.pop("body")}
395
return None
396
397
def _textcontent_to_html(self, deviation, content):
398
html = content["html"]
399
markup = html.get("markup")
400
401
if not markup or markup[0] != "{":
402
return markup
403
404
if html["type"] == "tiptap":
405
try:
406
return self.utils("tiptap").to_html(markup)
407
except Exception as exc:
408
self.log.traceback(exc)
409
self.log.error("%s: '%s: %s'", deviation["index"],
410
exc.__class__.__name__, exc)
411
412
self.log.warning("%s: Unsupported '%s' markup.",
413
deviation["index"], html["type"])
414
415
def _extract_content(self, deviation):
416
content = deviation["content"]
417
418
if self.original and deviation["is_downloadable"]:
419
self._update_content(deviation, content)
420
return content
421
422
if self.jwt:
423
self._update_token(deviation, content)
424
return content
425
426
if content["src"].startswith("https://images-wixmp-"):
427
if self.intermediary and deviation["index"] <= 790677560:
428
# https://github.com/r888888888/danbooru/issues/4069
429
intermediary, count = self.intermediary_subn(
430
r"/intermediary\1", content["src"], 1)
431
if count:
432
deviation["is_original"] = False
433
deviation["_fallback"] = (content["src"],)
434
content["src"] = intermediary
435
if self.quality:
436
content["src"] = self.quality_sub(
437
self.quality, content["src"], 1)
438
439
return content
440
441
def _find_folder(self, folders, name, uuid):
442
if uuid.isdecimal():
443
match = text.re(
444
"(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
445
for folder in folders:
446
if match(folder["name"]):
447
return folder
448
elif folder.get("has_subfolders"):
449
for subfolder in folder["subfolders"]:
450
if match(subfolder["name"]):
451
return subfolder
452
else:
453
for folder in folders:
454
if folder["folderid"] == uuid:
455
return folder
456
elif folder.get("has_subfolders"):
457
for subfolder in folder["subfolders"]:
458
if subfolder["folderid"] == uuid:
459
return subfolder
460
raise exception.NotFoundError("folder")
461
462
def _folder_urls(self, folders, category, extractor):
463
base = f"{self.root}/{self.user}/{category}/"
464
for folder in folders:
465
folder["_extractor"] = extractor
466
url = f"{base}{folder['folderid']}/{folder['name']}"
467
yield url, folder
468
469
def _update_content_default(self, deviation, content):
470
if "premium_folder_data" in deviation or deviation.get("is_mature"):
471
public = False
472
else:
473
public = None
474
475
data = self.api.deviation_download(deviation["deviationid"], public)
476
content.update(data)
477
deviation["is_original"] = True
478
479
def _update_content_image(self, deviation, content):
480
data = self.api.deviation_download(deviation["deviationid"])
481
url = data["src"].partition("?")[0]
482
mtype = mimetypes.guess_type(url, False)[0]
483
if mtype and mtype.startswith("image/"):
484
content.update(data)
485
deviation["is_original"] = True
486
487
def _update_token(self, deviation, content):
488
"""Replace JWT to be able to remove width/height limits
489
490
All credit goes to @Ironchest337
491
for discovering and implementing this method
492
"""
493
url, sep, _ = content["src"].partition("/v1/")
494
if not sep:
495
return
496
497
# 'images-wixmp' returns 401 errors, but just 'wixmp' still works
498
url = url.replace("//images-wixmp", "//wixmp", 1)
499
500
# header = b'{"typ":"JWT","alg":"none"}'
501
payload = (
502
b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
503
url.partition("/f/")[2].encode() +
504
b'"}]],"aud":["urn:service:file.download"]}'
505
)
506
507
deviation["_fallback"] = (content["src"],)
508
deviation["is_original"] = True
509
pl = binascii.b2a_base64(payload).rstrip(b'=\n').decode()
510
content["src"] = (
511
# base64 of 'header' is precomputed as 'eyJ0eX...'
512
f"{url}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{pl}.")
513
514
def _extract_comments(self, target_id, target_type="deviation"):
515
results = None
516
comment_ids = [None]
517
518
while comment_ids:
519
comments = self.api.comments(
520
target_id, target_type, comment_ids.pop())
521
522
if results:
523
results.extend(comments)
524
else:
525
results = comments
526
527
# parent comments, i.e. nodes with at least one child
528
parents = {c["parentid"] for c in comments}
529
# comments with more than one reply
530
replies = {c["commentid"] for c in comments if c["replies"]}
531
# add comment UUIDs with replies that are not parent to any node
532
comment_ids.extend(replies - parents)
533
534
return results
535
536
def _limited_request(self, url, **kwargs):
537
"""Limits HTTP requests to one every 2 seconds"""
538
diff = time.time() - DeviantartExtractor._last_request
539
if diff < 2.0:
540
self.sleep(2.0 - diff, "request")
541
response = self.request(url, **kwargs)
542
DeviantartExtractor._last_request = time.time()
543
return response
544
545
def _fetch_premium(self, deviation):
546
try:
547
return self._premium_cache[deviation["deviationid"]]
548
except KeyError:
549
pass
550
551
if not self.api.refresh_token_key:
552
self.log.warning(
553
"Unable to access premium content (no refresh-token)")
554
self._fetch_premium = lambda _: None
555
return None
556
557
dev = self.api.deviation(deviation["deviationid"], False)
558
folder = deviation["premium_folder_data"]
559
username = dev["author"]["username"]
560
561
# premium_folder_data is no longer present when user has access (#5063)
562
has_access = ("premium_folder_data" not in dev) or folder["has_access"]
563
564
if not has_access and folder["type"] == "watchers" and \
565
self.config("auto-watch"):
566
if self.unwatch is not None:
567
self.unwatch.append(username)
568
if self.api.user_friends_watch(username):
569
has_access = True
570
self.log.info(
571
"Watching %s for premium folder access", username)
572
else:
573
self.log.warning(
574
"Error when trying to watch %s. "
575
"Try again with a new refresh-token", username)
576
577
if has_access:
578
self.log.info("Fetching premium folder data")
579
else:
580
self.log.warning("Unable to access premium content (type: %s)",
581
folder["type"])
582
583
cache = self._premium_cache
584
for dev in self.api.gallery(
585
username, folder["gallery_id"], public=False):
586
cache[dev["deviationid"]] = dev if has_access else None
587
588
return cache.get(deviation["deviationid"])
589
590
def _unwatch_premium(self):
591
for username in self.unwatch:
592
self.log.info("Unwatching %s", username)
593
self.api.user_friends_unwatch(username)
594
595
def _eclipse_to_oauth(self, eclipse_api, deviations):
596
for obj in deviations:
597
deviation = obj["deviation"] if "deviation" in obj else obj
598
deviation_uuid = eclipse_api.deviation_extended_fetch(
599
deviation["deviationId"],
600
deviation["author"]["username"],
601
"journal" if deviation["isJournal"] else "art",
602
)["deviation"]["extended"]["deviationUuid"]
603
yield self.api.deviation(deviation_uuid)
604
605
def _unescape_json(self, json):
606
return json.replace('\\"', '"') \
607
.replace("\\'", "'") \
608
.replace("\\\\", "\\")
609
610
611
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
612
"""Extractor for an artist's user profile"""
613
pattern = BASE_PATTERN + r"/?$"
614
example = "https://www.deviantart.com/USER"
615
616
def items(self):
617
base = f"{self.root}/{self.user}/"
618
return self._dispatch_extractors((
619
(DeviantartAvatarExtractor , base + "avatar"),
620
(DeviantartBackgroundExtractor, base + "banner"),
621
(DeviantartGalleryExtractor , base + "gallery"),
622
(DeviantartScrapsExtractor , base + "gallery/scraps"),
623
(DeviantartJournalExtractor , base + "posts"),
624
(DeviantartStatusExtractor , base + "posts/statuses"),
625
(DeviantartFavoriteExtractor , base + "favourites"),
626
), ("gallery",))
627
628
629
###############################################################################
630
# OAuth #######################################################################
631
632
class DeviantartGalleryExtractor(DeviantartExtractor):
633
"""Extractor for all deviations from an artist's gallery"""
634
subcategory = "gallery"
635
archive_fmt = "g_{_username}_{index}.{extension}"
636
pattern = (BASE_PATTERN + r"/gallery"
637
r"(?:/all|/recommended-for-you)?"
638
r"/?(\?(?!q=|catpath=scraps).*)?$")
639
example = "https://www.deviantart.com/USER/gallery/"
640
641
def deviations(self):
642
if self.flat and not self.group:
643
return self.api.gallery_all(self.user, self.offset)
644
folders = self.api.gallery_folders(self.user)
645
return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
646
647
648
class DeviantartAvatarExtractor(DeviantartExtractor):
649
"""Extractor for an artist's avatar"""
650
subcategory = "avatar"
651
archive_fmt = "a_{_username}_{index}"
652
pattern = BASE_PATTERN + r"/avatar"
653
example = "https://www.deviantart.com/USER/avatar/"
654
655
def deviations(self):
656
name = self.user.lower()
657
user = _user_details(self, name)
658
if not user:
659
return ()
660
661
icon = user["usericon"]
662
if icon == DEFAULT_AVATAR:
663
self.log.debug("Skipping avatar of '%s' (default)", name)
664
return ()
665
666
_, sep, index = icon.rpartition("?")
667
if not sep:
668
index = "0"
669
670
formats = self.config("formats")
671
if not formats:
672
url = icon.replace("/avatars/", "/avatars-big/", 1)
673
return (self._make_deviation(url, user, index, ""),)
674
675
if isinstance(formats, str):
676
formats = formats.replace(" ", "").split(",")
677
678
results = []
679
for fmt in formats:
680
fmt, _, ext = fmt.rpartition(".")
681
if fmt:
682
fmt = "-" + fmt
683
url = (f"https://a.deviantart.net/avatars{fmt}"
684
f"/{name[0]}/{name[1]}/{name}.{ext}?{index}")
685
results.append(self._make_deviation(url, user, index, fmt))
686
return results
687
688
def _make_deviation(self, url, user, index, fmt):
689
return {
690
"author" : user,
691
"da_category" : "avatar",
692
"index" : text.parse_int(index),
693
"is_deleted" : False,
694
"is_downloadable": False,
695
"published_time" : 0,
696
"title" : "avatar" + fmt,
697
"stats" : {"comments": 0},
698
"content" : {"src": url},
699
}
700
701
702
class DeviantartBackgroundExtractor(DeviantartExtractor):
703
"""Extractor for an artist's banner"""
704
subcategory = "background"
705
archive_fmt = "b_{index}"
706
pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
707
example = "https://www.deviantart.com/USER/banner/"
708
709
def deviations(self):
710
try:
711
return (self.api.user_profile(self.user.lower())
712
["cover_deviation"]["cover_deviation"],)
713
except Exception:
714
return ()
715
716
717
class DeviantartFolderExtractor(DeviantartExtractor):
718
"""Extractor for deviations inside an artist's gallery folder"""
719
subcategory = "folder"
720
directory_fmt = ("{category}", "{username}", "{folder[title]}")
721
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
722
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
723
example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
724
725
def __init__(self, match):
726
DeviantartExtractor.__init__(self, match)
727
self.folder = None
728
self.folder_id = match[3]
729
self.folder_name = match[4]
730
731
def deviations(self):
732
folders = self.api.gallery_folders(self.user)
733
folder = self._find_folder(folders, self.folder_name, self.folder_id)
734
735
# Leaving this here for backwards compatibility
736
self.folder = {
737
"title": folder["name"],
738
"uuid" : folder["folderid"],
739
"index": self.folder_id,
740
"owner": self.user,
741
"parent_uuid": folder["parent"],
742
}
743
744
if folder.get("subfolder"):
745
self.folder["parent_folder"] = folder["parent_folder"]
746
self.archive_fmt = "F_{folder[parent_uuid]}_{index}.{extension}"
747
748
if self.flat:
749
self.directory_fmt = ("{category}", "{username}",
750
"{folder[parent_folder]}")
751
else:
752
self.directory_fmt = ("{category}", "{username}",
753
"{folder[parent_folder]}",
754
"{folder[title]}")
755
756
if folder.get("has_subfolders") and self.config("subfolders", True):
757
for subfolder in folder["subfolders"]:
758
subfolder["parent_folder"] = folder["name"]
759
subfolder["subfolder"] = True
760
yield from self._folder_urls(
761
folder["subfolders"], "gallery", DeviantartFolderExtractor)
762
763
yield from self.api.gallery(self.user, folder["folderid"], self.offset)
764
765
def prepare(self, deviation):
766
DeviantartExtractor.prepare(self, deviation)
767
deviation["folder"] = self.folder
768
769
770
class DeviantartStashExtractor(DeviantartExtractor):
771
"""Extractor for sta.sh-ed deviations"""
772
subcategory = "stash"
773
archive_fmt = "{index}.{extension}"
774
pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.s(h))"
775
r"/([a-z0-9]+)")
776
example = "https://www.deviantart.com/stash/abcde"
777
778
skip = Extractor.skip
779
780
def __init__(self, match):
781
DeviantartExtractor.__init__(self, match)
782
self.user = ""
783
784
def deviations(self, stash_id=None, stash_data=None):
785
if stash_id is None:
786
legacy_url, stash_id = self.groups
787
else:
788
legacy_url = False
789
790
if legacy_url and stash_id[0] == "2":
791
url = "https://sta.sh/" + stash_id
792
response = self._limited_request(url)
793
stash_id = response.url.rpartition("/")[2]
794
page = response.text
795
else:
796
url = "https://www.deviantart.com/stash/" + stash_id
797
page = self._limited_request(url).text
798
799
if stash_id[0] == "0":
800
if uuid := text.extr(page, '//deviation/', '"'):
801
deviation = self.api.deviation(uuid)
802
deviation["_page"] = page
803
deviation["index"] = text.parse_int(text.extr(
804
page, '\\"deviationId\\":', ','))
805
806
deviation["stash_id"] = stash_id
807
if stash_data:
808
folder = stash_data["folder"]
809
deviation["stash_name"] = folder["name"]
810
deviation["stash_folder"] = folder["folderId"]
811
deviation["stash_parent"] = folder["parentId"] or 0
812
deviation["stash_description"] = \
813
folder["richDescription"]["excerpt"]
814
else:
815
deviation["stash_name"] = ""
816
deviation["stash_description"] = ""
817
deviation["stash_folder"] = 0
818
deviation["stash_parent"] = 0
819
820
yield deviation
821
return
822
823
if stash_data := text.extr(page, ',\\"stash\\":', ',\\"@@'):
824
if stash_data.endswith(":{}"):
825
stash_data = stash_data[:stash_data.rfind("}", None, -2)+1]
826
stash_data = util.json_loads(self._unescape_json(stash_data))
827
828
for sid in text.extract_iter(
829
page, 'href="https://www.deviantart.com/stash/', '"'):
830
if sid == stash_id or sid.endswith("#comments"):
831
continue
832
yield from self.deviations(sid, stash_data)
833
834
835
class DeviantartFavoriteExtractor(DeviantartExtractor):
836
"""Extractor for an artist's favorites"""
837
subcategory = "favorite"
838
directory_fmt = ("{category}", "{username}", "Favourites")
839
archive_fmt = "f_{_username}_{index}.{extension}"
840
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
841
example = "https://www.deviantart.com/USER/favourites/"
842
843
def deviations(self):
844
if self.flat:
845
return self.api.collections_all(self.user, self.offset)
846
folders = self.api.collections_folders(self.user)
847
return self._folder_urls(
848
folders, "favourites", DeviantartCollectionExtractor)
849
850
851
class DeviantartCollectionExtractor(DeviantartExtractor):
852
"""Extractor for a single favorite collection"""
853
subcategory = "collection"
854
directory_fmt = ("{category}", "{username}", "Favourites",
855
"{collection[title]}")
856
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
857
pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
858
example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
859
860
def __init__(self, match):
861
DeviantartExtractor.__init__(self, match)
862
self.collection = None
863
self.collection_id = match[3]
864
self.collection_name = match[4]
865
866
def deviations(self):
867
folders = self.api.collections_folders(self.user)
868
folder = self._find_folder(
869
folders, self.collection_name, self.collection_id)
870
self.collection = {
871
"title": folder["name"],
872
"uuid" : folder["folderid"],
873
"index": self.collection_id,
874
"owner": self.user,
875
}
876
return self.api.collections(self.user, folder["folderid"], self.offset)
877
878
def prepare(self, deviation):
879
DeviantartExtractor.prepare(self, deviation)
880
deviation["collection"] = self.collection
881
882
883
class DeviantartJournalExtractor(DeviantartExtractor):
884
"""Extractor for an artist's journals"""
885
subcategory = "journal"
886
directory_fmt = ("{category}", "{username}", "Journal")
887
archive_fmt = "j_{_username}_{index}.{extension}"
888
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
889
example = "https://www.deviantart.com/USER/posts/journals/"
890
891
def deviations(self):
892
return self.api.browse_user_journals(self.user, self.offset)
893
894
895
class DeviantartStatusExtractor(DeviantartExtractor):
896
"""Extractor for an artist's status updates"""
897
subcategory = "status"
898
directory_fmt = ("{category}", "{username}", "Status")
899
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
900
archive_fmt = "S_{_username}_{index}.{extension}"
901
pattern = BASE_PATTERN + r"/posts/statuses"
902
example = "https://www.deviantart.com/USER/posts/statuses/"
903
904
def deviations(self):
905
for status in self.api.user_statuses(self.user, self.offset):
906
yield from self.process_status(status)
907
908
def process_status(self, status):
909
for item in status.get("items") or (): # do not trust is_share
910
# shared deviations/statuses
911
if "deviation" in item:
912
yield item["deviation"].copy()
913
if "status" in item:
914
yield from self.process_status(item["status"].copy())
915
# assume is_deleted == true means necessary fields are missing
916
if status["is_deleted"]:
917
self.log.warning(
918
"Skipping status %s (deleted)", status.get("statusid"))
919
return
920
yield status
921
922
def prepare(self, deviation):
923
if "deviationid" in deviation:
924
return DeviantartExtractor.prepare(self, deviation)
925
926
try:
927
path = deviation["url"].split("/")
928
deviation["index"] = text.parse_int(path[-1] or path[-2])
929
except KeyError:
930
deviation["index"] = 0
931
932
if self.user:
933
deviation["username"] = self.user
934
deviation["_username"] = self.user.lower()
935
else:
936
deviation["username"] = deviation["author"]["username"]
937
deviation["_username"] = deviation["username"].lower()
938
939
deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
940
deviation["published_time"] = int(dt.to_ts(d))
941
942
deviation["da_category"] = "Status"
943
deviation["category_path"] = "status"
944
deviation["is_downloadable"] = False
945
deviation["title"] = "Status Update"
946
947
comments_count = deviation.pop("comments_count", 0)
948
deviation["stats"] = {"comments": comments_count}
949
if self.comments:
950
deviation["comments"] = (
951
self._extract_comments(deviation["statusid"], "status")
952
if comments_count else ()
953
)
954
955
956
class DeviantartTagExtractor(DeviantartExtractor):
957
"""Extractor for deviations from tag searches"""
958
subcategory = "tag"
959
directory_fmt = ("{category}", "Tags", "{search_tags}")
960
archive_fmt = "T_{search_tags}_{index}.{extension}"
961
pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
962
example = "https://www.deviantart.com/tag/TAG"
963
964
def __init__(self, match):
965
DeviantartExtractor.__init__(self, match)
966
self.tag = text.unquote(match[1])
967
self.user = ""
968
969
def deviations(self):
970
return self.api.browse_tags(self.tag, self.offset)
971
972
def prepare(self, deviation):
973
DeviantartExtractor.prepare(self, deviation)
974
deviation["search_tags"] = self.tag
975
976
977
class DeviantartWatchExtractor(DeviantartExtractor):
978
"""Extractor for Deviations from watched users"""
979
subcategory = "watch"
980
pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
981
r"/(?:watch/deviations|notifications/watch)()()")
982
example = "https://www.deviantart.com/watch/deviations"
983
984
def deviations(self):
985
return self.api.browse_deviantsyouwatch()
986
987
988
class DeviantartWatchPostsExtractor(DeviantartExtractor):
989
"""Extractor for Posts from watched users"""
990
subcategory = "watch-posts"
991
pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
992
example = "https://www.deviantart.com/watch/posts"
993
994
def deviations(self):
995
return self.api.browse_posts_deviantsyouwatch()
996
997
998
###############################################################################
999
# Eclipse #####################################################################
1000
1001
class DeviantartDeviationExtractor(DeviantartExtractor):
1002
"""Extractor for single deviations"""
1003
subcategory = "deviation"
1004
archive_fmt = "g_{_username}_{index}.{extension}"
1005
pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
1006
r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/"
1007
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
1008
r"(\d+)" # bare deviation ID without slug
1009
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
1010
example = "https://www.deviantart.com/UsER/art/TITLE-12345"
1011
1012
skip = Extractor.skip
1013
1014
def __init__(self, match):
1015
DeviantartExtractor.__init__(self, match)
1016
self.type = match[3]
1017
self.deviation_id = \
1018
match[4] or match[5] or id_from_base36(match[6])
1019
1020
def deviations(self):
1021
if self.user:
1022
url = (f"{self.root}/{self.user}"
1023
f"/{self.type or 'art'}/{self.deviation_id}")
1024
else:
1025
url = f"{self.root}/view/{self.deviation_id}/"
1026
1027
page = self._limited_request(url, notfound=True).text
1028
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
1029
if not uuid:
1030
raise exception.NotFoundError("deviation")
1031
1032
deviation = self.api.deviation(uuid)
1033
deviation["_page"] = page
1034
deviation["index_file"] = 0
1035
deviation["num"] = deviation["count"] = 1
1036
1037
additional_media = text.extr(page, ',\\"additionalMedia\\":', '}],\\"')
1038
if not additional_media:
1039
yield deviation
1040
return
1041
1042
self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
1043
"{num:>02}.{extension}")
1044
self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
1045
"{extension}")
1046
1047
additional_media = util.json_loads(self._unescape_json(
1048
additional_media) + "}]")
1049
deviation["count"] = 1 + len(additional_media)
1050
yield deviation
1051
1052
for index, post in enumerate(additional_media):
1053
uri = eclipse_media(post["media"], "fullview")[0]
1054
deviation["content"]["src"] = uri
1055
deviation["num"] += 1
1056
deviation["index_file"] = post["fileId"]
1057
# Download only works on purchased materials - no way to check
1058
deviation["is_downloadable"] = False
1059
yield deviation
1060
1061
1062
class DeviantartScrapsExtractor(DeviantartExtractor):
1063
"""Extractor for an artist's scraps"""
1064
subcategory = "scraps"
1065
directory_fmt = ("{category}", "{username}", "Scraps")
1066
archive_fmt = "s_{_username}_{index}.{extension}"
1067
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
1068
example = "https://www.deviantart.com/USER/gallery/scraps"
1069
1070
def deviations(self):
1071
self.login()
1072
1073
eclipse_api = DeviantartEclipseAPI(self)
1074
return self._eclipse_to_oauth(
1075
eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset))
1076
1077
1078
class DeviantartSearchExtractor(DeviantartExtractor):
1079
"""Extractor for deviantart search results"""
1080
subcategory = "search"
1081
directory_fmt = ("{category}", "Search", "{search_tags}")
1082
archive_fmt = "Q_{search_tags}_{index}.{extension}"
1083
pattern = (r"(?:https?://)?www\.deviantart\.com"
1084
r"/search(?:/deviations)?/?\?([^#]+)")
1085
example = "https://www.deviantart.com/search?q=QUERY"
1086
skip = Extractor.skip
1087
1088
def __init__(self, match):
1089
DeviantartExtractor.__init__(self, match)
1090
self.query = text.parse_query(self.user)
1091
self.search = self.query.get("q", "")
1092
self.user = ""
1093
1094
def deviations(self):
1095
logged_in = self.login()
1096
1097
eclipse_api = DeviantartEclipseAPI(self)
1098
search = (eclipse_api.search_deviations
1099
if logged_in else self._search_html)
1100
return self._eclipse_to_oauth(eclipse_api, search(self.query))
1101
1102
def prepare(self, deviation):
1103
DeviantartExtractor.prepare(self, deviation)
1104
deviation["search_tags"] = self.search
1105
1106
def _search_html(self, params):
1107
url = self.root + "/search"
1108
find = text.re(r'''href="https://www.deviantart.com/([^/?#]+)'''
1109
r'''/(art|journal)/(?:[^"]+-)?(\d+)''').findall
1110
while True:
1111
response = self.request(url, params=params)
1112
1113
if response.history and "/users/login" in response.url:
1114
raise exception.AbortExtraction("HTTP redirect to login page")
1115
page = response.text
1116
1117
for user, type, did in find(page)[:-3:3]:
1118
yield {
1119
"deviationId": did,
1120
"author": {"username": user},
1121
"isJournal": type == "journal",
1122
}
1123
1124
cursor = text.extr(page, r'\"cursor\":\"', '\\',)
1125
if not cursor:
1126
return
1127
params["cursor"] = cursor
1128
1129
1130
class DeviantartGallerySearchExtractor(DeviantartExtractor):
1131
"""Extractor for deviantart gallery searches"""
1132
subcategory = "gallery-search"
1133
archive_fmt = "g_{_username}_{index}.{extension}"
1134
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
1135
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
1136
1137
def __init__(self, match):
1138
DeviantartExtractor.__init__(self, match)
1139
self.query = match[3]
1140
1141
def deviations(self):
1142
self.login()
1143
1144
eclipse_api = DeviantartEclipseAPI(self)
1145
query = text.parse_query(self.query)
1146
self.search = query["q"]
1147
1148
return self._eclipse_to_oauth(
1149
eclipse_api, eclipse_api.galleries_search(
1150
self.user,
1151
self.search,
1152
self.offset,
1153
query.get("sort", "most-recent"),
1154
))
1155
1156
def prepare(self, deviation):
1157
DeviantartExtractor.prepare(self, deviation)
1158
deviation["search_tags"] = self.search
1159
1160
1161
class DeviantartFollowingExtractor(DeviantartExtractor):
1162
"""Extractor for user's watched users"""
1163
subcategory = "following"
1164
pattern = BASE_PATTERN + "/(?:about#)?watching"
1165
example = "https://www.deviantart.com/USER/about#watching"
1166
1167
def items(self):
1168
api = DeviantartOAuthAPI(self)
1169
1170
for user in api.user_friends(self.user):
1171
url = f"{self.root}/{user['user']['username']}"
1172
user["_extractor"] = DeviantartUserExtractor
1173
yield Message.Queue, url, user
1174
1175
1176
###############################################################################
1177
# API Interfaces ##############################################################
1178
1179
class DeviantartOAuthAPI():
1180
"""Interface for the DeviantArt OAuth API
1181
1182
https://www.deviantart.com/developers/http/v1/20160316
1183
"""
1184
CLIENT_ID = "5388"
1185
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
1186
1187
def __init__(self, extractor):
1188
self.extractor = extractor
1189
self.log = extractor.log
1190
self.headers = {"dA-minor-version": "20210526"}
1191
self._warn_429 = True
1192
1193
self.delay = extractor.config("wait-min", 0)
1194
self.delay_min = max(2, self.delay)
1195
1196
self.mature = extractor.config("mature", "true")
1197
if not isinstance(self.mature, str):
1198
self.mature = "true" if self.mature else "false"
1199
1200
self.strategy = extractor.config("pagination")
1201
self.folders = extractor.config("folders", False)
1202
self.public = extractor.config("public", True)
1203
1204
if client_id := extractor.config("client-id"):
1205
self.client_id = str(client_id)
1206
self.client_secret = extractor.config("client-secret")
1207
else:
1208
self.client_id = self.CLIENT_ID
1209
self.client_secret = self.CLIENT_SECRET
1210
1211
token = extractor.config("refresh-token")
1212
if token is None or token == "cache":
1213
token = "#" + self.client_id
1214
if not _refresh_token_cache(token):
1215
token = None
1216
self.refresh_token_key = token
1217
1218
metadata = extractor.config("metadata", False)
1219
if not metadata:
1220
metadata = True if extractor.extra else False
1221
if metadata:
1222
self.metadata = True
1223
1224
if isinstance(metadata, str):
1225
if metadata == "all":
1226
metadata = ("submission", "camera", "stats",
1227
"collection", "gallery")
1228
else:
1229
metadata = metadata.replace(" ", "").split(",")
1230
elif not isinstance(metadata, (list, tuple)):
1231
metadata = ()
1232
1233
self._metadata_params = {"mature_content": self.mature}
1234
self._metadata_public = None
1235
if metadata:
1236
# extended metadata
1237
self.limit = 10
1238
for param in metadata:
1239
self._metadata_params["ext_" + param] = "1"
1240
if "ext_collection" in self._metadata_params or \
1241
"ext_gallery" in self._metadata_params:
1242
if token:
1243
self._metadata_public = False
1244
else:
1245
self.log.error("'collection' and 'gallery' metadata "
1246
"require a refresh token")
1247
else:
1248
# base metadata
1249
self.limit = 50
1250
else:
1251
self.metadata = False
1252
self.limit = None
1253
1254
self.log.debug(
1255
"Using %s API credentials (client-id %s)",
1256
"default" if self.client_id == self.CLIENT_ID else "custom",
1257
self.client_id,
1258
)
1259
1260
def browse_deviantsyouwatch(self, offset=0):
1261
"""Yield deviations from users you watch"""
1262
endpoint = "/browse/deviantsyouwatch"
1263
params = {"limit": 50, "offset": offset,
1264
"mature_content": self.mature}
1265
return self._pagination(endpoint, params, public=False)
1266
1267
def browse_posts_deviantsyouwatch(self, offset=0):
1268
"""Yield posts from users you watch"""
1269
endpoint = "/browse/posts/deviantsyouwatch"
1270
params = {"limit": 50, "offset": offset,
1271
"mature_content": self.mature}
1272
return self._pagination(endpoint, params, public=False, unpack=True)
1273
1274
def browse_tags(self, tag, offset=0):
1275
""" Browse a tag """
1276
endpoint = "/browse/tags"
1277
params = {
1278
"tag" : tag,
1279
"offset" : offset,
1280
"limit" : 50,
1281
"mature_content": self.mature,
1282
}
1283
return self._pagination(endpoint, params)
1284
1285
def browse_user_journals(self, username, offset=0):
1286
journals = filter(
1287
lambda post: "/journal/" in post["url"],
1288
self.user_profile_posts(username))
1289
if offset:
1290
journals = util.advance(journals, offset)
1291
return journals
1292
1293
def collections(self, username, folder_id, offset=0):
1294
"""Yield all Deviation-objects contained in a collection folder"""
1295
endpoint = "/collections/" + folder_id
1296
params = {"username": username, "offset": offset, "limit": 24,
1297
"mature_content": self.mature}
1298
return self._pagination(endpoint, params)
1299
1300
def collections_all(self, username, offset=0):
1301
"""Yield all deviations in a user's collection"""
1302
endpoint = "/collections/all"
1303
params = {"username": username, "offset": offset, "limit": 24,
1304
"mature_content": self.mature}
1305
return self._pagination(endpoint, params)
1306
1307
@memcache(keyarg=1)
1308
def collections_folders(self, username, offset=0):
1309
"""Yield all collection folders of a specific user"""
1310
endpoint = "/collections/folders"
1311
params = {"username": username, "offset": offset, "limit": 50,
1312
"mature_content": self.mature}
1313
return self._pagination_list(endpoint, params)
1314
1315
def comments(self, target_id, target_type="deviation",
1316
comment_id=None, offset=0):
1317
"""Fetch comments posted on a target"""
1318
endpoint = f"/comments/{target_type}/{target_id}"
1319
params = {
1320
"commentid" : comment_id,
1321
"maxdepth" : "5",
1322
"offset" : offset,
1323
"limit" : 50,
1324
"mature_content": self.mature,
1325
}
1326
return self._pagination_list(endpoint, params=params, key="thread")
1327
1328
def deviation(self, deviation_id, public=None):
1329
"""Query and return info about a single Deviation"""
1330
endpoint = "/deviation/" + deviation_id
1331
1332
deviation = self._call(endpoint, public=public)
1333
if deviation.get("is_mature") and public is None and \
1334
self.refresh_token_key:
1335
deviation = self._call(endpoint, public=False)
1336
1337
if self.metadata:
1338
self._metadata((deviation,))
1339
if self.folders:
1340
self._folders((deviation,))
1341
return deviation
1342
1343
def deviation_content(self, deviation_id, public=None):
1344
"""Get extended content of a single Deviation"""
1345
endpoint = "/deviation/content"
1346
params = {"deviationid": deviation_id}
1347
content = self._call(endpoint, params=params, public=public)
1348
if public and content["html"].startswith(
1349
' <span class=\"username-with-symbol'):
1350
if self.refresh_token_key:
1351
content = self._call(endpoint, params=params, public=False)
1352
else:
1353
self.log.warning("Private Journal")
1354
return content
1355
1356
def deviation_download(self, deviation_id, public=None):
1357
"""Get the original file download (if allowed)"""
1358
endpoint = "/deviation/download/" + deviation_id
1359
params = {"mature_content": self.mature}
1360
1361
try:
1362
return self._call(
1363
endpoint, params=params, public=public, log=False)
1364
except Exception:
1365
if not self.refresh_token_key:
1366
raise
1367
return self._call(endpoint, params=params, public=False)
1368
1369
def deviation_metadata(self, deviations):
1370
""" Fetch deviation metadata for a set of deviations"""
1371
endpoint = "/deviation/metadata?" + "&".join(
1372
f"deviationids[{num}]={deviation['deviationid']}"
1373
for num, deviation in enumerate(deviations)
1374
)
1375
return self._call(
1376
endpoint,
1377
params=self._metadata_params,
1378
public=self._metadata_public,
1379
)["metadata"]
1380
1381
def gallery(self, username, folder_id, offset=0, extend=True, public=None):
1382
"""Yield all Deviation-objects contained in a gallery folder"""
1383
endpoint = "/gallery/" + folder_id
1384
params = {"username": username, "offset": offset, "limit": 24,
1385
"mature_content": self.mature, "mode": "newest"}
1386
return self._pagination(endpoint, params, extend, public)
1387
1388
def gallery_all(self, username, offset=0):
1389
"""Yield all Deviation-objects of a specific user"""
1390
endpoint = "/gallery/all"
1391
params = {"username": username, "offset": offset, "limit": 24,
1392
"mature_content": self.mature}
1393
return self._pagination(endpoint, params)
1394
1395
@memcache(keyarg=1)
1396
def gallery_folders(self, username, offset=0):
1397
"""Yield all gallery folders of a specific user"""
1398
endpoint = "/gallery/folders"
1399
params = {"username": username, "offset": offset, "limit": 50,
1400
"mature_content": self.mature}
1401
return self._pagination_list(endpoint, params)
1402
1403
def user_friends(self, username, offset=0):
1404
"""Get the users list of friends"""
1405
endpoint = "/user/friends/" + username
1406
params = {"limit": 50, "offset": offset, "mature_content": self.mature}
1407
return self._pagination(endpoint, params)
1408
1409
def user_friends_watch(self, username):
1410
"""Watch a user"""
1411
endpoint = "/user/friends/watch/" + username
1412
data = {
1413
"watch[friend]" : "0",
1414
"watch[deviations]" : "0",
1415
"watch[journals]" : "0",
1416
"watch[forum_threads]": "0",
1417
"watch[critiques]" : "0",
1418
"watch[scraps]" : "0",
1419
"watch[activity]" : "0",
1420
"watch[collections]" : "0",
1421
"mature_content" : self.mature,
1422
}
1423
return self._call(
1424
endpoint, method="POST", data=data, public=False, fatal=False,
1425
).get("success")
1426
1427
def user_friends_unwatch(self, username):
1428
"""Unwatch a user"""
1429
endpoint = "/user/friends/unwatch/" + username
1430
return self._call(
1431
endpoint, method="POST", public=False, fatal=False,
1432
).get("success")
1433
1434
@memcache(keyarg=1)
1435
def user_profile(self, username):
1436
"""Get user profile information"""
1437
endpoint = "/user/profile/" + username
1438
return self._call(endpoint, fatal=False)
1439
1440
def user_profile_posts(self, username):
1441
endpoint = "/user/profile/posts"
1442
params = {"username": username, "limit": 50,
1443
"mature_content": self.mature}
1444
return self._pagination(endpoint, params)
1445
1446
def user_statuses(self, username, offset=0):
1447
"""Yield status updates of a specific user"""
1448
statuses = filter(
1449
lambda post: "/status-update/" in post["url"],
1450
self.user_profile_posts(username))
1451
if offset:
1452
statuses = util.advance(statuses, offset)
1453
return statuses
1454
1455
def authenticate(self, refresh_token_key):
1456
"""Authenticate the application by requesting an access token"""
1457
self.headers["Authorization"] = \
1458
self._authenticate_impl(refresh_token_key)
1459
1460
@cache(maxage=3600, keyarg=1)
1461
def _authenticate_impl(self, refresh_token_key):
1462
"""Actual authenticate implementation"""
1463
url = "https://www.deviantart.com/oauth2/token"
1464
if refresh_token_key:
1465
self.log.info("Refreshing private access token")
1466
data = {"grant_type": "refresh_token",
1467
"refresh_token": _refresh_token_cache(refresh_token_key)}
1468
else:
1469
self.log.info("Requesting public access token")
1470
data = {"grant_type": "client_credentials"}
1471
1472
auth = util.HTTPBasicAuth(self.client_id, self.client_secret)
1473
response = self.extractor.request(
1474
url, method="POST", data=data, auth=auth, fatal=False)
1475
data = response.json()
1476
1477
if response.status_code != 200:
1478
self.log.debug("Server response: %s", data)
1479
raise exception.AuthenticationError(
1480
f"\"{data.get('error_description')}\" ({data.get('error')})")
1481
if refresh_token_key:
1482
_refresh_token_cache.update(
1483
refresh_token_key, data["refresh_token"])
1484
return "Bearer " + data["access_token"]
1485
1486
def _call(self, endpoint, fatal=True, log=True, public=None, **kwargs):
1487
"""Call an API endpoint"""
1488
url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
1489
kwargs["fatal"] = None
1490
1491
if public is None:
1492
public = self.public
1493
1494
while True:
1495
if self.delay:
1496
self.extractor.sleep(self.delay, "api")
1497
1498
self.authenticate(None if public else self.refresh_token_key)
1499
kwargs["headers"] = self.headers
1500
response = self.extractor.request(url, **kwargs)
1501
1502
try:
1503
data = response.json()
1504
except ValueError:
1505
self.log.error("Unable to parse API response")
1506
data = {}
1507
1508
status = response.status_code
1509
if 200 <= status < 400:
1510
if self.delay > self.delay_min:
1511
self.delay -= 1
1512
return data
1513
if not fatal and status != 429:
1514
return None
1515
1516
error = data.get("error_description")
1517
if error == "User not found.":
1518
raise exception.NotFoundError("user or group")
1519
if error == "Deviation not downloadable.":
1520
raise exception.AuthorizationError()
1521
1522
self.log.debug(response.text)
1523
msg = f"API responded with {status} {response.reason}"
1524
if status == 429:
1525
if self.delay < 30:
1526
self.delay += 1
1527
self.log.warning("%s. Using %ds delay.", msg, self.delay)
1528
1529
if self._warn_429 and self.delay >= 3:
1530
self._warn_429 = False
1531
if self.client_id == self.CLIENT_ID:
1532
self.log.info(
1533
"Register your own OAuth application and use its "
1534
"credentials to prevent this error: "
1535
"https://gdl-org.github.io/docs/configuration.html"
1536
"#extractor-deviantart-client-id-client-secret")
1537
else:
1538
if log:
1539
self.log.error(msg)
1540
return data
1541
1542
def _should_switch_tokens(self, results, params):
1543
if len(results) < params["limit"]:
1544
return True
1545
1546
if not self.extractor.jwt:
1547
for item in results:
1548
if item.get("is_mature"):
1549
return True
1550
1551
return False
1552
1553
def _pagination(self, endpoint, params,
1554
extend=True, public=None, unpack=False, key="results"):
1555
warn = True
1556
if public is None:
1557
public = self.public
1558
1559
if self.limit and params["limit"] > self.limit:
1560
params["limit"] = (params["limit"] // self.limit) * self.limit
1561
1562
while True:
1563
data = self._call(endpoint, params=params, public=public)
1564
try:
1565
results = data[key]
1566
except KeyError:
1567
self.log.error("Unexpected API response: %s", data)
1568
return
1569
1570
if unpack:
1571
results = [item["journal"] for item in results
1572
if "journal" in item]
1573
if extend:
1574
if public and self._should_switch_tokens(results, params):
1575
if self.refresh_token_key:
1576
self.log.debug("Switching to private access token")
1577
public = False
1578
continue
1579
elif data["has_more"] and warn:
1580
warn = False
1581
self.log.warning(
1582
"Private or mature deviations detected! "
1583
"Run 'gallery-dl oauth:deviantart' and follow the "
1584
"instructions to be able to access them.")
1585
1586
# "statusid" cannot be used instead
1587
if results and "deviationid" in results[0]:
1588
if self.metadata:
1589
self._metadata(results)
1590
if self.folders:
1591
self._folders(results)
1592
else: # attempt to fix "deleted" deviations
1593
for dev in self._shared_content(results):
1594
if not dev["is_deleted"]:
1595
continue
1596
patch = self._call(
1597
"/deviation/" + dev["deviationid"], fatal=False)
1598
if patch:
1599
dev.update(patch)
1600
1601
yield from results
1602
1603
if not data["has_more"] and (
1604
self.strategy != "manual" or not results or not extend):
1605
return
1606
1607
if "next_cursor" in data:
1608
if not data["next_cursor"]:
1609
return
1610
params["offset"] = None
1611
params["cursor"] = data["next_cursor"]
1612
elif data["next_offset"] is not None:
1613
params["offset"] = data["next_offset"]
1614
params["cursor"] = None
1615
else:
1616
if params.get("offset") is None:
1617
return
1618
params["offset"] = int(params["offset"]) + len(results)
1619
1620
def _pagination_list(self, endpoint, params, key="results"):
1621
return list(self._pagination(endpoint, params, False, key=key))
1622
1623
def _shared_content(self, results):
1624
"""Return an iterable of shared deviations in 'results'"""
1625
for result in results:
1626
for item in result.get("items") or ():
1627
if "deviation" in item:
1628
yield item["deviation"]
1629
1630
def _metadata(self, deviations):
1631
"""Add extended metadata to each deviation object"""
1632
if len(deviations) <= self.limit:
1633
self._metadata_batch(deviations)
1634
else:
1635
n = self.limit
1636
for index in range(0, len(deviations), n):
1637
self._metadata_batch(deviations[index:index+n])
1638
1639
def _metadata_batch(self, deviations):
1640
"""Fetch extended metadata for a single batch of deviations"""
1641
for deviation, metadata in zip(
1642
deviations, self.deviation_metadata(deviations)):
1643
deviation.update(metadata)
1644
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
1645
1646
def _folders(self, deviations):
1647
"""Add a list of all containing folders to each deviation object"""
1648
for deviation in deviations:
1649
deviation["folders"] = self._folders_map(
1650
deviation["author"]["username"])[deviation["deviationid"]]
1651
1652
@memcache(keyarg=1)
1653
def _folders_map(self, username):
1654
"""Generate a deviation_id -> folders mapping for 'username'"""
1655
self.log.info("Collecting folder information for '%s'", username)
1656
folders = self.gallery_folders(username)
1657
1658
# create 'folderid'-to-'folder' mapping
1659
fmap = {
1660
folder["folderid"]: folder
1661
for folder in folders
1662
}
1663
1664
# add parent names to folders, but ignore "Featured" as parent
1665
featured = folders[0]["folderid"]
1666
done = False
1667
1668
while not done:
1669
done = True
1670
for folder in folders:
1671
parent = folder["parent"]
1672
if not parent:
1673
pass
1674
elif parent == featured:
1675
folder["parent"] = None
1676
else:
1677
parent = fmap[parent]
1678
if parent["parent"]:
1679
done = False
1680
else:
1681
folder["name"] = parent["name"] + "/" + folder["name"]
1682
folder["parent"] = None
1683
1684
# map deviationids to folder names
1685
dmap = collections.defaultdict(list)
1686
for folder in folders:
1687
for deviation in self.gallery(
1688
username, folder["folderid"], 0, False):
1689
dmap[deviation["deviationid"]].append(folder["name"])
1690
return dmap
1691
1692
1693
class DeviantartEclipseAPI():
1694
"""Interface to the DeviantArt Eclipse API"""
1695
1696
def __init__(self, extractor):
1697
self.extractor = extractor
1698
self.log = extractor.log
1699
self.request = self.extractor._limited_request
1700
self.csrf_token = None
1701
1702
def deviation_extended_fetch(self, deviation_id, user, kind=None):
1703
endpoint = "/_puppy/dadeviation/init"
1704
params = {
1705
"deviationid" : deviation_id,
1706
"username" : user,
1707
"type" : kind,
1708
"include_session" : "false",
1709
"expand" : "deviation.related",
1710
"da_minor_version": "20230710",
1711
}
1712
return self._call(endpoint, params)
1713
1714
def gallery_scraps(self, user, offset=0):
1715
endpoint = "/_puppy/dashared/gallection/contents"
1716
params = {
1717
"username" : user,
1718
"type" : "gallery",
1719
"offset" : offset,
1720
"limit" : 24,
1721
"scraps_folder": "true",
1722
}
1723
return self._pagination(endpoint, params)
1724
1725
def galleries_search(self, user, query, offset=0, order="most-recent"):
1726
endpoint = "/_puppy/dashared/gallection/search"
1727
params = {
1728
"username": user,
1729
"type" : "gallery",
1730
"order" : order,
1731
"q" : query,
1732
"offset" : offset,
1733
"limit" : 24,
1734
}
1735
return self._pagination(endpoint, params)
1736
1737
def search_deviations(self, params):
1738
endpoint = "/_puppy/dabrowse/search/deviations"
1739
return self._pagination(endpoint, params, key="deviations")
1740
1741
def user_info(self, user, expand=False):
1742
endpoint = "/_puppy/dauserprofile/init/about"
1743
params = {"username": user}
1744
return self._call(endpoint, params)
1745
1746
def user_watching(self, user, offset=0):
1747
gruserid, moduleid = self._ids_watching(user)
1748
1749
endpoint = "/_puppy/gruser/module/watching"
1750
params = {
1751
"gruserid" : gruserid,
1752
"gruser_typeid": "4",
1753
"username" : user,
1754
"moduleid" : moduleid,
1755
"offset" : offset,
1756
"limit" : 24,
1757
}
1758
return self._pagination(endpoint, params)
1759
1760
def _call(self, endpoint, params):
1761
url = "https://www.deviantart.com" + endpoint
1762
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
1763
1764
response = self.request(url, params=params, fatal=None)
1765
1766
try:
1767
return response.json()
1768
except Exception:
1769
return {"error": response.text}
1770
1771
def _pagination(self, endpoint, params, key="results"):
1772
limit = params.get("limit", 24)
1773
warn = True
1774
1775
while True:
1776
data = self._call(endpoint, params)
1777
1778
results = data.get(key)
1779
if results is None:
1780
return
1781
if len(results) < limit and warn and data.get("hasMore"):
1782
warn = False
1783
self.log.warning(
1784
"Private deviations detected! "
1785
"Provide login credentials or session cookies "
1786
"to be able to access them.")
1787
yield from results
1788
1789
if not data.get("hasMore"):
1790
return
1791
1792
if "nextCursor" in data:
1793
params["offset"] = None
1794
params["cursor"] = data["nextCursor"]
1795
elif "nextOffset" in data:
1796
params["offset"] = data["nextOffset"]
1797
params["cursor"] = None
1798
elif params.get("offset") is None:
1799
return
1800
else:
1801
params["offset"] = int(params["offset"]) + len(results)
1802
1803
def _ids_watching(self, user):
1804
url = f"{self.extractor.root}/{user}/about"
1805
page = self.request(url).text
1806
1807
gruser_id = text.extr(page, ' data-userid="', '"')
1808
1809
pos = page.find('\\"name\\":\\"watching\\"')
1810
if pos < 0:
1811
raise exception.NotFoundError("'watching' module ID")
1812
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
1813
1814
self._fetch_csrf_token(page)
1815
return gruser_id, module_id
1816
1817
def _fetch_csrf_token(self, page=None):
1818
if page is None:
1819
page = self.request(self.extractor.root + "/").text
1820
self.csrf_token = token = text.extr(
1821
page, "window.__CSRF_TOKEN__ = '", "'")
1822
return token
1823
1824
1825
@memcache(keyarg=1)
1826
def _user_details(extr, name):
1827
try:
1828
return extr.api.user_profile(name)["user"]
1829
except Exception:
1830
return None
1831
1832
1833
@cache(maxage=36500*86400, keyarg=0)
1834
def _refresh_token_cache(token):
1835
if token and token[0] == "#":
1836
return None
1837
return token
1838
1839
1840
@cache(maxage=28*86400, keyarg=1)
1841
def _login_impl(extr, username, password):
1842
extr.log.info("Logging in as %s", username)
1843
1844
url = "https://www.deviantart.com/users/login"
1845
page = extr.request(url).text
1846
1847
data = {}
1848
for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
1849
name, _, value = item.partition('" value="')
1850
data[name] = value
1851
1852
challenge = data.get("challenge")
1853
if challenge and challenge != "0":
1854
extr.log.warning("Login requires solving a CAPTCHA")
1855
extr.log.debug(challenge)
1856
1857
data["username"] = username
1858
data["password"] = password
1859
data["remember"] = "on"
1860
1861
extr.sleep(2.0, "login")
1862
url = "https://www.deviantart.com/_sisu/do/signin"
1863
response = extr.request(url, method="POST", data=data)
1864
1865
if not response.history:
1866
raise exception.AuthenticationError()
1867
1868
return {
1869
cookie.name: cookie.value
1870
for cookie in extr.cookies
1871
}
1872
1873
1874
_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
1875
1876
1877
def id_from_base36(base36):
1878
return util.bdecode(base36, _ALPHABET)
1879
1880
1881
def base36_from_id(deviation_id):
1882
return util.bencode(int(deviation_id), _ALPHABET)
1883
1884
1885
def eclipse_media(media, format="preview"):
1886
url = [media["baseUri"]]
1887
1888
formats = {
1889
fmt["t"]: fmt
1890
for fmt in media["types"]
1891
}
1892
1893
if tokens := media.get("token") or ():
1894
if len(tokens) <= 1:
1895
fmt = formats[format]
1896
if "c" in fmt:
1897
url.append(fmt["c"].replace(
1898
"<prettyName>", media["prettyName"]))
1899
url.append("?token=")
1900
url.append(tokens[-1])
1901
1902
return "".join(url), formats
1903
1904