Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/deviantart.py
8834 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://www.deviantart.com/"""
10
11
from .common import Extractor, Message, Dispatch
12
from .. import text, util, dt, exception
13
from ..cache import cache, memcache
14
import collections
15
import mimetypes
16
import binascii
17
import time
18
19
BASE_PATTERN = (
20
r"(?:https?://)?(?:"
21
r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
22
r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
23
)
24
DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
25
26
27
class DeviantartExtractor(Extractor):
28
"""Base class for deviantart extractors"""
29
category = "deviantart"
30
root = "https://www.deviantart.com"
31
directory_fmt = ("{category}", "{username}")
32
filename_fmt = "{category}_{index}_{title}.{extension}"
33
cookies_domain = ".deviantart.com"
34
cookies_names = ("auth", "auth_secure", "userinfo")
35
_last_request = 0
36
37
def __init__(self, match):
38
Extractor.__init__(self, match)
39
self.user = (match[1] or match[2] or "").lower()
40
self.offset = 0
41
42
def _init(self):
43
self.jwt = self.config("jwt", False)
44
self.flat = self.config("flat", True)
45
self.extra = self.config("extra", False)
46
self.quality = self.config("quality", "100")
47
self.original = self.config("original", True)
48
self.previews = self.config("previews", False)
49
self.intermediary = self.config("intermediary", True)
50
self.comments_avatars = self.config("comments-avatars", False)
51
self.comments = self.comments_avatars or self.config("comments", False)
52
53
self.api = DeviantartOAuthAPI(self)
54
self.eclipse_api = None
55
self.group = False
56
self._premium_cache = {}
57
58
if self.config("auto-unwatch"):
59
self.unwatch = []
60
self.finalize = self._unwatch_premium
61
else:
62
self.unwatch = None
63
64
if self.quality:
65
if self.quality == "png":
66
self.quality = "-fullview.png?"
67
self.quality_sub = text.re(r"-fullview\.[a-z0-9]+\?").sub
68
else:
69
self.quality = ",q_" + str(self.quality)
70
self.quality_sub = text.re(r",q_\d+").sub
71
72
if self.intermediary:
73
self.intermediary_subn = text.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
74
75
if isinstance(self.original, str) and \
76
self.original.lower().startswith("image"):
77
self.original = True
78
self._update_content = self._update_content_image
79
else:
80
self._update_content = self._update_content_default
81
82
if self.previews == "all":
83
self.previews_images = self.previews = True
84
else:
85
self.previews_images = False
86
87
journals = self.config("journals", "html")
88
if journals == "html":
89
self.commit_journal = self._commit_journal_html
90
elif journals == "text":
91
self.commit_journal = self._commit_journal_text
92
else:
93
self.commit_journal = None
94
95
def request(self, url, **kwargs):
96
if "fatal" not in kwargs:
97
kwargs["fatal"] = False
98
while True:
99
response = Extractor.request(self, url, **kwargs)
100
if response.status_code != 403 or \
101
b"Request blocked." not in response.content:
102
return response
103
self.wait(seconds=300, reason="CloudFront block")
104
105
def skip(self, num):
106
self.offset += num
107
return num
108
109
def login(self):
110
if self.cookies_check(self.cookies_names):
111
return True
112
113
username, password = self._get_auth_info()
114
if username:
115
self.cookies_update(_login_impl(self, username, password))
116
return True
117
118
def items(self):
119
if self.user:
120
if group := self.config("group", True):
121
if user := _user_details(self, self.user):
122
self.user = user["username"]
123
self.group = False
124
elif group == "skip":
125
self.log.info("Skipping group '%s'", self.user)
126
raise exception.AbortExtraction()
127
else:
128
self.subcategory = "group-" + self.subcategory
129
self.group = True
130
131
for deviation in self.deviations():
132
if isinstance(deviation, tuple):
133
url, data = deviation
134
yield Message.Queue, url, data
135
continue
136
137
if deviation["is_deleted"]:
138
# prevent crashing in case the deviation really is
139
# deleted
140
self.log.debug(
141
"Skipping %s (deleted)", deviation["deviationid"])
142
continue
143
144
tier_access = deviation.get("tier_access")
145
if tier_access == "locked":
146
self.log.debug(
147
"Skipping %s (access locked)", deviation["deviationid"])
148
continue
149
150
if "premium_folder_data" in deviation:
151
data = self._fetch_premium(deviation)
152
if not data:
153
continue
154
deviation.update(data)
155
156
self.prepare(deviation)
157
yield Message.Directory, "", deviation
158
159
if "content" in deviation:
160
content = self._extract_content(deviation)
161
yield self.commit(deviation, content)
162
163
elif deviation["is_downloadable"]:
164
content = self.api.deviation_download(deviation["deviationid"])
165
deviation["is_original"] = True
166
yield self.commit(deviation, content)
167
168
if "videos" in deviation and deviation["videos"]:
169
video = max(deviation["videos"],
170
key=lambda x: text.parse_int(x["quality"][:-1]))
171
deviation["is_original"] = False
172
yield self.commit(deviation, video)
173
174
if "flash" in deviation:
175
deviation["is_original"] = True
176
yield self.commit(deviation, deviation["flash"])
177
178
if self.commit_journal:
179
if journal := self._extract_journal(deviation):
180
if self.extra:
181
deviation["_journal"] = journal["html"]
182
deviation["is_original"] = True
183
yield self.commit_journal(deviation, journal)
184
185
if self.comments_avatars:
186
for comment in deviation["comments"]:
187
user = comment["user"]
188
name = user["username"].lower()
189
if user["usericon"] == DEFAULT_AVATAR:
190
self.log.debug(
191
"Skipping avatar of '%s' (default)", name)
192
continue
193
_user_details.update(name, user)
194
195
url = f"{self.root}/{name}/avatar/"
196
comment["_extractor"] = DeviantartAvatarExtractor
197
yield Message.Queue, url, comment
198
199
if self.previews and "preview" in deviation:
200
preview = deviation["preview"]
201
deviation["is_preview"] = True
202
if self.previews_images:
203
yield self.commit(deviation, preview)
204
else:
205
mtype = mimetypes.guess_type(
206
"a." + deviation["extension"], False)[0]
207
if mtype and not mtype.startswith("image/"):
208
yield self.commit(deviation, preview)
209
del deviation["is_preview"]
210
211
if not self.extra:
212
continue
213
214
# ref: https://www.deviantart.com
215
# /developers/http/v1/20210526/object/editor_text
216
# the value of "features" is a JSON string with forward
217
# slashes escaped
218
text_content = \
219
deviation["text_content"]["body"]["features"].replace(
220
"\\/", "/") if "text_content" in deviation else None
221
for txt in (text_content, deviation.get("description"),
222
deviation.get("_journal")):
223
if txt is None:
224
continue
225
for match in DeviantartStashExtractor.pattern.finditer(txt):
226
url = text.ensure_http_scheme(match[0])
227
deviation["_extractor"] = DeviantartStashExtractor
228
yield Message.Queue, url, deviation
229
230
def deviations(self):
231
"""Return an iterable containing all relevant Deviation-objects"""
232
233
def prepare(self, deviation):
234
"""Adjust the contents of a Deviation-object"""
235
if "index" not in deviation:
236
try:
237
if deviation["url"].startswith((
238
"https://www.deviantart.com/stash/", "https://sta.sh",
239
)):
240
filename = deviation["content"]["src"].split("/")[5]
241
deviation["index_base36"] = filename.partition("-")[0][1:]
242
deviation["index"] = id_from_base36(
243
deviation["index_base36"])
244
else:
245
deviation["index"] = text.parse_int(
246
deviation["url"].rpartition("-")[2])
247
except KeyError:
248
deviation["index"] = 0
249
deviation["index_base36"] = "0"
250
if "index_base36" not in deviation:
251
deviation["index_base36"] = base36_from_id(deviation["index"])
252
253
if self.user:
254
deviation["username"] = self.user
255
deviation["_username"] = self.user.lower()
256
else:
257
deviation["username"] = deviation["author"]["username"]
258
deviation["_username"] = deviation["username"].lower()
259
260
deviation["published_time"] = text.parse_int(
261
deviation["published_time"])
262
deviation["date"] = self.parse_timestamp(
263
deviation["published_time"])
264
265
if self.comments:
266
deviation["comments"] = (
267
self._extract_comments(deviation["deviationid"], "deviation")
268
if deviation["stats"]["comments"] else ()
269
)
270
271
# filename metadata
272
sub = text.re(r"\W").sub
273
deviation["filename"] = "".join((
274
sub("_", deviation["title"].lower()), "_by_",
275
sub("_", deviation["author"]["username"].lower()), "-d",
276
deviation["index_base36"],
277
))
278
279
def commit(self, deviation, target):
280
url = target["src"]
281
name = target.get("filename") or url
282
target = target.copy()
283
target["filename"] = deviation["filename"]
284
deviation["target"] = target
285
deviation["extension"] = target["extension"] = text.ext_from_url(name)
286
if "is_original" not in deviation:
287
deviation["is_original"] = ("/v1/" not in url)
288
return Message.Url, url, deviation
289
290
def _commit_journal_html(self, deviation, journal):
291
title = text.escape(deviation["title"])
292
url = deviation["url"]
293
thumbs = deviation.get("thumbs") or deviation.get("files")
294
html = journal["html"]
295
tmpl = self.utils("journal")
296
shadow = tmpl.SHADOW.format_map(thumbs[0]) if thumbs else ""
297
298
if not html:
299
self.log.warning("%s: Empty journal content", deviation["index"])
300
301
if "css" in journal:
302
css, cls = journal["css"], "withskin"
303
elif html.startswith("<style"):
304
css, _, html = html.partition("</style>")
305
css = css.partition(">")[2]
306
cls = "withskin"
307
else:
308
css, cls = "", "journal-green"
309
310
if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
311
needle = '<div class="boxtop journaltop">'
312
header = tmpl.HEADER_CUSTOM.format(
313
title=title, url=url, date=deviation["date"],
314
)
315
else:
316
needle = '<div usr class="gr">'
317
username = deviation["author"]["username"]
318
urlname = deviation.get("username") or username.lower()
319
header = tmpl.HEADER.format(
320
title=title,
321
url=url,
322
userurl=f"{self.root}/{urlname}/",
323
username=username,
324
date=deviation["date"],
325
)
326
327
if needle in html:
328
html = html.replace(needle, header, 1)
329
else:
330
html = tmpl.HTML_EXTRA.format(header, html)
331
332
html = tmpl.HTML.format(
333
title=title, html=html, shadow=shadow, css=css, cls=cls)
334
335
deviation["extension"] = "htm"
336
return Message.Url, html, deviation
337
338
def _commit_journal_text(self, deviation, journal):
339
html = journal["html"]
340
if not html:
341
self.log.warning("%s: Empty journal content", deviation["index"])
342
elif html.startswith("<style"):
343
html = html.partition("</style>")[2]
344
head, _, tail = html.rpartition("<script")
345
content = "\n".join(
346
text.unescape(text.remove_html(txt))
347
for txt in (head or tail).split("<br />")
348
)
349
txt = self.utils("journal").TEXT.format(
350
title=deviation["title"],
351
username=deviation["author"]["username"],
352
date=deviation["date"],
353
content=content,
354
)
355
356
deviation["extension"] = "txt"
357
return Message.Url, txt, deviation
358
359
def _extract_journal(self, deviation):
360
if "excerpt" in deviation:
361
# # empty 'html'
362
# return self.api.deviation_content(deviation["deviationid"])
363
364
if "_page" in deviation:
365
page = deviation["_page"]
366
del deviation["_page"]
367
else:
368
page = self._limited_request(deviation["url"]).text
369
370
# extract journal html from webpage
371
html = text.extr(
372
page,
373
"<h2>Literature Text</h2></span><div>",
374
"</div></section></div></div>")
375
if html:
376
return {"html": html}
377
378
self.log.debug("%s: Failed to extract journal HTML from webpage. "
379
"Falling back to __INITIAL_STATE__ markup.",
380
deviation["index"])
381
382
# parse __INITIAL_STATE__ as fallback
383
state = util.json_loads(text.extr(
384
page, 'window.__INITIAL_STATE__ = JSON.parse("', '");')
385
.replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"'))
386
deviations = state["@@entities"]["deviation"]
387
content = deviations.popitem()[1]["textContent"]
388
389
if html := self._textcontent_to_html(deviation, content):
390
return {"html": html}
391
return {"html": content["excerpt"].replace("\n", "<br />")}
392
393
if "body" in deviation:
394
return {"html": deviation.pop("body")}
395
return None
396
397
def _textcontent_to_html(self, deviation, content):
398
html = content["html"]
399
markup = html.get("markup")
400
401
if not markup or markup[0] != "{":
402
return markup
403
404
if html["type"] == "tiptap":
405
try:
406
return self.utils("tiptap").to_html(markup)
407
except Exception as exc:
408
self.log.traceback(exc)
409
self.log.error("%s: '%s: %s'", deviation["index"],
410
exc.__class__.__name__, exc)
411
412
self.log.warning("%s: Unsupported '%s' markup.",
413
deviation["index"], html["type"])
414
415
def _extract_content(self, deviation):
416
content = deviation["content"]
417
418
if self.original and deviation["is_downloadable"]:
419
self._update_content(deviation, content)
420
return content
421
422
if self.jwt:
423
self._update_token(deviation, content)
424
return content
425
426
if content["src"].startswith("https://images-wixmp-"):
427
if self.intermediary and deviation["index"] <= 790677560:
428
# https://github.com/r888888888/danbooru/issues/4069
429
intermediary, count = self.intermediary_subn(
430
r"/intermediary\1", content["src"], 1)
431
if count:
432
deviation["is_original"] = False
433
deviation["_fallback"] = (content["src"],)
434
content["src"] = intermediary
435
if self.quality:
436
content["src"] = self.quality_sub(
437
self.quality, content["src"], 1)
438
439
return content
440
441
def _find_folder(self, folders, name, uuid):
442
if uuid.isdecimal():
443
match = text.re(
444
"(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
445
for folder in folders:
446
if match(folder["name"]):
447
return folder
448
elif folder.get("has_subfolders"):
449
for subfolder in folder["subfolders"]:
450
if match(subfolder["name"]):
451
return subfolder
452
else:
453
for folder in folders:
454
if folder["folderid"] == uuid:
455
return folder
456
elif folder.get("has_subfolders"):
457
for subfolder in folder["subfolders"]:
458
if subfolder["folderid"] == uuid:
459
return subfolder
460
raise exception.NotFoundError("folder")
461
462
def _folder_urls(self, folders, category, extractor):
463
base = f"{self.root}/{self.user}/{category}/"
464
for folder in folders:
465
folder["_extractor"] = extractor
466
url = f"{base}{folder['folderid']}/{folder['name']}"
467
yield url, folder
468
469
def _update_content_default(self, deviation, content):
470
if "premium_folder_data" in deviation or deviation.get("is_mature"):
471
public = False
472
else:
473
public = None
474
475
data = self.api.deviation_download(deviation["deviationid"], public)
476
content.update(data)
477
deviation["is_original"] = True
478
479
def _update_content_image(self, deviation, content):
480
data = self.api.deviation_download(deviation["deviationid"])
481
url = data["src"].partition("?")[0]
482
mtype = mimetypes.guess_type(url, False)[0]
483
if mtype and mtype.startswith("image/"):
484
content.update(data)
485
deviation["is_original"] = True
486
487
def _update_token(self, deviation, content):
488
"""Replace JWT to be able to remove width/height limits
489
490
All credit goes to @Ironchest337
491
for discovering and implementing this method
492
"""
493
url, sep, _ = content["src"].partition("/v1/")
494
if not sep:
495
return
496
497
# 'images-wixmp' returns 401 errors, but just 'wixmp' still works
498
url = url.replace("//images-wixmp", "//wixmp", 1)
499
500
# header = b'{"typ":"JWT","alg":"none"}'
501
payload = (
502
b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
503
url.partition("/f/")[2].encode() +
504
b'"}]],"aud":["urn:service:file.download"]}'
505
)
506
507
deviation["_fallback"] = (content["src"],)
508
deviation["is_original"] = True
509
pl = binascii.b2a_base64(payload).rstrip(b'=\n').decode()
510
content["src"] = (
511
# base64 of 'header' is precomputed as 'eyJ0eX...'
512
f"{url}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{pl}.")
513
514
def _extract_comments(self, target_id, target_type="deviation"):
515
results = None
516
comment_ids = [None]
517
518
while comment_ids:
519
comments = self.api.comments(
520
target_id, target_type, comment_ids.pop())
521
522
if results:
523
results.extend(comments)
524
else:
525
results = comments
526
527
# parent comments, i.e. nodes with at least one child
528
parents = {c["parentid"] for c in comments}
529
# comments with more than one reply
530
replies = {c["commentid"] for c in comments if c["replies"]}
531
# add comment UUIDs with replies that are not parent to any node
532
comment_ids.extend(replies - parents)
533
534
return results
535
536
def _limited_request(self, url, **kwargs):
537
"""Limits HTTP requests to one every 2 seconds"""
538
diff = time.time() - DeviantartExtractor._last_request
539
if diff < 2.0:
540
self.sleep(2.0 - diff, "request")
541
response = self.request(url, **kwargs)
542
DeviantartExtractor._last_request = time.time()
543
return response
544
545
def _fetch_premium(self, deviation):
546
try:
547
return self._premium_cache[deviation["deviationid"]]
548
except KeyError:
549
pass
550
551
if not self.api.refresh_token_key:
552
self.log.warning(
553
"Unable to access premium content (no refresh-token)")
554
self._fetch_premium = lambda _: None
555
return None
556
557
dev = self.api.deviation(deviation["deviationid"], False)
558
folder = deviation["premium_folder_data"]
559
username = dev["author"]["username"]
560
561
# premium_folder_data is no longer present when user has access (#5063)
562
has_access = ("premium_folder_data" not in dev) or folder["has_access"]
563
564
if not has_access and folder["type"] == "watchers" and \
565
self.config("auto-watch"):
566
if self.unwatch is not None:
567
self.unwatch.append(username)
568
if self.api.user_friends_watch(username):
569
has_access = True
570
self.log.info(
571
"Watching %s for premium folder access", username)
572
else:
573
self.log.warning(
574
"Error when trying to watch %s. "
575
"Try again with a new refresh-token", username)
576
577
if has_access:
578
self.log.info("Fetching premium folder data")
579
else:
580
self.log.warning("Unable to access premium content (type: %s)",
581
folder["type"])
582
583
cache = self._premium_cache
584
for dev in self.api.gallery(
585
username, folder["gallery_id"], public=False):
586
cache[dev["deviationid"]] = dev if has_access else None
587
588
return cache.get(deviation["deviationid"])
589
590
def _unwatch_premium(self):
591
for username in self.unwatch:
592
self.log.info("Unwatching %s", username)
593
self.api.user_friends_unwatch(username)
594
595
def _eclipse_to_oauth(self, eclipse_api, deviations):
596
for obj in deviations:
597
deviation = obj["deviation"] if "deviation" in obj else obj
598
deviation_uuid = eclipse_api.deviation_extended_fetch(
599
deviation["deviationId"],
600
deviation["author"]["username"],
601
"journal" if deviation["isJournal"] else "art",
602
)["deviation"]["extended"]["deviationUuid"]
603
yield self.api.deviation(deviation_uuid)
604
605
def _unescape_json(self, json):
606
return json.replace('\\"', '"') \
607
.replace("\\'", "'") \
608
.replace("\\\\", "\\")
609
610
611
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
612
"""Extractor for an artist's user profile"""
613
pattern = BASE_PATTERN + r"/?$"
614
example = "https://www.deviantart.com/USER"
615
616
def items(self):
617
base = f"{self.root}/{self.user}/"
618
return self._dispatch_extractors((
619
(DeviantartAvatarExtractor , base + "avatar"),
620
(DeviantartBackgroundExtractor, base + "banner"),
621
(DeviantartGalleryExtractor , base + "gallery"),
622
(DeviantartScrapsExtractor , base + "gallery/scraps"),
623
(DeviantartJournalExtractor , base + "posts"),
624
(DeviantartStatusExtractor , base + "posts/statuses"),
625
(DeviantartFavoriteExtractor , base + "favourites"),
626
), ("gallery",))
627
628
629
###############################################################################
630
# OAuth #######################################################################
631
632
class DeviantartGalleryExtractor(DeviantartExtractor):
633
"""Extractor for all deviations from an artist's gallery"""
634
subcategory = "gallery"
635
archive_fmt = "g_{_username}_{index}.{extension}"
636
pattern = (BASE_PATTERN + r"/gallery"
637
r"(?:/all|/recommended-for-you)?/?(\?(?!q=).*)?$")
638
example = "https://www.deviantart.com/USER/gallery/"
639
640
def deviations(self):
641
if self.flat and not self.group:
642
return self.api.gallery_all(self.user, self.offset)
643
folders = self.api.gallery_folders(self.user)
644
return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
645
646
647
class DeviantartAvatarExtractor(DeviantartExtractor):
648
"""Extractor for an artist's avatar"""
649
subcategory = "avatar"
650
archive_fmt = "a_{_username}_{index}"
651
pattern = BASE_PATTERN + r"/avatar"
652
example = "https://www.deviantart.com/USER/avatar/"
653
654
def deviations(self):
655
name = self.user.lower()
656
user = _user_details(self, name)
657
if not user:
658
return ()
659
660
icon = user["usericon"]
661
if icon == DEFAULT_AVATAR:
662
self.log.debug("Skipping avatar of '%s' (default)", name)
663
return ()
664
665
_, sep, index = icon.rpartition("?")
666
if not sep:
667
index = "0"
668
669
formats = self.config("formats")
670
if not formats:
671
url = icon.replace("/avatars/", "/avatars-big/", 1)
672
return (self._make_deviation(url, user, index, ""),)
673
674
if isinstance(formats, str):
675
formats = formats.replace(" ", "").split(",")
676
677
results = []
678
for fmt in formats:
679
fmt, _, ext = fmt.rpartition(".")
680
if fmt:
681
fmt = "-" + fmt
682
url = (f"https://a.deviantart.net/avatars{fmt}"
683
f"/{name[0]}/{name[1]}/{name}.{ext}?{index}")
684
results.append(self._make_deviation(url, user, index, fmt))
685
return results
686
687
def _make_deviation(self, url, user, index, fmt):
688
return {
689
"author" : user,
690
"da_category" : "avatar",
691
"index" : text.parse_int(index),
692
"is_deleted" : False,
693
"is_downloadable": False,
694
"published_time" : 0,
695
"title" : "avatar" + fmt,
696
"stats" : {"comments": 0},
697
"content" : {"src": url},
698
}
699
700
701
class DeviantartBackgroundExtractor(DeviantartExtractor):
702
"""Extractor for an artist's banner"""
703
subcategory = "background"
704
archive_fmt = "b_{index}"
705
pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
706
example = "https://www.deviantart.com/USER/banner/"
707
708
def deviations(self):
709
try:
710
return (self.api.user_profile(self.user.lower())
711
["cover_deviation"]["cover_deviation"],)
712
except Exception:
713
return ()
714
715
716
class DeviantartFolderExtractor(DeviantartExtractor):
717
"""Extractor for deviations inside an artist's gallery folder"""
718
subcategory = "folder"
719
directory_fmt = ("{category}", "{username}", "{folder[title]}")
720
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
721
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
722
example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
723
724
def __init__(self, match):
725
DeviantartExtractor.__init__(self, match)
726
self.folder = None
727
self.folder_id = match[3]
728
self.folder_name = match[4]
729
730
def deviations(self):
731
folders = self.api.gallery_folders(self.user)
732
folder = self._find_folder(folders, self.folder_name, self.folder_id)
733
734
# Leaving this here for backwards compatibility
735
self.folder = {
736
"title": folder["name"],
737
"uuid" : folder["folderid"],
738
"index": self.folder_id,
739
"owner": self.user,
740
"parent_uuid": folder["parent"],
741
}
742
743
if folder.get("subfolder"):
744
self.folder["parent_folder"] = folder["parent_folder"]
745
self.archive_fmt = "F_{folder[parent_uuid]}_{index}.{extension}"
746
747
if self.flat:
748
self.directory_fmt = ("{category}", "{username}",
749
"{folder[parent_folder]}")
750
else:
751
self.directory_fmt = ("{category}", "{username}",
752
"{folder[parent_folder]}",
753
"{folder[title]}")
754
755
if folder.get("has_subfolders") and self.config("subfolders", True):
756
for subfolder in folder["subfolders"]:
757
subfolder["parent_folder"] = folder["name"]
758
subfolder["subfolder"] = True
759
yield from self._folder_urls(
760
folder["subfolders"], "gallery", DeviantartFolderExtractor)
761
762
yield from self.api.gallery(self.user, folder["folderid"], self.offset)
763
764
def prepare(self, deviation):
765
DeviantartExtractor.prepare(self, deviation)
766
deviation["folder"] = self.folder
767
768
769
class DeviantartStashExtractor(DeviantartExtractor):
770
"""Extractor for sta.sh-ed deviations"""
771
subcategory = "stash"
772
archive_fmt = "{index}.{extension}"
773
pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.s(h))"
774
r"/([a-z0-9]+)")
775
example = "https://www.deviantart.com/stash/abcde"
776
777
skip = Extractor.skip
778
779
def __init__(self, match):
780
DeviantartExtractor.__init__(self, match)
781
self.user = ""
782
783
def deviations(self, stash_id=None, stash_data=None):
784
if stash_id is None:
785
legacy_url, stash_id = self.groups
786
else:
787
legacy_url = False
788
789
if legacy_url and stash_id[0] == "2":
790
url = "https://sta.sh/" + stash_id
791
response = self._limited_request(url)
792
stash_id = response.url.rpartition("/")[2]
793
page = response.text
794
else:
795
url = "https://www.deviantart.com/stash/" + stash_id
796
page = self._limited_request(url).text
797
798
if stash_id[0] == "0":
799
if uuid := text.extr(page, '//deviation/', '"'):
800
deviation = self.api.deviation(uuid)
801
deviation["_page"] = page
802
deviation["index"] = text.parse_int(text.extr(
803
page, '\\"deviationId\\":', ','))
804
805
deviation["stash_id"] = stash_id
806
if stash_data:
807
folder = stash_data["folder"]
808
deviation["stash_name"] = folder["name"]
809
deviation["stash_folder"] = folder["folderId"]
810
deviation["stash_parent"] = folder["parentId"] or 0
811
deviation["stash_description"] = \
812
folder["richDescription"]["excerpt"]
813
else:
814
deviation["stash_name"] = ""
815
deviation["stash_description"] = ""
816
deviation["stash_folder"] = 0
817
deviation["stash_parent"] = 0
818
819
yield deviation
820
return
821
822
if stash_data := text.extr(page, ',\\"stash\\":', ',\\"@@'):
823
if stash_data.endswith(":{}"):
824
stash_data = stash_data[:stash_data.rfind("}", None, -2)+1]
825
stash_data = util.json_loads(self._unescape_json(stash_data))
826
827
for sid in text.extract_iter(
828
page, 'href="https://www.deviantart.com/stash/', '"'):
829
if sid == stash_id or sid.endswith("#comments"):
830
continue
831
yield from self.deviations(sid, stash_data)
832
833
834
class DeviantartFavoriteExtractor(DeviantartExtractor):
835
"""Extractor for an artist's favorites"""
836
subcategory = "favorite"
837
directory_fmt = ("{category}", "{username}", "Favourites")
838
archive_fmt = "f_{_username}_{index}.{extension}"
839
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
840
example = "https://www.deviantart.com/USER/favourites/"
841
842
def deviations(self):
843
if self.flat:
844
return self.api.collections_all(self.user, self.offset)
845
folders = self.api.collections_folders(self.user)
846
return self._folder_urls(
847
folders, "favourites", DeviantartCollectionExtractor)
848
849
850
class DeviantartCollectionExtractor(DeviantartExtractor):
851
"""Extractor for a single favorite collection"""
852
subcategory = "collection"
853
directory_fmt = ("{category}", "{username}", "Favourites",
854
"{collection[title]}")
855
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
856
pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
857
example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
858
859
def __init__(self, match):
860
DeviantartExtractor.__init__(self, match)
861
self.collection = None
862
self.collection_id = match[3]
863
self.collection_name = match[4]
864
865
def deviations(self):
866
folders = self.api.collections_folders(self.user)
867
folder = self._find_folder(
868
folders, self.collection_name, self.collection_id)
869
self.collection = {
870
"title": folder["name"],
871
"uuid" : folder["folderid"],
872
"index": self.collection_id,
873
"owner": self.user,
874
}
875
return self.api.collections(self.user, folder["folderid"], self.offset)
876
877
def prepare(self, deviation):
878
DeviantartExtractor.prepare(self, deviation)
879
deviation["collection"] = self.collection
880
881
882
class DeviantartJournalExtractor(DeviantartExtractor):
883
"""Extractor for an artist's journals"""
884
subcategory = "journal"
885
directory_fmt = ("{category}", "{username}", "Journal")
886
archive_fmt = "j_{_username}_{index}.{extension}"
887
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
888
example = "https://www.deviantart.com/USER/posts/journals/"
889
890
def deviations(self):
891
return self.api.browse_user_journals(self.user, self.offset)
892
893
894
class DeviantartStatusExtractor(DeviantartExtractor):
895
"""Extractor for an artist's status updates"""
896
subcategory = "status"
897
directory_fmt = ("{category}", "{username}", "Status")
898
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
899
archive_fmt = "S_{_username}_{index}.{extension}"
900
pattern = BASE_PATTERN + r"/posts/statuses"
901
example = "https://www.deviantart.com/USER/posts/statuses/"
902
903
def deviations(self):
904
for status in self.api.user_statuses(self.user, self.offset):
905
yield from self.process_status(status)
906
907
def process_status(self, status):
908
for item in status.get("items") or (): # do not trust is_share
909
# shared deviations/statuses
910
if "deviation" in item:
911
yield item["deviation"].copy()
912
if "status" in item:
913
yield from self.process_status(item["status"].copy())
914
# assume is_deleted == true means necessary fields are missing
915
if status["is_deleted"]:
916
self.log.warning(
917
"Skipping status %s (deleted)", status.get("statusid"))
918
return
919
yield status
920
921
def prepare(self, deviation):
922
if "deviationid" in deviation:
923
return DeviantartExtractor.prepare(self, deviation)
924
925
try:
926
path = deviation["url"].split("/")
927
deviation["index"] = text.parse_int(path[-1] or path[-2])
928
except KeyError:
929
deviation["index"] = 0
930
931
if self.user:
932
deviation["username"] = self.user
933
deviation["_username"] = self.user.lower()
934
else:
935
deviation["username"] = deviation["author"]["username"]
936
deviation["_username"] = deviation["username"].lower()
937
938
deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
939
deviation["published_time"] = int(dt.to_ts(d))
940
941
deviation["da_category"] = "Status"
942
deviation["category_path"] = "status"
943
deviation["is_downloadable"] = False
944
deviation["title"] = "Status Update"
945
946
comments_count = deviation.pop("comments_count", 0)
947
deviation["stats"] = {"comments": comments_count}
948
if self.comments:
949
deviation["comments"] = (
950
self._extract_comments(deviation["statusid"], "status")
951
if comments_count else ()
952
)
953
954
955
class DeviantartTagExtractor(DeviantartExtractor):
956
"""Extractor for deviations from tag searches"""
957
subcategory = "tag"
958
directory_fmt = ("{category}", "Tags", "{search_tags}")
959
archive_fmt = "T_{search_tags}_{index}.{extension}"
960
pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
961
example = "https://www.deviantart.com/tag/TAG"
962
963
def __init__(self, match):
964
DeviantartExtractor.__init__(self, match)
965
self.tag = text.unquote(match[1])
966
self.user = ""
967
968
def deviations(self):
969
return self.api.browse_tags(self.tag, self.offset)
970
971
def prepare(self, deviation):
972
DeviantartExtractor.prepare(self, deviation)
973
deviation["search_tags"] = self.tag
974
975
976
class DeviantartWatchExtractor(DeviantartExtractor):
977
"""Extractor for Deviations from watched users"""
978
subcategory = "watch"
979
pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
980
r"/(?:watch/deviations|notifications/watch)()()")
981
example = "https://www.deviantart.com/watch/deviations"
982
983
def deviations(self):
984
return self.api.browse_deviantsyouwatch()
985
986
987
class DeviantartWatchPostsExtractor(DeviantartExtractor):
988
"""Extractor for Posts from watched users"""
989
subcategory = "watch-posts"
990
pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
991
example = "https://www.deviantart.com/watch/posts"
992
993
def deviations(self):
994
return self.api.browse_posts_deviantsyouwatch()
995
996
997
###############################################################################
998
# Eclipse #####################################################################
999
1000
class DeviantartDeviationExtractor(DeviantartExtractor):
1001
"""Extractor for single deviations"""
1002
subcategory = "deviation"
1003
archive_fmt = "g_{_username}_{index}.{extension}"
1004
pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
1005
r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/"
1006
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
1007
r"(\d+)" # bare deviation ID without slug
1008
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
1009
example = "https://www.deviantart.com/UsER/art/TITLE-12345"
1010
1011
skip = Extractor.skip
1012
1013
def __init__(self, match):
1014
DeviantartExtractor.__init__(self, match)
1015
self.type = match[3]
1016
self.deviation_id = \
1017
match[4] or match[5] or id_from_base36(match[6])
1018
1019
def deviations(self):
1020
if self.user:
1021
url = (f"{self.root}/{self.user}"
1022
f"/{self.type or 'art'}/{self.deviation_id}")
1023
else:
1024
url = f"{self.root}/view/{self.deviation_id}/"
1025
1026
page = self._limited_request(url, notfound=True).text
1027
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
1028
if not uuid:
1029
raise exception.NotFoundError("deviation")
1030
1031
deviation = self.api.deviation(uuid)
1032
deviation["_page"] = page
1033
deviation["index_file"] = 0
1034
deviation["num"] = deviation["count"] = 1
1035
1036
additional_media = text.extr(page, ',\\"additionalMedia\\":', '}],\\"')
1037
if not additional_media:
1038
yield deviation
1039
return
1040
1041
self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
1042
"{num:>02}.{extension}")
1043
self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
1044
"{extension}")
1045
1046
additional_media = util.json_loads(self._unescape_json(
1047
additional_media) + "}]")
1048
deviation["count"] = 1 + len(additional_media)
1049
yield deviation
1050
1051
for index, post in enumerate(additional_media):
1052
uri = eclipse_media(post["media"], "fullview")[0]
1053
deviation["content"]["src"] = uri
1054
deviation["num"] += 1
1055
deviation["index_file"] = post["fileId"]
1056
# Download only works on purchased materials - no way to check
1057
deviation["is_downloadable"] = False
1058
yield deviation
1059
1060
1061
class DeviantartScrapsExtractor(DeviantartExtractor):
1062
"""Extractor for an artist's scraps"""
1063
subcategory = "scraps"
1064
directory_fmt = ("{category}", "{username}", "Scraps")
1065
archive_fmt = "s_{_username}_{index}.{extension}"
1066
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
1067
example = "https://www.deviantart.com/USER/gallery/scraps"
1068
1069
def deviations(self):
1070
self.login()
1071
1072
eclipse_api = DeviantartEclipseAPI(self)
1073
return self._eclipse_to_oauth(
1074
eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset))
1075
1076
1077
class DeviantartSearchExtractor(DeviantartExtractor):
1078
"""Extractor for deviantart search results"""
1079
subcategory = "search"
1080
directory_fmt = ("{category}", "Search", "{search_tags}")
1081
archive_fmt = "Q_{search_tags}_{index}.{extension}"
1082
pattern = (r"(?:https?://)?www\.deviantart\.com"
1083
r"/search(?:/deviations)?/?\?([^#]+)")
1084
example = "https://www.deviantart.com/search?q=QUERY"
1085
skip = Extractor.skip
1086
1087
def __init__(self, match):
1088
DeviantartExtractor.__init__(self, match)
1089
self.query = text.parse_query(self.user)
1090
self.search = self.query.get("q", "")
1091
self.user = ""
1092
1093
def deviations(self):
1094
logged_in = self.login()
1095
1096
eclipse_api = DeviantartEclipseAPI(self)
1097
search = (eclipse_api.search_deviations
1098
if logged_in else self._search_html)
1099
return self._eclipse_to_oauth(eclipse_api, search(self.query))
1100
1101
def prepare(self, deviation):
1102
DeviantartExtractor.prepare(self, deviation)
1103
deviation["search_tags"] = self.search
1104
1105
def _search_html(self, params):
1106
url = self.root + "/search"
1107
find = text.re(r'''href="https://www.deviantart.com/([^/?#]+)'''
1108
r'''/(art|journal)/(?:[^"]+-)?(\d+)''').findall
1109
while True:
1110
response = self.request(url, params=params)
1111
1112
if response.history and "/users/login" in response.url:
1113
raise exception.AbortExtraction("HTTP redirect to login page")
1114
page = response.text
1115
1116
for user, type, did in find(page)[:-3:3]:
1117
yield {
1118
"deviationId": did,
1119
"author": {"username": user},
1120
"isJournal": type == "journal",
1121
}
1122
1123
cursor = text.extr(page, r'\"cursor\":\"', '\\',)
1124
if not cursor:
1125
return
1126
params["cursor"] = cursor
1127
1128
1129
class DeviantartGallerySearchExtractor(DeviantartExtractor):
1130
"""Extractor for deviantart gallery searches"""
1131
subcategory = "gallery-search"
1132
archive_fmt = "g_{_username}_{index}.{extension}"
1133
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
1134
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
1135
1136
def __init__(self, match):
1137
DeviantartExtractor.__init__(self, match)
1138
self.query = match[3]
1139
1140
def deviations(self):
1141
self.login()
1142
1143
eclipse_api = DeviantartEclipseAPI(self)
1144
query = text.parse_query(self.query)
1145
self.search = query["q"]
1146
1147
return self._eclipse_to_oauth(
1148
eclipse_api, eclipse_api.galleries_search(
1149
self.user,
1150
self.search,
1151
self.offset,
1152
query.get("sort", "most-recent"),
1153
))
1154
1155
def prepare(self, deviation):
1156
DeviantartExtractor.prepare(self, deviation)
1157
deviation["search_tags"] = self.search
1158
1159
1160
class DeviantartFollowingExtractor(DeviantartExtractor):
1161
"""Extractor for user's watched users"""
1162
subcategory = "following"
1163
pattern = BASE_PATTERN + "/(?:about#)?watching"
1164
example = "https://www.deviantart.com/USER/about#watching"
1165
1166
def items(self):
1167
api = DeviantartOAuthAPI(self)
1168
1169
for user in api.user_friends(self.user):
1170
url = f"{self.root}/{user['user']['username']}"
1171
user["_extractor"] = DeviantartUserExtractor
1172
yield Message.Queue, url, user
1173
1174
1175
###############################################################################
1176
# API Interfaces ##############################################################
1177
1178
class DeviantartOAuthAPI():
1179
"""Interface for the DeviantArt OAuth API
1180
1181
https://www.deviantart.com/developers/http/v1/20160316
1182
"""
1183
CLIENT_ID = "5388"
1184
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
1185
1186
def __init__(self, extractor):
1187
self.extractor = extractor
1188
self.log = extractor.log
1189
self.headers = {"dA-minor-version": "20210526"}
1190
self._warn_429 = True
1191
1192
self.delay = extractor.config("wait-min", 0)
1193
self.delay_min = max(2, self.delay)
1194
1195
self.mature = extractor.config("mature", "true")
1196
if not isinstance(self.mature, str):
1197
self.mature = "true" if self.mature else "false"
1198
1199
self.strategy = extractor.config("pagination")
1200
self.folders = extractor.config("folders", False)
1201
self.public = extractor.config("public", True)
1202
1203
if client_id := extractor.config("client-id"):
1204
self.client_id = str(client_id)
1205
self.client_secret = extractor.config("client-secret")
1206
else:
1207
self.client_id = self.CLIENT_ID
1208
self.client_secret = self.CLIENT_SECRET
1209
1210
token = extractor.config("refresh-token")
1211
if token is None or token == "cache":
1212
token = "#" + self.client_id
1213
if not _refresh_token_cache(token):
1214
token = None
1215
self.refresh_token_key = token
1216
1217
metadata = extractor.config("metadata", False)
1218
if not metadata:
1219
metadata = True if extractor.extra else False
1220
if metadata:
1221
self.metadata = True
1222
1223
if isinstance(metadata, str):
1224
if metadata == "all":
1225
metadata = ("submission", "camera", "stats",
1226
"collection", "gallery")
1227
else:
1228
metadata = metadata.replace(" ", "").split(",")
1229
elif not isinstance(metadata, (list, tuple)):
1230
metadata = ()
1231
1232
self._metadata_params = {"mature_content": self.mature}
1233
self._metadata_public = None
1234
if metadata:
1235
# extended metadata
1236
self.limit = 10
1237
for param in metadata:
1238
self._metadata_params["ext_" + param] = "1"
1239
if "ext_collection" in self._metadata_params or \
1240
"ext_gallery" in self._metadata_params:
1241
if token:
1242
self._metadata_public = False
1243
else:
1244
self.log.error("'collection' and 'gallery' metadata "
1245
"require a refresh token")
1246
else:
1247
# base metadata
1248
self.limit = 50
1249
else:
1250
self.metadata = False
1251
self.limit = None
1252
1253
self.log.debug(
1254
"Using %s API credentials (client-id %s)",
1255
"default" if self.client_id == self.CLIENT_ID else "custom",
1256
self.client_id,
1257
)
1258
1259
def browse_deviantsyouwatch(self, offset=0):
1260
"""Yield deviations from users you watch"""
1261
endpoint = "/browse/deviantsyouwatch"
1262
params = {"limit": 50, "offset": offset,
1263
"mature_content": self.mature}
1264
return self._pagination(endpoint, params, public=False)
1265
1266
def browse_posts_deviantsyouwatch(self, offset=0):
1267
"""Yield posts from users you watch"""
1268
endpoint = "/browse/posts/deviantsyouwatch"
1269
params = {"limit": 50, "offset": offset,
1270
"mature_content": self.mature}
1271
return self._pagination(endpoint, params, public=False, unpack=True)
1272
1273
def browse_tags(self, tag, offset=0):
1274
""" Browse a tag """
1275
endpoint = "/browse/tags"
1276
params = {
1277
"tag" : tag,
1278
"offset" : offset,
1279
"limit" : 50,
1280
"mature_content": self.mature,
1281
}
1282
return self._pagination(endpoint, params)
1283
1284
def browse_user_journals(self, username, offset=0):
1285
journals = filter(
1286
lambda post: "/journal/" in post["url"],
1287
self.user_profile_posts(username))
1288
if offset:
1289
journals = util.advance(journals, offset)
1290
return journals
1291
1292
def collections(self, username, folder_id, offset=0):
1293
"""Yield all Deviation-objects contained in a collection folder"""
1294
endpoint = "/collections/" + folder_id
1295
params = {"username": username, "offset": offset, "limit": 24,
1296
"mature_content": self.mature}
1297
return self._pagination(endpoint, params)
1298
1299
def collections_all(self, username, offset=0):
1300
"""Yield all deviations in a user's collection"""
1301
endpoint = "/collections/all"
1302
params = {"username": username, "offset": offset, "limit": 24,
1303
"mature_content": self.mature}
1304
return self._pagination(endpoint, params)
1305
1306
@memcache(keyarg=1)
1307
def collections_folders(self, username, offset=0):
1308
"""Yield all collection folders of a specific user"""
1309
endpoint = "/collections/folders"
1310
params = {"username": username, "offset": offset, "limit": 50,
1311
"mature_content": self.mature}
1312
return self._pagination_list(endpoint, params)
1313
1314
def comments(self, target_id, target_type="deviation",
1315
comment_id=None, offset=0):
1316
"""Fetch comments posted on a target"""
1317
endpoint = f"/comments/{target_type}/{target_id}"
1318
params = {
1319
"commentid" : comment_id,
1320
"maxdepth" : "5",
1321
"offset" : offset,
1322
"limit" : 50,
1323
"mature_content": self.mature,
1324
}
1325
return self._pagination_list(endpoint, params=params, key="thread")
1326
1327
def deviation(self, deviation_id, public=None):
1328
"""Query and return info about a single Deviation"""
1329
endpoint = "/deviation/" + deviation_id
1330
1331
deviation = self._call(endpoint, public=public)
1332
if deviation.get("is_mature") and public is None and \
1333
self.refresh_token_key:
1334
deviation = self._call(endpoint, public=False)
1335
1336
if self.metadata:
1337
self._metadata((deviation,))
1338
if self.folders:
1339
self._folders((deviation,))
1340
return deviation
1341
1342
def deviation_content(self, deviation_id, public=None):
1343
"""Get extended content of a single Deviation"""
1344
endpoint = "/deviation/content"
1345
params = {"deviationid": deviation_id}
1346
content = self._call(endpoint, params=params, public=public)
1347
if public and content["html"].startswith(
1348
' <span class=\"username-with-symbol'):
1349
if self.refresh_token_key:
1350
content = self._call(endpoint, params=params, public=False)
1351
else:
1352
self.log.warning("Private Journal")
1353
return content
1354
1355
def deviation_download(self, deviation_id, public=None):
1356
"""Get the original file download (if allowed)"""
1357
endpoint = "/deviation/download/" + deviation_id
1358
params = {"mature_content": self.mature}
1359
1360
try:
1361
return self._call(
1362
endpoint, params=params, public=public, log=False)
1363
except Exception:
1364
if not self.refresh_token_key:
1365
raise
1366
return self._call(endpoint, params=params, public=False)
1367
1368
def deviation_metadata(self, deviations):
1369
""" Fetch deviation metadata for a set of deviations"""
1370
endpoint = "/deviation/metadata?" + "&".join(
1371
f"deviationids[{num}]={deviation['deviationid']}"
1372
for num, deviation in enumerate(deviations)
1373
)
1374
return self._call(
1375
endpoint,
1376
params=self._metadata_params,
1377
public=self._metadata_public,
1378
)["metadata"]
1379
1380
def gallery(self, username, folder_id, offset=0, extend=True, public=None):
1381
"""Yield all Deviation-objects contained in a gallery folder"""
1382
endpoint = "/gallery/" + folder_id
1383
params = {"username": username, "offset": offset, "limit": 24,
1384
"mature_content": self.mature, "mode": "newest"}
1385
return self._pagination(endpoint, params, extend, public)
1386
1387
def gallery_all(self, username, offset=0):
1388
"""Yield all Deviation-objects of a specific user"""
1389
endpoint = "/gallery/all"
1390
params = {"username": username, "offset": offset, "limit": 24,
1391
"mature_content": self.mature}
1392
return self._pagination(endpoint, params)
1393
1394
@memcache(keyarg=1)
1395
def gallery_folders(self, username, offset=0):
1396
"""Yield all gallery folders of a specific user"""
1397
endpoint = "/gallery/folders"
1398
params = {"username": username, "offset": offset, "limit": 50,
1399
"mature_content": self.mature}
1400
return self._pagination_list(endpoint, params)
1401
1402
def user_friends(self, username, offset=0):
1403
"""Get the users list of friends"""
1404
endpoint = "/user/friends/" + username
1405
params = {"limit": 50, "offset": offset, "mature_content": self.mature}
1406
return self._pagination(endpoint, params)
1407
1408
def user_friends_watch(self, username):
1409
"""Watch a user"""
1410
endpoint = "/user/friends/watch/" + username
1411
data = {
1412
"watch[friend]" : "0",
1413
"watch[deviations]" : "0",
1414
"watch[journals]" : "0",
1415
"watch[forum_threads]": "0",
1416
"watch[critiques]" : "0",
1417
"watch[scraps]" : "0",
1418
"watch[activity]" : "0",
1419
"watch[collections]" : "0",
1420
"mature_content" : self.mature,
1421
}
1422
return self._call(
1423
endpoint, method="POST", data=data, public=False, fatal=False,
1424
).get("success")
1425
1426
def user_friends_unwatch(self, username):
1427
"""Unwatch a user"""
1428
endpoint = "/user/friends/unwatch/" + username
1429
return self._call(
1430
endpoint, method="POST", public=False, fatal=False,
1431
).get("success")
1432
1433
@memcache(keyarg=1)
1434
def user_profile(self, username):
1435
"""Get user profile information"""
1436
endpoint = "/user/profile/" + username
1437
return self._call(endpoint, fatal=False)
1438
1439
def user_profile_posts(self, username):
1440
endpoint = "/user/profile/posts"
1441
params = {"username": username, "limit": 50,
1442
"mature_content": self.mature}
1443
return self._pagination(endpoint, params)
1444
1445
def user_statuses(self, username, offset=0):
1446
"""Yield status updates of a specific user"""
1447
statuses = filter(
1448
lambda post: "/status-update/" in post["url"],
1449
self.user_profile_posts(username))
1450
if offset:
1451
statuses = util.advance(statuses, offset)
1452
return statuses
1453
1454
def authenticate(self, refresh_token_key):
1455
"""Authenticate the application by requesting an access token"""
1456
self.headers["Authorization"] = \
1457
self._authenticate_impl(refresh_token_key)
1458
1459
@cache(maxage=3600, keyarg=1)
1460
def _authenticate_impl(self, refresh_token_key):
1461
"""Actual authenticate implementation"""
1462
url = "https://www.deviantart.com/oauth2/token"
1463
if refresh_token_key:
1464
self.log.info("Refreshing private access token")
1465
data = {"grant_type": "refresh_token",
1466
"refresh_token": _refresh_token_cache(refresh_token_key)}
1467
else:
1468
self.log.info("Requesting public access token")
1469
data = {"grant_type": "client_credentials"}
1470
1471
auth = util.HTTPBasicAuth(self.client_id, self.client_secret)
1472
response = self.extractor.request(
1473
url, method="POST", data=data, auth=auth, fatal=False)
1474
data = response.json()
1475
1476
if response.status_code != 200:
1477
self.log.debug("Server response: %s", data)
1478
raise exception.AuthenticationError(
1479
f"\"{data.get('error_description')}\" ({data.get('error')})")
1480
if refresh_token_key:
1481
_refresh_token_cache.update(
1482
refresh_token_key, data["refresh_token"])
1483
return "Bearer " + data["access_token"]
1484
1485
def _call(self, endpoint, fatal=True, log=True, public=None, **kwargs):
1486
"""Call an API endpoint"""
1487
url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
1488
kwargs["fatal"] = None
1489
1490
if public is None:
1491
public = self.public
1492
1493
while True:
1494
if self.delay:
1495
self.extractor.sleep(self.delay, "api")
1496
1497
self.authenticate(None if public else self.refresh_token_key)
1498
kwargs["headers"] = self.headers
1499
response = self.extractor.request(url, **kwargs)
1500
1501
try:
1502
data = response.json()
1503
except ValueError:
1504
self.log.error("Unable to parse API response")
1505
data = {}
1506
1507
status = response.status_code
1508
if 200 <= status < 400:
1509
if self.delay > self.delay_min:
1510
self.delay -= 1
1511
return data
1512
if not fatal and status != 429:
1513
return None
1514
1515
error = data.get("error_description")
1516
if error == "User not found.":
1517
raise exception.NotFoundError("user or group")
1518
if error == "Deviation not downloadable.":
1519
raise exception.AuthorizationError()
1520
1521
self.log.debug(response.text)
1522
msg = f"API responded with {status} {response.reason}"
1523
if status == 429:
1524
if self.delay < 30:
1525
self.delay += 1
1526
self.log.warning("%s. Using %ds delay.", msg, self.delay)
1527
1528
if self._warn_429 and self.delay >= 3:
1529
self._warn_429 = False
1530
if self.client_id == self.CLIENT_ID:
1531
self.log.info(
1532
"Register your own OAuth application and use its "
1533
"credentials to prevent this error: "
1534
"https://gdl-org.github.io/docs/configuration.html"
1535
"#extractor-deviantart-client-id-client-secret")
1536
else:
1537
if log:
1538
self.log.error(msg)
1539
return data
1540
1541
def _should_switch_tokens(self, results, params):
1542
if len(results) < params["limit"]:
1543
return True
1544
1545
if not self.extractor.jwt:
1546
for item in results:
1547
if item.get("is_mature"):
1548
return True
1549
1550
return False
1551
1552
def _pagination(self, endpoint, params,
1553
extend=True, public=None, unpack=False, key="results"):
1554
warn = True
1555
if public is None:
1556
public = self.public
1557
1558
if self.limit and params["limit"] > self.limit:
1559
params["limit"] = (params["limit"] // self.limit) * self.limit
1560
1561
while True:
1562
data = self._call(endpoint, params=params, public=public)
1563
try:
1564
results = data[key]
1565
except KeyError:
1566
self.log.error("Unexpected API response: %s", data)
1567
return
1568
1569
if unpack:
1570
results = [item["journal"] for item in results
1571
if "journal" in item]
1572
if extend:
1573
if public and self._should_switch_tokens(results, params):
1574
if self.refresh_token_key:
1575
self.log.debug("Switching to private access token")
1576
public = False
1577
continue
1578
elif data["has_more"] and warn:
1579
warn = False
1580
self.log.warning(
1581
"Private or mature deviations detected! "
1582
"Run 'gallery-dl oauth:deviantart' and follow the "
1583
"instructions to be able to access them.")
1584
1585
# "statusid" cannot be used instead
1586
if results and "deviationid" in results[0]:
1587
if self.metadata:
1588
self._metadata(results)
1589
if self.folders:
1590
self._folders(results)
1591
else: # attempt to fix "deleted" deviations
1592
for dev in self._shared_content(results):
1593
if not dev["is_deleted"]:
1594
continue
1595
patch = self._call(
1596
"/deviation/" + dev["deviationid"], fatal=False)
1597
if patch:
1598
dev.update(patch)
1599
1600
yield from results
1601
1602
if not data["has_more"] and (
1603
self.strategy != "manual" or not results or not extend):
1604
return
1605
1606
if "next_cursor" in data:
1607
if not data["next_cursor"]:
1608
return
1609
params["offset"] = None
1610
params["cursor"] = data["next_cursor"]
1611
elif data["next_offset"] is not None:
1612
params["offset"] = data["next_offset"]
1613
params["cursor"] = None
1614
else:
1615
if params.get("offset") is None:
1616
return
1617
params["offset"] = int(params["offset"]) + len(results)
1618
1619
def _pagination_list(self, endpoint, params, key="results"):
1620
return list(self._pagination(endpoint, params, False, key=key))
1621
1622
def _shared_content(self, results):
1623
"""Return an iterable of shared deviations in 'results'"""
1624
for result in results:
1625
for item in result.get("items") or ():
1626
if "deviation" in item:
1627
yield item["deviation"]
1628
1629
def _metadata(self, deviations):
1630
"""Add extended metadata to each deviation object"""
1631
if len(deviations) <= self.limit:
1632
self._metadata_batch(deviations)
1633
else:
1634
n = self.limit
1635
for index in range(0, len(deviations), n):
1636
self._metadata_batch(deviations[index:index+n])
1637
1638
def _metadata_batch(self, deviations):
1639
"""Fetch extended metadata for a single batch of deviations"""
1640
for deviation, metadata in zip(
1641
deviations, self.deviation_metadata(deviations)):
1642
deviation.update(metadata)
1643
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
1644
1645
def _folders(self, deviations):
1646
"""Add a list of all containing folders to each deviation object"""
1647
for deviation in deviations:
1648
deviation["folders"] = self._folders_map(
1649
deviation["author"]["username"])[deviation["deviationid"]]
1650
1651
@memcache(keyarg=1)
1652
def _folders_map(self, username):
1653
"""Generate a deviation_id -> folders mapping for 'username'"""
1654
self.log.info("Collecting folder information for '%s'", username)
1655
folders = self.gallery_folders(username)
1656
1657
# create 'folderid'-to-'folder' mapping
1658
fmap = {
1659
folder["folderid"]: folder
1660
for folder in folders
1661
}
1662
1663
# add parent names to folders, but ignore "Featured" as parent
1664
featured = folders[0]["folderid"]
1665
done = False
1666
1667
while not done:
1668
done = True
1669
for folder in folders:
1670
parent = folder["parent"]
1671
if not parent:
1672
pass
1673
elif parent == featured:
1674
folder["parent"] = None
1675
else:
1676
parent = fmap[parent]
1677
if parent["parent"]:
1678
done = False
1679
else:
1680
folder["name"] = parent["name"] + "/" + folder["name"]
1681
folder["parent"] = None
1682
1683
# map deviationids to folder names
1684
dmap = collections.defaultdict(list)
1685
for folder in folders:
1686
for deviation in self.gallery(
1687
username, folder["folderid"], 0, False):
1688
dmap[deviation["deviationid"]].append(folder["name"])
1689
return dmap
1690
1691
1692
class DeviantartEclipseAPI():
1693
"""Interface to the DeviantArt Eclipse API"""
1694
1695
def __init__(self, extractor):
1696
self.extractor = extractor
1697
self.log = extractor.log
1698
self.request = self.extractor._limited_request
1699
self.csrf_token = None
1700
1701
def deviation_extended_fetch(self, deviation_id, user, kind=None):
1702
endpoint = "/_puppy/dadeviation/init"
1703
params = {
1704
"deviationid" : deviation_id,
1705
"username" : user,
1706
"type" : kind,
1707
"include_session" : "false",
1708
"expand" : "deviation.related",
1709
"da_minor_version": "20230710",
1710
}
1711
return self._call(endpoint, params)
1712
1713
def gallery_scraps(self, user, offset=0):
1714
endpoint = "/_puppy/dashared/gallection/contents"
1715
params = {
1716
"username" : user,
1717
"type" : "gallery",
1718
"offset" : offset,
1719
"limit" : 24,
1720
"scraps_folder": "true",
1721
}
1722
return self._pagination(endpoint, params)
1723
1724
def galleries_search(self, user, query, offset=0, order="most-recent"):
1725
endpoint = "/_puppy/dashared/gallection/search"
1726
params = {
1727
"username": user,
1728
"type" : "gallery",
1729
"order" : order,
1730
"q" : query,
1731
"offset" : offset,
1732
"limit" : 24,
1733
}
1734
return self._pagination(endpoint, params)
1735
1736
def search_deviations(self, params):
1737
endpoint = "/_puppy/dabrowse/search/deviations"
1738
return self._pagination(endpoint, params, key="deviations")
1739
1740
def user_info(self, user, expand=False):
1741
endpoint = "/_puppy/dauserprofile/init/about"
1742
params = {"username": user}
1743
return self._call(endpoint, params)
1744
1745
def user_watching(self, user, offset=0):
1746
gruserid, moduleid = self._ids_watching(user)
1747
1748
endpoint = "/_puppy/gruser/module/watching"
1749
params = {
1750
"gruserid" : gruserid,
1751
"gruser_typeid": "4",
1752
"username" : user,
1753
"moduleid" : moduleid,
1754
"offset" : offset,
1755
"limit" : 24,
1756
}
1757
return self._pagination(endpoint, params)
1758
1759
def _call(self, endpoint, params):
1760
url = "https://www.deviantart.com" + endpoint
1761
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
1762
1763
response = self.request(url, params=params, fatal=None)
1764
1765
try:
1766
return response.json()
1767
except Exception:
1768
return {"error": response.text}
1769
1770
def _pagination(self, endpoint, params, key="results"):
1771
limit = params.get("limit", 24)
1772
warn = True
1773
1774
while True:
1775
data = self._call(endpoint, params)
1776
1777
results = data.get(key)
1778
if results is None:
1779
return
1780
if len(results) < limit and warn and data.get("hasMore"):
1781
warn = False
1782
self.log.warning(
1783
"Private deviations detected! "
1784
"Provide login credentials or session cookies "
1785
"to be able to access them.")
1786
yield from results
1787
1788
if not data.get("hasMore"):
1789
return
1790
1791
if "nextCursor" in data:
1792
params["offset"] = None
1793
params["cursor"] = data["nextCursor"]
1794
elif "nextOffset" in data:
1795
params["offset"] = data["nextOffset"]
1796
params["cursor"] = None
1797
elif params.get("offset") is None:
1798
return
1799
else:
1800
params["offset"] = int(params["offset"]) + len(results)
1801
1802
def _ids_watching(self, user):
1803
url = f"{self.extractor.root}/{user}/about"
1804
page = self.request(url).text
1805
1806
gruser_id = text.extr(page, ' data-userid="', '"')
1807
1808
pos = page.find('\\"name\\":\\"watching\\"')
1809
if pos < 0:
1810
raise exception.NotFoundError("'watching' module ID")
1811
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
1812
1813
self._fetch_csrf_token(page)
1814
return gruser_id, module_id
1815
1816
def _fetch_csrf_token(self, page=None):
1817
if page is None:
1818
page = self.request(self.extractor.root + "/").text
1819
self.csrf_token = token = text.extr(
1820
page, "window.__CSRF_TOKEN__ = '", "'")
1821
return token
1822
1823
1824
@memcache(keyarg=1)
1825
def _user_details(extr, name):
1826
try:
1827
return extr.api.user_profile(name)["user"]
1828
except Exception:
1829
return None
1830
1831
1832
@cache(maxage=36500*86400, keyarg=0)
1833
def _refresh_token_cache(token):
1834
if token and token[0] == "#":
1835
return None
1836
return token
1837
1838
1839
@cache(maxage=28*86400, keyarg=1)
1840
def _login_impl(extr, username, password):
1841
extr.log.info("Logging in as %s", username)
1842
1843
url = "https://www.deviantart.com/users/login"
1844
page = extr.request(url).text
1845
1846
data = {}
1847
for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
1848
name, _, value = item.partition('" value="')
1849
data[name] = value
1850
1851
challenge = data.get("challenge")
1852
if challenge and challenge != "0":
1853
extr.log.warning("Login requires solving a CAPTCHA")
1854
extr.log.debug(challenge)
1855
1856
data["username"] = username
1857
data["password"] = password
1858
data["remember"] = "on"
1859
1860
extr.sleep(2.0, "login")
1861
url = "https://www.deviantart.com/_sisu/do/signin"
1862
response = extr.request(url, method="POST", data=data)
1863
1864
if not response.history:
1865
raise exception.AuthenticationError()
1866
1867
return {
1868
cookie.name: cookie.value
1869
for cookie in extr.cookies
1870
}
1871
1872
1873
_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
1874
1875
1876
def id_from_base36(base36):
1877
return util.bdecode(base36, _ALPHABET)
1878
1879
1880
def base36_from_id(deviation_id):
1881
return util.bencode(int(deviation_id), _ALPHABET)
1882
1883
1884
def eclipse_media(media, format="preview"):
1885
url = [media["baseUri"]]
1886
1887
formats = {
1888
fmt["t"]: fmt
1889
for fmt in media["types"]
1890
}
1891
1892
if tokens := media.get("token") or ():
1893
if len(tokens) <= 1:
1894
fmt = formats[format]
1895
if "c" in fmt:
1896
url.append(fmt["c"].replace(
1897
"<prettyName>", media["prettyName"]))
1898
url.append("?token=")
1899
url.append(tokens[-1])
1900
1901
return "".join(url), formats
1902
1903