Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/deviantart.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://www.deviantart.com/"""
10
11
from .common import Extractor, Message, Dispatch
12
from .. import text, util, exception
13
from ..cache import cache, memcache
14
import collections
15
import mimetypes
16
import binascii
17
import time
18
19
BASE_PATTERN = (
20
r"(?:https?://)?(?:"
21
r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
22
r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
23
)
24
DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
25
26
27
class DeviantartExtractor(Extractor):
28
"""Base class for deviantart extractors"""
29
category = "deviantart"
30
root = "https://www.deviantart.com"
31
directory_fmt = ("{category}", "{username}")
32
filename_fmt = "{category}_{index}_{title}.{extension}"
33
cookies_domain = ".deviantart.com"
34
cookies_names = ("auth", "auth_secure", "userinfo")
35
_last_request = 0
36
37
def __init__(self, match):
38
Extractor.__init__(self, match)
39
self.user = (match[1] or match[2] or "").lower()
40
self.offset = 0
41
42
def _init(self):
43
self.jwt = self.config("jwt", False)
44
self.flat = self.config("flat", True)
45
self.extra = self.config("extra", False)
46
self.quality = self.config("quality", "100")
47
self.original = self.config("original", True)
48
self.previews = self.config("previews", False)
49
self.intermediary = self.config("intermediary", True)
50
self.comments_avatars = self.config("comments-avatars", False)
51
self.comments = self.comments_avatars or self.config("comments", False)
52
53
self.api = DeviantartOAuthAPI(self)
54
self.eclipse_api = None
55
self.group = False
56
self._premium_cache = {}
57
58
if self.config("auto-unwatch"):
59
self.unwatch = []
60
self.finalize = self._unwatch_premium
61
else:
62
self.unwatch = None
63
64
if self.quality:
65
if self.quality == "png":
66
self.quality = "-fullview.png?"
67
self.quality_sub = util.re(r"-fullview\.[a-z0-9]+\?").sub
68
else:
69
self.quality = f",q_{self.quality}"
70
self.quality_sub = util.re(r",q_\d+").sub
71
72
if self.intermediary:
73
self.intermediary_subn = util.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
74
75
if isinstance(self.original, str) and \
76
self.original.lower().startswith("image"):
77
self.original = True
78
self._update_content = self._update_content_image
79
else:
80
self._update_content = self._update_content_default
81
82
if self.previews == "all":
83
self.previews_images = self.previews = True
84
else:
85
self.previews_images = False
86
87
journals = self.config("journals", "html")
88
if journals == "html":
89
self.commit_journal = self._commit_journal_html
90
elif journals == "text":
91
self.commit_journal = self._commit_journal_text
92
else:
93
self.commit_journal = None
94
95
def request(self, url, **kwargs):
96
if "fatal" not in kwargs:
97
kwargs["fatal"] = False
98
while True:
99
response = Extractor.request(self, url, **kwargs)
100
if response.status_code != 403 or \
101
b"Request blocked." not in response.content:
102
return response
103
self.wait(seconds=300, reason="CloudFront block")
104
105
def skip(self, num):
106
self.offset += num
107
return num
108
109
def login(self):
110
if self.cookies_check(self.cookies_names):
111
return True
112
113
username, password = self._get_auth_info()
114
if username:
115
self.cookies_update(_login_impl(self, username, password))
116
return True
117
118
def items(self):
119
if self.user:
120
if group := self.config("group", True):
121
if user := _user_details(self, self.user):
122
self.user = user["username"]
123
self.group = False
124
elif group == "skip":
125
self.log.info("Skipping group '%s'", self.user)
126
raise exception.AbortExtraction()
127
else:
128
self.subcategory = "group-" + self.subcategory
129
self.group = True
130
131
for deviation in self.deviations():
132
if isinstance(deviation, tuple):
133
url, data = deviation
134
yield Message.Queue, url, data
135
continue
136
137
if deviation["is_deleted"]:
138
# prevent crashing in case the deviation really is
139
# deleted
140
self.log.debug(
141
"Skipping %s (deleted)", deviation["deviationid"])
142
continue
143
144
tier_access = deviation.get("tier_access")
145
if tier_access == "locked":
146
self.log.debug(
147
"Skipping %s (access locked)", deviation["deviationid"])
148
continue
149
150
if "premium_folder_data" in deviation:
151
data = self._fetch_premium(deviation)
152
if not data:
153
continue
154
deviation.update(data)
155
156
self.prepare(deviation)
157
yield Message.Directory, deviation
158
159
if "content" in deviation:
160
content = self._extract_content(deviation)
161
yield self.commit(deviation, content)
162
163
elif deviation["is_downloadable"]:
164
content = self.api.deviation_download(deviation["deviationid"])
165
deviation["is_original"] = True
166
yield self.commit(deviation, content)
167
168
if "videos" in deviation and deviation["videos"]:
169
video = max(deviation["videos"],
170
key=lambda x: text.parse_int(x["quality"][:-1]))
171
deviation["is_original"] = False
172
yield self.commit(deviation, video)
173
174
if "flash" in deviation:
175
deviation["is_original"] = True
176
yield self.commit(deviation, deviation["flash"])
177
178
if self.commit_journal:
179
if journal := self._extract_journal(deviation):
180
if self.extra:
181
deviation["_journal"] = journal["html"]
182
deviation["is_original"] = True
183
yield self.commit_journal(deviation, journal)
184
185
if self.comments_avatars:
186
for comment in deviation["comments"]:
187
user = comment["user"]
188
name = user["username"].lower()
189
if user["usericon"] == DEFAULT_AVATAR:
190
self.log.debug(
191
"Skipping avatar of '%s' (default)", name)
192
continue
193
_user_details.update(name, user)
194
195
url = f"{self.root}/{name}/avatar/"
196
comment["_extractor"] = DeviantartAvatarExtractor
197
yield Message.Queue, url, comment
198
199
if self.previews and "preview" in deviation:
200
preview = deviation["preview"]
201
deviation["is_preview"] = True
202
if self.previews_images:
203
yield self.commit(deviation, preview)
204
else:
205
mtype = mimetypes.guess_type(
206
"a." + deviation["extension"], False)[0]
207
if mtype and not mtype.startswith("image/"):
208
yield self.commit(deviation, preview)
209
del deviation["is_preview"]
210
211
if not self.extra:
212
continue
213
214
# ref: https://www.deviantart.com
215
# /developers/http/v1/20210526/object/editor_text
216
# the value of "features" is a JSON string with forward
217
# slashes escaped
218
text_content = \
219
deviation["text_content"]["body"]["features"].replace(
220
"\\/", "/") if "text_content" in deviation else None
221
for txt in (text_content, deviation.get("description"),
222
deviation.get("_journal")):
223
if txt is None:
224
continue
225
for match in DeviantartStashExtractor.pattern.finditer(txt):
226
url = text.ensure_http_scheme(match[0])
227
deviation["_extractor"] = DeviantartStashExtractor
228
yield Message.Queue, url, deviation
229
230
def deviations(self):
231
"""Return an iterable containing all relevant Deviation-objects"""
232
233
def prepare(self, deviation):
234
"""Adjust the contents of a Deviation-object"""
235
if "index" not in deviation:
236
try:
237
if deviation["url"].startswith((
238
"https://www.deviantart.com/stash/", "https://sta.sh",
239
)):
240
filename = deviation["content"]["src"].split("/")[5]
241
deviation["index_base36"] = filename.partition("-")[0][1:]
242
deviation["index"] = id_from_base36(
243
deviation["index_base36"])
244
else:
245
deviation["index"] = text.parse_int(
246
deviation["url"].rpartition("-")[2])
247
except KeyError:
248
deviation["index"] = 0
249
deviation["index_base36"] = "0"
250
if "index_base36" not in deviation:
251
deviation["index_base36"] = base36_from_id(deviation["index"])
252
253
if self.user:
254
deviation["username"] = self.user
255
deviation["_username"] = self.user.lower()
256
else:
257
deviation["username"] = deviation["author"]["username"]
258
deviation["_username"] = deviation["username"].lower()
259
260
deviation["published_time"] = text.parse_int(
261
deviation["published_time"])
262
deviation["date"] = text.parse_timestamp(
263
deviation["published_time"])
264
265
if self.comments:
266
deviation["comments"] = (
267
self._extract_comments(deviation["deviationid"], "deviation")
268
if deviation["stats"]["comments"] else ()
269
)
270
271
# filename metadata
272
sub = util.re(r"\W").sub
273
deviation["filename"] = "".join((
274
sub("_", deviation["title"].lower()), "_by_",
275
sub("_", deviation["author"]["username"].lower()), "-d",
276
deviation["index_base36"],
277
))
278
279
def commit(self, deviation, target):
280
url = target["src"]
281
name = target.get("filename") or url
282
target = target.copy()
283
target["filename"] = deviation["filename"]
284
deviation["target"] = target
285
deviation["extension"] = target["extension"] = text.ext_from_url(name)
286
if "is_original" not in deviation:
287
deviation["is_original"] = ("/v1/" not in url)
288
return Message.Url, url, deviation
289
290
def _commit_journal_html(self, deviation, journal):
291
title = text.escape(deviation["title"])
292
url = deviation["url"]
293
thumbs = deviation.get("thumbs") or deviation.get("files")
294
html = journal["html"]
295
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
296
297
if not html:
298
self.log.warning("%s: Empty journal content", deviation["index"])
299
300
if "css" in journal:
301
css, cls = journal["css"], "withskin"
302
elif html.startswith("<style"):
303
css, _, html = html.partition("</style>")
304
css = css.partition(">")[2]
305
cls = "withskin"
306
else:
307
css, cls = "", "journal-green"
308
309
if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
310
needle = '<div class="boxtop journaltop">'
311
header = HEADER_CUSTOM_TEMPLATE.format(
312
title=title, url=url, date=deviation["date"],
313
)
314
else:
315
needle = '<div usr class="gr">'
316
username = deviation["author"]["username"]
317
urlname = deviation.get("username") or username.lower()
318
header = HEADER_TEMPLATE.format(
319
title=title,
320
url=url,
321
userurl=f"{self.root}/{urlname}/",
322
username=username,
323
date=deviation["date"],
324
)
325
326
if needle in html:
327
html = html.replace(needle, header, 1)
328
else:
329
html = JOURNAL_TEMPLATE_HTML_EXTRA.format(header, html)
330
331
html = JOURNAL_TEMPLATE_HTML.format(
332
title=title, html=html, shadow=shadow, css=css, cls=cls)
333
334
deviation["extension"] = "htm"
335
return Message.Url, html, deviation
336
337
def _commit_journal_text(self, deviation, journal):
338
html = journal["html"]
339
if not html:
340
self.log.warning("%s: Empty journal content", deviation["index"])
341
elif html.startswith("<style"):
342
html = html.partition("</style>")[2]
343
head, _, tail = html.rpartition("<script")
344
content = "\n".join(
345
text.unescape(text.remove_html(txt))
346
for txt in (head or tail).split("<br />")
347
)
348
txt = JOURNAL_TEMPLATE_TEXT.format(
349
title=deviation["title"],
350
username=deviation["author"]["username"],
351
date=deviation["date"],
352
content=content,
353
)
354
355
deviation["extension"] = "txt"
356
return Message.Url, txt, deviation
357
358
def _extract_journal(self, deviation):
359
if "excerpt" in deviation:
360
# # empty 'html'
361
# return self.api.deviation_content(deviation["deviationid"])
362
363
if "_page" in deviation:
364
page = deviation["_page"]
365
del deviation["_page"]
366
else:
367
page = self._limited_request(deviation["url"]).text
368
369
# extract journal html from webpage
370
html = text.extr(
371
page,
372
"<h2>Literature Text</h2></span><div>",
373
"</div></section></div></div>")
374
if html:
375
return {"html": html}
376
377
self.log.debug("%s: Failed to extract journal HTML from webpage. "
378
"Falling back to __INITIAL_STATE__ markup.",
379
deviation["index"])
380
381
# parse __INITIAL_STATE__ as fallback
382
state = util.json_loads(text.extr(
383
page, 'window.__INITIAL_STATE__ = JSON.parse("', '");')
384
.replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"'))
385
deviations = state["@@entities"]["deviation"]
386
content = deviations.popitem()[1]["textContent"]
387
388
if html := self._textcontent_to_html(deviation, content):
389
return {"html": html}
390
return {"html": content["excerpt"].replace("\n", "<br />")}
391
392
if "body" in deviation:
393
return {"html": deviation.pop("body")}
394
return None
395
396
def _textcontent_to_html(self, deviation, content):
397
html = content["html"]
398
markup = html.get("markup")
399
400
if not markup or markup[0] != "{":
401
return markup
402
403
if html["type"] == "tiptap":
404
try:
405
return self._tiptap_to_html(markup)
406
except Exception as exc:
407
self.log.debug("", exc_info=exc)
408
self.log.error("%s: '%s: %s'", deviation["index"],
409
exc.__class__.__name__, exc)
410
411
self.log.warning("%s: Unsupported '%s' markup.",
412
deviation["index"], html["type"])
413
414
def _tiptap_to_html(self, markup):
415
html = []
416
417
html.append('<div data-editor-viewer="1" '
418
'class="_83r8m _2CKTq _3NjDa mDnFl">')
419
data = util.json_loads(markup)
420
for block in data["document"]["content"]:
421
self._tiptap_process_content(html, block)
422
html.append("</div>")
423
424
return "".join(html)
425
426
def _tiptap_process_content(self, html, content):
427
type = content["type"]
428
429
if type == "paragraph":
430
if children := content.get("content"):
431
html.append('<p style="')
432
433
if attrs := content.get("attrs"):
434
if align := attrs.get("textAlign"):
435
html.append("text-align:")
436
html.append(align)
437
html.append(";")
438
self._tiptap_process_indentation(html, attrs)
439
html.append('">')
440
else:
441
html.append('margin-inline-start:0px">')
442
443
for block in children:
444
self._tiptap_process_content(html, block)
445
html.append("</p>")
446
else:
447
html.append('<p class="empty-p"><br/></p>')
448
449
elif type == "text":
450
self._tiptap_process_text(html, content)
451
452
elif type == "heading":
453
attrs = content["attrs"]
454
level = str(attrs.get("level") or "3")
455
456
html.append("<h")
457
html.append(level)
458
html.append(' style="text-align:')
459
html.append(attrs.get("textAlign") or "left")
460
html.append('">')
461
html.append('<span style="')
462
self._tiptap_process_indentation(html, attrs)
463
html.append('">')
464
self._tiptap_process_children(html, content)
465
html.append("</span></h")
466
html.append(level)
467
html.append(">")
468
469
elif type in ("listItem", "bulletList", "orderedList", "blockquote"):
470
c = type[1]
471
tag = (
472
"li" if c == "i" else
473
"ul" if c == "u" else
474
"ol" if c == "r" else
475
"blockquote"
476
)
477
html.append("<" + tag + ">")
478
self._tiptap_process_children(html, content)
479
html.append("</" + tag + ">")
480
481
elif type == "anchor":
482
attrs = content["attrs"]
483
html.append('<a id="')
484
html.append(attrs.get("id") or "")
485
html.append('" data-testid="anchor"></a>')
486
487
elif type == "hardBreak":
488
html.append("<br/><br/>")
489
490
elif type == "horizontalRule":
491
html.append("<hr/>")
492
493
elif type == "da-deviation":
494
self._tiptap_process_deviation(html, content)
495
496
elif type == "da-mention":
497
user = content["attrs"]["user"]["username"]
498
html.append('<a href="https://www.deviantart.com/')
499
html.append(user.lower())
500
html.append('" data-da-type="da-mention" data-user="">@<!-- -->')
501
html.append(user)
502
html.append('</a>')
503
504
elif type == "da-gif":
505
attrs = content["attrs"]
506
width = str(attrs.get("width") or "")
507
height = str(attrs.get("height") or "")
508
url = text.escape(attrs.get("url") or "")
509
510
html.append('<div data-da-type="da-gif" data-width="')
511
html.append(width)
512
html.append('" data-height="')
513
html.append(height)
514
html.append('" data-alignment="')
515
html.append(attrs.get("alignment") or "")
516
html.append('" data-url="')
517
html.append(url)
518
html.append('" class="t61qu"><video role="img" autoPlay="" '
519
'muted="" loop="" style="pointer-events:none" '
520
'controlsList="nofullscreen" playsInline="" '
521
'aria-label="gif" data-da-type="da-gif" width="')
522
html.append(width)
523
html.append('" height="')
524
html.append(height)
525
html.append('" src="')
526
html.append(url)
527
html.append('" class="_1Fkk6"></video></div>')
528
529
elif type == "da-video":
530
src = text.escape(content["attrs"].get("src") or "")
531
html.append('<div data-testid="video" data-da-type="da-video" '
532
'data-src="')
533
html.append(src)
534
html.append('" class="_1Uxvs"><div data-canfs="yes" data-testid="v'
535
'ideo-inner" class="main-video" style="width:780px;hei'
536
'ght:438px"><div style="width:780px;height:438px">'
537
'<video src="')
538
html.append(src)
539
html.append('" style="width:100%;height:100%;" preload="auto" cont'
540
'rols=""></video></div></div></div>')
541
542
else:
543
self.log.warning("Unsupported content type '%s'", type)
544
545
def _tiptap_process_text(self, html, content):
546
if marks := content.get("marks"):
547
close = []
548
for mark in marks:
549
type = mark["type"]
550
if type == "link":
551
attrs = mark.get("attrs") or {}
552
html.append('<a href="')
553
html.append(text.escape(attrs.get("href") or ""))
554
if "target" in attrs:
555
html.append('" target="')
556
html.append(attrs["target"])
557
html.append('" rel="')
558
html.append(attrs.get("rel") or
559
"noopener noreferrer nofollow ugc")
560
html.append('">')
561
close.append("</a>")
562
elif type == "bold":
563
html.append("<strong>")
564
close.append("</strong>")
565
elif type == "italic":
566
html.append("<em>")
567
close.append("</em>")
568
elif type == "underline":
569
html.append("<u>")
570
close.append("</u>")
571
elif type == "strike":
572
html.append("<s>")
573
close.append("</s>")
574
elif type == "textStyle" and len(mark) <= 1:
575
pass
576
else:
577
self.log.warning("Unsupported text marker '%s'", type)
578
close.reverse()
579
html.append(text.escape(content["text"]))
580
html.extend(close)
581
else:
582
html.append(text.escape(content["text"]))
583
584
def _tiptap_process_children(self, html, content):
585
if children := content.get("content"):
586
for block in children:
587
self._tiptap_process_content(html, block)
588
589
def _tiptap_process_indentation(self, html, attrs):
590
itype = ("text-indent" if attrs.get("indentType") == "line" else
591
"margin-inline-start")
592
isize = str((attrs.get("indentation") or 0) * 24)
593
html.append(itype + ":" + isize + "px")
594
595
def _tiptap_process_deviation(self, html, content):
596
dev = content["attrs"]["deviation"]
597
media = dev.get("media") or ()
598
599
html.append('<div class="jjNX2">')
600
html.append('<figure class="Qf-HY" data-da-type="da-deviation" '
601
'data-deviation="" '
602
'data-width="" data-link="" data-alignment="center">')
603
604
if "baseUri" in media:
605
url, formats = self._eclipse_media(media)
606
full = formats["fullview"]
607
608
html.append('<a href="')
609
html.append(text.escape(dev["url"]))
610
html.append('" class="_3ouD5" style="margin:0 auto;display:flex;'
611
'align-items:center;justify-content:center;'
612
'overflow:hidden;width:780px;height:')
613
html.append(str(780 * full["h"] / full["w"]))
614
html.append('px">')
615
616
html.append('<img src="')
617
html.append(text.escape(url))
618
html.append('" alt="')
619
html.append(text.escape(dev["title"]))
620
html.append('" style="width:100%;max-width:100%;display:block"/>')
621
html.append("</a>")
622
623
elif "textContent" in dev:
624
html.append('<div class="_32Hs4" style="width:350px">')
625
626
html.append('<a href="')
627
html.append(text.escape(dev["url"]))
628
html.append('" class="_3ouD5">')
629
630
html.append('''\
631
<section class="Q91qI aG7Yi" style="width:350px;height:313px">\
632
<div class="_16ECM _1xMkk" aria-hidden="true">\
633
<svg height="100%" viewBox="0 0 15 12" preserveAspectRatio="xMidYMin slice" \
634
fill-rule="evenodd">\
635
<linearGradient x1="87.8481761%" y1="16.3690766%" \
636
x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
637
<stop stop-color="#00FF62" offset="0%"></stop>\
638
<stop stop-color="#3197EF" stop-opacity="0" offset="100%"></stop>\
639
</linearGradient>\
640
<text class="_2uqbc" fill="url(#app-root-3)" text-anchor="end" x="15" y="11">J\
641
</text></svg></div><div class="_1xz9u">Literature</div><h3 class="_2WvKD">\
642
''')
643
html.append(text.escape(dev["title"]))
644
html.append('</h3><div class="_2CPLm">')
645
html.append(text.escape(dev["textContent"]["excerpt"]))
646
html.append('</div></section></a></div>')
647
648
html.append('</figure></div>')
649
650
def _extract_content(self, deviation):
651
content = deviation["content"]
652
653
if self.original and deviation["is_downloadable"]:
654
self._update_content(deviation, content)
655
return content
656
657
if self.jwt:
658
self._update_token(deviation, content)
659
return content
660
661
if content["src"].startswith("https://images-wixmp-"):
662
if self.intermediary and deviation["index"] <= 790677560:
663
# https://github.com/r888888888/danbooru/issues/4069
664
intermediary, count = self.intermediary_subn(
665
r"/intermediary\1", content["src"], 1)
666
if count:
667
deviation["is_original"] = False
668
deviation["_fallback"] = (content["src"],)
669
content["src"] = intermediary
670
if self.quality:
671
content["src"] = self.quality_sub(
672
self.quality, content["src"], 1)
673
674
return content
675
676
def _find_folder(self, folders, name, uuid):
677
if uuid.isdecimal():
678
match = util.re(
679
"(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
680
for folder in folders:
681
if match(folder["name"]):
682
return folder
683
elif folder.get("has_subfolders"):
684
for subfolder in folder["subfolders"]:
685
if match(subfolder["name"]):
686
return subfolder
687
else:
688
for folder in folders:
689
if folder["folderid"] == uuid:
690
return folder
691
elif folder.get("has_subfolders"):
692
for subfolder in folder["subfolders"]:
693
if subfolder["folderid"] == uuid:
694
return subfolder
695
raise exception.NotFoundError("folder")
696
697
def _folder_urls(self, folders, category, extractor):
698
base = f"{self.root}/{self.user}/{category}/"
699
for folder in folders:
700
folder["_extractor"] = extractor
701
url = f"{base}{folder['folderid']}/{folder['name']}"
702
yield url, folder
703
704
def _update_content_default(self, deviation, content):
705
if "premium_folder_data" in deviation or deviation.get("is_mature"):
706
public = False
707
else:
708
public = None
709
710
data = self.api.deviation_download(deviation["deviationid"], public)
711
content.update(data)
712
deviation["is_original"] = True
713
714
def _update_content_image(self, deviation, content):
715
data = self.api.deviation_download(deviation["deviationid"])
716
url = data["src"].partition("?")[0]
717
mtype = mimetypes.guess_type(url, False)[0]
718
if mtype and mtype.startswith("image/"):
719
content.update(data)
720
deviation["is_original"] = True
721
722
def _update_token(self, deviation, content):
723
"""Replace JWT to be able to remove width/height limits
724
725
All credit goes to @Ironchest337
726
for discovering and implementing this method
727
"""
728
url, sep, _ = content["src"].partition("/v1/")
729
if not sep:
730
return
731
732
# 'images-wixmp' returns 401 errors, but just 'wixmp' still works
733
url = url.replace("//images-wixmp", "//wixmp", 1)
734
735
# header = b'{"typ":"JWT","alg":"none"}'
736
payload = (
737
b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
738
url.partition("/f/")[2].encode() +
739
b'"}]],"aud":["urn:service:file.download"]}'
740
)
741
742
deviation["_fallback"] = (content["src"],)
743
deviation["is_original"] = True
744
pl = binascii.b2a_base64(payload).rstrip(b'=\n').decode()
745
content["src"] = (
746
# base64 of 'header' is precomputed as 'eyJ0eX...'
747
f"{url}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{pl}.")
748
749
def _extract_comments(self, target_id, target_type="deviation"):
750
results = None
751
comment_ids = [None]
752
753
while comment_ids:
754
comments = self.api.comments(
755
target_id, target_type, comment_ids.pop())
756
757
if results:
758
results.extend(comments)
759
else:
760
results = comments
761
762
# parent comments, i.e. nodes with at least one child
763
parents = {c["parentid"] for c in comments}
764
# comments with more than one reply
765
replies = {c["commentid"] for c in comments if c["replies"]}
766
# add comment UUIDs with replies that are not parent to any node
767
comment_ids.extend(replies - parents)
768
769
return results
770
771
def _limited_request(self, url, **kwargs):
772
"""Limits HTTP requests to one every 2 seconds"""
773
diff = time.time() - DeviantartExtractor._last_request
774
if diff < 2.0:
775
self.sleep(2.0 - diff, "request")
776
response = self.request(url, **kwargs)
777
DeviantartExtractor._last_request = time.time()
778
return response
779
780
def _fetch_premium(self, deviation):
781
try:
782
return self._premium_cache[deviation["deviationid"]]
783
except KeyError:
784
pass
785
786
if not self.api.refresh_token_key:
787
self.log.warning(
788
"Unable to access premium content (no refresh-token)")
789
self._fetch_premium = lambda _: None
790
return None
791
792
dev = self.api.deviation(deviation["deviationid"], False)
793
folder = deviation["premium_folder_data"]
794
username = dev["author"]["username"]
795
796
# premium_folder_data is no longer present when user has access (#5063)
797
has_access = ("premium_folder_data" not in dev) or folder["has_access"]
798
799
if not has_access and folder["type"] == "watchers" and \
800
self.config("auto-watch"):
801
if self.unwatch is not None:
802
self.unwatch.append(username)
803
if self.api.user_friends_watch(username):
804
has_access = True
805
self.log.info(
806
"Watching %s for premium folder access", username)
807
else:
808
self.log.warning(
809
"Error when trying to watch %s. "
810
"Try again with a new refresh-token", username)
811
812
if has_access:
813
self.log.info("Fetching premium folder data")
814
else:
815
self.log.warning("Unable to access premium content (type: %s)",
816
folder["type"])
817
818
cache = self._premium_cache
819
for dev in self.api.gallery(
820
username, folder["gallery_id"], public=False):
821
cache[dev["deviationid"]] = dev if has_access else None
822
823
return cache.get(deviation["deviationid"])
824
825
def _unwatch_premium(self):
826
for username in self.unwatch:
827
self.log.info("Unwatching %s", username)
828
self.api.user_friends_unwatch(username)
829
830
def _eclipse_media(self, media, format="preview"):
831
url = [media["baseUri"]]
832
833
formats = {
834
fmt["t"]: fmt
835
for fmt in media["types"]
836
}
837
838
if tokens := media.get("token") or ():
839
if len(tokens) <= 1:
840
fmt = formats[format]
841
if "c" in fmt:
842
url.append(fmt["c"].replace(
843
"<prettyName>", media["prettyName"]))
844
url.append("?token=")
845
url.append(tokens[-1])
846
847
return "".join(url), formats
848
849
def _eclipse_to_oauth(self, eclipse_api, deviations):
850
for obj in deviations:
851
deviation = obj["deviation"] if "deviation" in obj else obj
852
deviation_uuid = eclipse_api.deviation_extended_fetch(
853
deviation["deviationId"],
854
deviation["author"]["username"],
855
"journal" if deviation["isJournal"] else "art",
856
)["deviation"]["extended"]["deviationUuid"]
857
yield self.api.deviation(deviation_uuid)
858
859
def _unescape_json(self, json):
860
return json.replace('\\"', '"') \
861
.replace("\\'", "'") \
862
.replace("\\\\", "\\")
863
864
865
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
866
"""Extractor for an artist's user profile"""
867
pattern = BASE_PATTERN + r"/?$"
868
example = "https://www.deviantart.com/USER"
869
870
def items(self):
871
base = f"{self.root}/{self.user}/"
872
return self._dispatch_extractors((
873
(DeviantartAvatarExtractor , base + "avatar"),
874
(DeviantartBackgroundExtractor, base + "banner"),
875
(DeviantartGalleryExtractor , base + "gallery"),
876
(DeviantartScrapsExtractor , base + "gallery/scraps"),
877
(DeviantartJournalExtractor , base + "posts"),
878
(DeviantartStatusExtractor , base + "posts/statuses"),
879
(DeviantartFavoriteExtractor , base + "favourites"),
880
), ("gallery",))
881
882
883
###############################################################################
884
# OAuth #######################################################################
885
886
class DeviantartGalleryExtractor(DeviantartExtractor):
887
"""Extractor for all deviations from an artist's gallery"""
888
subcategory = "gallery"
889
archive_fmt = "g_{_username}_{index}.{extension}"
890
pattern = (BASE_PATTERN + r"/gallery"
891
r"(?:/all|/recommended-for-you|/?\?catpath=)?/?$")
892
example = "https://www.deviantart.com/USER/gallery/"
893
894
def deviations(self):
895
if self.flat and not self.group:
896
return self.api.gallery_all(self.user, self.offset)
897
folders = self.api.gallery_folders(self.user)
898
return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
899
900
901
class DeviantartAvatarExtractor(DeviantartExtractor):
902
"""Extractor for an artist's avatar"""
903
subcategory = "avatar"
904
archive_fmt = "a_{_username}_{index}"
905
pattern = BASE_PATTERN + r"/avatar"
906
example = "https://www.deviantart.com/USER/avatar/"
907
908
def deviations(self):
909
name = self.user.lower()
910
user = _user_details(self, name)
911
if not user:
912
return ()
913
914
icon = user["usericon"]
915
if icon == DEFAULT_AVATAR:
916
self.log.debug("Skipping avatar of '%s' (default)", name)
917
return ()
918
919
_, sep, index = icon.rpartition("?")
920
if not sep:
921
index = "0"
922
923
formats = self.config("formats")
924
if not formats:
925
url = icon.replace("/avatars/", "/avatars-big/", 1)
926
return (self._make_deviation(url, user, index, ""),)
927
928
if isinstance(formats, str):
929
formats = formats.replace(" ", "").split(",")
930
931
results = []
932
for fmt in formats:
933
fmt, _, ext = fmt.rpartition(".")
934
if fmt:
935
fmt = "-" + fmt
936
url = (f"https://a.deviantart.net/avatars{fmt}"
937
f"/{name[0]}/{name[1]}/{name}.{ext}?{index}")
938
results.append(self._make_deviation(url, user, index, fmt))
939
return results
940
941
def _make_deviation(self, url, user, index, fmt):
942
return {
943
"author" : user,
944
"da_category" : "avatar",
945
"index" : text.parse_int(index),
946
"is_deleted" : False,
947
"is_downloadable": False,
948
"published_time" : 0,
949
"title" : "avatar" + fmt,
950
"stats" : {"comments": 0},
951
"content" : {"src": url},
952
}
953
954
955
class DeviantartBackgroundExtractor(DeviantartExtractor):
956
"""Extractor for an artist's banner"""
957
subcategory = "background"
958
archive_fmt = "b_{index}"
959
pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
960
example = "https://www.deviantart.com/USER/banner/"
961
962
def deviations(self):
963
try:
964
return (self.api.user_profile(self.user.lower())
965
["cover_deviation"]["cover_deviation"],)
966
except Exception:
967
return ()
968
969
970
class DeviantartFolderExtractor(DeviantartExtractor):
971
"""Extractor for deviations inside an artist's gallery folder"""
972
subcategory = "folder"
973
directory_fmt = ("{category}", "{username}", "{folder[title]}")
974
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
975
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
976
example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
977
978
def __init__(self, match):
979
DeviantartExtractor.__init__(self, match)
980
self.folder = None
981
self.folder_id = match[3]
982
self.folder_name = match[4]
983
984
def deviations(self):
985
folders = self.api.gallery_folders(self.user)
986
folder = self._find_folder(folders, self.folder_name, self.folder_id)
987
988
# Leaving this here for backwards compatibility
989
self.folder = {
990
"title": folder["name"],
991
"uuid" : folder["folderid"],
992
"index": self.folder_id,
993
"owner": self.user,
994
"parent_uuid": folder["parent"],
995
}
996
997
if folder.get("subfolder"):
998
self.folder["parent_folder"] = folder["parent_folder"]
999
self.archive_fmt = "F_{folder[parent_uuid]}_{index}.{extension}"
1000
1001
if self.flat:
1002
self.directory_fmt = ("{category}", "{username}",
1003
"{folder[parent_folder]}")
1004
else:
1005
self.directory_fmt = ("{category}", "{username}",
1006
"{folder[parent_folder]}",
1007
"{folder[title]}")
1008
1009
if folder.get("has_subfolders") and self.config("subfolders", True):
1010
for subfolder in folder["subfolders"]:
1011
subfolder["parent_folder"] = folder["name"]
1012
subfolder["subfolder"] = True
1013
yield from self._folder_urls(
1014
folder["subfolders"], "gallery", DeviantartFolderExtractor)
1015
1016
yield from self.api.gallery(self.user, folder["folderid"], self.offset)
1017
1018
def prepare(self, deviation):
1019
DeviantartExtractor.prepare(self, deviation)
1020
deviation["folder"] = self.folder
1021
1022
1023
class DeviantartStashExtractor(DeviantartExtractor):
1024
"""Extractor for sta.sh-ed deviations"""
1025
subcategory = "stash"
1026
archive_fmt = "{index}.{extension}"
1027
pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.s(h))"
1028
r"/([a-z0-9]+)")
1029
example = "https://www.deviantart.com/stash/abcde"
1030
1031
skip = Extractor.skip
1032
1033
def __init__(self, match):
1034
DeviantartExtractor.__init__(self, match)
1035
self.user = ""
1036
1037
def deviations(self, stash_id=None, stash_data=None):
1038
if stash_id is None:
1039
legacy_url, stash_id = self.groups
1040
else:
1041
legacy_url = False
1042
1043
if legacy_url and stash_id[0] == "2":
1044
url = "https://sta.sh/" + stash_id
1045
response = self._limited_request(url)
1046
stash_id = response.url.rpartition("/")[2]
1047
page = response.text
1048
else:
1049
url = "https://www.deviantart.com/stash/" + stash_id
1050
page = self._limited_request(url).text
1051
1052
if stash_id[0] == "0":
1053
if uuid := text.extr(page, '//deviation/', '"'):
1054
deviation = self.api.deviation(uuid)
1055
deviation["_page"] = page
1056
deviation["index"] = text.parse_int(text.extr(
1057
page, '\\"deviationId\\":', ','))
1058
1059
deviation["stash_id"] = stash_id
1060
if stash_data:
1061
folder = stash_data["folder"]
1062
deviation["stash_name"] = folder["name"]
1063
deviation["stash_folder"] = folder["folderId"]
1064
deviation["stash_parent"] = folder["parentId"] or 0
1065
deviation["stash_description"] = \
1066
folder["richDescription"]["excerpt"]
1067
else:
1068
deviation["stash_name"] = ""
1069
deviation["stash_description"] = ""
1070
deviation["stash_folder"] = 0
1071
deviation["stash_parent"] = 0
1072
1073
yield deviation
1074
return
1075
1076
if stash_data := text.extr(page, ',\\"stash\\":', ',\\"@@'):
1077
stash_data = util.json_loads(self._unescape_json(stash_data))
1078
1079
for sid in text.extract_iter(
1080
page, 'href="https://www.deviantart.com/stash/', '"'):
1081
if sid == stash_id or sid.endswith("#comments"):
1082
continue
1083
yield from self.deviations(sid, stash_data)
1084
1085
1086
class DeviantartFavoriteExtractor(DeviantartExtractor):
1087
"""Extractor for an artist's favorites"""
1088
subcategory = "favorite"
1089
directory_fmt = ("{category}", "{username}", "Favourites")
1090
archive_fmt = "f_{_username}_{index}.{extension}"
1091
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
1092
example = "https://www.deviantart.com/USER/favourites/"
1093
1094
def deviations(self):
1095
if self.flat:
1096
return self.api.collections_all(self.user, self.offset)
1097
folders = self.api.collections_folders(self.user)
1098
return self._folder_urls(
1099
folders, "favourites", DeviantartCollectionExtractor)
1100
1101
1102
class DeviantartCollectionExtractor(DeviantartExtractor):
1103
"""Extractor for a single favorite collection"""
1104
subcategory = "collection"
1105
directory_fmt = ("{category}", "{username}", "Favourites",
1106
"{collection[title]}")
1107
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
1108
pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
1109
example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
1110
1111
def __init__(self, match):
1112
DeviantartExtractor.__init__(self, match)
1113
self.collection = None
1114
self.collection_id = match[3]
1115
self.collection_name = match[4]
1116
1117
def deviations(self):
1118
folders = self.api.collections_folders(self.user)
1119
folder = self._find_folder(
1120
folders, self.collection_name, self.collection_id)
1121
self.collection = {
1122
"title": folder["name"],
1123
"uuid" : folder["folderid"],
1124
"index": self.collection_id,
1125
"owner": self.user,
1126
}
1127
return self.api.collections(self.user, folder["folderid"], self.offset)
1128
1129
def prepare(self, deviation):
1130
DeviantartExtractor.prepare(self, deviation)
1131
deviation["collection"] = self.collection
1132
1133
1134
class DeviantartJournalExtractor(DeviantartExtractor):
1135
"""Extractor for an artist's journals"""
1136
subcategory = "journal"
1137
directory_fmt = ("{category}", "{username}", "Journal")
1138
archive_fmt = "j_{_username}_{index}.{extension}"
1139
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
1140
example = "https://www.deviantart.com/USER/posts/journals/"
1141
1142
def deviations(self):
1143
return self.api.browse_user_journals(self.user, self.offset)
1144
1145
1146
class DeviantartStatusExtractor(DeviantartExtractor):
1147
"""Extractor for an artist's status updates"""
1148
subcategory = "status"
1149
directory_fmt = ("{category}", "{username}", "Status")
1150
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
1151
archive_fmt = "S_{_username}_{index}.{extension}"
1152
pattern = BASE_PATTERN + r"/posts/statuses"
1153
example = "https://www.deviantart.com/USER/posts/statuses/"
1154
1155
def deviations(self):
1156
for status in self.api.user_statuses(self.user, self.offset):
1157
yield from self.process_status(status)
1158
1159
def process_status(self, status):
1160
for item in status.get("items") or (): # do not trust is_share
1161
# shared deviations/statuses
1162
if "deviation" in item:
1163
yield item["deviation"].copy()
1164
if "status" in item:
1165
yield from self.process_status(item["status"].copy())
1166
# assume is_deleted == true means necessary fields are missing
1167
if status["is_deleted"]:
1168
self.log.warning(
1169
"Skipping status %s (deleted)", status.get("statusid"))
1170
return
1171
yield status
1172
1173
def prepare(self, deviation):
1174
if "deviationid" in deviation:
1175
return DeviantartExtractor.prepare(self, deviation)
1176
1177
try:
1178
path = deviation["url"].split("/")
1179
deviation["index"] = text.parse_int(path[-1] or path[-2])
1180
except KeyError:
1181
deviation["index"] = 0
1182
1183
if self.user:
1184
deviation["username"] = self.user
1185
deviation["_username"] = self.user.lower()
1186
else:
1187
deviation["username"] = deviation["author"]["username"]
1188
deviation["_username"] = deviation["username"].lower()
1189
1190
deviation["date"] = dt = text.parse_datetime(deviation["ts"])
1191
deviation["published_time"] = int(util.datetime_to_timestamp(dt))
1192
1193
deviation["da_category"] = "Status"
1194
deviation["category_path"] = "status"
1195
deviation["is_downloadable"] = False
1196
deviation["title"] = "Status Update"
1197
1198
comments_count = deviation.pop("comments_count", 0)
1199
deviation["stats"] = {"comments": comments_count}
1200
if self.comments:
1201
deviation["comments"] = (
1202
self._extract_comments(deviation["statusid"], "status")
1203
if comments_count else ()
1204
)
1205
1206
1207
class DeviantartTagExtractor(DeviantartExtractor):
1208
"""Extractor for deviations from tag searches"""
1209
subcategory = "tag"
1210
directory_fmt = ("{category}", "Tags", "{search_tags}")
1211
archive_fmt = "T_{search_tags}_{index}.{extension}"
1212
pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
1213
example = "https://www.deviantart.com/tag/TAG"
1214
1215
def __init__(self, match):
1216
DeviantartExtractor.__init__(self, match)
1217
self.tag = text.unquote(match[1])
1218
self.user = ""
1219
1220
def deviations(self):
1221
return self.api.browse_tags(self.tag, self.offset)
1222
1223
def prepare(self, deviation):
1224
DeviantartExtractor.prepare(self, deviation)
1225
deviation["search_tags"] = self.tag
1226
1227
1228
class DeviantartWatchExtractor(DeviantartExtractor):
1229
"""Extractor for Deviations from watched users"""
1230
subcategory = "watch"
1231
pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
1232
r"/(?:watch/deviations|notifications/watch)()()")
1233
example = "https://www.deviantart.com/watch/deviations"
1234
1235
def deviations(self):
1236
return self.api.browse_deviantsyouwatch()
1237
1238
1239
class DeviantartWatchPostsExtractor(DeviantartExtractor):
1240
"""Extractor for Posts from watched users"""
1241
subcategory = "watch-posts"
1242
pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
1243
example = "https://www.deviantart.com/watch/posts"
1244
1245
def deviations(self):
1246
return self.api.browse_posts_deviantsyouwatch()
1247
1248
1249
###############################################################################
1250
# Eclipse #####################################################################
1251
1252
class DeviantartDeviationExtractor(DeviantartExtractor):
1253
"""Extractor for single deviations"""
1254
subcategory = "deviation"
1255
archive_fmt = "g_{_username}_{index}.{extension}"
1256
pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
1257
r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/"
1258
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
1259
r"(\d+)" # bare deviation ID without slug
1260
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
1261
example = "https://www.deviantart.com/UsER/art/TITLE-12345"
1262
1263
skip = Extractor.skip
1264
1265
def __init__(self, match):
1266
DeviantartExtractor.__init__(self, match)
1267
self.type = match[3]
1268
self.deviation_id = \
1269
match[4] or match[5] or id_from_base36(match[6])
1270
1271
def deviations(self):
1272
if self.user:
1273
url = (f"{self.root}/{self.user}"
1274
f"/{self.type or 'art'}/{self.deviation_id}")
1275
else:
1276
url = f"{self.root}/view/{self.deviation_id}/"
1277
1278
page = self._limited_request(url, notfound="deviation").text
1279
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
1280
if not uuid:
1281
raise exception.NotFoundError("deviation")
1282
1283
deviation = self.api.deviation(uuid)
1284
deviation["_page"] = page
1285
deviation["index_file"] = 0
1286
deviation["num"] = deviation["count"] = 1
1287
1288
additional_media = text.extr(page, ',\\"additionalMedia\\":', '}],\\"')
1289
if not additional_media:
1290
yield deviation
1291
return
1292
1293
self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
1294
"{num:>02}.{extension}")
1295
self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
1296
"{extension}")
1297
1298
additional_media = util.json_loads(self._unescape_json(
1299
additional_media) + "}]")
1300
deviation["count"] = 1 + len(additional_media)
1301
yield deviation
1302
1303
for index, post in enumerate(additional_media):
1304
uri = self._eclipse_media(post["media"], "fullview")[0]
1305
deviation["content"]["src"] = uri
1306
deviation["num"] += 1
1307
deviation["index_file"] = post["fileId"]
1308
# Download only works on purchased materials - no way to check
1309
deviation["is_downloadable"] = False
1310
yield deviation
1311
1312
1313
class DeviantartScrapsExtractor(DeviantartExtractor):
1314
"""Extractor for an artist's scraps"""
1315
subcategory = "scraps"
1316
directory_fmt = ("{category}", "{username}", "Scraps")
1317
archive_fmt = "s_{_username}_{index}.{extension}"
1318
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
1319
example = "https://www.deviantart.com/USER/gallery/scraps"
1320
1321
def deviations(self):
1322
self.login()
1323
1324
eclipse_api = DeviantartEclipseAPI(self)
1325
return self._eclipse_to_oauth(
1326
eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset))
1327
1328
1329
class DeviantartSearchExtractor(DeviantartExtractor):
1330
"""Extractor for deviantart search results"""
1331
subcategory = "search"
1332
directory_fmt = ("{category}", "Search", "{search_tags}")
1333
archive_fmt = "Q_{search_tags}_{index}.{extension}"
1334
pattern = (r"(?:https?://)?www\.deviantart\.com"
1335
r"/search(?:/deviations)?/?\?([^#]+)")
1336
example = "https://www.deviantart.com/search?q=QUERY"
1337
skip = Extractor.skip
1338
1339
def __init__(self, match):
1340
DeviantartExtractor.__init__(self, match)
1341
self.query = text.parse_query(self.user)
1342
self.search = self.query.get("q", "")
1343
self.user = ""
1344
1345
def deviations(self):
1346
logged_in = self.login()
1347
1348
eclipse_api = DeviantartEclipseAPI(self)
1349
search = (eclipse_api.search_deviations
1350
if logged_in else self._search_html)
1351
return self._eclipse_to_oauth(eclipse_api, search(self.query))
1352
1353
def prepare(self, deviation):
1354
DeviantartExtractor.prepare(self, deviation)
1355
deviation["search_tags"] = self.search
1356
1357
def _search_html(self, params):
1358
url = self.root + "/search"
1359
find = text.re(r'''href="https://www.deviantart.com/([^/?#]+)'''
1360
r'''/(art|journal)/(?:[^"]+-)?(\d+)''').findall
1361
while True:
1362
response = self.request(url, params=params)
1363
1364
if response.history and "/users/login" in response.url:
1365
raise exception.AbortExtraction("HTTP redirect to login page")
1366
page = response.text
1367
1368
for user, type, did in find(page)[:-3:3]:
1369
yield {
1370
"deviationId": did,
1371
"author": {"username": user},
1372
"isJournal": type == "journal",
1373
}
1374
1375
cursor = text.extr(page, r'\"cursor\":\"', '\\',)
1376
if not cursor:
1377
return
1378
params["cursor"] = cursor
1379
1380
1381
class DeviantartGallerySearchExtractor(DeviantartExtractor):
1382
"""Extractor for deviantart gallery searches"""
1383
subcategory = "gallery-search"
1384
archive_fmt = "g_{_username}_{index}.{extension}"
1385
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
1386
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
1387
1388
def __init__(self, match):
1389
DeviantartExtractor.__init__(self, match)
1390
self.query = match[3]
1391
1392
def deviations(self):
1393
self.login()
1394
1395
eclipse_api = DeviantartEclipseAPI(self)
1396
query = text.parse_query(self.query)
1397
self.search = query["q"]
1398
1399
return self._eclipse_to_oauth(
1400
eclipse_api, eclipse_api.galleries_search(
1401
self.user,
1402
self.search,
1403
self.offset,
1404
query.get("sort", "most-recent"),
1405
))
1406
1407
def prepare(self, deviation):
1408
DeviantartExtractor.prepare(self, deviation)
1409
deviation["search_tags"] = self.search
1410
1411
1412
class DeviantartFollowingExtractor(DeviantartExtractor):
1413
"""Extractor for user's watched users"""
1414
subcategory = "following"
1415
pattern = BASE_PATTERN + "/(?:about#)?watching"
1416
example = "https://www.deviantart.com/USER/about#watching"
1417
1418
def items(self):
1419
api = DeviantartOAuthAPI(self)
1420
1421
for user in api.user_friends(self.user):
1422
url = f"{self.root}/{user['user']['username']}"
1423
user["_extractor"] = DeviantartUserExtractor
1424
yield Message.Queue, url, user
1425
1426
1427
###############################################################################
1428
# API Interfaces ##############################################################
1429
1430
class DeviantartOAuthAPI():
1431
"""Interface for the DeviantArt OAuth API
1432
1433
https://www.deviantart.com/developers/http/v1/20160316
1434
"""
1435
CLIENT_ID = "5388"
1436
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
1437
1438
def __init__(self, extractor):
1439
self.extractor = extractor
1440
self.log = extractor.log
1441
self.headers = {"dA-minor-version": "20210526"}
1442
self._warn_429 = True
1443
1444
self.delay = extractor.config("wait-min", 0)
1445
self.delay_min = max(2, self.delay)
1446
1447
self.mature = extractor.config("mature", "true")
1448
if not isinstance(self.mature, str):
1449
self.mature = "true" if self.mature else "false"
1450
1451
self.strategy = extractor.config("pagination")
1452
self.folders = extractor.config("folders", False)
1453
self.public = extractor.config("public", True)
1454
1455
if client_id := extractor.config("client-id"):
1456
self.client_id = str(client_id)
1457
self.client_secret = extractor.config("client-secret")
1458
else:
1459
self.client_id = self.CLIENT_ID
1460
self.client_secret = self.CLIENT_SECRET
1461
1462
token = extractor.config("refresh-token")
1463
if token is None or token == "cache":
1464
token = "#" + self.client_id
1465
if not _refresh_token_cache(token):
1466
token = None
1467
self.refresh_token_key = token
1468
1469
metadata = extractor.config("metadata", False)
1470
if not metadata:
1471
metadata = True if extractor.extra else False
1472
if metadata:
1473
self.metadata = True
1474
1475
if isinstance(metadata, str):
1476
if metadata == "all":
1477
metadata = ("submission", "camera", "stats",
1478
"collection", "gallery")
1479
else:
1480
metadata = metadata.replace(" ", "").split(",")
1481
elif not isinstance(metadata, (list, tuple)):
1482
metadata = ()
1483
1484
self._metadata_params = {"mature_content": self.mature}
1485
self._metadata_public = None
1486
if metadata:
1487
# extended metadata
1488
self.limit = 10
1489
for param in metadata:
1490
self._metadata_params["ext_" + param] = "1"
1491
if "ext_collection" in self._metadata_params or \
1492
"ext_gallery" in self._metadata_params:
1493
if token:
1494
self._metadata_public = False
1495
else:
1496
self.log.error("'collection' and 'gallery' metadata "
1497
"require a refresh token")
1498
else:
1499
# base metadata
1500
self.limit = 50
1501
else:
1502
self.metadata = False
1503
self.limit = None
1504
1505
self.log.debug(
1506
"Using %s API credentials (client-id %s)",
1507
"default" if self.client_id == self.CLIENT_ID else "custom",
1508
self.client_id,
1509
)
1510
1511
def browse_deviantsyouwatch(self, offset=0):
1512
"""Yield deviations from users you watch"""
1513
endpoint = "/browse/deviantsyouwatch"
1514
params = {"limit": 50, "offset": offset,
1515
"mature_content": self.mature}
1516
return self._pagination(endpoint, params, public=False)
1517
1518
def browse_posts_deviantsyouwatch(self, offset=0):
1519
"""Yield posts from users you watch"""
1520
endpoint = "/browse/posts/deviantsyouwatch"
1521
params = {"limit": 50, "offset": offset,
1522
"mature_content": self.mature}
1523
return self._pagination(endpoint, params, public=False, unpack=True)
1524
1525
def browse_tags(self, tag, offset=0):
1526
""" Browse a tag """
1527
endpoint = "/browse/tags"
1528
params = {
1529
"tag" : tag,
1530
"offset" : offset,
1531
"limit" : 50,
1532
"mature_content": self.mature,
1533
}
1534
return self._pagination(endpoint, params)
1535
1536
def browse_user_journals(self, username, offset=0):
1537
journals = filter(
1538
lambda post: "/journal/" in post["url"],
1539
self.user_profile_posts(username))
1540
if offset:
1541
journals = util.advance(journals, offset)
1542
return journals
1543
1544
def collections(self, username, folder_id, offset=0):
1545
"""Yield all Deviation-objects contained in a collection folder"""
1546
endpoint = "/collections/" + folder_id
1547
params = {"username": username, "offset": offset, "limit": 24,
1548
"mature_content": self.mature}
1549
return self._pagination(endpoint, params)
1550
1551
def collections_all(self, username, offset=0):
1552
"""Yield all deviations in a user's collection"""
1553
endpoint = "/collections/all"
1554
params = {"username": username, "offset": offset, "limit": 24,
1555
"mature_content": self.mature}
1556
return self._pagination(endpoint, params)
1557
1558
@memcache(keyarg=1)
1559
def collections_folders(self, username, offset=0):
1560
"""Yield all collection folders of a specific user"""
1561
endpoint = "/collections/folders"
1562
params = {"username": username, "offset": offset, "limit": 50,
1563
"mature_content": self.mature}
1564
return self._pagination_list(endpoint, params)
1565
1566
def comments(self, target_id, target_type="deviation",
1567
comment_id=None, offset=0):
1568
"""Fetch comments posted on a target"""
1569
endpoint = f"/comments/{target_type}/{target_id}"
1570
params = {
1571
"commentid" : comment_id,
1572
"maxdepth" : "5",
1573
"offset" : offset,
1574
"limit" : 50,
1575
"mature_content": self.mature,
1576
}
1577
return self._pagination_list(endpoint, params=params, key="thread")
1578
1579
def deviation(self, deviation_id, public=None):
1580
"""Query and return info about a single Deviation"""
1581
endpoint = "/deviation/" + deviation_id
1582
1583
deviation = self._call(endpoint, public=public)
1584
if deviation.get("is_mature") and public is None and \
1585
self.refresh_token_key:
1586
deviation = self._call(endpoint, public=False)
1587
1588
if self.metadata:
1589
self._metadata((deviation,))
1590
if self.folders:
1591
self._folders((deviation,))
1592
return deviation
1593
1594
def deviation_content(self, deviation_id, public=None):
1595
"""Get extended content of a single Deviation"""
1596
endpoint = "/deviation/content"
1597
params = {"deviationid": deviation_id}
1598
content = self._call(endpoint, params=params, public=public)
1599
if public and content["html"].startswith(
1600
' <span class=\"username-with-symbol'):
1601
if self.refresh_token_key:
1602
content = self._call(endpoint, params=params, public=False)
1603
else:
1604
self.log.warning("Private Journal")
1605
return content
1606
1607
def deviation_download(self, deviation_id, public=None):
1608
"""Get the original file download (if allowed)"""
1609
endpoint = "/deviation/download/" + deviation_id
1610
params = {"mature_content": self.mature}
1611
1612
try:
1613
return self._call(
1614
endpoint, params=params, public=public, log=False)
1615
except Exception:
1616
if not self.refresh_token_key:
1617
raise
1618
return self._call(endpoint, params=params, public=False)
1619
1620
def deviation_metadata(self, deviations):
1621
""" Fetch deviation metadata for a set of deviations"""
1622
endpoint = "/deviation/metadata?" + "&".join(
1623
f"deviationids[{num}]={deviation['deviationid']}"
1624
for num, deviation in enumerate(deviations)
1625
)
1626
return self._call(
1627
endpoint,
1628
params=self._metadata_params,
1629
public=self._metadata_public,
1630
)["metadata"]
1631
1632
def gallery(self, username, folder_id, offset=0, extend=True, public=None):
1633
"""Yield all Deviation-objects contained in a gallery folder"""
1634
endpoint = "/gallery/" + folder_id
1635
params = {"username": username, "offset": offset, "limit": 24,
1636
"mature_content": self.mature, "mode": "newest"}
1637
return self._pagination(endpoint, params, extend, public)
1638
1639
def gallery_all(self, username, offset=0):
1640
"""Yield all Deviation-objects of a specific user"""
1641
endpoint = "/gallery/all"
1642
params = {"username": username, "offset": offset, "limit": 24,
1643
"mature_content": self.mature}
1644
return self._pagination(endpoint, params)
1645
1646
@memcache(keyarg=1)
1647
def gallery_folders(self, username, offset=0):
1648
"""Yield all gallery folders of a specific user"""
1649
endpoint = "/gallery/folders"
1650
params = {"username": username, "offset": offset, "limit": 50,
1651
"mature_content": self.mature}
1652
return self._pagination_list(endpoint, params)
1653
1654
def user_friends(self, username, offset=0):
1655
"""Get the users list of friends"""
1656
endpoint = "/user/friends/" + username
1657
params = {"limit": 50, "offset": offset, "mature_content": self.mature}
1658
return self._pagination(endpoint, params)
1659
1660
def user_friends_watch(self, username):
1661
"""Watch a user"""
1662
endpoint = "/user/friends/watch/" + username
1663
data = {
1664
"watch[friend]" : "0",
1665
"watch[deviations]" : "0",
1666
"watch[journals]" : "0",
1667
"watch[forum_threads]": "0",
1668
"watch[critiques]" : "0",
1669
"watch[scraps]" : "0",
1670
"watch[activity]" : "0",
1671
"watch[collections]" : "0",
1672
"mature_content" : self.mature,
1673
}
1674
return self._call(
1675
endpoint, method="POST", data=data, public=False, fatal=False,
1676
).get("success")
1677
1678
def user_friends_unwatch(self, username):
1679
"""Unwatch a user"""
1680
endpoint = "/user/friends/unwatch/" + username
1681
return self._call(
1682
endpoint, method="POST", public=False, fatal=False,
1683
).get("success")
1684
1685
@memcache(keyarg=1)
1686
def user_profile(self, username):
1687
"""Get user profile information"""
1688
endpoint = "/user/profile/" + username
1689
return self._call(endpoint, fatal=False)
1690
1691
def user_profile_posts(self, username):
1692
endpoint = "/user/profile/posts"
1693
params = {"username": username, "limit": 50,
1694
"mature_content": self.mature}
1695
return self._pagination(endpoint, params)
1696
1697
def user_statuses(self, username, offset=0):
1698
"""Yield status updates of a specific user"""
1699
statuses = filter(
1700
lambda post: "/status-update/" in post["url"],
1701
self.user_profile_posts(username))
1702
if offset:
1703
statuses = util.advance(statuses, offset)
1704
return statuses
1705
1706
def authenticate(self, refresh_token_key):
1707
"""Authenticate the application by requesting an access token"""
1708
self.headers["Authorization"] = \
1709
self._authenticate_impl(refresh_token_key)
1710
1711
@cache(maxage=3600, keyarg=1)
1712
def _authenticate_impl(self, refresh_token_key):
1713
"""Actual authenticate implementation"""
1714
url = "https://www.deviantart.com/oauth2/token"
1715
if refresh_token_key:
1716
self.log.info("Refreshing private access token")
1717
data = {"grant_type": "refresh_token",
1718
"refresh_token": _refresh_token_cache(refresh_token_key)}
1719
else:
1720
self.log.info("Requesting public access token")
1721
data = {"grant_type": "client_credentials"}
1722
1723
auth = util.HTTPBasicAuth(self.client_id, self.client_secret)
1724
response = self.extractor.request(
1725
url, method="POST", data=data, auth=auth, fatal=False)
1726
data = response.json()
1727
1728
if response.status_code != 200:
1729
self.log.debug("Server response: %s", data)
1730
raise exception.AuthenticationError(
1731
f"\"{data.get('error_description')}\" ({data.get('error')})")
1732
if refresh_token_key:
1733
_refresh_token_cache.update(
1734
refresh_token_key, data["refresh_token"])
1735
return "Bearer " + data["access_token"]
1736
1737
def _call(self, endpoint, fatal=True, log=True, public=None, **kwargs):
1738
"""Call an API endpoint"""
1739
url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
1740
kwargs["fatal"] = None
1741
1742
if public is None:
1743
public = self.public
1744
1745
while True:
1746
if self.delay:
1747
self.extractor.sleep(self.delay, "api")
1748
1749
self.authenticate(None if public else self.refresh_token_key)
1750
kwargs["headers"] = self.headers
1751
response = self.extractor.request(url, **kwargs)
1752
1753
try:
1754
data = response.json()
1755
except ValueError:
1756
self.log.error("Unable to parse API response")
1757
data = {}
1758
1759
status = response.status_code
1760
if 200 <= status < 400:
1761
if self.delay > self.delay_min:
1762
self.delay -= 1
1763
return data
1764
if not fatal and status != 429:
1765
return None
1766
1767
error = data.get("error_description")
1768
if error == "User not found.":
1769
raise exception.NotFoundError("user or group")
1770
if error == "Deviation not downloadable.":
1771
raise exception.AuthorizationError()
1772
1773
self.log.debug(response.text)
1774
msg = f"API responded with {status} {response.reason}"
1775
if status == 429:
1776
if self.delay < 30:
1777
self.delay += 1
1778
self.log.warning("%s. Using %ds delay.", msg, self.delay)
1779
1780
if self._warn_429 and self.delay >= 3:
1781
self._warn_429 = False
1782
if self.client_id == self.CLIENT_ID:
1783
self.log.info(
1784
"Register your own OAuth application and use its "
1785
"credentials to prevent this error: "
1786
"https://gdl-org.github.io/docs/configuration.html"
1787
"#extractor-deviantart-client-id-client-secret")
1788
else:
1789
if log:
1790
self.log.error(msg)
1791
return data
1792
1793
def _should_switch_tokens(self, results, params):
1794
if len(results) < params["limit"]:
1795
return True
1796
1797
if not self.extractor.jwt:
1798
for item in results:
1799
if item.get("is_mature"):
1800
return True
1801
1802
return False
1803
1804
def _pagination(self, endpoint, params,
1805
extend=True, public=None, unpack=False, key="results"):
1806
warn = True
1807
if public is None:
1808
public = self.public
1809
1810
if self.limit and params["limit"] > self.limit:
1811
params["limit"] = (params["limit"] // self.limit) * self.limit
1812
1813
while True:
1814
data = self._call(endpoint, params=params, public=public)
1815
try:
1816
results = data[key]
1817
except KeyError:
1818
self.log.error("Unexpected API response: %s", data)
1819
return
1820
1821
if unpack:
1822
results = [item["journal"] for item in results
1823
if "journal" in item]
1824
if extend:
1825
if public and self._should_switch_tokens(results, params):
1826
if self.refresh_token_key:
1827
self.log.debug("Switching to private access token")
1828
public = False
1829
continue
1830
elif data["has_more"] and warn:
1831
warn = False
1832
self.log.warning(
1833
"Private or mature deviations detected! "
1834
"Run 'gallery-dl oauth:deviantart' and follow the "
1835
"instructions to be able to access them.")
1836
1837
# "statusid" cannot be used instead
1838
if results and "deviationid" in results[0]:
1839
if self.metadata:
1840
self._metadata(results)
1841
if self.folders:
1842
self._folders(results)
1843
else: # attempt to fix "deleted" deviations
1844
for dev in self._shared_content(results):
1845
if not dev["is_deleted"]:
1846
continue
1847
patch = self._call(
1848
"/deviation/" + dev["deviationid"], fatal=False)
1849
if patch:
1850
dev.update(patch)
1851
1852
yield from results
1853
1854
if not data["has_more"] and (
1855
self.strategy != "manual" or not results or not extend):
1856
return
1857
1858
if "next_cursor" in data:
1859
if not data["next_cursor"]:
1860
return
1861
params["offset"] = None
1862
params["cursor"] = data["next_cursor"]
1863
elif data["next_offset"] is not None:
1864
params["offset"] = data["next_offset"]
1865
params["cursor"] = None
1866
else:
1867
if params.get("offset") is None:
1868
return
1869
params["offset"] = int(params["offset"]) + len(results)
1870
1871
def _pagination_list(self, endpoint, params, key="results"):
1872
return list(self._pagination(endpoint, params, False, key=key))
1873
1874
def _shared_content(self, results):
1875
"""Return an iterable of shared deviations in 'results'"""
1876
for result in results:
1877
for item in result.get("items") or ():
1878
if "deviation" in item:
1879
yield item["deviation"]
1880
1881
def _metadata(self, deviations):
1882
"""Add extended metadata to each deviation object"""
1883
if len(deviations) <= self.limit:
1884
self._metadata_batch(deviations)
1885
else:
1886
n = self.limit
1887
for index in range(0, len(deviations), n):
1888
self._metadata_batch(deviations[index:index+n])
1889
1890
def _metadata_batch(self, deviations):
1891
"""Fetch extended metadata for a single batch of deviations"""
1892
for deviation, metadata in zip(
1893
deviations, self.deviation_metadata(deviations)):
1894
deviation.update(metadata)
1895
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
1896
1897
def _folders(self, deviations):
1898
"""Add a list of all containing folders to each deviation object"""
1899
for deviation in deviations:
1900
deviation["folders"] = self._folders_map(
1901
deviation["author"]["username"])[deviation["deviationid"]]
1902
1903
@memcache(keyarg=1)
1904
def _folders_map(self, username):
1905
"""Generate a deviation_id -> folders mapping for 'username'"""
1906
self.log.info("Collecting folder information for '%s'", username)
1907
folders = self.gallery_folders(username)
1908
1909
# create 'folderid'-to-'folder' mapping
1910
fmap = {
1911
folder["folderid"]: folder
1912
for folder in folders
1913
}
1914
1915
# add parent names to folders, but ignore "Featured" as parent
1916
featured = folders[0]["folderid"]
1917
done = False
1918
1919
while not done:
1920
done = True
1921
for folder in folders:
1922
parent = folder["parent"]
1923
if not parent:
1924
pass
1925
elif parent == featured:
1926
folder["parent"] = None
1927
else:
1928
parent = fmap[parent]
1929
if parent["parent"]:
1930
done = False
1931
else:
1932
folder["name"] = parent["name"] + "/" + folder["name"]
1933
folder["parent"] = None
1934
1935
# map deviationids to folder names
1936
dmap = collections.defaultdict(list)
1937
for folder in folders:
1938
for deviation in self.gallery(
1939
username, folder["folderid"], 0, False):
1940
dmap[deviation["deviationid"]].append(folder["name"])
1941
return dmap
1942
1943
1944
class DeviantartEclipseAPI():
1945
"""Interface to the DeviantArt Eclipse API"""
1946
1947
def __init__(self, extractor):
1948
self.extractor = extractor
1949
self.log = extractor.log
1950
self.request = self.extractor._limited_request
1951
self.csrf_token = None
1952
1953
def deviation_extended_fetch(self, deviation_id, user, kind=None):
1954
endpoint = "/_puppy/dadeviation/init"
1955
params = {
1956
"deviationid" : deviation_id,
1957
"username" : user,
1958
"type" : kind,
1959
"include_session" : "false",
1960
"expand" : "deviation.related",
1961
"da_minor_version": "20230710",
1962
}
1963
return self._call(endpoint, params)
1964
1965
def gallery_scraps(self, user, offset=0):
1966
endpoint = "/_puppy/dashared/gallection/contents"
1967
params = {
1968
"username" : user,
1969
"type" : "gallery",
1970
"offset" : offset,
1971
"limit" : 24,
1972
"scraps_folder": "true",
1973
}
1974
return self._pagination(endpoint, params)
1975
1976
def galleries_search(self, user, query, offset=0, order="most-recent"):
1977
endpoint = "/_puppy/dashared/gallection/search"
1978
params = {
1979
"username": user,
1980
"type" : "gallery",
1981
"order" : order,
1982
"q" : query,
1983
"offset" : offset,
1984
"limit" : 24,
1985
}
1986
return self._pagination(endpoint, params)
1987
1988
def search_deviations(self, params):
1989
endpoint = "/_puppy/dabrowse/search/deviations"
1990
return self._pagination(endpoint, params, key="deviations")
1991
1992
def user_info(self, user, expand=False):
1993
endpoint = "/_puppy/dauserprofile/init/about"
1994
params = {"username": user}
1995
return self._call(endpoint, params)
1996
1997
def user_watching(self, user, offset=0):
1998
gruserid, moduleid = self._ids_watching(user)
1999
2000
endpoint = "/_puppy/gruser/module/watching"
2001
params = {
2002
"gruserid" : gruserid,
2003
"gruser_typeid": "4",
2004
"username" : user,
2005
"moduleid" : moduleid,
2006
"offset" : offset,
2007
"limit" : 24,
2008
}
2009
return self._pagination(endpoint, params)
2010
2011
def _call(self, endpoint, params):
2012
url = "https://www.deviantart.com" + endpoint
2013
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
2014
2015
response = self.request(url, params=params, fatal=None)
2016
2017
try:
2018
return response.json()
2019
except Exception:
2020
return {"error": response.text}
2021
2022
def _pagination(self, endpoint, params, key="results"):
2023
limit = params.get("limit", 24)
2024
warn = True
2025
2026
while True:
2027
data = self._call(endpoint, params)
2028
2029
results = data.get(key)
2030
if results is None:
2031
return
2032
if len(results) < limit and warn and data.get("hasMore"):
2033
warn = False
2034
self.log.warning(
2035
"Private deviations detected! "
2036
"Provide login credentials or session cookies "
2037
"to be able to access them.")
2038
yield from results
2039
2040
if not data.get("hasMore"):
2041
return
2042
2043
if "nextCursor" in data:
2044
params["offset"] = None
2045
params["cursor"] = data["nextCursor"]
2046
elif "nextOffset" in data:
2047
params["offset"] = data["nextOffset"]
2048
params["cursor"] = None
2049
elif params.get("offset") is None:
2050
return
2051
else:
2052
params["offset"] = int(params["offset"]) + len(results)
2053
2054
def _ids_watching(self, user):
2055
url = f"{self.extractor.root}/{user}/about"
2056
page = self.request(url).text
2057
2058
gruser_id = text.extr(page, ' data-userid="', '"')
2059
2060
pos = page.find('\\"name\\":\\"watching\\"')
2061
if pos < 0:
2062
raise exception.NotFoundError("'watching' module ID")
2063
module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
2064
2065
self._fetch_csrf_token(page)
2066
return gruser_id, module_id
2067
2068
def _fetch_csrf_token(self, page=None):
2069
if page is None:
2070
page = self.request(self.extractor.root + "/").text
2071
self.csrf_token = token = text.extr(
2072
page, "window.__CSRF_TOKEN__ = '", "'")
2073
return token
2074
2075
2076
@memcache(keyarg=1)
2077
def _user_details(extr, name):
2078
try:
2079
return extr.api.user_profile(name)["user"]
2080
except Exception:
2081
return None
2082
2083
2084
@cache(maxage=36500*86400, keyarg=0)
2085
def _refresh_token_cache(token):
2086
if token and token[0] == "#":
2087
return None
2088
return token
2089
2090
2091
@cache(maxage=28*86400, keyarg=1)
2092
def _login_impl(extr, username, password):
2093
extr.log.info("Logging in as %s", username)
2094
2095
url = "https://www.deviantart.com/users/login"
2096
page = extr.request(url).text
2097
2098
data = {}
2099
for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
2100
name, _, value = item.partition('" value="')
2101
data[name] = value
2102
2103
challenge = data.get("challenge")
2104
if challenge and challenge != "0":
2105
extr.log.warning("Login requires solving a CAPTCHA")
2106
extr.log.debug(challenge)
2107
2108
data["username"] = username
2109
data["password"] = password
2110
data["remember"] = "on"
2111
2112
extr.sleep(2.0, "login")
2113
url = "https://www.deviantart.com/_sisu/do/signin"
2114
response = extr.request(url, method="POST", data=data)
2115
2116
if not response.history:
2117
raise exception.AuthenticationError()
2118
2119
return {
2120
cookie.name: cookie.value
2121
for cookie in extr.cookies
2122
}
2123
2124
2125
def id_from_base36(base36):
2126
return util.bdecode(base36, _ALPHABET)
2127
2128
2129
def base36_from_id(deviation_id):
2130
return util.bencode(int(deviation_id), _ALPHABET)
2131
2132
2133
_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
2134
2135
2136
###############################################################################
2137
# Journal Formats #############################################################
2138
2139
SHADOW_TEMPLATE = """
2140
<span class="shadow">
2141
<img src="{src}" class="smshadow" width="{width}" height="{height}">
2142
</span>
2143
<br><br>
2144
"""
2145
2146
HEADER_TEMPLATE = """<div usr class="gr">
2147
<div class="metadata">
2148
<h2><a href="{url}">{title}</a></h2>
2149
<ul>
2150
<li class="author">
2151
by <span class="name"><span class="username-with-symbol u">
2152
<a class="u regular username" href="{userurl}">{username}</a>\
2153
<span class="user-symbol regular"></span></span></span>,
2154
<span>{date}</span>
2155
</li>
2156
</ul>
2157
</div>
2158
"""
2159
2160
HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'>
2161
<h2>
2162
<img src="https://st.deviantart.net/minish/gruzecontrol/icons/journal.gif\
2163
?2" style="vertical-align:middle" alt=""/>
2164
<a href="{url}">{title}</a>
2165
</h2>
2166
Journal Entry: <span>{date}</span>
2167
"""
2168
2169
JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html>
2170
<html>
2171
<head>
2172
<meta charset="utf-8">
2173
<title>{title}</title>
2174
<link rel="stylesheet" href="https://st.deviantart.net\
2175
/css/deviantart-network_lc.css?3843780832"/>
2176
<link rel="stylesheet" href="https://st.deviantart.net\
2177
/css/group_secrets_lc.css?3250492874"/>
2178
<link rel="stylesheet" href="https://st.deviantart.net\
2179
/css/v6core_lc.css?4246581581"/>
2180
<link rel="stylesheet" href="https://st.deviantart.net\
2181
/css/sidebar_lc.css?1490570941"/>
2182
<link rel="stylesheet" href="https://st.deviantart.net\
2183
/css/writer_lc.css?3090682151"/>
2184
<link rel="stylesheet" href="https://st.deviantart.net\
2185
/css/v6loggedin_lc.css?3001430805"/>
2186
<style>{css}</style>
2187
<link rel="stylesheet" href="https://st.deviantart.net\
2188
/roses/cssmin/core.css?1488405371919"/>
2189
<link rel="stylesheet" href="https://st.deviantart.net\
2190
/roses/cssmin/peeky.css?1487067424177"/>
2191
<link rel="stylesheet" href="https://st.deviantart.net\
2192
/roses/cssmin/desktop.css?1491362542749"/>
2193
<link rel="stylesheet" href="https://static.parastorage.com/services\
2194
/da-deviation/2bfd1ff7a9d6bf10d27b98dd8504c0399c3f9974a015785114b7dc6b\
2195
/app.min.css"/>
2196
</head>
2197
<body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart">
2198
<div id="output">
2199
<div class="dev-page-container bubbleview">
2200
<div class="dev-page-view view-mode-normal">
2201
<div class="dev-view-main-content">
2202
<div class="dev-view-deviation">
2203
{shadow}
2204
<div class="journal-wrapper tt-a">
2205
<div class="journal-wrapper2">
2206
<div class="journal {cls} journalcontrol">
2207
{html}
2208
</div>
2209
</div>
2210
</div>
2211
</div>
2212
</div>
2213
</div>
2214
</div>
2215
</div>
2216
</body>
2217
</html>
2218
"""
2219
2220
JOURNAL_TEMPLATE_HTML_EXTRA = """\
2221
<div id="devskin0"><div class="negate-box-margin" style="">\
2222
<div usr class="gr-box gr-genericbox"
2223
><i usr class="gr1"><i></i></i
2224
><i usr class="gr2"><i></i></i
2225
><i usr class="gr3"><i></i></i
2226
><div usr class="gr-top">
2227
<i usr class="tri"></i>
2228
{}
2229
</div>
2230
</div><div usr class="gr-body"><div usr class="gr">
2231
<div class="grf-indent">
2232
<div class="text">
2233
{} </div>
2234
</div>
2235
</div></div>
2236
<i usr class="gr3 gb"></i>
2237
<i usr class="gr2 gb"></i>
2238
<i usr class="gr1 gb gb1"></i> </div>
2239
</div></div>"""
2240
2241
JOURNAL_TEMPLATE_TEXT = """text:{title}
2242
by {username}, {date}
2243
2244
{content}
2245
"""
2246
2247