CoCalc -- deviantart.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/deviantart.py
⁵³⁹⁹ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractors for https://www.deviantart.com/"""
10

11
from .common import Extractor, Message, Dispatch
12
from .. import text, util, exception
13
from ..cache import cache, memcache
14
import collections
15
import mimetypes
16
import binascii
17
import time
18

19
BASE_PATTERN = (
20
    r"(?:https?://)?(?:"
21
    r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
22
    r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
23
)
24
DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
25

26

27
class DeviantartExtractor(Extractor):
28
    """Base class for deviantart extractors"""
29
    category = "deviantart"
30
    root = "https://www.deviantart.com"
31
    directory_fmt = ("{category}", "{username}")
32
    filename_fmt = "{category}_{index}_{title}.{extension}"
33
    cookies_domain = ".deviantart.com"
34
    cookies_names = ("auth", "auth_secure", "userinfo")
35
    _last_request = 0
36

37
    def __init__(self, match):
38
        Extractor.__init__(self, match)
39
        self.user = (match[1] or match[2] or "").lower()
40
        self.offset = 0
41

42
    def _init(self):
43
        self.jwt = self.config("jwt", False)
44
        self.flat = self.config("flat", True)
45
        self.extra = self.config("extra", False)
46
        self.quality = self.config("quality", "100")
47
        self.original = self.config("original", True)
48
        self.previews = self.config("previews", False)
49
        self.intermediary = self.config("intermediary", True)
50
        self.comments_avatars = self.config("comments-avatars", False)
51
        self.comments = self.comments_avatars or self.config("comments", False)
52

53
        self.api = DeviantartOAuthAPI(self)
54
        self.eclipse_api = None
55
        self.group = False
56
        self._premium_cache = {}
57

58
        if self.config("auto-unwatch"):
59
            self.unwatch = []
60
            self.finalize = self._unwatch_premium
61
        else:
62
            self.unwatch = None
63

64
        if self.quality:
65
            if self.quality == "png":
66
                self.quality = "-fullview.png?"
67
                self.quality_sub = util.re(r"-fullview\.[a-z0-9]+\?").sub
68
            else:
69
                self.quality = f",q_{self.quality}"
70
                self.quality_sub = util.re(r",q_\d+").sub
71

72
        if self.intermediary:
73
            self.intermediary_subn = util.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
74

75
        if isinstance(self.original, str) and \
76
                self.original.lower().startswith("image"):
77
            self.original = True
78
            self._update_content = self._update_content_image
79
        else:
80
            self._update_content = self._update_content_default
81

82
        if self.previews == "all":
83
            self.previews_images = self.previews = True
84
        else:
85
            self.previews_images = False
86

87
        journals = self.config("journals", "html")
88
        if journals == "html":
89
            self.commit_journal = self._commit_journal_html
90
        elif journals == "text":
91
            self.commit_journal = self._commit_journal_text
92
        else:
93
            self.commit_journal = None
94

95
    def request(self, url, **kwargs):
96
        if "fatal" not in kwargs:
97
            kwargs["fatal"] = False
98
        while True:
99
            response = Extractor.request(self, url, **kwargs)
100
            if response.status_code != 403 or \
101
                    b"Request blocked." not in response.content:
102
                return response
103
            self.wait(seconds=300, reason="CloudFront block")
104

105
    def skip(self, num):
106
        self.offset += num
107
        return num
108

109
    def login(self):
110
        if self.cookies_check(self.cookies_names):
111
            return True
112

113
        username, password = self._get_auth_info()
114
        if username:
115
            self.cookies_update(_login_impl(self, username, password))
116
            return True
117

118
    def items(self):
119
        if self.user:
120
            if group := self.config("group", True):
121
                if user := _user_details(self, self.user):
122
                    self.user = user["username"]
123
                    self.group = False
124
                elif group == "skip":
125
                    self.log.info("Skipping group '%s'", self.user)
126
                    raise exception.AbortExtraction()
127
                else:
128
                    self.subcategory = "group-" + self.subcategory
129
                    self.group = True
130

131
        for deviation in self.deviations():
132
            if isinstance(deviation, tuple):
133
                url, data = deviation
134
                yield Message.Queue, url, data
135
                continue
136

137
            if deviation["is_deleted"]:
138
                # prevent crashing in case the deviation really is
139
                # deleted
140
                self.log.debug(
141
                    "Skipping %s (deleted)", deviation["deviationid"])
142
                continue
143

144
            tier_access = deviation.get("tier_access")
145
            if tier_access == "locked":
146
                self.log.debug(
147
                    "Skipping %s (access locked)", deviation["deviationid"])
148
                continue
149

150
            if "premium_folder_data" in deviation:
151
                data = self._fetch_premium(deviation)
152
                if not data:
153
                    continue
154
                deviation.update(data)
155

156
            self.prepare(deviation)
157
            yield Message.Directory, deviation
158

159
            if "content" in deviation:
160
                content = self._extract_content(deviation)
161
                yield self.commit(deviation, content)
162

163
            elif deviation["is_downloadable"]:
164
                content = self.api.deviation_download(deviation["deviationid"])
165
                deviation["is_original"] = True
166
                yield self.commit(deviation, content)
167

168
            if "videos" in deviation and deviation["videos"]:
169
                video = max(deviation["videos"],
170
                            key=lambda x: text.parse_int(x["quality"][:-1]))
171
                deviation["is_original"] = False
172
                yield self.commit(deviation, video)
173

174
            if "flash" in deviation:
175
                deviation["is_original"] = True
176
                yield self.commit(deviation, deviation["flash"])
177

178
            if self.commit_journal:
179
                if journal := self._extract_journal(deviation):
180
                    if self.extra:
181
                        deviation["_journal"] = journal["html"]
182
                    deviation["is_original"] = True
183
                    yield self.commit_journal(deviation, journal)
184

185
            if self.comments_avatars:
186
                for comment in deviation["comments"]:
187
                    user = comment["user"]
188
                    name = user["username"].lower()
189
                    if user["usericon"] == DEFAULT_AVATAR:
190
                        self.log.debug(
191
                            "Skipping avatar of '%s' (default)", name)
192
                        continue
193
                    _user_details.update(name, user)
194

195
                    url = f"{self.root}/{name}/avatar/"
196
                    comment["_extractor"] = DeviantartAvatarExtractor
197
                    yield Message.Queue, url, comment
198

199
            if self.previews and "preview" in deviation:
200
                preview = deviation["preview"]
201
                deviation["is_preview"] = True
202
                if self.previews_images:
203
                    yield self.commit(deviation, preview)
204
                else:
205
                    mtype = mimetypes.guess_type(
206
                        "a." + deviation["extension"], False)[0]
207
                    if mtype and not mtype.startswith("image/"):
208
                        yield self.commit(deviation, preview)
209
                del deviation["is_preview"]
210

211
            if not self.extra:
212
                continue
213

214
            # ref: https://www.deviantart.com
215
            #      /developers/http/v1/20210526/object/editor_text
216
            # the value of "features" is a JSON string with forward
217
            # slashes escaped
218
            text_content = \
219
                deviation["text_content"]["body"]["features"].replace(
220
                    "\\/", "/") if "text_content" in deviation else None
221
            for txt in (text_content, deviation.get("description"),
222
                        deviation.get("_journal")):
223
                if txt is None:
224
                    continue
225
                for match in DeviantartStashExtractor.pattern.finditer(txt):
226
                    url = text.ensure_http_scheme(match[0])
227
                    deviation["_extractor"] = DeviantartStashExtractor
228
                    yield Message.Queue, url, deviation
229

230
    def deviations(self):
231
        """Return an iterable containing all relevant Deviation-objects"""
232

233
    def prepare(self, deviation):
234
        """Adjust the contents of a Deviation-object"""
235
        if "index" not in deviation:
236
            try:
237
                if deviation["url"].startswith((
238
                    "https://www.deviantart.com/stash/", "https://sta.sh",
239
                )):
240
                    filename = deviation["content"]["src"].split("/")[5]
241
                    deviation["index_base36"] = filename.partition("-")[0][1:]
242
                    deviation["index"] = id_from_base36(
243
                        deviation["index_base36"])
244
                else:
245
                    deviation["index"] = text.parse_int(
246
                        deviation["url"].rpartition("-")[2])
247
            except KeyError:
248
                deviation["index"] = 0
249
                deviation["index_base36"] = "0"
250
        if "index_base36" not in deviation:
251
            deviation["index_base36"] = base36_from_id(deviation["index"])
252

253
        if self.user:
254
            deviation["username"] = self.user
255
            deviation["_username"] = self.user.lower()
256
        else:
257
            deviation["username"] = deviation["author"]["username"]
258
            deviation["_username"] = deviation["username"].lower()
259

260
        deviation["published_time"] = text.parse_int(
261
            deviation["published_time"])
262
        deviation["date"] = text.parse_timestamp(
263
            deviation["published_time"])
264

265
        if self.comments:
266
            deviation["comments"] = (
267
                self._extract_comments(deviation["deviationid"], "deviation")
268
                if deviation["stats"]["comments"] else ()
269
            )
270

271
        # filename metadata
272
        sub = util.re(r"\W").sub
273
        deviation["filename"] = "".join((
274
            sub("_", deviation["title"].lower()), "_by_",
275
            sub("_", deviation["author"]["username"].lower()), "-d",
276
            deviation["index_base36"],
277
        ))
278

279
    def commit(self, deviation, target):
280
        url = target["src"]
281
        name = target.get("filename") or url
282
        target = target.copy()
283
        target["filename"] = deviation["filename"]
284
        deviation["target"] = target
285
        deviation["extension"] = target["extension"] = text.ext_from_url(name)
286
        if "is_original" not in deviation:
287
            deviation["is_original"] = ("/v1/" not in url)
288
        return Message.Url, url, deviation
289

290
    def _commit_journal_html(self, deviation, journal):
291
        title = text.escape(deviation["title"])
292
        url = deviation["url"]
293
        thumbs = deviation.get("thumbs") or deviation.get("files")
294
        html = journal["html"]
295
        shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
296

297
        if not html:
298
            self.log.warning("%s: Empty journal content", deviation["index"])
299

300
        if "css" in journal:
301
            css, cls = journal["css"], "withskin"
302
        elif html.startswith("<style"):
303
            css, _, html = html.partition("</style>")
304
            css = css.partition(">")[2]
305
            cls = "withskin"
306
        else:
307
            css, cls = "", "journal-green"
308

309
        if html.find('<div class="boxtop journaltop">', 0, 250) != -1:
310
            needle = '<div class="boxtop journaltop">'
311
            header = HEADER_CUSTOM_TEMPLATE.format(
312
                title=title, url=url, date=deviation["date"],
313
            )
314
        else:
315
            needle = '<div usr class="gr">'
316
            username = deviation["author"]["username"]
317
            urlname = deviation.get("username") or username.lower()
318
            header = HEADER_TEMPLATE.format(
319
                title=title,
320
                url=url,
321
                userurl=f"{self.root}/{urlname}/",
322
                username=username,
323
                date=deviation["date"],
324
            )
325

326
        if needle in html:
327
            html = html.replace(needle, header, 1)
328
        else:
329
            html = JOURNAL_TEMPLATE_HTML_EXTRA.format(header, html)
330

331
        html = JOURNAL_TEMPLATE_HTML.format(
332
            title=title, html=html, shadow=shadow, css=css, cls=cls)
333

334
        deviation["extension"] = "htm"
335
        return Message.Url, html, deviation
336

337
    def _commit_journal_text(self, deviation, journal):
338
        html = journal["html"]
339
        if not html:
340
            self.log.warning("%s: Empty journal content", deviation["index"])
341
        elif html.startswith("<style"):
342
            html = html.partition("</style>")[2]
343
        head, _, tail = html.rpartition("<script")
344
        content = "\n".join(
345
            text.unescape(text.remove_html(txt))
346
            for txt in (head or tail).split("<br />")
347
        )
348
        txt = JOURNAL_TEMPLATE_TEXT.format(
349
            title=deviation["title"],
350
            username=deviation["author"]["username"],
351
            date=deviation["date"],
352
            content=content,
353
        )
354

355
        deviation["extension"] = "txt"
356
        return Message.Url, txt, deviation
357

358
    def _extract_journal(self, deviation):
359
        if "excerpt" in deviation:
360
            # # empty 'html'
361
            #  return self.api.deviation_content(deviation["deviationid"])
362

363
            if "_page" in deviation:
364
                page = deviation["_page"]
365
                del deviation["_page"]
366
            else:
367
                page = self._limited_request(deviation["url"]).text
368

369
            # extract journal html from webpage
370
            html = text.extr(
371
                page,
372
                "<h2>Literature Text</h2></span><div>",
373
                "</div></section></div></div>")
374
            if html:
375
                return {"html": html}
376

377
            self.log.debug("%s: Failed to extract journal HTML from webpage. "
378
                           "Falling back to __INITIAL_STATE__ markup.",
379
                           deviation["index"])
380

381
            # parse __INITIAL_STATE__ as fallback
382
            state = util.json_loads(text.extr(
383
                page, 'window.__INITIAL_STATE__ = JSON.parse("', '");')
384
                .replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"'))
385
            deviations = state["@@entities"]["deviation"]
386
            content = deviations.popitem()[1]["textContent"]
387

388
            if html := self._textcontent_to_html(deviation, content):
389
                return {"html": html}
390
            return {"html": content["excerpt"].replace("\n", "<br />")}
391

392
        if "body" in deviation:
393
            return {"html": deviation.pop("body")}
394
        return None
395

396
    def _textcontent_to_html(self, deviation, content):
397
        html = content["html"]
398
        markup = html.get("markup")
399

400
        if not markup or markup[0] != "{":
401
            return markup
402

403
        if html["type"] == "tiptap":
404
            try:
405
                return self._tiptap_to_html(markup)
406
            except Exception as exc:
407
                self.log.debug("", exc_info=exc)
408
                self.log.error("%s: '%s: %s'", deviation["index"],
409
                               exc.__class__.__name__, exc)
410

411
        self.log.warning("%s: Unsupported '%s' markup.",
412
                         deviation["index"], html["type"])
413

414
    def _tiptap_to_html(self, markup):
415
        html = []
416

417
        html.append('<div data-editor-viewer="1" '
418
                    'class="_83r8m _2CKTq _3NjDa mDnFl">')
419
        data = util.json_loads(markup)
420
        for block in data["document"]["content"]:
421
            self._tiptap_process_content(html, block)
422
        html.append("</div>")
423

424
        return "".join(html)
425

426
    def _tiptap_process_content(self, html, content):
427
        type = content["type"]
428

429
        if type == "paragraph":
430
            if children := content.get("content"):
431
                html.append('<p style="')
432

433
                if attrs := content.get("attrs"):
434
                    if align := attrs.get("textAlign"):
435
                        html.append("text-align:")
436
                        html.append(align)
437
                        html.append(";")
438
                    self._tiptap_process_indentation(html, attrs)
439
                    html.append('">')
440
                else:
441
                    html.append('margin-inline-start:0px">')
442

443
                for block in children:
444
                    self._tiptap_process_content(html, block)
445
                html.append("</p>")
446
            else:
447
                html.append('<p class="empty-p"><br/></p>')
448

449
        elif type == "text":
450
            self._tiptap_process_text(html, content)
451

452
        elif type == "heading":
453
            attrs = content["attrs"]
454
            level = str(attrs.get("level") or "3")
455

456
            html.append("<h")
457
            html.append(level)
458
            html.append(' style="text-align:')
459
            html.append(attrs.get("textAlign") or "left")
460
            html.append('">')
461
            html.append('<span style="')
462
            self._tiptap_process_indentation(html, attrs)
463
            html.append('">')
464
            self._tiptap_process_children(html, content)
465
            html.append("</span></h")
466
            html.append(level)
467
            html.append(">")
468

469
        elif type in ("listItem", "bulletList", "orderedList", "blockquote"):
470
            c = type[1]
471
            tag = (
472
                "li" if c == "i" else
473
                "ul" if c == "u" else
474
                "ol" if c == "r" else
475
                "blockquote"
476
            )
477
            html.append("<" + tag + ">")
478
            self._tiptap_process_children(html, content)
479
            html.append("</" + tag + ">")
480

481
        elif type == "anchor":
482
            attrs = content["attrs"]
483
            html.append('<a id="')
484
            html.append(attrs.get("id") or "")
485
            html.append('" data-testid="anchor"></a>')
486

487
        elif type == "hardBreak":
488
            html.append("<br/><br/>")
489

490
        elif type == "horizontalRule":
491
            html.append("<hr/>")
492

493
        elif type == "da-deviation":
494
            self._tiptap_process_deviation(html, content)
495

496
        elif type == "da-mention":
497
            user = content["attrs"]["user"]["username"]
498
            html.append('<a href="https://www.deviantart.com/')
499
            html.append(user.lower())
500
            html.append('" data-da-type="da-mention" data-user="">@<!-- -->')
501
            html.append(user)
502
            html.append('</a>')
503

504
        elif type == "da-gif":
505
            attrs = content["attrs"]
506
            width = str(attrs.get("width") or "")
507
            height = str(attrs.get("height") or "")
508
            url = text.escape(attrs.get("url") or "")
509

510
            html.append('<div data-da-type="da-gif" data-width="')
511
            html.append(width)
512
            html.append('" data-height="')
513
            html.append(height)
514
            html.append('" data-alignment="')
515
            html.append(attrs.get("alignment") or "")
516
            html.append('" data-url="')
517
            html.append(url)
518
            html.append('" class="t61qu"><video role="img" autoPlay="" '
519
                        'muted="" loop="" style="pointer-events:none" '
520
                        'controlsList="nofullscreen" playsInline="" '
521
                        'aria-label="gif" data-da-type="da-gif" width="')
522
            html.append(width)
523
            html.append('" height="')
524
            html.append(height)
525
            html.append('" src="')
526
            html.append(url)
527
            html.append('" class="_1Fkk6"></video></div>')
528

529
        elif type == "da-video":
530
            src = text.escape(content["attrs"].get("src") or "")
531
            html.append('<div data-testid="video" data-da-type="da-video" '
532
                        'data-src="')
533
            html.append(src)
534
            html.append('" class="_1Uxvs"><div data-canfs="yes" data-testid="v'
535
                        'ideo-inner" class="main-video" style="width:780px;hei'
536
                        'ght:438px"><div style="width:780px;height:438px">'
537
                        '<video src="')
538
            html.append(src)
539
            html.append('" style="width:100%;height:100%;" preload="auto" cont'
540
                        'rols=""></video></div></div></div>')
541

542
        else:
543
            self.log.warning("Unsupported content type '%s'", type)
544

545
    def _tiptap_process_text(self, html, content):
546
        if marks := content.get("marks"):
547
            close = []
548
            for mark in marks:
549
                type = mark["type"]
550
                if type == "link":
551
                    attrs = mark.get("attrs") or {}
552
                    html.append('<a href="')
553
                    html.append(text.escape(attrs.get("href") or ""))
554
                    if "target" in attrs:
555
                        html.append('" target="')
556
                        html.append(attrs["target"])
557
                    html.append('" rel="')
558
                    html.append(attrs.get("rel") or
559
                                "noopener noreferrer nofollow ugc")
560
                    html.append('">')
561
                    close.append("</a>")
562
                elif type == "bold":
563
                    html.append("<strong>")
564
                    close.append("</strong>")
565
                elif type == "italic":
566
                    html.append("<em>")
567
                    close.append("</em>")
568
                elif type == "underline":
569
                    html.append("<u>")
570
                    close.append("</u>")
571
                elif type == "strike":
572
                    html.append("<s>")
573
                    close.append("</s>")
574
                elif type == "textStyle" and len(mark) <= 1:
575
                    pass
576
                else:
577
                    self.log.warning("Unsupported text marker '%s'", type)
578
            close.reverse()
579
            html.append(text.escape(content["text"]))
580
            html.extend(close)
581
        else:
582
            html.append(text.escape(content["text"]))
583

584
    def _tiptap_process_children(self, html, content):
585
        if children := content.get("content"):
586
            for block in children:
587
                self._tiptap_process_content(html, block)
588

589
    def _tiptap_process_indentation(self, html, attrs):
590
        itype = ("text-indent" if attrs.get("indentType") == "line" else
591
                 "margin-inline-start")
592
        isize = str((attrs.get("indentation") or 0) * 24)
593
        html.append(itype + ":" + isize + "px")
594

595
    def _tiptap_process_deviation(self, html, content):
596
        dev = content["attrs"]["deviation"]
597
        media = dev.get("media") or ()
598

599
        html.append('<div class="jjNX2">')
600
        html.append('<figure class="Qf-HY" data-da-type="da-deviation" '
601
                    'data-deviation="" '
602
                    'data-width="" data-link="" data-alignment="center">')
603

604
        if "baseUri" in media:
605
            url, formats = self._eclipse_media(media)
606
            full = formats["fullview"]
607

608
            html.append('<a href="')
609
            html.append(text.escape(dev["url"]))
610
            html.append('" class="_3ouD5" style="margin:0 auto;display:flex;'
611
                        'align-items:center;justify-content:center;'
612
                        'overflow:hidden;width:780px;height:')
613
            html.append(str(780 * full["h"] / full["w"]))
614
            html.append('px">')
615

616
            html.append('<img src="')
617
            html.append(text.escape(url))
618
            html.append('" alt="')
619
            html.append(text.escape(dev["title"]))
620
            html.append('" style="width:100%;max-width:100%;display:block"/>')
621
            html.append("</a>")
622

623
        elif "textContent" in dev:
624
            html.append('<div class="_32Hs4" style="width:350px">')
625

626
            html.append('<a href="')
627
            html.append(text.escape(dev["url"]))
628
            html.append('" class="_3ouD5">')
629

630
            html.append('''\
631
<section class="Q91qI aG7Yi" style="width:350px;height:313px">\
632
<div class="_16ECM _1xMkk" aria-hidden="true">\
633
<svg height="100%" viewBox="0 0 15 12" preserveAspectRatio="xMidYMin slice" \
634
fill-rule="evenodd">\
635
<linearGradient x1="87.8481761%" y1="16.3690766%" \
636
x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
637
<stop stop-color="#00FF62" offset="0%"></stop>\
638
<stop stop-color="#3197EF" stop-opacity="0" offset="100%"></stop>\
639
</linearGradient>\
640
<text class="_2uqbc" fill="url(#app-root-3)" text-anchor="end" x="15" y="11">J\
641
</text></svg></div><div class="_1xz9u">Literature</div><h3 class="_2WvKD">\
642
''')
643
            html.append(text.escape(dev["title"]))
644
            html.append('</h3><div class="_2CPLm">')
645
            html.append(text.escape(dev["textContent"]["excerpt"]))
646
            html.append('</div></section></a></div>')
647

648
        html.append('</figure></div>')
649

650
    def _extract_content(self, deviation):
651
        content = deviation["content"]
652

653
        if self.original and deviation["is_downloadable"]:
654
            self._update_content(deviation, content)
655
            return content
656

657
        if self.jwt:
658
            self._update_token(deviation, content)
659
            return content
660

661
        if content["src"].startswith("https://images-wixmp-"):
662
            if self.intermediary and deviation["index"] <= 790677560:
663
                # https://github.com/r888888888/danbooru/issues/4069
664
                intermediary, count = self.intermediary_subn(
665
                    r"/intermediary\1", content["src"], 1)
666
                if count:
667
                    deviation["is_original"] = False
668
                    deviation["_fallback"] = (content["src"],)
669
                    content["src"] = intermediary
670
            if self.quality:
671
                content["src"] = self.quality_sub(
672
                    self.quality, content["src"], 1)
673

674
        return content
675

676
    def _find_folder(self, folders, name, uuid):
677
        if uuid.isdecimal():
678
            match = util.re(
679
                "(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
680
            for folder in folders:
681
                if match(folder["name"]):
682
                    return folder
683
                elif folder.get("has_subfolders"):
684
                    for subfolder in folder["subfolders"]:
685
                        if match(subfolder["name"]):
686
                            return subfolder
687
        else:
688
            for folder in folders:
689
                if folder["folderid"] == uuid:
690
                    return folder
691
                elif folder.get("has_subfolders"):
692
                    for subfolder in folder["subfolders"]:
693
                        if subfolder["folderid"] == uuid:
694
                            return subfolder
695
        raise exception.NotFoundError("folder")
696

697
    def _folder_urls(self, folders, category, extractor):
698
        base = f"{self.root}/{self.user}/{category}/"
699
        for folder in folders:
700
            folder["_extractor"] = extractor
701
            url = f"{base}{folder['folderid']}/{folder['name']}"
702
            yield url, folder
703

704
    def _update_content_default(self, deviation, content):
705
        if "premium_folder_data" in deviation or deviation.get("is_mature"):
706
            public = False
707
        else:
708
            public = None
709

710
        data = self.api.deviation_download(deviation["deviationid"], public)
711
        content.update(data)
712
        deviation["is_original"] = True
713

714
    def _update_content_image(self, deviation, content):
715
        data = self.api.deviation_download(deviation["deviationid"])
716
        url = data["src"].partition("?")[0]
717
        mtype = mimetypes.guess_type(url, False)[0]
718
        if mtype and mtype.startswith("image/"):
719
            content.update(data)
720
            deviation["is_original"] = True
721

722
    def _update_token(self, deviation, content):
723
        """Replace JWT to be able to remove width/height limits
724

725
        All credit goes to @Ironchest337
726
        for discovering and implementing this method
727
        """
728
        url, sep, _ = content["src"].partition("/v1/")
729
        if not sep:
730
            return
731

732
        # 'images-wixmp' returns 401 errors, but just 'wixmp' still works
733
        url = url.replace("//images-wixmp", "//wixmp", 1)
734

735
        #  header = b'{"typ":"JWT","alg":"none"}'
736
        payload = (
737
            b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
738
            url.partition("/f/")[2].encode() +
739
            b'"}]],"aud":["urn:service:file.download"]}'
740
        )
741

742
        deviation["_fallback"] = (content["src"],)
743
        deviation["is_original"] = True
744
        pl = binascii.b2a_base64(payload).rstrip(b'=\n').decode()
745
        content["src"] = (
746
            # base64 of 'header' is precomputed as 'eyJ0eX...'
747
            f"{url}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{pl}.")
748

749
    def _extract_comments(self, target_id, target_type="deviation"):
750
        results = None
751
        comment_ids = [None]
752

753
        while comment_ids:
754
            comments = self.api.comments(
755
                target_id, target_type, comment_ids.pop())
756

757
            if results:
758
                results.extend(comments)
759
            else:
760
                results = comments
761

762
            # parent comments, i.e. nodes with at least one child
763
            parents = {c["parentid"] for c in comments}
764
            # comments with more than one reply
765
            replies = {c["commentid"] for c in comments if c["replies"]}
766
            # add comment UUIDs with replies that are not parent to any node
767
            comment_ids.extend(replies - parents)
768

769
        return results
770

771
    def _limited_request(self, url, **kwargs):
772
        """Limits HTTP requests to one every 2 seconds"""
773
        diff = time.time() - DeviantartExtractor._last_request
774
        if diff < 2.0:
775
            self.sleep(2.0 - diff, "request")
776
        response = self.request(url, **kwargs)
777
        DeviantartExtractor._last_request = time.time()
778
        return response
779

780
    def _fetch_premium(self, deviation):
781
        try:
782
            return self._premium_cache[deviation["deviationid"]]
783
        except KeyError:
784
            pass
785

786
        if not self.api.refresh_token_key:
787
            self.log.warning(
788
                "Unable to access premium content (no refresh-token)")
789
            self._fetch_premium = lambda _: None
790
            return None
791

792
        dev = self.api.deviation(deviation["deviationid"], False)
793
        folder = deviation["premium_folder_data"]
794
        username = dev["author"]["username"]
795

796
        # premium_folder_data is no longer present when user has access (#5063)
797
        has_access = ("premium_folder_data" not in dev) or folder["has_access"]
798

799
        if not has_access and folder["type"] == "watchers" and \
800
                self.config("auto-watch"):
801
            if self.unwatch is not None:
802
                self.unwatch.append(username)
803
            if self.api.user_friends_watch(username):
804
                has_access = True
805
                self.log.info(
806
                    "Watching %s for premium folder access", username)
807
            else:
808
                self.log.warning(
809
                    "Error when trying to watch %s. "
810
                    "Try again with a new refresh-token", username)
811

812
        if has_access:
813
            self.log.info("Fetching premium folder data")
814
        else:
815
            self.log.warning("Unable to access premium content (type: %s)",
816
                             folder["type"])
817

818
        cache = self._premium_cache
819
        for dev in self.api.gallery(
820
                username, folder["gallery_id"], public=False):
821
            cache[dev["deviationid"]] = dev if has_access else None
822

823
        return cache.get(deviation["deviationid"])
824

825
    def _unwatch_premium(self):
826
        for username in self.unwatch:
827
            self.log.info("Unwatching %s", username)
828
            self.api.user_friends_unwatch(username)
829

830
    def _eclipse_media(self, media, format="preview"):
831
        url = [media["baseUri"]]
832

833
        formats = {
834
            fmt["t"]: fmt
835
            for fmt in media["types"]
836
        }
837

838
        if tokens := media.get("token") or ():
839
            if len(tokens) <= 1:
840
                fmt = formats[format]
841
                if "c" in fmt:
842
                    url.append(fmt["c"].replace(
843
                        "<prettyName>", media["prettyName"]))
844
            url.append("?token=")
845
            url.append(tokens[-1])
846

847
        return "".join(url), formats
848

849
    def _eclipse_to_oauth(self, eclipse_api, deviations):
850
        for obj in deviations:
851
            deviation = obj["deviation"] if "deviation" in obj else obj
852
            deviation_uuid = eclipse_api.deviation_extended_fetch(
853
                deviation["deviationId"],
854
                deviation["author"]["username"],
855
                "journal" if deviation["isJournal"] else "art",
856
            )["deviation"]["extended"]["deviationUuid"]
857
            yield self.api.deviation(deviation_uuid)
858

859
    def _unescape_json(self, json):
860
        return json.replace('\\"', '"') \
861
                   .replace("\\'", "'") \
862
                   .replace("\\\\", "\\")
863

864

865
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
866
    """Extractor for an artist's user profile"""
867
    pattern = BASE_PATTERN + r"/?$"
868
    example = "https://www.deviantart.com/USER"
869

870
    def items(self):
871
        base = f"{self.root}/{self.user}/"
872
        return self._dispatch_extractors((
873
            (DeviantartAvatarExtractor    , base + "avatar"),
874
            (DeviantartBackgroundExtractor, base + "banner"),
875
            (DeviantartGalleryExtractor   , base + "gallery"),
876
            (DeviantartScrapsExtractor    , base + "gallery/scraps"),
877
            (DeviantartJournalExtractor   , base + "posts"),
878
            (DeviantartStatusExtractor    , base + "posts/statuses"),
879
            (DeviantartFavoriteExtractor  , base + "favourites"),
880
        ), ("gallery",))
881

882

883
###############################################################################
884
# OAuth #######################################################################
885

886
class DeviantartGalleryExtractor(DeviantartExtractor):
887
    """Extractor for all deviations from an artist's gallery"""
888
    subcategory = "gallery"
889
    archive_fmt = "g_{_username}_{index}.{extension}"
890
    pattern = (BASE_PATTERN + r"/gallery"
891
               r"(?:/all|/recommended-for-you|/?\?catpath=)?/?$")
892
    example = "https://www.deviantart.com/USER/gallery/"
893

894
    def deviations(self):
895
        if self.flat and not self.group:
896
            return self.api.gallery_all(self.user, self.offset)
897
        folders = self.api.gallery_folders(self.user)
898
        return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
899

900

901
class DeviantartAvatarExtractor(DeviantartExtractor):
902
    """Extractor for an artist's avatar"""
903
    subcategory = "avatar"
904
    archive_fmt = "a_{_username}_{index}"
905
    pattern = BASE_PATTERN + r"/avatar"
906
    example = "https://www.deviantart.com/USER/avatar/"
907

908
    def deviations(self):
909
        name = self.user.lower()
910
        user = _user_details(self, name)
911
        if not user:
912
            return ()
913

914
        icon = user["usericon"]
915
        if icon == DEFAULT_AVATAR:
916
            self.log.debug("Skipping avatar of '%s' (default)", name)
917
            return ()
918

919
        _, sep, index = icon.rpartition("?")
920
        if not sep:
921
            index = "0"
922

923
        formats = self.config("formats")
924
        if not formats:
925
            url = icon.replace("/avatars/", "/avatars-big/", 1)
926
            return (self._make_deviation(url, user, index, ""),)
927

928
        if isinstance(formats, str):
929
            formats = formats.replace(" ", "").split(",")
930

931
        results = []
932
        for fmt in formats:
933
            fmt, _, ext = fmt.rpartition(".")
934
            if fmt:
935
                fmt = "-" + fmt
936
            url = (f"https://a.deviantart.net/avatars{fmt}"
937
                   f"/{name[0]}/{name[1]}/{name}.{ext}?{index}")
938
            results.append(self._make_deviation(url, user, index, fmt))
939
        return results
940

941
    def _make_deviation(self, url, user, index, fmt):
942
        return {
943
            "author"         : user,
944
            "da_category"    : "avatar",
945
            "index"          : text.parse_int(index),
946
            "is_deleted"     : False,
947
            "is_downloadable": False,
948
            "published_time" : 0,
949
            "title"          : "avatar" + fmt,
950
            "stats"          : {"comments": 0},
951
            "content"        : {"src": url},
952
        }
953

954

955
class DeviantartBackgroundExtractor(DeviantartExtractor):
956
    """Extractor for an artist's banner"""
957
    subcategory = "background"
958
    archive_fmt = "b_{index}"
959
    pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
960
    example = "https://www.deviantart.com/USER/banner/"
961

962
    def deviations(self):
963
        try:
964
            return (self.api.user_profile(self.user.lower())
965
                    ["cover_deviation"]["cover_deviation"],)
966
        except Exception:
967
            return ()
968

969

970
class DeviantartFolderExtractor(DeviantartExtractor):
971
    """Extractor for deviations inside an artist's gallery folder"""
972
    subcategory = "folder"
973
    directory_fmt = ("{category}", "{username}", "{folder[title]}")
974
    archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
975
    pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
976
    example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
977

978
    def __init__(self, match):
979
        DeviantartExtractor.__init__(self, match)
980
        self.folder = None
981
        self.folder_id = match[3]
982
        self.folder_name = match[4]
983

984
    def deviations(self):
985
        folders = self.api.gallery_folders(self.user)
986
        folder = self._find_folder(folders, self.folder_name, self.folder_id)
987

988
        # Leaving this here for backwards compatibility
989
        self.folder = {
990
            "title": folder["name"],
991
            "uuid" : folder["folderid"],
992
            "index": self.folder_id,
993
            "owner": self.user,
994
            "parent_uuid": folder["parent"],
995
        }
996

997
        if folder.get("subfolder"):
998
            self.folder["parent_folder"] = folder["parent_folder"]
999
            self.archive_fmt = "F_{folder[parent_uuid]}_{index}.{extension}"
1000

1001
            if self.flat:
1002
                self.directory_fmt = ("{category}", "{username}",
1003
                                      "{folder[parent_folder]}")
1004
            else:
1005
                self.directory_fmt = ("{category}", "{username}",
1006
                                      "{folder[parent_folder]}",
1007
                                      "{folder[title]}")
1008

1009
        if folder.get("has_subfolders") and self.config("subfolders", True):
1010
            for subfolder in folder["subfolders"]:
1011
                subfolder["parent_folder"] = folder["name"]
1012
                subfolder["subfolder"] = True
1013
            yield from self._folder_urls(
1014
                folder["subfolders"], "gallery", DeviantartFolderExtractor)
1015

1016
        yield from self.api.gallery(self.user, folder["folderid"], self.offset)
1017

1018
    def prepare(self, deviation):
1019
        DeviantartExtractor.prepare(self, deviation)
1020
        deviation["folder"] = self.folder
1021

1022

1023
class DeviantartStashExtractor(DeviantartExtractor):
1024
    """Extractor for sta.sh-ed deviations"""
1025
    subcategory = "stash"
1026
    archive_fmt = "{index}.{extension}"
1027
    pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.s(h))"
1028
               r"/([a-z0-9]+)")
1029
    example = "https://www.deviantart.com/stash/abcde"
1030

1031
    skip = Extractor.skip
1032

1033
    def __init__(self, match):
1034
        DeviantartExtractor.__init__(self, match)
1035
        self.user = ""
1036

1037
    def deviations(self, stash_id=None, stash_data=None):
1038
        if stash_id is None:
1039
            legacy_url, stash_id = self.groups
1040
        else:
1041
            legacy_url = False
1042

1043
        if legacy_url and stash_id[0] == "2":
1044
            url = "https://sta.sh/" + stash_id
1045
            response = self._limited_request(url)
1046
            stash_id = response.url.rpartition("/")[2]
1047
            page = response.text
1048
        else:
1049
            url = "https://www.deviantart.com/stash/" + stash_id
1050
            page = self._limited_request(url).text
1051

1052
        if stash_id[0] == "0":
1053
            if uuid := text.extr(page, '//deviation/', '"'):
1054
                deviation = self.api.deviation(uuid)
1055
                deviation["_page"] = page
1056
                deviation["index"] = text.parse_int(text.extr(
1057
                    page, '\\"deviationId\\":', ','))
1058

1059
                deviation["stash_id"] = stash_id
1060
                if stash_data:
1061
                    folder = stash_data["folder"]
1062
                    deviation["stash_name"] = folder["name"]
1063
                    deviation["stash_folder"] = folder["folderId"]
1064
                    deviation["stash_parent"] = folder["parentId"] or 0
1065
                    deviation["stash_description"] = \
1066
                        folder["richDescription"]["excerpt"]
1067
                else:
1068
                    deviation["stash_name"] = ""
1069
                    deviation["stash_description"] = ""
1070
                    deviation["stash_folder"] = 0
1071
                    deviation["stash_parent"] = 0
1072

1073
                yield deviation
1074
                return
1075

1076
        if stash_data := text.extr(page, ',\\"stash\\":', ',\\"@@'):
1077
            stash_data = util.json_loads(self._unescape_json(stash_data))
1078

1079
        for sid in text.extract_iter(
1080
                page, 'href="https://www.deviantart.com/stash/', '"'):
1081
            if sid == stash_id or sid.endswith("#comments"):
1082
                continue
1083
            yield from self.deviations(sid, stash_data)
1084

1085

1086
class DeviantartFavoriteExtractor(DeviantartExtractor):
1087
    """Extractor for an artist's favorites"""
1088
    subcategory = "favorite"
1089
    directory_fmt = ("{category}", "{username}", "Favourites")
1090
    archive_fmt = "f_{_username}_{index}.{extension}"
1091
    pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
1092
    example = "https://www.deviantart.com/USER/favourites/"
1093

1094
    def deviations(self):
1095
        if self.flat:
1096
            return self.api.collections_all(self.user, self.offset)
1097
        folders = self.api.collections_folders(self.user)
1098
        return self._folder_urls(
1099
            folders, "favourites", DeviantartCollectionExtractor)
1100

1101

1102
class DeviantartCollectionExtractor(DeviantartExtractor):
1103
    """Extractor for a single favorite collection"""
1104
    subcategory = "collection"
1105
    directory_fmt = ("{category}", "{username}", "Favourites",
1106
                     "{collection[title]}")
1107
    archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
1108
    pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
1109
    example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
1110

1111
    def __init__(self, match):
1112
        DeviantartExtractor.__init__(self, match)
1113
        self.collection = None
1114
        self.collection_id = match[3]
1115
        self.collection_name = match[4]
1116

1117
    def deviations(self):
1118
        folders = self.api.collections_folders(self.user)
1119
        folder = self._find_folder(
1120
            folders, self.collection_name, self.collection_id)
1121
        self.collection = {
1122
            "title": folder["name"],
1123
            "uuid" : folder["folderid"],
1124
            "index": self.collection_id,
1125
            "owner": self.user,
1126
        }
1127
        return self.api.collections(self.user, folder["folderid"], self.offset)
1128

1129
    def prepare(self, deviation):
1130
        DeviantartExtractor.prepare(self, deviation)
1131
        deviation["collection"] = self.collection
1132

1133

1134
class DeviantartJournalExtractor(DeviantartExtractor):
1135
    """Extractor for an artist's journals"""
1136
    subcategory = "journal"
1137
    directory_fmt = ("{category}", "{username}", "Journal")
1138
    archive_fmt = "j_{_username}_{index}.{extension}"
1139
    pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
1140
    example = "https://www.deviantart.com/USER/posts/journals/"
1141

1142
    def deviations(self):
1143
        return self.api.browse_user_journals(self.user, self.offset)
1144

1145

1146
class DeviantartStatusExtractor(DeviantartExtractor):
1147
    """Extractor for an artist's status updates"""
1148
    subcategory = "status"
1149
    directory_fmt = ("{category}", "{username}", "Status")
1150
    filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
1151
    archive_fmt = "S_{_username}_{index}.{extension}"
1152
    pattern = BASE_PATTERN + r"/posts/statuses"
1153
    example = "https://www.deviantart.com/USER/posts/statuses/"
1154

1155
    def deviations(self):
1156
        for status in self.api.user_statuses(self.user, self.offset):
1157
            yield from self.process_status(status)
1158

1159
    def process_status(self, status):
1160
        for item in status.get("items") or ():  # do not trust is_share
1161
            # shared deviations/statuses
1162
            if "deviation" in item:
1163
                yield item["deviation"].copy()
1164
            if "status" in item:
1165
                yield from self.process_status(item["status"].copy())
1166
        # assume is_deleted == true means necessary fields are missing
1167
        if status["is_deleted"]:
1168
            self.log.warning(
1169
                "Skipping status %s (deleted)", status.get("statusid"))
1170
            return
1171
        yield status
1172

1173
    def prepare(self, deviation):
1174
        if "deviationid" in deviation:
1175
            return DeviantartExtractor.prepare(self, deviation)
1176

1177
        try:
1178
            path = deviation["url"].split("/")
1179
            deviation["index"] = text.parse_int(path[-1] or path[-2])
1180
        except KeyError:
1181
            deviation["index"] = 0
1182

1183
        if self.user:
1184
            deviation["username"] = self.user
1185
            deviation["_username"] = self.user.lower()
1186
        else:
1187
            deviation["username"] = deviation["author"]["username"]
1188
            deviation["_username"] = deviation["username"].lower()
1189

1190
        deviation["date"] = dt = text.parse_datetime(deviation["ts"])
1191
        deviation["published_time"] = int(util.datetime_to_timestamp(dt))
1192

1193
        deviation["da_category"] = "Status"
1194
        deviation["category_path"] = "status"
1195
        deviation["is_downloadable"] = False
1196
        deviation["title"] = "Status Update"
1197

1198
        comments_count = deviation.pop("comments_count", 0)
1199
        deviation["stats"] = {"comments": comments_count}
1200
        if self.comments:
1201
            deviation["comments"] = (
1202
                self._extract_comments(deviation["statusid"], "status")
1203
                if comments_count else ()
1204
            )
1205

1206

1207
class DeviantartTagExtractor(DeviantartExtractor):
1208
    """Extractor for deviations from tag searches"""
1209
    subcategory = "tag"
1210
    directory_fmt = ("{category}", "Tags", "{search_tags}")
1211
    archive_fmt = "T_{search_tags}_{index}.{extension}"
1212
    pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
1213
    example = "https://www.deviantart.com/tag/TAG"
1214

1215
    def __init__(self, match):
1216
        DeviantartExtractor.__init__(self, match)
1217
        self.tag = text.unquote(match[1])
1218
        self.user = ""
1219

1220
    def deviations(self):
1221
        return self.api.browse_tags(self.tag, self.offset)
1222

1223
    def prepare(self, deviation):
1224
        DeviantartExtractor.prepare(self, deviation)
1225
        deviation["search_tags"] = self.tag
1226

1227

1228
class DeviantartWatchExtractor(DeviantartExtractor):
1229
    """Extractor for Deviations from watched users"""
1230
    subcategory = "watch"
1231
    pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
1232
               r"/(?:watch/deviations|notifications/watch)()()")
1233
    example = "https://www.deviantart.com/watch/deviations"
1234

1235
    def deviations(self):
1236
        return self.api.browse_deviantsyouwatch()
1237

1238

1239
class DeviantartWatchPostsExtractor(DeviantartExtractor):
1240
    """Extractor for Posts from watched users"""
1241
    subcategory = "watch-posts"
1242
    pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
1243
    example = "https://www.deviantart.com/watch/posts"
1244

1245
    def deviations(self):
1246
        return self.api.browse_posts_deviantsyouwatch()
1247

1248

1249
###############################################################################
1250
# Eclipse #####################################################################
1251

1252
class DeviantartDeviationExtractor(DeviantartExtractor):
1253
    """Extractor for single deviations"""
1254
    subcategory = "deviation"
1255
    archive_fmt = "g_{_username}_{index}.{extension}"
1256
    pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
1257
               r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/"
1258
               r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
1259
               r"(\d+)"  # bare deviation ID without slug
1260
               r"|(?:https?://)?fav\.me/d([0-9a-z]+)")  # base36
1261
    example = "https://www.deviantart.com/UsER/art/TITLE-12345"
1262

1263
    skip = Extractor.skip
1264

1265
    def __init__(self, match):
1266
        DeviantartExtractor.__init__(self, match)
1267
        self.type = match[3]
1268
        self.deviation_id = \
1269
            match[4] or match[5] or id_from_base36(match[6])
1270

1271
    def deviations(self):
1272
        if self.user:
1273
            url = (f"{self.root}/{self.user}"
1274
                   f"/{self.type or 'art'}/{self.deviation_id}")
1275
        else:
1276
            url = f"{self.root}/view/{self.deviation_id}/"
1277

1278
        page = self._limited_request(url, notfound="deviation").text
1279
        uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
1280
        if not uuid:
1281
            raise exception.NotFoundError("deviation")
1282

1283
        deviation = self.api.deviation(uuid)
1284
        deviation["_page"] = page
1285
        deviation["index_file"] = 0
1286
        deviation["num"] = deviation["count"] = 1
1287

1288
        additional_media = text.extr(page, ',\\"additionalMedia\\":', '}],\\"')
1289
        if not additional_media:
1290
            yield deviation
1291
            return
1292

1293
        self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
1294
                             "{num:>02}.{extension}")
1295
        self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
1296
                            "{extension}")
1297

1298
        additional_media = util.json_loads(self._unescape_json(
1299
            additional_media) + "}]")
1300
        deviation["count"] = 1 + len(additional_media)
1301
        yield deviation
1302

1303
        for index, post in enumerate(additional_media):
1304
            uri = self._eclipse_media(post["media"], "fullview")[0]
1305
            deviation["content"]["src"] = uri
1306
            deviation["num"] += 1
1307
            deviation["index_file"] = post["fileId"]
1308
            # Download only works on purchased materials - no way to check
1309
            deviation["is_downloadable"] = False
1310
            yield deviation
1311

1312

1313
class DeviantartScrapsExtractor(DeviantartExtractor):
1314
    """Extractor for an artist's scraps"""
1315
    subcategory = "scraps"
1316
    directory_fmt = ("{category}", "{username}", "Scraps")
1317
    archive_fmt = "s_{_username}_{index}.{extension}"
1318
    pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
1319
    example = "https://www.deviantart.com/USER/gallery/scraps"
1320

1321
    def deviations(self):
1322
        self.login()
1323

1324
        eclipse_api = DeviantartEclipseAPI(self)
1325
        return self._eclipse_to_oauth(
1326
            eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset))
1327

1328

1329
class DeviantartSearchExtractor(DeviantartExtractor):
1330
    """Extractor for deviantart search results"""
1331
    subcategory = "search"
1332
    directory_fmt = ("{category}", "Search", "{search_tags}")
1333
    archive_fmt = "Q_{search_tags}_{index}.{extension}"
1334
    pattern = (r"(?:https?://)?www\.deviantart\.com"
1335
               r"/search(?:/deviations)?/?\?([^#]+)")
1336
    example = "https://www.deviantart.com/search?q=QUERY"
1337
    skip = Extractor.skip
1338

1339
    def __init__(self, match):
1340
        DeviantartExtractor.__init__(self, match)
1341
        self.query = text.parse_query(self.user)
1342
        self.search = self.query.get("q", "")
1343
        self.user = ""
1344

1345
    def deviations(self):
1346
        logged_in = self.login()
1347

1348
        eclipse_api = DeviantartEclipseAPI(self)
1349
        search = (eclipse_api.search_deviations
1350
                  if logged_in else self._search_html)
1351
        return self._eclipse_to_oauth(eclipse_api, search(self.query))
1352

1353
    def prepare(self, deviation):
1354
        DeviantartExtractor.prepare(self, deviation)
1355
        deviation["search_tags"] = self.search
1356

1357
    def _search_html(self, params):
1358
        url = self.root + "/search"
1359
        find = text.re(r'''href="https://www.deviantart.com/([^/?#]+)'''
1360
                       r'''/(art|journal)/(?:[^"]+-)?(\d+)''').findall
1361
        while True:
1362
            response = self.request(url, params=params)
1363

1364
            if response.history and "/users/login" in response.url:
1365
                raise exception.AbortExtraction("HTTP redirect to login page")
1366
            page = response.text
1367

1368
            for user, type, did in find(page)[:-3:3]:
1369
                yield {
1370
                    "deviationId": did,
1371
                    "author": {"username": user},
1372
                    "isJournal": type == "journal",
1373
                }
1374

1375
            cursor = text.extr(page, r'\"cursor\":\"', '\\',)
1376
            if not cursor:
1377
                return
1378
            params["cursor"] = cursor
1379

1380

1381
class DeviantartGallerySearchExtractor(DeviantartExtractor):
1382
    """Extractor for deviantart gallery searches"""
1383
    subcategory = "gallery-search"
1384
    archive_fmt = "g_{_username}_{index}.{extension}"
1385
    pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
1386
    example = "https://www.deviantart.com/USER/gallery?q=QUERY"
1387

1388
    def __init__(self, match):
1389
        DeviantartExtractor.__init__(self, match)
1390
        self.query = match[3]
1391

1392
    def deviations(self):
1393
        self.login()
1394

1395
        eclipse_api = DeviantartEclipseAPI(self)
1396
        query = text.parse_query(self.query)
1397
        self.search = query["q"]
1398

1399
        return self._eclipse_to_oauth(
1400
            eclipse_api, eclipse_api.galleries_search(
1401
                self.user,
1402
                self.search,
1403
                self.offset,
1404
                query.get("sort", "most-recent"),
1405
            ))
1406

1407
    def prepare(self, deviation):
1408
        DeviantartExtractor.prepare(self, deviation)
1409
        deviation["search_tags"] = self.search
1410

1411

1412
class DeviantartFollowingExtractor(DeviantartExtractor):
1413
    """Extractor for user's watched users"""
1414
    subcategory = "following"
1415
    pattern = BASE_PATTERN + "/(?:about#)?watching"
1416
    example = "https://www.deviantart.com/USER/about#watching"
1417

1418
    def items(self):
1419
        api = DeviantartOAuthAPI(self)
1420

1421
        for user in api.user_friends(self.user):
1422
            url = f"{self.root}/{user['user']['username']}"
1423
            user["_extractor"] = DeviantartUserExtractor
1424
            yield Message.Queue, url, user
1425

1426

1427
###############################################################################
1428
# API Interfaces ##############################################################
1429

1430
class DeviantartOAuthAPI():
1431
    """Interface for the DeviantArt OAuth API
1432

1433
    https://www.deviantart.com/developers/http/v1/20160316
1434
    """
1435
    CLIENT_ID = "5388"
1436
    CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
1437

1438
    def __init__(self, extractor):
1439
        self.extractor = extractor
1440
        self.log = extractor.log
1441
        self.headers = {"dA-minor-version": "20210526"}
1442
        self._warn_429 = True
1443

1444
        self.delay = extractor.config("wait-min", 0)
1445
        self.delay_min = max(2, self.delay)
1446

1447
        self.mature = extractor.config("mature", "true")
1448
        if not isinstance(self.mature, str):
1449
            self.mature = "true" if self.mature else "false"
1450

1451
        self.strategy = extractor.config("pagination")
1452
        self.folders = extractor.config("folders", False)
1453
        self.public = extractor.config("public", True)
1454

1455
        if client_id := extractor.config("client-id"):
1456
            self.client_id = str(client_id)
1457
            self.client_secret = extractor.config("client-secret")
1458
        else:
1459
            self.client_id = self.CLIENT_ID
1460
            self.client_secret = self.CLIENT_SECRET
1461

1462
        token = extractor.config("refresh-token")
1463
        if token is None or token == "cache":
1464
            token = "#" + self.client_id
1465
            if not _refresh_token_cache(token):
1466
                token = None
1467
        self.refresh_token_key = token
1468

1469
        metadata = extractor.config("metadata", False)
1470
        if not metadata:
1471
            metadata = True if extractor.extra else False
1472
        if metadata:
1473
            self.metadata = True
1474

1475
            if isinstance(metadata, str):
1476
                if metadata == "all":
1477
                    metadata = ("submission", "camera", "stats",
1478
                                "collection", "gallery")
1479
                else:
1480
                    metadata = metadata.replace(" ", "").split(",")
1481
            elif not isinstance(metadata, (list, tuple)):
1482
                metadata = ()
1483

1484
            self._metadata_params = {"mature_content": self.mature}
1485
            self._metadata_public = None
1486
            if metadata:
1487
                # extended metadata
1488
                self.limit = 10
1489
                for param in metadata:
1490
                    self._metadata_params["ext_" + param] = "1"
1491
                if "ext_collection" in self._metadata_params or \
1492
                        "ext_gallery" in self._metadata_params:
1493
                    if token:
1494
                        self._metadata_public = False
1495
                    else:
1496
                        self.log.error("'collection' and 'gallery' metadata "
1497
                                       "require a refresh token")
1498
            else:
1499
                # base metadata
1500
                self.limit = 50
1501
        else:
1502
            self.metadata = False
1503
            self.limit = None
1504

1505
        self.log.debug(
1506
            "Using %s API credentials (client-id %s)",
1507
            "default" if self.client_id == self.CLIENT_ID else "custom",
1508
            self.client_id,
1509
        )
1510

1511
    def browse_deviantsyouwatch(self, offset=0):
1512
        """Yield deviations from users you watch"""
1513
        endpoint = "/browse/deviantsyouwatch"
1514
        params = {"limit": 50, "offset": offset,
1515
                  "mature_content": self.mature}
1516
        return self._pagination(endpoint, params, public=False)
1517

1518
    def browse_posts_deviantsyouwatch(self, offset=0):
1519
        """Yield posts from users you watch"""
1520
        endpoint = "/browse/posts/deviantsyouwatch"
1521
        params = {"limit": 50, "offset": offset,
1522
                  "mature_content": self.mature}
1523
        return self._pagination(endpoint, params, public=False, unpack=True)
1524

1525
    def browse_tags(self, tag, offset=0):
1526
        """ Browse a tag """
1527
        endpoint = "/browse/tags"
1528
        params = {
1529
            "tag"           : tag,
1530
            "offset"        : offset,
1531
            "limit"         : 50,
1532
            "mature_content": self.mature,
1533
        }
1534
        return self._pagination(endpoint, params)
1535

1536
    def browse_user_journals(self, username, offset=0):
1537
        journals = filter(
1538
            lambda post: "/journal/" in post["url"],
1539
            self.user_profile_posts(username))
1540
        if offset:
1541
            journals = util.advance(journals, offset)
1542
        return journals
1543

1544
    def collections(self, username, folder_id, offset=0):
1545
        """Yield all Deviation-objects contained in a collection folder"""
1546
        endpoint = "/collections/" + folder_id
1547
        params = {"username": username, "offset": offset, "limit": 24,
1548
                  "mature_content": self.mature}
1549
        return self._pagination(endpoint, params)
1550

1551
    def collections_all(self, username, offset=0):
1552
        """Yield all deviations in a user's collection"""
1553
        endpoint = "/collections/all"
1554
        params = {"username": username, "offset": offset, "limit": 24,
1555
                  "mature_content": self.mature}
1556
        return self._pagination(endpoint, params)
1557

1558
    @memcache(keyarg=1)
1559
    def collections_folders(self, username, offset=0):
1560
        """Yield all collection folders of a specific user"""
1561
        endpoint = "/collections/folders"
1562
        params = {"username": username, "offset": offset, "limit": 50,
1563
                  "mature_content": self.mature}
1564
        return self._pagination_list(endpoint, params)
1565

1566
    def comments(self, target_id, target_type="deviation",
1567
                 comment_id=None, offset=0):
1568
        """Fetch comments posted on a target"""
1569
        endpoint = f"/comments/{target_type}/{target_id}"
1570
        params = {
1571
            "commentid"     : comment_id,
1572
            "maxdepth"      : "5",
1573
            "offset"        : offset,
1574
            "limit"         : 50,
1575
            "mature_content": self.mature,
1576
        }
1577
        return self._pagination_list(endpoint, params=params, key="thread")
1578

1579
    def deviation(self, deviation_id, public=None):
1580
        """Query and return info about a single Deviation"""
1581
        endpoint = "/deviation/" + deviation_id
1582

1583
        deviation = self._call(endpoint, public=public)
1584
        if deviation.get("is_mature") and public is None and \
1585
                self.refresh_token_key:
1586
            deviation = self._call(endpoint, public=False)
1587

1588
        if self.metadata:
1589
            self._metadata((deviation,))
1590
        if self.folders:
1591
            self._folders((deviation,))
1592
        return deviation
1593

1594
    def deviation_content(self, deviation_id, public=None):
1595
        """Get extended content of a single Deviation"""
1596
        endpoint = "/deviation/content"
1597
        params = {"deviationid": deviation_id}
1598
        content = self._call(endpoint, params=params, public=public)
1599
        if public and content["html"].startswith(
1600
                '        <span class=\"username-with-symbol'):
1601
            if self.refresh_token_key:
1602
                content = self._call(endpoint, params=params, public=False)
1603
            else:
1604
                self.log.warning("Private Journal")
1605
        return content
1606

1607
    def deviation_download(self, deviation_id, public=None):
1608
        """Get the original file download (if allowed)"""
1609
        endpoint = "/deviation/download/" + deviation_id
1610
        params = {"mature_content": self.mature}
1611

1612
        try:
1613
            return self._call(
1614
                endpoint, params=params, public=public, log=False)
1615
        except Exception:
1616
            if not self.refresh_token_key:
1617
                raise
1618
            return self._call(endpoint, params=params, public=False)
1619

1620
    def deviation_metadata(self, deviations):
1621
        """ Fetch deviation metadata for a set of deviations"""
1622
        endpoint = "/deviation/metadata?" + "&".join(
1623
            f"deviationids[{num}]={deviation['deviationid']}"
1624
            for num, deviation in enumerate(deviations)
1625
        )
1626
        return self._call(
1627
            endpoint,
1628
            params=self._metadata_params,
1629
            public=self._metadata_public,
1630
        )["metadata"]
1631

1632
    def gallery(self, username, folder_id, offset=0, extend=True, public=None):
1633
        """Yield all Deviation-objects contained in a gallery folder"""
1634
        endpoint = "/gallery/" + folder_id
1635
        params = {"username": username, "offset": offset, "limit": 24,
1636
                  "mature_content": self.mature, "mode": "newest"}
1637
        return self._pagination(endpoint, params, extend, public)
1638

1639
    def gallery_all(self, username, offset=0):
1640
        """Yield all Deviation-objects of a specific user"""
1641
        endpoint = "/gallery/all"
1642
        params = {"username": username, "offset": offset, "limit": 24,
1643
                  "mature_content": self.mature}
1644
        return self._pagination(endpoint, params)
1645

1646
    @memcache(keyarg=1)
1647
    def gallery_folders(self, username, offset=0):
1648
        """Yield all gallery folders of a specific user"""
1649
        endpoint = "/gallery/folders"
1650
        params = {"username": username, "offset": offset, "limit": 50,
1651
                  "mature_content": self.mature}
1652
        return self._pagination_list(endpoint, params)
1653

1654
    def user_friends(self, username, offset=0):
1655
        """Get the users list of friends"""
1656
        endpoint = "/user/friends/" + username
1657
        params = {"limit": 50, "offset": offset, "mature_content": self.mature}
1658
        return self._pagination(endpoint, params)
1659

1660
    def user_friends_watch(self, username):
1661
        """Watch a user"""
1662
        endpoint = "/user/friends/watch/" + username
1663
        data = {
1664
            "watch[friend]"       : "0",
1665
            "watch[deviations]"   : "0",
1666
            "watch[journals]"     : "0",
1667
            "watch[forum_threads]": "0",
1668
            "watch[critiques]"    : "0",
1669
            "watch[scraps]"       : "0",
1670
            "watch[activity]"     : "0",
1671
            "watch[collections]"  : "0",
1672
            "mature_content"      : self.mature,
1673
        }
1674
        return self._call(
1675
            endpoint, method="POST", data=data, public=False, fatal=False,
1676
        ).get("success")
1677

1678
    def user_friends_unwatch(self, username):
1679
        """Unwatch a user"""
1680
        endpoint = "/user/friends/unwatch/" + username
1681
        return self._call(
1682
            endpoint, method="POST", public=False, fatal=False,
1683
        ).get("success")
1684

1685
    @memcache(keyarg=1)
1686
    def user_profile(self, username):
1687
        """Get user profile information"""
1688
        endpoint = "/user/profile/" + username
1689
        return self._call(endpoint, fatal=False)
1690

1691
    def user_profile_posts(self, username):
1692
        endpoint = "/user/profile/posts"
1693
        params = {"username": username, "limit": 50,
1694
                  "mature_content": self.mature}
1695
        return self._pagination(endpoint, params)
1696

1697
    def user_statuses(self, username, offset=0):
1698
        """Yield status updates of a specific user"""
1699
        statuses = filter(
1700
            lambda post: "/status-update/" in post["url"],
1701
            self.user_profile_posts(username))
1702
        if offset:
1703
            statuses = util.advance(statuses, offset)
1704
        return statuses
1705

1706
    def authenticate(self, refresh_token_key):
1707
        """Authenticate the application by requesting an access token"""
1708
        self.headers["Authorization"] = \
1709
            self._authenticate_impl(refresh_token_key)
1710

1711
    @cache(maxage=3600, keyarg=1)
1712
    def _authenticate_impl(self, refresh_token_key):
1713
        """Actual authenticate implementation"""
1714
        url = "https://www.deviantart.com/oauth2/token"
1715
        if refresh_token_key:
1716
            self.log.info("Refreshing private access token")
1717
            data = {"grant_type": "refresh_token",
1718
                    "refresh_token": _refresh_token_cache(refresh_token_key)}
1719
        else:
1720
            self.log.info("Requesting public access token")
1721
            data = {"grant_type": "client_credentials"}
1722

1723
        auth = util.HTTPBasicAuth(self.client_id, self.client_secret)
1724
        response = self.extractor.request(
1725
            url, method="POST", data=data, auth=auth, fatal=False)
1726
        data = response.json()
1727

1728
        if response.status_code != 200:
1729
            self.log.debug("Server response: %s", data)
1730
            raise exception.AuthenticationError(
1731
                f"\"{data.get('error_description')}\" ({data.get('error')})")
1732
        if refresh_token_key:
1733
            _refresh_token_cache.update(
1734
                refresh_token_key, data["refresh_token"])
1735
        return "Bearer " + data["access_token"]
1736

1737
    def _call(self, endpoint, fatal=True, log=True, public=None, **kwargs):
1738
        """Call an API endpoint"""
1739
        url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
1740
        kwargs["fatal"] = None
1741

1742
        if public is None:
1743
            public = self.public
1744

1745
        while True:
1746
            if self.delay:
1747
                self.extractor.sleep(self.delay, "api")
1748

1749
            self.authenticate(None if public else self.refresh_token_key)
1750
            kwargs["headers"] = self.headers
1751
            response = self.extractor.request(url, **kwargs)
1752

1753
            try:
1754
                data = response.json()
1755
            except ValueError:
1756
                self.log.error("Unable to parse API response")
1757
                data = {}
1758

1759
            status = response.status_code
1760
            if 200 <= status < 400:
1761
                if self.delay > self.delay_min:
1762
                    self.delay -= 1
1763
                return data
1764
            if not fatal and status != 429:
1765
                return None
1766

1767
            error = data.get("error_description")
1768
            if error == "User not found.":
1769
                raise exception.NotFoundError("user or group")
1770
            if error == "Deviation not downloadable.":
1771
                raise exception.AuthorizationError()
1772

1773
            self.log.debug(response.text)
1774
            msg = f"API responded with {status} {response.reason}"
1775
            if status == 429:
1776
                if self.delay < 30:
1777
                    self.delay += 1
1778
                self.log.warning("%s. Using %ds delay.", msg, self.delay)
1779

1780
                if self._warn_429 and self.delay >= 3:
1781
                    self._warn_429 = False
1782
                    if self.client_id == self.CLIENT_ID:
1783
                        self.log.info(
1784
                            "Register your own OAuth application and use its "
1785
                            "credentials to prevent this error: "
1786
                            "https://gdl-org.github.io/docs/configuration.html"
1787
                            "#extractor-deviantart-client-id-client-secret")
1788
            else:
1789
                if log:
1790
                    self.log.error(msg)
1791
                return data
1792

1793
    def _should_switch_tokens(self, results, params):
1794
        if len(results) < params["limit"]:
1795
            return True
1796

1797
        if not self.extractor.jwt:
1798
            for item in results:
1799
                if item.get("is_mature"):
1800
                    return True
1801

1802
        return False
1803

1804
    def _pagination(self, endpoint, params,
1805
                    extend=True, public=None, unpack=False, key="results"):
1806
        warn = True
1807
        if public is None:
1808
            public = self.public
1809

1810
        if self.limit and params["limit"] > self.limit:
1811
            params["limit"] = (params["limit"] // self.limit) * self.limit
1812

1813
        while True:
1814
            data = self._call(endpoint, params=params, public=public)
1815
            try:
1816
                results = data[key]
1817
            except KeyError:
1818
                self.log.error("Unexpected API response: %s", data)
1819
                return
1820

1821
            if unpack:
1822
                results = [item["journal"] for item in results
1823
                           if "journal" in item]
1824
            if extend:
1825
                if public and self._should_switch_tokens(results, params):
1826
                    if self.refresh_token_key:
1827
                        self.log.debug("Switching to private access token")
1828
                        public = False
1829
                        continue
1830
                    elif data["has_more"] and warn:
1831
                        warn = False
1832
                        self.log.warning(
1833
                            "Private or mature deviations detected! "
1834
                            "Run 'gallery-dl oauth:deviantart' and follow the "
1835
                            "instructions to be able to access them.")
1836

1837
                # "statusid" cannot be used instead
1838
                if results and "deviationid" in results[0]:
1839
                    if self.metadata:
1840
                        self._metadata(results)
1841
                    if self.folders:
1842
                        self._folders(results)
1843
                else:  # attempt to fix "deleted" deviations
1844
                    for dev in self._shared_content(results):
1845
                        if not dev["is_deleted"]:
1846
                            continue
1847
                        patch = self._call(
1848
                            "/deviation/" + dev["deviationid"], fatal=False)
1849
                        if patch:
1850
                            dev.update(patch)
1851

1852
            yield from results
1853

1854
            if not data["has_more"] and (
1855
                    self.strategy != "manual" or not results or not extend):
1856
                return
1857

1858
            if "next_cursor" in data:
1859
                if not data["next_cursor"]:
1860
                    return
1861
                params["offset"] = None
1862
                params["cursor"] = data["next_cursor"]
1863
            elif data["next_offset"] is not None:
1864
                params["offset"] = data["next_offset"]
1865
                params["cursor"] = None
1866
            else:
1867
                if params.get("offset") is None:
1868
                    return
1869
                params["offset"] = int(params["offset"]) + len(results)
1870

1871
    def _pagination_list(self, endpoint, params, key="results"):
1872
        return list(self._pagination(endpoint, params, False, key=key))
1873

1874
    def _shared_content(self, results):
1875
        """Return an iterable of shared deviations in 'results'"""
1876
        for result in results:
1877
            for item in result.get("items") or ():
1878
                if "deviation" in item:
1879
                    yield item["deviation"]
1880

1881
    def _metadata(self, deviations):
1882
        """Add extended metadata to each deviation object"""
1883
        if len(deviations) <= self.limit:
1884
            self._metadata_batch(deviations)
1885
        else:
1886
            n = self.limit
1887
            for index in range(0, len(deviations), n):
1888
                self._metadata_batch(deviations[index:index+n])
1889

1890
    def _metadata_batch(self, deviations):
1891
        """Fetch extended metadata for a single batch of deviations"""
1892
        for deviation, metadata in zip(
1893
                deviations, self.deviation_metadata(deviations)):
1894
            deviation.update(metadata)
1895
            deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
1896

1897
    def _folders(self, deviations):
1898
        """Add a list of all containing folders to each deviation object"""
1899
        for deviation in deviations:
1900
            deviation["folders"] = self._folders_map(
1901
                deviation["author"]["username"])[deviation["deviationid"]]
1902

1903
    @memcache(keyarg=1)
1904
    def _folders_map(self, username):
1905
        """Generate a deviation_id -> folders mapping for 'username'"""
1906
        self.log.info("Collecting folder information for '%s'", username)
1907
        folders = self.gallery_folders(username)
1908

1909
        # create 'folderid'-to-'folder' mapping
1910
        fmap = {
1911
            folder["folderid"]: folder
1912
            for folder in folders
1913
        }
1914

1915
        # add parent names to folders, but ignore "Featured" as parent
1916
        featured = folders[0]["folderid"]
1917
        done = False
1918

1919
        while not done:
1920
            done = True
1921
            for folder in folders:
1922
                parent = folder["parent"]
1923
                if not parent:
1924
                    pass
1925
                elif parent == featured:
1926
                    folder["parent"] = None
1927
                else:
1928
                    parent = fmap[parent]
1929
                    if parent["parent"]:
1930
                        done = False
1931
                    else:
1932
                        folder["name"] = parent["name"] + "/" + folder["name"]
1933
                        folder["parent"] = None
1934

1935
        # map deviationids to folder names
1936
        dmap = collections.defaultdict(list)
1937
        for folder in folders:
1938
            for deviation in self.gallery(
1939
                    username, folder["folderid"], 0, False):
1940
                dmap[deviation["deviationid"]].append(folder["name"])
1941
        return dmap
1942

1943

1944
class DeviantartEclipseAPI():
1945
    """Interface to the DeviantArt Eclipse API"""
1946

1947
    def __init__(self, extractor):
1948
        self.extractor = extractor
1949
        self.log = extractor.log
1950
        self.request = self.extractor._limited_request
1951
        self.csrf_token = None
1952

1953
    def deviation_extended_fetch(self, deviation_id, user, kind=None):
1954
        endpoint = "/_puppy/dadeviation/init"
1955
        params = {
1956
            "deviationid"     : deviation_id,
1957
            "username"        : user,
1958
            "type"            : kind,
1959
            "include_session" : "false",
1960
            "expand"          : "deviation.related",
1961
            "da_minor_version": "20230710",
1962
        }
1963
        return self._call(endpoint, params)
1964

1965
    def gallery_scraps(self, user, offset=0):
1966
        endpoint = "/_puppy/dashared/gallection/contents"
1967
        params = {
1968
            "username"     : user,
1969
            "type"         : "gallery",
1970
            "offset"       : offset,
1971
            "limit"        : 24,
1972
            "scraps_folder": "true",
1973
        }
1974
        return self._pagination(endpoint, params)
1975

1976
    def galleries_search(self, user, query, offset=0, order="most-recent"):
1977
        endpoint = "/_puppy/dashared/gallection/search"
1978
        params = {
1979
            "username": user,
1980
            "type"    : "gallery",
1981
            "order"   : order,
1982
            "q"       : query,
1983
            "offset"  : offset,
1984
            "limit"   : 24,
1985
        }
1986
        return self._pagination(endpoint, params)
1987

1988
    def search_deviations(self, params):
1989
        endpoint = "/_puppy/dabrowse/search/deviations"
1990
        return self._pagination(endpoint, params, key="deviations")
1991

1992
    def user_info(self, user, expand=False):
1993
        endpoint = "/_puppy/dauserprofile/init/about"
1994
        params = {"username": user}
1995
        return self._call(endpoint, params)
1996

1997
    def user_watching(self, user, offset=0):
1998
        gruserid, moduleid = self._ids_watching(user)
1999

2000
        endpoint = "/_puppy/gruser/module/watching"
2001
        params = {
2002
            "gruserid"     : gruserid,
2003
            "gruser_typeid": "4",
2004
            "username"     : user,
2005
            "moduleid"     : moduleid,
2006
            "offset"       : offset,
2007
            "limit"        : 24,
2008
        }
2009
        return self._pagination(endpoint, params)
2010

2011
    def _call(self, endpoint, params):
2012
        url = "https://www.deviantart.com" + endpoint
2013
        params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
2014

2015
        response = self.request(url, params=params, fatal=None)
2016

2017
        try:
2018
            return response.json()
2019
        except Exception:
2020
            return {"error": response.text}
2021

2022
    def _pagination(self, endpoint, params, key="results"):
2023
        limit = params.get("limit", 24)
2024
        warn = True
2025

2026
        while True:
2027
            data = self._call(endpoint, params)
2028

2029
            results = data.get(key)
2030
            if results is None:
2031
                return
2032
            if len(results) < limit and warn and data.get("hasMore"):
2033
                warn = False
2034
                self.log.warning(
2035
                    "Private deviations detected! "
2036
                    "Provide login credentials or session cookies "
2037
                    "to be able to access them.")
2038
            yield from results
2039

2040
            if not data.get("hasMore"):
2041
                return
2042

2043
            if "nextCursor" in data:
2044
                params["offset"] = None
2045
                params["cursor"] = data["nextCursor"]
2046
            elif "nextOffset" in data:
2047
                params["offset"] = data["nextOffset"]
2048
                params["cursor"] = None
2049
            elif params.get("offset") is None:
2050
                return
2051
            else:
2052
                params["offset"] = int(params["offset"]) + len(results)
2053

2054
    def _ids_watching(self, user):
2055
        url = f"{self.extractor.root}/{user}/about"
2056
        page = self.request(url).text
2057

2058
        gruser_id = text.extr(page, ' data-userid="', '"')
2059

2060
        pos = page.find('\\"name\\":\\"watching\\"')
2061
        if pos < 0:
2062
            raise exception.NotFoundError("'watching' module ID")
2063
        module_id = text.rextr(page, '\\"id\\":', ',', pos).strip('" ')
2064

2065
        self._fetch_csrf_token(page)
2066
        return gruser_id, module_id
2067

2068
    def _fetch_csrf_token(self, page=None):
2069
        if page is None:
2070
            page = self.request(self.extractor.root + "/").text
2071
        self.csrf_token = token = text.extr(
2072
            page, "window.__CSRF_TOKEN__ = '", "'")
2073
        return token
2074

2075

2076
@memcache(keyarg=1)
2077
def _user_details(extr, name):
2078
    try:
2079
        return extr.api.user_profile(name)["user"]
2080
    except Exception:
2081
        return None
2082

2083

2084
@cache(maxage=36500*86400, keyarg=0)
2085
def _refresh_token_cache(token):
2086
    if token and token[0] == "#":
2087
        return None
2088
    return token
2089

2090

2091
@cache(maxage=28*86400, keyarg=1)
2092
def _login_impl(extr, username, password):
2093
    extr.log.info("Logging in as %s", username)
2094

2095
    url = "https://www.deviantart.com/users/login"
2096
    page = extr.request(url).text
2097

2098
    data = {}
2099
    for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
2100
        name, _, value = item.partition('" value="')
2101
        data[name] = value
2102

2103
    challenge = data.get("challenge")
2104
    if challenge and challenge != "0":
2105
        extr.log.warning("Login requires solving a CAPTCHA")
2106
        extr.log.debug(challenge)
2107

2108
    data["username"] = username
2109
    data["password"] = password
2110
    data["remember"] = "on"
2111

2112
    extr.sleep(2.0, "login")
2113
    url = "https://www.deviantart.com/_sisu/do/signin"
2114
    response = extr.request(url, method="POST", data=data)
2115

2116
    if not response.history:
2117
        raise exception.AuthenticationError()
2118

2119
    return {
2120
        cookie.name: cookie.value
2121
        for cookie in extr.cookies
2122
    }
2123

2124

2125
def id_from_base36(base36):
2126
    return util.bdecode(base36, _ALPHABET)
2127

2128

2129
def base36_from_id(deviation_id):
2130
    return util.bencode(int(deviation_id), _ALPHABET)
2131

2132

2133
_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
2134

2135

2136
###############################################################################
2137
# Journal Formats #############################################################
2138

2139
SHADOW_TEMPLATE = """
2140
<span class="shadow">
2141
    <img src="{src}" class="smshadow" width="{width}" height="{height}">
2142
</span>
2143
<br><br>
2144
"""
2145

2146
HEADER_TEMPLATE = """<div usr class="gr">
2147
<div class="metadata">
2148
    <h2><a href="{url}">{title}</a></h2>
2149
    <ul>
2150
        <li class="author">
2151
            by <span class="name"><span class="username-with-symbol u">
2152
            <a class="u regular username" href="{userurl}">{username}</a>\
2153
<span class="user-symbol regular"></span></span></span>,
2154
            <span>{date}</span>
2155
        </li>
2156
    </ul>
2157
</div>
2158
"""
2159

2160
HEADER_CUSTOM_TEMPLATE = """<div class='boxtop journaltop'>
2161
<h2>
2162
    <img src="https://st.deviantart.net/minish/gruzecontrol/icons/journal.gif\
2163
?2" style="vertical-align:middle" alt=""/>
2164
    <a href="{url}">{title}</a>
2165
</h2>
2166
Journal Entry: <span>{date}</span>
2167
"""
2168

2169
JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html>
2170
<html>
2171
<head>
2172
    <meta charset="utf-8">
2173
    <title>{title}</title>
2174
    <link rel="stylesheet" href="https://st.deviantart.net\
2175
/css/deviantart-network_lc.css?3843780832"/>
2176
    <link rel="stylesheet" href="https://st.deviantart.net\
2177
/css/group_secrets_lc.css?3250492874"/>
2178
    <link rel="stylesheet" href="https://st.deviantart.net\
2179
/css/v6core_lc.css?4246581581"/>
2180
    <link rel="stylesheet" href="https://st.deviantart.net\
2181
/css/sidebar_lc.css?1490570941"/>
2182
    <link rel="stylesheet" href="https://st.deviantart.net\
2183
/css/writer_lc.css?3090682151"/>
2184
    <link rel="stylesheet" href="https://st.deviantart.net\
2185
/css/v6loggedin_lc.css?3001430805"/>
2186
    <style>{css}</style>
2187
    <link rel="stylesheet" href="https://st.deviantart.net\
2188
/roses/cssmin/core.css?1488405371919"/>
2189
    <link rel="stylesheet" href="https://st.deviantart.net\
2190
/roses/cssmin/peeky.css?1487067424177"/>
2191
    <link rel="stylesheet" href="https://st.deviantart.net\
2192
/roses/cssmin/desktop.css?1491362542749"/>
2193
    <link rel="stylesheet" href="https://static.parastorage.com/services\
2194
/da-deviation/2bfd1ff7a9d6bf10d27b98dd8504c0399c3f9974a015785114b7dc6b\
2195
/app.min.css"/>
2196
</head>
2197
<body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart">
2198
    <div id="output">
2199
    <div class="dev-page-container bubbleview">
2200
    <div class="dev-page-view view-mode-normal">
2201
    <div class="dev-view-main-content">
2202
    <div class="dev-view-deviation">
2203
    {shadow}
2204
    <div class="journal-wrapper tt-a">
2205
    <div class="journal-wrapper2">
2206
    <div class="journal {cls} journalcontrol">
2207
    {html}
2208
    </div>
2209
    </div>
2210
    </div>
2211
    </div>
2212
    </div>
2213
    </div>
2214
    </div>
2215
    </div>
2216
</body>
2217
</html>
2218
"""
2219

2220
JOURNAL_TEMPLATE_HTML_EXTRA = """\
2221
<div id="devskin0"><div class="negate-box-margin" style="">\
2222
<div usr class="gr-box gr-genericbox"
2223
        ><i usr class="gr1"><i></i></i
2224
        ><i usr class="gr2"><i></i></i
2225
        ><i usr class="gr3"><i></i></i
2226
        ><div usr class="gr-top">
2227
            <i usr class="tri"></i>
2228
            {}
2229
            </div>
2230
    </div><div usr class="gr-body"><div usr class="gr">
2231
            <div class="grf-indent">
2232
            <div class="text">
2233
                {}            </div>
2234
        </div>
2235
                </div></div>
2236
        <i usr class="gr3 gb"></i>
2237
        <i usr class="gr2 gb"></i>
2238
        <i usr class="gr1 gb gb1"></i>    </div>
2239
    </div></div>"""
2240

2241
JOURNAL_TEMPLATE_TEXT = """text:{title}
2242
by {username}, {date}
2243

2244
{content}
2245
"""
2246

2247
Product

Resources

Company