CoCalc -- arcalive.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/arcalive.py
⁵³⁹⁹ views
1
# -*- coding: utf-8 -*-
2

3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as
5
# published by the Free Software Foundation.
6

7
"""Extractors for https://arca.live/"""
8

9
from .common import Extractor, Message
10
from .. import text, util, exception
11

12
BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
13

14

15
class ArcaliveExtractor(Extractor):
16
    """Base class for Arca.live extractors"""
17
    category = "arcalive"
18
    root = "https://arca.live"
19
    useragent = "net.umanle.arca.android.playstore/0.9.75"
20
    request_interval = (0.5, 1.5)
21

22
    def _init(self):
23
        self.api = ArcaliveAPI(self)
24

25
    def items(self):
26
        for article in self.articles():
27
            article["_extractor"] = ArcalivePostExtractor
28
            board = self.board or article.get("boardSlug") or "breaking"
29
            url = f"{self.root}/b/{board}/{article['id']}"
30
            yield Message.Queue, url, article
31

32

33
class ArcalivePostExtractor(ArcaliveExtractor):
34
    """Extractor for an arca.live post"""
35
    subcategory = "post"
36
    directory_fmt = ("{category}", "{boardSlug}")
37
    filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}"
38
    archive_fmt = "{id}_{num}"
39
    pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)"
40
    example = "https://arca.live/b/breaking/123456789"
41

42
    def items(self):
43
        self.emoticons = self.config("emoticons", False)
44
        self.gifs = gifs = self.config("gifs", True)
45
        if gifs:
46
            self.gifs_fallback = (gifs != "check")
47

48
        post = self.api.post(self.groups[0])
49
        files = self._extract_files(post)
50

51
        post["count"] = len(files)
52
        post["date"] = text.parse_datetime(
53
            post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
54
        post["post_url"] = post_url = \
55
            f"{self.root}/b/{post['boardSlug']}/{post['id']}"
56
        post["_http_headers"] = {"Referer": post_url + "?p=1"}
57

58
        yield Message.Directory, post
59
        for post["num"], file in enumerate(files, 1):
60
            post.update(file)
61
            url = file["url"]
62
            yield Message.Url, url, text.nameext_from_url(url, post)
63

64
    def _extract_files(self, post):
65
        files = []
66

67
        for video, media in util.re(r"<(?:img|vide(o)) ([^>]+)").findall(
68
                post["content"]):
69
            if not self.emoticons and 'class="arca-emoticon"' in media:
70
                continue
71

72
            src = (text.extr(media, 'data-originalurl="', '"') or
73
                   text.extr(media, 'src="', '"'))
74
            if not src:
75
                continue
76

77
            src, _, query = text.unescape(src).partition("?")
78
            if src[0] == "/":
79
                if src[1] == "/":
80
                    url = "https:" + src.replace(
81
                        "//ac-p.namu", "//ac-o.namu", 1)
82
                else:
83
                    url = self.root + src
84
            else:
85
                url = src
86

87
            fallback = ()
88
            query = f"?type=orig&{query}"
89
            if orig := text.extr(media, 'data-orig="', '"'):
90
                path, _, ext = url.rpartition(".")
91
                if ext != orig:
92
                    fallback = (url + query,)
93
                    url = path + "." + orig
94
            elif video and self.gifs:
95
                url_gif = url.rpartition(".")[0] + ".gif"
96
                if self.gifs_fallback:
97
                    fallback = (url + query,)
98
                    url = url_gif
99
                else:
100
                    response = self.request(
101
                        url_gif + query, method="HEAD", fatal=False)
102
                    if response.status_code < 400:
103
                        fallback = (url + query,)
104
                        url = url_gif
105

106
            files.append({
107
                "url"   : url + query,
108
                "width" : text.parse_int(text.extr(media, 'width="', '"')),
109
                "height": text.parse_int(text.extr(media, 'height="', '"')),
110
                "_fallback": fallback,
111
            })
112

113
        return files
114

115

116
class ArcaliveBoardExtractor(ArcaliveExtractor):
117
    """Extractor for an arca.live board's posts"""
118
    subcategory = "board"
119
    pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$"
120
    example = "https://arca.live/b/breaking"
121

122
    def articles(self):
123
        self.board, query = self.groups
124
        params = text.parse_query(query)
125
        return self.api.board(self.board, params)
126

127

128
class ArcaliveUserExtractor(ArcaliveExtractor):
129
    """Extractor for an arca.live users's posts"""
130
    subcategory = "user"
131
    pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$"
132
    example = "https://arca.live/u/@USER"
133

134
    def articles(self):
135
        self.board = None
136
        user, query = self.groups
137
        params = text.parse_query(query)
138
        return self.api.user_posts(text.unquote(user), params)
139

140

141
class ArcaliveAPI():
142

143
    def __init__(self, extractor):
144
        self.extractor = extractor
145
        self.log = extractor.log
146
        self.root = extractor.root + "/api/app"
147

148
        extractor.session.headers["X-Device-Token"] = util.generate_token(64)
149

150
    def board(self, board_slug, params):
151
        endpoint = "/list/channel/" + board_slug
152
        return self._pagination(endpoint, params, "articles")
153

154
    def post(self, post_id):
155
        endpoint = "/view/article/breaking/" + str(post_id)
156
        return self._call(endpoint)
157

158
    def user_posts(self, username, params):
159
        endpoint = "/list/channel/breaking"
160
        params["target"] = "nickname"
161
        params["keyword"] = username
162
        return self._pagination(endpoint, params, "articles")
163

164
    def _call(self, endpoint, params=None):
165
        url = self.root + endpoint
166
        response = self.extractor.request(url, params=params)
167

168
        data = response.json()
169
        if response.status_code == 200:
170
            return data
171

172
        self.log.debug("Server response: %s", data)
173
        msg = f": {msg}" if (msg := data.get("message")) else ""
174
        raise exception.AbortExtraction(f"API request failed{msg}")
175

176
    def _pagination(self, endpoint, params, key):
177
        while True:
178
            data = self._call(endpoint, params)
179

180
            posts = data.get(key)
181
            if not posts:
182
                break
183
            yield from posts
184

185
            params.update(data["next"])
186

187
Product

Resources

Company