CoCalc -- audiochan.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/audiochan.py
⁸⁹⁰¹ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractors for https://audiochan.com/"""
10

11
from .common import Extractor, Message
12
from .. import text
13

14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?audiochan\.com"
15

16

17
class AudiochanExtractor(Extractor):
18
    """Base class for audiochan extractors"""
19
    category = "audiochan"
20
    root = "https://audiochan.com"
21
    root_api = "https://api.audiochan.com"
22
    directory_fmt = ("{category}", "{user[display_name]}")
23
    filename_fmt = "{title} ({slug}).{extension}"
24
    archive_fmt = "{audioFile[id]}"
25

26
    def _init(self):
27
        self.user = False
28
        self.headers_api = {
29
            "content-type"   : "application/json",
30
            "Origin"         : self.root,
31
            "Sec-Fetch-Dest" : "empty",
32
            "Sec-Fetch-Mode" : "cors",
33
            "Sec-Fetch-Site" : "same-site",
34
        }
35
        self.headers_dl = {
36
            "Accept": "audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,"
37
                      "application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
38
            "Sec-Fetch-Dest" : "audio",
39
            "Sec-Fetch-Mode" : "no-cors",
40
            "Sec-Fetch-Site" : "same-site",
41
            "Accept-Encoding": "identity",
42
        }
43

44
    def items(self):
45
        for post in self.posts():
46
            file = post["audioFile"]
47

48
            post["_http_headers"] = self.headers_dl
49
            post["date"] = self.parse_datetime_iso(file["created_at"])
50
            post["date_updated"] = self.parse_datetime_iso(file["updated_at"])
51
            post["description"] = self._extract_description(
52
                post["description"])
53

54
            tags = []
55
            for tag in post["tags"]:
56
                if "tag" in tag:
57
                    tag = tag["tag"]
58
                tags.append(f"{tag['category']}:{tag['name']}")
59
            post["tags"] = tags
60

61
            if self.user:
62
                post["user"] = post["credits"][0]["user"]
63

64
            if not (url := file["url"]):
65
                post["_http_segmented"] = 600000
66
                url = file["stream_url"]
67

68
            yield Message.Directory, "", post
69
            text.nameext_from_name(file["filename"], post)
70
            yield Message.Url, url, post
71

72
    def request_api(self, endpoint, params=None):
73
        url = self.root_api + endpoint
74
        return self.request_json(url, params=params, headers=self.headers_api)
75

76
    def _pagination(self, endpoint, params, key=None):
77
        params["page"] = 1
78
        params["limit"] = "12"
79

80
        while True:
81
            data = self.request_api(endpoint, params)
82
            if key is not None:
83
                data = data[key]
84

85
            yield from data["data"]
86

87
            if not data["has_more"]:
88
                break
89
            params["page"] += 1
90

91
    def _extract_description(self, description, texts=None):
92
        if texts is None:
93
            texts = []
94

95
        if "text" in description:
96
            texts.append(description["text"])
97
        elif "content" in description:
98
            for desc in description["content"]:
99
                self._extract_description(desc, texts)
100

101
        return texts
102

103

104
class AudiochanAudioExtractor(AudiochanExtractor):
105
    subcategory = "audio"
106
    pattern = BASE_PATTERN + r"/a/([^/?#]+)"
107
    example = "https://audiochan.com/a/SLUG"
108

109
    def posts(self):
110
        self.user = True
111
        audio = self.request_api("/audios/slug/" + self.groups[0])
112
        return (audio,)
113

114

115
class AudiochanUserExtractor(AudiochanExtractor):
116
    subcategory = "user"
117
    pattern = BASE_PATTERN + r"/u/([^/?#]+)"
118
    example = "https://audiochan.com/u/USER"
119

120
    def posts(self):
121
        endpoint = "/users/" + self.groups[0]
122
        self.kwdict["user"] = self.request_api(endpoint)["data"]
123

124
        params = {
125
            "sfw_only": "false",
126
            "sort"    : "new",
127
        }
128
        return self._pagination(endpoint + "/audios", params)
129

130

131
class AudiochanCollectionExtractor(AudiochanExtractor):
132
    subcategory = "collection"
133
    pattern = BASE_PATTERN + r"/c/([^/?#]+)"
134
    example = "https://audiochan.com/c/SLUG"
135

136
    def posts(self):
137
        slug = self.groups[0]
138
        endpoint = "/collections/" + slug
139
        self.kwdict["collection"] = col = self.request_api(endpoint)
140
        col.pop("audios", None)
141
        col.pop("items", None)
142

143
        endpoint = f"/collections/slug/{slug}/items"
144
        return self._pagination(endpoint, {})
145

146

147
class AudiochanSearchExtractor(AudiochanExtractor):
148
    subcategory = "search"
149
    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
150
    example = "https://audiochan.com/search?q=QUERY"
151

152
    def posts(self):
153
        self.user = True
154
        endpoint = "/search"
155
        params = text.parse_query(self.groups[0])
156
        params["sfw_only"] = "false"
157
        self.kwdict["search_tags"] = params.get("q")
158
        return self._pagination(endpoint, params, "audios")
159

160
Product

Resources

Company