Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/audiochan.py
8901 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://audiochan.com/"""
10
11
from .common import Extractor, Message
12
from .. import text
13
14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?audiochan\.com"
15
16
17
class AudiochanExtractor(Extractor):
18
"""Base class for audiochan extractors"""
19
category = "audiochan"
20
root = "https://audiochan.com"
21
root_api = "https://api.audiochan.com"
22
directory_fmt = ("{category}", "{user[display_name]}")
23
filename_fmt = "{title} ({slug}).{extension}"
24
archive_fmt = "{audioFile[id]}"
25
26
def _init(self):
27
self.user = False
28
self.headers_api = {
29
"content-type" : "application/json",
30
"Origin" : self.root,
31
"Sec-Fetch-Dest" : "empty",
32
"Sec-Fetch-Mode" : "cors",
33
"Sec-Fetch-Site" : "same-site",
34
}
35
self.headers_dl = {
36
"Accept": "audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,"
37
"application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
38
"Sec-Fetch-Dest" : "audio",
39
"Sec-Fetch-Mode" : "no-cors",
40
"Sec-Fetch-Site" : "same-site",
41
"Accept-Encoding": "identity",
42
}
43
44
def items(self):
45
for post in self.posts():
46
file = post["audioFile"]
47
48
post["_http_headers"] = self.headers_dl
49
post["date"] = self.parse_datetime_iso(file["created_at"])
50
post["date_updated"] = self.parse_datetime_iso(file["updated_at"])
51
post["description"] = self._extract_description(
52
post["description"])
53
54
tags = []
55
for tag in post["tags"]:
56
if "tag" in tag:
57
tag = tag["tag"]
58
tags.append(f"{tag['category']}:{tag['name']}")
59
post["tags"] = tags
60
61
if self.user:
62
post["user"] = post["credits"][0]["user"]
63
64
if not (url := file["url"]):
65
post["_http_segmented"] = 600000
66
url = file["stream_url"]
67
68
yield Message.Directory, "", post
69
text.nameext_from_name(file["filename"], post)
70
yield Message.Url, url, post
71
72
def request_api(self, endpoint, params=None):
73
url = self.root_api + endpoint
74
return self.request_json(url, params=params, headers=self.headers_api)
75
76
def _pagination(self, endpoint, params, key=None):
77
params["page"] = 1
78
params["limit"] = "12"
79
80
while True:
81
data = self.request_api(endpoint, params)
82
if key is not None:
83
data = data[key]
84
85
yield from data["data"]
86
87
if not data["has_more"]:
88
break
89
params["page"] += 1
90
91
def _extract_description(self, description, texts=None):
92
if texts is None:
93
texts = []
94
95
if "text" in description:
96
texts.append(description["text"])
97
elif "content" in description:
98
for desc in description["content"]:
99
self._extract_description(desc, texts)
100
101
return texts
102
103
104
class AudiochanAudioExtractor(AudiochanExtractor):
105
subcategory = "audio"
106
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
107
example = "https://audiochan.com/a/SLUG"
108
109
def posts(self):
110
self.user = True
111
audio = self.request_api("/audios/slug/" + self.groups[0])
112
return (audio,)
113
114
115
class AudiochanUserExtractor(AudiochanExtractor):
116
subcategory = "user"
117
pattern = BASE_PATTERN + r"/u/([^/?#]+)"
118
example = "https://audiochan.com/u/USER"
119
120
def posts(self):
121
endpoint = "/users/" + self.groups[0]
122
self.kwdict["user"] = self.request_api(endpoint)["data"]
123
124
params = {
125
"sfw_only": "false",
126
"sort" : "new",
127
}
128
return self._pagination(endpoint + "/audios", params)
129
130
131
class AudiochanCollectionExtractor(AudiochanExtractor):
132
subcategory = "collection"
133
pattern = BASE_PATTERN + r"/c/([^/?#]+)"
134
example = "https://audiochan.com/c/SLUG"
135
136
def posts(self):
137
slug = self.groups[0]
138
endpoint = "/collections/" + slug
139
self.kwdict["collection"] = col = self.request_api(endpoint)
140
col.pop("audios", None)
141
col.pop("items", None)
142
143
endpoint = f"/collections/slug/{slug}/items"
144
return self._pagination(endpoint, {})
145
146
147
class AudiochanSearchExtractor(AudiochanExtractor):
148
subcategory = "search"
149
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
150
example = "https://audiochan.com/search?q=QUERY"
151
152
def posts(self):
153
self.user = True
154
endpoint = "/search"
155
params = text.parse_query(self.groups[0])
156
params["sfw_only"] = "false"
157
self.kwdict["search_tags"] = params.get("q")
158
return self._pagination(endpoint, params, "audios")
159
160