Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/comick.py
8854 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://comick.io/"""
10
11
from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
12
from .. import text, exception
13
from ..cache import memcache
14
15
BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
16
17
18
class ComickBase():
19
"""Base class for comick.io extractors"""
20
category = "comick"
21
root = "https://comick.io"
22
23
24
class ComickCoversExtractor(ComickBase, GalleryExtractor):
25
"""Extractor for comick.io manga covers"""
26
subcategory = "covers"
27
directory_fmt = ("{category}", "{manga}", "Covers")
28
filename_fmt = "{volume:>02}_{lang}.{extension}"
29
archive_fmt = "c_{id}"
30
pattern = BASE_PATTERN + r"/comic/([\w-]+)/cover"
31
example = "https://comick.io/comic/MANGA/cover"
32
33
def metadata(self, page):
34
manga = _manga_info(self, self.groups[0])
35
self.slug = manga['manga_slug']
36
return manga
37
38
def images(self, page):
39
url = f"{self.root}/comic/{self.slug}/cover"
40
page = self.request(url).text
41
data = self._extract_nextdata(page)
42
43
covers = data["props"]["pageProps"]["comic"]["md_covers"]
44
covers.reverse()
45
46
return [
47
("https://meo.comick.pictures/" + cover["b2key"], {
48
"id" : cover["id"],
49
"width" : cover["w"],
50
"height": cover["h"],
51
"size" : cover["s"],
52
"lang" : cover["locale"],
53
"volume": text.parse_int(cover["vol"]),
54
"cover" : cover,
55
})
56
for cover in covers
57
]
58
59
60
class ComickChapterExtractor(ComickBase, ChapterExtractor):
61
"""Extractor for comick.io manga chapters"""
62
archive_fmt = "{chapter_hid}_{page}"
63
pattern = (BASE_PATTERN + r"/comic/([\w-]+)"
64
r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)")
65
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
66
67
def metadata(self, page):
68
slug, chstr = self.groups
69
manga = _manga_info(self, slug)
70
71
while True:
72
try:
73
props = _chapter_info(self, manga, chstr)
74
except exception.HttpError as exc:
75
if exc.response.status_code != 404:
76
raise
77
if exc.response.headers.get(
78
"Content-Type", "").startswith("text/html"):
79
if locals().get("_retry_buildid"):
80
raise
81
self.log.debug("Updating Next.js build ID")
82
_retry_buildid = True
83
_manga_info.cache.clear()
84
manga = _manga_info(self, slug)
85
continue
86
if b'"notFound":true' in exc.response.content:
87
raise exception.NotFoundError("chapter")
88
raise
89
90
if "__N_REDIRECT" in props:
91
path = props["__N_REDIRECT"]
92
self.log.debug("Following redirect to %s", path)
93
_, slug, chstr = path.rsplit("/", 2)
94
continue
95
96
ch = props["chapter"]
97
break
98
99
self._images = ch["md_images"]
100
101
if chapter := ch["chap"]:
102
chapter, sep, minor = chapter.partition(".")
103
else:
104
chapter = 0
105
sep = minor = ""
106
107
return {
108
**manga,
109
"title" : props["chapTitle"],
110
"volume" : text.parse_int(ch["vol"]),
111
"chapter" : text.parse_int(chapter),
112
"chapter_minor" : sep + minor,
113
"chapter_id" : ch["id"],
114
"chapter_hid" : ch["hid"],
115
"chapter_string": chstr,
116
"group" : ch["group_name"],
117
"date" : self.parse_datetime_iso(ch["created_at"][:19]),
118
"date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
119
"lang" : ch["lang"],
120
}
121
122
def images(self, page):
123
if not self._images[0].get("b2key") and all(
124
not img.get("b2key") for img in self._images):
125
self.log.error(
126
"%s: Broken Chapter (missing 'b2key' for all pages)",
127
self.groups[1])
128
return ()
129
130
return [
131
("https://meo.comick.pictures/" + img["b2key"], {
132
"width" : img["w"],
133
"height" : img["h"],
134
"size" : img["s"],
135
"optimized": img["optimized"],
136
})
137
for img in self._images
138
]
139
140
141
class ComickMangaExtractor(ComickBase, MangaExtractor):
142
"""Extractor for comick.io manga"""
143
pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?"
144
example = "https://comick.io/comic/MANGA"
145
146
def items(self):
147
manga = _manga_info(self, self.groups[0])
148
slug = manga["manga_slug"]
149
_manga_info.update(slug, manga)
150
151
for ch in self.chapters(manga):
152
ch.update(manga)
153
ch["_extractor"] = ComickChapterExtractor
154
155
if chapter := ch["chap"]:
156
url = (f"{self.root}/comic/{slug}"
157
f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}")
158
chapter, sep, minor = chapter.partition(".")
159
ch["volume"] = text.parse_int(ch["vol"])
160
ch["chapter"] = text.parse_int(chapter)
161
ch["chapter_minor"] = sep + minor
162
elif volume := ch["vol"]:
163
url = (f"{self.root}/comic/{slug}"
164
f"/{ch['hid']}-volume-{volume}-{ch['lang']}")
165
ch["volume"] = text.parse_int(volume)
166
ch["chapter"] = 0
167
ch["chapter_minor"] = ""
168
else:
169
url = f"{self.root}/comic/{slug}/{ch['hid']}"
170
ch["volume"] = ch["chapter"] = 0
171
ch["chapter_minor"] = ""
172
173
yield Message.Queue, url, ch
174
175
def chapters(self, manga):
176
info = True
177
slug, query = self.groups
178
179
url = f"https://api.comick.io/comic/{manga['manga_hid']}/chapters"
180
headers = {
181
"Origin": "https://comick.io",
182
"Sec-Fetch-Dest": "empty",
183
"Sec-Fetch-Mode": "cors",
184
"Sec-Fetch-Site": "same-site",
185
}
186
187
query = text.parse_query_list(query, ("lang",))
188
189
if (lang := query.get("lang")) or (lang := self.config("lang")):
190
if not isinstance(lang, str):
191
lang = ",".join(lang)
192
else:
193
lang = None
194
195
params = {"lang": lang}
196
params["page"] = page = text.parse_int(query.get("page"), 1)
197
198
if date_order := query.get("date-order"):
199
params["date-order"] = date_order
200
elif chap_order := query.get("chap-order"):
201
params["chap-order"] = chap_order
202
else:
203
params["chap-order"] = \
204
"0" if self.config("chapter-reverse", False) else "1"
205
206
group = query.get("group")
207
if group == "0":
208
group = None
209
210
while True:
211
data = self.request_json(url, params=params, headers=headers)
212
limit = data["limit"]
213
214
if info:
215
info = False
216
total = data["total"] - limit * page
217
if total > limit:
218
self.log.info("Collecting %s chapters", total)
219
220
if group is None:
221
yield from data["chapters"]
222
else:
223
for ch in data["chapters"]:
224
if (groups := ch["group_name"]) and group in groups:
225
yield ch
226
227
if data["total"] <= limit * page:
228
return
229
params["page"] = page = page + 1
230
231
232
@memcache(keyarg=1)
233
def _manga_info(self, slug):
234
url = f"{self.root}/comic/{slug}"
235
page = self.request(url).text
236
data = self._extract_nextdata(page)
237
props = data["props"]["pageProps"]
238
comic = props["comic"]
239
240
genre = []
241
theme = []
242
format = ""
243
for item in comic["md_comic_md_genres"]:
244
item = item["md_genres"]
245
group = item["group"]
246
if group == "Genre":
247
genre.append(item["name"])
248
elif group == "Theme":
249
theme.append(item["name"])
250
else:
251
format = item["name"]
252
253
if mu := comic["mu_comics"]:
254
tags = [c["mu_categories"]["title"]
255
for c in mu["mu_comic_categories"]]
256
publisher = [p["mu_publishers"]["title"]
257
for p in mu["mu_comic_publishers"]]
258
else:
259
tags = publisher = ()
260
261
return {
262
"manga": comic["title"],
263
"manga_id": comic["id"],
264
"manga_hid": comic["hid"],
265
"manga_slug": comic["slug"],
266
"manga_titles": [t["title"] for t in comic["md_titles"]],
267
"artist": [a["name"] for a in props["artists"]],
268
"author": [a["name"] for a in props["authors"]],
269
"genre" : genre,
270
"theme" : theme,
271
"format": format,
272
"tags" : tags,
273
"publisher": publisher,
274
"published": text.parse_int(comic["year"]),
275
"description": comic["desc"],
276
"demographic": props["demographic"],
277
"origin": comic["iso639_1"],
278
"mature": props["matureContent"],
279
"rating": comic["content_rating"],
280
"rank" : comic["follow_rank"],
281
"score" : text.parse_float(comic["bayesian_rating"]),
282
"status": "Complete" if comic["status"] == 2 else "Ongoing",
283
"links" : comic["links"],
284
"_build_id": data["buildId"],
285
}
286
287
288
def _chapter_info(self, manga, chstr):
289
slug = manga['manga_slug']
290
url = (f"{self.root}/_next/data/{manga['_build_id']}"
291
f"/comic/{slug}/{chstr}.json")
292
params = {"slug": slug, "chapter": chstr}
293
return self.request_json(url, params=params)["pageProps"]
294
295