Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/comick.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://comick.io/"""
10
11
from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
12
from .. import text
13
from ..cache import memcache
14
15
BASE_PATTERN = r"(?:https?://)?(?:www\.)?comick\.io"
16
17
18
class ComickBase():
19
"""Base class for comick.io extractors"""
20
category = "comick"
21
root = "https://comick.io"
22
23
24
class ComickCoversExtractor(ComickBase, GalleryExtractor):
25
"""Extractor for comick.io manga covers"""
26
subcategory = "covers"
27
directory_fmt = ("{category}", "{manga}", "Covers")
28
filename_fmt = "{volume:>02}_{lang}.{extension}"
29
archive_fmt = "c_{id}"
30
pattern = BASE_PATTERN + r"/comic/([\w-]+)/cover"
31
example = "https://comick.io/comic/MANGA/cover"
32
33
def metadata(self, page):
34
manga = _manga_info(self, self.groups[0])
35
self.slug = manga['manga_slug']
36
return manga
37
38
def images(self, page):
39
url = f"{self.root}/comic/{self.slug}/cover"
40
page = self.request(url).text
41
data = self._extract_nextdata(page)
42
43
covers = data["props"]["pageProps"]["comic"]["md_covers"]
44
covers.reverse()
45
46
return [
47
(f"https://meo.comick.pictures/{cover['b2key']}", {
48
"id" : cover["id"],
49
"width" : cover["w"],
50
"height": cover["h"],
51
"size" : cover["s"],
52
"lang" : cover["locale"],
53
"volume": text.parse_int(cover["vol"]),
54
"cover" : cover,
55
})
56
for cover in covers
57
]
58
59
60
class ComickChapterExtractor(ComickBase, ChapterExtractor):
61
"""Extractor for comick.io manga chapters"""
62
archive_fmt = "{chapter_hid}_{page}"
63
pattern = (BASE_PATTERN + r"/comic/([\w-]+)"
64
r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)")
65
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
66
67
def metadata(self, page):
68
slug, chstr = self.groups
69
manga = _manga_info(self, slug)
70
props = _chapter_info(self, manga, chstr)
71
72
ch = props["chapter"]
73
self._images = ch["md_images"]
74
75
if chapter := ch["chap"]:
76
chapter, sep, minor = chapter.partition(".")
77
else:
78
chapter = 0
79
sep = minor = ""
80
81
return {
82
**manga,
83
"title" : props["chapTitle"],
84
"volume" : text.parse_int(ch["vol"]),
85
"chapter" : text.parse_int(chapter),
86
"chapter_minor" : sep + minor,
87
"chapter_id" : ch["id"],
88
"chapter_hid" : ch["hid"],
89
"chapter_string": chstr,
90
"group" : ch["group_name"],
91
"date" : text.parse_datetime(
92
ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
93
"date_updated" : text.parse_datetime(
94
ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
95
"lang" : ch["lang"],
96
}
97
98
def images(self, page):
99
if not self._images[0].get("b2key") and all(
100
not img.get("b2key") for img in self._images):
101
self.log.error(
102
"%s: Broken Chapter (missing 'b2key' for all pages)",
103
self.groups[1])
104
return ()
105
106
return [
107
(f"https://meo.comick.pictures/{img['b2key']}", {
108
"width" : img["w"],
109
"height" : img["h"],
110
"size" : img["s"],
111
"optimized": img["optimized"],
112
})
113
for img in self._images
114
]
115
116
117
class ComickMangaExtractor(ComickBase, MangaExtractor):
118
"""Extractor for comick.io manga"""
119
pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?"
120
example = "https://comick.io/comic/MANGA"
121
122
def items(self):
123
manga = _manga_info(self, self.groups[0])
124
slug = manga["manga_slug"]
125
_manga_info.update(slug, manga)
126
127
for ch in self.chapters(manga):
128
ch.update(manga)
129
ch["_extractor"] = ComickChapterExtractor
130
131
if chapter := ch["chap"]:
132
url = (f"{self.root}/comic/{slug}"
133
f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}")
134
chapter, sep, minor = chapter.partition(".")
135
ch["volume"] = text.parse_int(ch["vol"])
136
ch["chapter"] = text.parse_int(chapter)
137
ch["chapter_minor"] = sep + minor
138
elif volume := ch["vol"]:
139
url = (f"{self.root}/comic/{slug}"
140
f"/{ch['hid']}-volume-{volume}-{ch['lang']}")
141
ch["volume"] = text.parse_int(volume)
142
ch["chapter"] = 0
143
ch["chapter_minor"] = ""
144
else:
145
url = f"{self.root}/comic/{slug}/{ch['hid']}"
146
ch["volume"] = ch["chapter"] = 0
147
ch["chapter_minor"] = ""
148
149
yield Message.Queue, url, ch
150
151
def chapters(self, manga):
152
info = True
153
slug, query = self.groups
154
155
url = f"https://api.comick.io/comic/{manga['manga_hid']}/chapters"
156
headers = {
157
"Origin": "https://comick.io",
158
"Sec-Fetch-Dest": "empty",
159
"Sec-Fetch-Mode": "cors",
160
"Sec-Fetch-Site": "same-site",
161
}
162
163
query = text.parse_query_list(query, ("lang",))
164
165
if (lang := query.get("lang")) or (lang := self.config("lang")):
166
if not isinstance(lang, str):
167
lang = ",".join(lang)
168
else:
169
lang = None
170
171
params = {"lang": lang}
172
params["page"] = page = text.parse_int(query.get("page"), 1)
173
174
if date_order := query.get("date-order"):
175
params["date-order"] = date_order
176
elif chap_order := query.get("chap-order"):
177
params["chap-order"] = chap_order
178
else:
179
params["chap-order"] = \
180
"0" if self.config("chapter-reverse", False) else "1"
181
182
group = query.get("group")
183
if group == "0":
184
group = None
185
186
while True:
187
data = self.request_json(url, params=params, headers=headers)
188
limit = data["limit"]
189
190
if info:
191
info = False
192
total = data["total"] - limit * page
193
if total > limit:
194
self.log.info("Collecting %s chapters", total)
195
196
if group is None:
197
yield from data["chapters"]
198
else:
199
for ch in data["chapters"]:
200
if (groups := ch["group_name"]) and group in groups:
201
yield ch
202
203
if data["total"] <= limit * page:
204
return
205
params["page"] = page = page + 1
206
207
208
@memcache(keyarg=1)
209
def _manga_info(self, slug):
210
url = f"{self.root}/comic/{slug}"
211
page = self.request(url).text
212
data = self._extract_nextdata(page)
213
props = data["props"]["pageProps"]
214
comic = props["comic"]
215
216
genre = []
217
theme = []
218
format = ""
219
for item in comic["md_comic_md_genres"]:
220
item = item["md_genres"]
221
group = item["group"]
222
if group == "Genre":
223
genre.append(item["name"])
224
elif group == "Theme":
225
theme.append(item["name"])
226
else:
227
format = item["name"]
228
229
if mu := comic["mu_comics"]:
230
tags = [c["mu_categories"]["title"]
231
for c in mu["mu_comic_categories"]]
232
publisher = [p["mu_publishers"]["title"]
233
for p in mu["mu_comic_publishers"]]
234
else:
235
tags = publisher = ()
236
237
return {
238
"manga": comic["title"],
239
"manga_id": comic["id"],
240
"manga_hid": comic["hid"],
241
"manga_slug": comic["slug"],
242
"manga_titles": [t["title"] for t in comic["md_titles"]],
243
"artist": [a["name"] for a in props["artists"]],
244
"author": [a["name"] for a in props["authors"]],
245
"genre" : genre,
246
"theme" : theme,
247
"format": format,
248
"tags" : tags,
249
"publisher": publisher,
250
"published": text.parse_int(comic["year"]),
251
"description": comic["desc"],
252
"demographic": props["demographic"],
253
"origin": comic["iso639_1"],
254
"mature": props["matureContent"],
255
"rating": comic["content_rating"],
256
"rank" : comic["follow_rank"],
257
"score" : text.parse_float(comic["bayesian_rating"]),
258
"status": "Complete" if comic["status"] == 2 else "Ongoing",
259
"links" : comic["links"],
260
"_build_id": data["buildId"],
261
}
262
263
264
def _chapter_info(self, manga, chstr):
265
slug = manga['manga_slug']
266
url = (f"{self.root}/_next/data/{manga['_build_id']}"
267
f"/comic/{slug}/{chstr}.json")
268
params = {"slug": slug, "chapter": chstr}
269
return self.request_json(url, params=params)["pageProps"]
270
271