Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/dynastyscans.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://dynasty-scans.com/"""
10
11
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
12
from .. import text, util
13
14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
15
16
17
class DynastyscansBase():
18
"""Base class for dynastyscans extractors"""
19
category = "dynastyscans"
20
root = "https://dynasty-scans.com"
21
22
def _parse_image_page(self, image_id):
23
url = f"{self.root}/images/{image_id}"
24
extr = text.extract_from(self.request(url).text)
25
26
date = extr("class='create_at'>", "</span>")
27
tags = extr("class='tags'>", "</span>")
28
src = extr("class='btn-group'>", "</div>")
29
url = extr(' src="', '"')
30
31
src = text.extr(src, 'href="', '"') if "Source<" in src else ""
32
33
return {
34
"url" : self.root + url,
35
"image_id": text.parse_int(image_id),
36
"tags" : text.split_html(tags),
37
"date" : text.remove_html(date),
38
"source" : text.unescape(src),
39
}
40
41
42
class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
43
"""Extractor for manga-chapters from dynasty-scans.com"""
44
pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)"
45
example = "https://dynasty-scans.com/chapters/NAME"
46
47
def metadata(self, page):
48
extr = text.extract_from(page)
49
match = util.re(
50
r"(?:<a[^>]*>)?([^<]+)(?:</a>)?" # manga name
51
r"(?: ch(\d+)([^:<]*))?" # chapter info
52
r"(?:: (.+))?" # title
53
).match(extr("<h3 id='chapter-title'><b>", "</b>"))
54
author = extr(" by ", "</a>")
55
group = extr('"icon-print"></i> ', '</span>')
56
57
return {
58
"manga" : text.unescape(match[1]),
59
"chapter" : text.parse_int(match[2]),
60
"chapter_minor": match[3] or "",
61
"title" : text.unescape(match[4] or ""),
62
"author" : text.remove_html(author),
63
"group" : (text.remove_html(group) or
64
text.extr(group, ' alt="', '"')),
65
"date" : text.parse_datetime(extr(
66
'"icon-calendar"></i> ', '<'), "%b %d, %Y"),
67
"tags" : text.split_html(extr(
68
"class='tags'>", "<div id='chapter-actions'")),
69
"lang" : "en",
70
"language": "English",
71
}
72
73
def images(self, page):
74
data = text.extr(page, "var pages = ", ";\n")
75
return [
76
(self.root + img["image"], None)
77
for img in util.json_loads(data)
78
]
79
80
81
class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
82
chapterclass = DynastyscansChapterExtractor
83
reverse = False
84
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
85
example = "https://dynasty-scans.com/series/NAME"
86
87
def chapters(self, page):
88
return [
89
(self.root + path, {})
90
for path in text.extract_iter(page, '<dd>\n<a href="', '"')
91
]
92
93
94
class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
95
"""Extrator for image search results on dynasty-scans.com"""
96
subcategory = "search"
97
directory_fmt = ("{category}", "Images")
98
filename_fmt = "{image_id}.{extension}"
99
archive_fmt = "i_{image_id}"
100
pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$"
101
example = "https://dynasty-scans.com/images?QUERY"
102
103
def __init__(self, match):
104
Extractor.__init__(self, match)
105
self.query = match[1] or ""
106
107
def items(self):
108
yield Message.Directory, {}
109
for image_id in self.images():
110
image = self._parse_image_page(image_id)
111
url = image["url"]
112
yield Message.Url, url, text.nameext_from_url(url, image)
113
114
def images(self):
115
url = self.root + "/images?" + self.query.replace("[]", "%5B%5D")
116
params = {"page": 1}
117
118
while True:
119
page = self.request(url, params=params).text
120
yield from text.extract_iter(page, '"/images/', '"')
121
if 'rel="next"' not in page:
122
return
123
params["page"] += 1
124
125
126
class DynastyscansImageExtractor(DynastyscansSearchExtractor):
127
"""Extractor for individual images on dynasty-scans.com"""
128
subcategory = "image"
129
pattern = BASE_PATTERN + r"/images/(\d+)"
130
example = "https://dynasty-scans.com/images/12345"
131
132
def images(self):
133
return (self.query,)
134
135
136
class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
137
"""Extractor for dynasty-scans anthologies"""
138
subcategory = "anthology"
139
pattern = BASE_PATTERN + r"/anthologies/([^/?#]+)"
140
example = "https://dynasty-scans.com/anthologies/TITLE"
141
142
def items(self):
143
url = f"{self.root}/anthologies/{self.groups[0]}.atom"
144
root = self.request_xml(url, xmlns=False)
145
146
data = {
147
"_extractor": DynastyscansChapterExtractor,
148
"anthology" : root[3].text[28:],
149
}
150
151
if self.config("metadata", False):
152
page = self.request(url[:-5]).text
153
alert = text.extr(page, "<div class='alert", "</div>")
154
155
data["alert"] = text.split_html(alert)[1:] if alert else ()
156
data["status"] = text.extr(
157
page, "<small>&mdash; ", "</small>")
158
data["description"] = text.extr(
159
page, "<div class='description'>", "</div>")
160
161
for element in root:
162
if element.tag != "entry":
163
continue
164
content = element[6][0]
165
data["author"] = content[0].text[8:]
166
data["scanlator"] = content[1].text[11:]
167
data["tags"] = content[2].text[6:].lower().split(", ")
168
data["title"] = element[5].text
169
data["date"] = text.parse_datetime(
170
element[1].text, "%Y-%m-%dT%H:%M:%S%z")
171
data["date_updated"] = text.parse_datetime(
172
element[2].text, "%Y-%m-%dT%H:%M:%S%z")
173
yield Message.Queue, element[4].text, data
174
175