Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/allporncomic.py
14119 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2026 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://allporncomic.com/"""
10
11
from .common import Extractor, ChapterExtractor, MangaExtractor, Message
12
from .. import text
13
14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?allporncomic\.com"
15
16
17
class AllporncomicBase():
18
"""Base class for allporncomic extractors"""
19
category = "allporncomic"
20
root = "https://allporncomic.com"
21
22
def _manga_info(self, slug, page=None):
23
if page is None:
24
url = f"{self.root}/porncomic/{slug}/"
25
page = self.request(url).text
26
extr = text.extract_from(page)
27
28
lang = extr('property="og:locale" content="', '"')
29
title = text.unescape(extr('property="og:title" content="', '"'))
30
manga = text.re(r"(.+?)( \([^)]+\))?( \[[^\]]+\])?\s*$").match(title)
31
32
return {
33
"description" : text.unescape(extr(
34
'property="og:description" content="', '"')),
35
"manga" : "" if manga is None else manga[1],
36
"manga_slug" : slug,
37
"manga_cover" : extr('property="og:image" content="', '"'),
38
"manga_date" : self.parse_datetime_iso(extr(
39
'"datePublished":"', '"')),
40
"manga_date_updated": self.parse_datetime_iso(extr(
41
'"dateModified":"', '"')),
42
"manga_id" : text.parse_int(extr(" postid-", " ")),
43
"rating" : text.parse_float(extr('total_votes">', "<")),
44
"votes" : text.parse_int(extr('id="countrate">', "<")),
45
"characters" : text.split_html(extr(
46
'class="author-content">', "</div>"))[::2],
47
"parody" : text.split_html(extr(
48
'class="author-content">', "</div>"))[::2],
49
"group" : text.split_html(extr(
50
'class="author-content">', "</div>"))[::2],
51
"artist" : text.split_html(extr(
52
'class="artist-content">', "</div>"))[::2],
53
"tags" : text.split_html(extr(
54
'class="genres-content">', "</div>"))[::2],
55
"type" : extr('class="summary-content">', "<").strip(),
56
"status" : extr('class="summary-content">', "<").strip(),
57
"comments" : text.parse_int(extr('<span>', " ")),
58
"bookmarks" : text.parse_int(extr(
59
'class="action_detail"><span>', " ")),
60
"lang" : lang.partition("_")[0],
61
}
62
63
64
class AllporncomicChapterExtractor(AllporncomicBase, ChapterExtractor):
65
"""Extractor for allporncomic manga chapters"""
66
directory_fmt = ("{category}", "{path[:-1]:I}", "{title}")
67
filename_fmt = "{page:>03}.{extension}"
68
archive_fmt = "{manga_id}_{chapter_id}_{page}"
69
pattern = (BASE_PATTERN +
70
r"(/porncomic/([^/?#]+)/(\d+(?:-\d+)?)?([^/?#]+))")
71
example = "https://allporncomic.com/porncomic/MANGA/123-TITLE/"
72
73
def __init__(self, match):
74
url = f"{self.root}{match[1]}/"
75
ChapterExtractor.__init__(self, match, url)
76
77
def metadata(self, page):
78
_, manga_slug, chapter, title_slug = self.groups
79
if chapter is None:
80
chapter = sep = minor = ""
81
else:
82
chapter, sep, minor = chapter.partition("-")
83
84
if '<source src="' in page:
85
media = "video"
86
self.needle = '<source src="'
87
else:
88
media = "image"
89
self.needle = ' data-src="'
90
91
path = text.split_html(text.extr(
92
page, '<ol class="breadcrumb', '</ol>'))
93
title = text.re(
94
r"^(?:\s*\d+(?:\.\d+)?\s*\.|\[[^\]]+\])\s").sub("", path[-1])
95
title = text.re(
96
r"(?:\s+-)?"
97
r"(?:\s+[Cc]hapter \d+(?:\s+[Ee]xtras)?)?"
98
r"(?:\s+\([^)]+\))?"
99
r"(?:\s+(?:-\s+)?\[[^\]]+\])?\s*$").sub("", title)
100
101
return {
102
**self.cache(self._manga_info, manga_slug),
103
"path" : path[3:],
104
"media" : media,
105
"title" : title,
106
"title_slug" : title_slug.lstrip("-"),
107
"chapter" : text.parse_int(chapter),
108
"chapter_id" : text.parse_int(text.extr(
109
page, 'manga_chapter_id" value="', '"')),
110
"chapter_minor": "." + minor if minor else "",
111
}
112
113
def images(self, page):
114
return [
115
(url.strip(), None)
116
for url in text.extract_iter(page, self.needle, '"')
117
]
118
119
120
class AllporncomicMangaExtractor(AllporncomicBase, MangaExtractor):
121
"""Extractor for allporncomic manga"""
122
chapterclass = AllporncomicChapterExtractor
123
pattern = BASE_PATTERN + r"/porncomic/([^/?#]+)"
124
example = "https://allporncomic.com/porncomic/MANGA/"
125
126
def __init__(self, match):
127
url = f"{self.root}/porncomic/{match[1]}/"
128
MangaExtractor.__init__(self, match, url)
129
130
def chapters(self, page):
131
slug = text.extr(page, "/porncomic/", "/")
132
info = self._manga_info(slug, page)
133
134
results = []
135
for ch in text.extract_iter(
136
page, '<li class="wp-manga-chapter', '</li>'):
137
url = text.extr(ch, ' href="', '"')
138
data = {
139
**info,
140
"date": self.parse_datetime(text.extr(
141
page, "<i>", "<"), "%B %d, %Y"),
142
}
143
results.append((url, data))
144
return results
145
146
147
class AllporncomicTagExtractor(AllporncomicBase, Extractor):
148
"""Extractor for allporncomic tag search results"""
149
subcategory = "tag"
150
pattern = (BASE_PATTERN + r"(/(?:porncomic-)?"
151
r"(?:genre|series|group|artist|characters)"
152
r"/[^/?#]+(?:/page/\d+)?)(/?\?[^#]+)?")
153
example = "https://allporncomic.com/porncomic-genre/GENRE/"
154
155
def items(self):
156
data = {"_extractor": AllporncomicMangaExtractor}
157
158
url = f"{self.root}{self.groups[0]}{self.groups[1] or '/'}"
159
while url:
160
page = self.request(url).text
161
162
for manga in text.extract_iter(page, 'id="manga-item-', "</div>"):
163
yield Message.Queue, text.extr(manga, ' href="', '"'), data
164
165
url = text.extr(page, '<link rel="next" href="', '"')
166
167