Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/chevereto.py
8935 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2023-2026 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for Chevereto galleries"""
10
11
from .common import BaseExtractor, Message
12
from .. import text, util
13
14
15
class CheveretoExtractor(BaseExtractor):
16
"""Base class for chevereto extractors"""
17
basecategory = "chevereto"
18
directory_fmt = ("{category}", "{user}", "{album}")
19
archive_fmt = "{id}"
20
parent = True
21
22
def _init(self):
23
self.path = self.groups[-1]
24
25
def _pagination(self, url, callback=None):
26
page = self.request(url).text
27
28
if form := text.extr(page, "<form ", "</form"):
29
page = self._password_submit(url, form) or page
30
31
if callback is not None:
32
callback(page)
33
34
while True:
35
for item in text.extract_iter(
36
page, '<div class="list-item-image ', 'image-container'):
37
yield text.urljoin(self.root, text.extr(
38
item, '<a href="', '"'))
39
40
url = text.extr(page, 'data-pagination="next" href="', '"')
41
if not url:
42
return
43
url = text.unescape(url).replace("+", " ")
44
if url[0] == "/":
45
url = self.root + url
46
page = self.request(url).text
47
48
def _password_submit(self, url, form):
49
sources = getattr(self, "_password_sources", None)
50
if sources is None:
51
sources = self._password_sources = []
52
if pw := getattr(self, "_password_last", None):
53
sources.append(pw)
54
if pw := self.config("password"):
55
if isinstance(pw, str):
56
pw = pw.split(",")
57
sources.extend(pw)
58
sources.reverse()
59
sources = sources.copy()
60
61
page = None
62
tried = set()
63
while True:
64
pw = sources.pop() if sources else self.input("Password: ")
65
if not pw:
66
break
67
if pw in tried:
68
continue
69
self.log.debug("Submitting password '%s'", pw)
70
data = {
71
"auth_token": text.unescape(text.extr(
72
form, 'name="auth_token" value="', '"')),
73
"content-password": pw,
74
}
75
page = self.request(url, method="POST", data=data).text
76
form = text.extr(page, "<form ", "</form")
77
if not form:
78
CheveretoExtractor._password_last = pw
79
break
80
tried.add(pw)
81
return page
82
83
84
BASE_PATTERN = CheveretoExtractor.update({
85
"jpgfish": {
86
"root": "https://jpg7.cr",
87
"pattern": r"(?:www\.)?jpe?g\d?\.(?:cr|su|pet|fish(?:ing)?|church)",
88
},
89
"imagepond": {
90
"root": "https://imagepond.net",
91
"pattern": r"(?:www\.)?imagepond\.net",
92
},
93
"imglike": {
94
"root": "https://imglike.com",
95
"pattern": r"(?:www\.)?imglike\.com",
96
},
97
})
98
99
100
class CheveretoImageExtractor(CheveretoExtractor):
101
"""Extractor for chevereto images"""
102
subcategory = "image"
103
pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)"
104
example = "https://jpg7.cr/img/TITLE.ID"
105
106
def items(self):
107
url = self.root + self.path
108
page = self.request(url).text
109
extr = text.extract_from(page)
110
111
url = (extr('<meta property="og:image" content="', '"') or
112
extr('url: "', '"'))
113
if not url or url.endswith("/loading.svg"):
114
pos = page.find(" download=")
115
url = text.rextr(page, 'href="', '"', pos)
116
if not url.startswith("https://"):
117
url = util.decrypt_xor(
118
url, b"seltilovessimpcity@simpcityhatesscrapers",
119
fromhex=True)
120
121
album_url, _, album_name = extr("Added to <a", "</a>").rpartition(">")
122
file = {
123
"id" : self.path.rpartition("/")[2].rpartition(".")[2],
124
"url" : url,
125
"album": text.remove_html(album_name),
126
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
127
"user" : extr('username: "', '"'),
128
}
129
130
file["album_slug"], _, file["album_id"] = text.rextr(
131
album_url, "/", '"').rpartition(".")
132
133
text.nameext_from_url(file["url"], file)
134
yield Message.Directory, "", file
135
yield Message.Url, file["url"], file
136
137
138
class CheveretoVideoExtractor(CheveretoExtractor):
139
"""Extractor for chevereto videos"""
140
subcategory = "video"
141
pattern = BASE_PATTERN + r"(/video/[^/?#]+)"
142
example = "https://imagepond.net/video/TITLE.ID"
143
144
def items(self):
145
url = self.root + self.path
146
page = self.request(url).text
147
extr = text.extract_from(page)
148
149
file = {
150
"id" : self.path.rpartition(".")[2],
151
"title" : text.unescape(extr(
152
'property="og:title" content="', '"')),
153
"thumbnail": extr(
154
'property="og:image" content="', '"'),
155
"url" : extr(
156
'property="og:video" content="', '"'),
157
"width" : text.parse_int(extr(
158
'property="video:width" content="', '"')),
159
"height" : text.parse_int(extr(
160
'property="video:height" content="', '"')),
161
"duration" : extr(
162
'class="far fa-clock"></i>', "—"),
163
"album" : extr(
164
"Added to <a", "</a>"),
165
"date" : self.parse_datetime_iso(extr('<span title="', '"')),
166
"user" : extr('username: "', '"'),
167
}
168
169
album_url, _, album_name = file["album"].rpartition(">")
170
file["album"] = text.remove_html(album_name)
171
file["album_slug"], _, file["album_id"] = text.rextr(
172
album_url, "/", '"').rpartition(".")
173
174
try:
175
min, _, sec = file["duration"].partition(":")
176
file["duration"] = int(min) * 60 + int(sec)
177
except Exception:
178
pass
179
180
text.nameext_from_url(file["url"], file)
181
yield Message.Directory, "", file
182
yield Message.Url, file["url"], file
183
184
185
class CheveretoAlbumExtractor(CheveretoExtractor):
186
"""Extractor for chevereto albums"""
187
subcategory = "album"
188
pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
189
example = "https://jpg7.cr/album/TITLE.ID"
190
191
def items(self):
192
url = self.root + self.path
193
data_image = {"_extractor": CheveretoImageExtractor}
194
data_video = {"_extractor": CheveretoVideoExtractor}
195
196
if self.path.endswith("/sub"):
197
albums = self._pagination(url)
198
else:
199
albums = (url,)
200
201
kwdict = self.kwdict
202
for album in albums:
203
for kwdict["num"], item_url in enumerate(self._pagination(
204
album, self._extract_metadata_album), 1):
205
data = data_video if "/video/" in item_url else data_image
206
yield Message.Queue, item_url, data
207
208
def _extract_metadata_album(self, page):
209
url, pos = text.extract(
210
page, 'property="og:url" content="', '"')
211
title, pos = text.extract(
212
page, 'property="og:title" content="', '"', pos)
213
214
kwdict = self.kwdict
215
kwdict["album_slug"], _, kwdict["album_id"] = \
216
url[url.rfind("/")+1:].rpartition(".")
217
kwdict["album"] = text.unescape(title)
218
kwdict["count"] = text.parse_int(text.extract(
219
page, 'data-text="image-count">', "<", pos)[0])
220
221
222
class CheveretoCategoryExtractor(CheveretoExtractor):
223
"""Extractor for chevereto galleries"""
224
subcategory = "category"
225
pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
226
example = "https://imglike.com/category/TITLE"
227
228
def items(self):
229
data = {"_extractor": CheveretoImageExtractor}
230
for image in self._pagination(self.root + self.path):
231
yield Message.Queue, image, data
232
233
234
class CheveretoUserExtractor(CheveretoExtractor):
235
"""Extractor for chevereto users"""
236
subcategory = "user"
237
pattern = BASE_PATTERN + r"(/[^/?#]+(?:/albums)?)"
238
example = "https://jpg7.cr/USER"
239
240
def items(self):
241
data_image = {"_extractor": CheveretoImageExtractor}
242
data_video = {"_extractor": CheveretoVideoExtractor}
243
data_album = {"_extractor": CheveretoAlbumExtractor}
244
for url in self._pagination(self.root + self.path):
245
data = (data_album if "/album/" in url else
246
data_video if "/video/" in url else
247
data_image)
248
yield Message.Queue, url, data
249
250