Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/ahottie.py
8935 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://ahottie.top/"""
10
11
from .common import Extractor, GalleryExtractor, Message
12
from .. import text
13
14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?ahottie\.top"
15
16
17
class AhottieExtractor(Extractor):
18
"""Base class for ahottie extractors"""
19
category = "ahottie"
20
root = "https://ahottie.top"
21
22
def items(self):
23
for album in self.albums():
24
yield Message.Queue, album["url"], album
25
26
def _pagination(self, url, params):
27
params["page"] = text.parse_int(params.get("page"), 1)
28
29
while True:
30
page = self.request(url, params=params).text
31
32
for album in text.extract_iter(
33
page, '<div class="relative">', '</div>'):
34
yield {
35
"url" : text.extr(album, ' href="', '"'),
36
"title": text.unquote(text.extr(
37
album, ' alt="', '"')),
38
"date" : self.parse_datetime_iso(text.extr(
39
album, ' datetime="', '"')),
40
"_extractor": AhottieGalleryExtractor,
41
}
42
43
if 'rel="next"' not in page:
44
break
45
params["page"] += 1
46
47
48
class AhottieGalleryExtractor(GalleryExtractor, AhottieExtractor):
49
directory_fmt = ("{category}", "{date:%Y-%m-%d} {title} ({gallery_id})")
50
filename_fmt = "{num:>03}.{extension}"
51
archive_fmt = "{gallery_id}_{num}_{filename}"
52
pattern = BASE_PATTERN + r"(/albums/(\w+))"
53
example = "https://ahottie.top/albums/1234567890"
54
55
def metadata(self, page):
56
extr = text.extract_from(page)
57
return {
58
"gallery_id": self.groups[1],
59
"title": text.unescape(extr("<title>", "<").rpartition(" | ")[0]),
60
"date" : self.parse_datetime_iso(extr('datetime="', '"')),
61
"tags" : text.split_html(extr('<i ', '</div>'))[1:],
62
}
63
64
def images(self, page):
65
data = {
66
"_http_headers" : {"Referer": None},
67
"_http_validate": self._validate,
68
}
69
70
results = []
71
while True:
72
pos = page.find("<time ") + 1
73
for url in text.extract_iter(page, '" src="', '"', pos):
74
results.append((url, data))
75
76
pos = page.find('rel="next"', pos)
77
if pos < 0:
78
break
79
page = self.request(text.rextr(page, 'href="', '"', pos)).text
80
return results
81
82
def _validate(self, response):
83
hget = response.headers.get
84
return not (
85
hget("content-length") == "2421" and
86
hget("content-type") == "image/jpeg"
87
)
88
89
90
class AhottieTagExtractor(AhottieExtractor):
91
subcategory = "tag"
92
pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
93
example = "https://ahottie.top/tags/TAG"
94
95
def albums(self):
96
tag = self.groups[0]
97
self.kwdict["search_tags"] = text.unquote(tag)
98
return self._pagination(f"{self.root}/tags/{tag}", {})
99
100
101
class AhottieSearchExtractor(AhottieExtractor):
102
subcategory = "search"
103
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
104
example = "https://ahottie.top/search?kw=QUERY"
105
106
def albums(self):
107
params = text.parse_query(self.groups[0])
108
self.kwdict["search_tags"] = params.get("kw")
109
return self._pagination(f"{self.root}/search", params)
110
111