CoCalc -- ahottie.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/ahottie.py
⁸⁹³⁵ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractors for https://ahottie.top/"""
10

11
from .common import Extractor, GalleryExtractor, Message
12
from .. import text
13

14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?ahottie\.top"
15

16

17
class AhottieExtractor(Extractor):
18
    """Base class for ahottie extractors"""
19
    category = "ahottie"
20
    root = "https://ahottie.top"
21

22
    def items(self):
23
        for album in self.albums():
24
            yield Message.Queue, album["url"], album
25

26
    def _pagination(self, url, params):
27
        params["page"] = text.parse_int(params.get("page"), 1)
28

29
        while True:
30
            page = self.request(url, params=params).text
31

32
            for album in text.extract_iter(
33
                    page, '<div class="relative">', '</div>'):
34
                yield {
35
                    "url"  : text.extr(album, ' href="', '"'),
36
                    "title": text.unquote(text.extr(
37
                        album, ' alt="', '"')),
38
                    "date" : self.parse_datetime_iso(text.extr(
39
                        album, ' datetime="', '"')),
40
                    "_extractor": AhottieGalleryExtractor,
41
                }
42

43
            if 'rel="next"' not in page:
44
                break
45
            params["page"] += 1
46

47

48
class AhottieGalleryExtractor(GalleryExtractor, AhottieExtractor):
49
    directory_fmt = ("{category}", "{date:%Y-%m-%d} {title} ({gallery_id})")
50
    filename_fmt = "{num:>03}.{extension}"
51
    archive_fmt = "{gallery_id}_{num}_{filename}"
52
    pattern = BASE_PATTERN + r"(/albums/(\w+))"
53
    example = "https://ahottie.top/albums/1234567890"
54

55
    def metadata(self, page):
56
        extr = text.extract_from(page)
57
        return {
58
            "gallery_id": self.groups[1],
59
            "title": text.unescape(extr("<title>", "<").rpartition(" | ")[0]),
60
            "date" : self.parse_datetime_iso(extr('datetime="', '"')),
61
            "tags" : text.split_html(extr('<i ', '</div>'))[1:],
62
        }
63

64
    def images(self, page):
65
        data = {
66
            "_http_headers" : {"Referer": None},
67
            "_http_validate": self._validate,
68
        }
69

70
        results = []
71
        while True:
72
            pos = page.find("<time ") + 1
73
            for url in text.extract_iter(page, '" src="', '"', pos):
74
                results.append((url, data))
75

76
            pos = page.find('rel="next"', pos)
77
            if pos < 0:
78
                break
79
            page = self.request(text.rextr(page, 'href="', '"', pos)).text
80
        return results
81

82
    def _validate(self, response):
83
        hget = response.headers.get
84
        return not (
85
            hget("content-length") == "2421" and
86
            hget("content-type") == "image/jpeg"
87
        )
88

89

90
class AhottieTagExtractor(AhottieExtractor):
91
    subcategory = "tag"
92
    pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
93
    example = "https://ahottie.top/tags/TAG"
94

95
    def albums(self):
96
        tag = self.groups[0]
97
        self.kwdict["search_tags"] = text.unquote(tag)
98
        return self._pagination(f"{self.root}/tags/{tag}", {})
99

100

101
class AhottieSearchExtractor(AhottieExtractor):
102
    subcategory = "search"
103
    pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
104
    example = "https://ahottie.top/search?kw=QUERY"
105

106
    def albums(self):
107
        params = text.parse_query(self.groups[0])
108
        self.kwdict["search_tags"] = params.get("kw")
109
        return self._pagination(f"{self.root}/search", params)
110

111
Product

Resources

Company