CoCalc -- agnph.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/agnph.py
⁸⁹⁰¹ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2024-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractors for https://agn.ph/"""
10

11
from . import booru
12
from .. import text
13
import collections
14

15
BASE_PATTERN = r"(?:https?://)?agn\.ph"
16

17

18
class AgnphExtractor(booru.BooruExtractor):
19
    category = "agnph"
20
    root = "https://agn.ph"
21
    page_start = 1
22
    per_page = 45
23

24
    TAG_TYPES = {
25
        "a": "artist",
26
        "b": "copyright",
27
        "c": "character",
28
        "d": "species",
29
        "m": "general",
30
    }
31

32
    def _init(self):
33
        self.cookies.set("confirmed_age", "true", domain="agn.ph")
34

35
    def _prepare(self, post):
36
        post["date"] = self.parse_timestamp(post["created_at"])
37
        post["status"] = post["status"].strip()
38
        post["has_children"] = ("true" in post["has_children"])
39

40
    def _xml_to_dict(self, xml):
41
        return {element.tag: element.text for element in xml}
42

43
    def _pagination(self, url, params):
44
        params["api"] = "xml"
45
        if "page" in params:
46
            params["page"] = \
47
                self.page_start + text.parse_int(params["page"]) - 1
48
        else:
49
            params["page"] = self.page_start
50

51
        while True:
52
            root = self.request_xml(url, params=params)
53

54
            yield from map(self._xml_to_dict, root)
55

56
            attrib = root.attrib
57
            if int(attrib["offset"]) + len(root) >= int(attrib["count"]):
58
                return
59

60
            params["page"] += 1
61

62
    def _html(self, post):
63
        url = f"{self.root}/gallery/post/show/{post['id']}/"
64
        return self.request(url).text
65

66
    def _tags(self, post, page):
67
        tag_container = text.extr(
68
            page, '<ul class="taglist">', '<h3>Statistics</h3>')
69
        if not tag_container:
70
            return
71

72
        tags = collections.defaultdict(list)
73
        pattern = text.re(r'class="(.)typetag">([^<]+)')
74
        for tag_type, tag_name in pattern.findall(tag_container):
75
            tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
76
        for key, value in tags.items():
77
            post["tags_" + self.TAG_TYPES[key]] = " ".join(value)
78

79

80
class AgnphTagExtractor(AgnphExtractor):
81
    subcategory = "tag"
82
    directory_fmt = ("{category}", "{search_tags}")
83
    archive_fmt = "t_{search_tags}_{id}"
84
    pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$"
85
    example = "https://agn.ph/gallery/post/?search=TAG"
86

87
    def __init__(self, match):
88
        AgnphExtractor.__init__(self, match)
89
        self.params = text.parse_query(self.groups[0])
90

91
    def metadata(self):
92
        return {"search_tags": self.params.get("search") or ""}
93

94
    def posts(self):
95
        url = self.root + "/gallery/post/"
96
        return self._pagination(url, self.params.copy())
97

98

99
class AgnphPostExtractor(AgnphExtractor):
100
    subcategory = "post"
101
    archive_fmt = "{id}"
102
    pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)"
103
    example = "https://agn.ph/gallery/post/show/12345/"
104

105
    def posts(self):
106
        url = f"{self.root}/gallery/post/show/{self.groups[0]}/?api=xml"
107
        post = self.request_xml(url)
108
        return (self._xml_to_dict(post),)
109

110
Product

Resources

Company