CoCalc -- erome.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/erome.py
⁵³⁹⁹ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2021-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractors for https://www.erome.com/"""
10

11
from .common import Extractor, Message
12
from .. import text, util, exception
13
from ..cache import cache
14
import itertools
15

16
BASE_PATTERN = r"(?:https?://)?(?:www\.)?erome\.com"
17

18

19
class EromeExtractor(Extractor):
20
    category = "erome"
21
    directory_fmt = ("{category}", "{user}")
22
    filename_fmt = "{album_id} {title} {num:>02}.{extension}"
23
    archive_fmt = "{album_id}_{num}"
24
    root = "https://www.erome.com"
25
    _cookies = True
26

27
    def items(self):
28
        base = f"{self.root}/a/"
29
        data = {"_extractor": EromeAlbumExtractor}
30
        for album_id in self.albums():
31
            yield Message.Queue, f"{base}{album_id}", data
32

33
    def albums(self):
34
        return ()
35

36
    def request(self, url, **kwargs):
37
        if self._cookies:
38
            self._cookies = False
39
            self.cookies.update(_cookie_cache())
40

41
        for _ in range(5):
42
            response = Extractor.request(self, url, **kwargs)
43
            if response.cookies:
44
                _cookie_cache.update("", response.cookies)
45
            if response.content.find(
46
                    b"<title>Please wait a few moments</title>", 0, 600) < 0:
47
                return response
48
            self.sleep(5.0, "check")
49

50
    def _pagination(self, url, params):
51
        find_albums = EromeAlbumExtractor.pattern.findall
52

53
        for params["page"] in itertools.count(
54
                text.parse_int(params.get("page"), 1)):
55
            page = self.request(url, params=params).text
56

57
            album_ids = find_albums(page)[::2]
58
            yield from album_ids
59

60
            if len(album_ids) < 36:
61
                return
62

63

64
class EromeAlbumExtractor(EromeExtractor):
65
    """Extractor for albums on erome.com"""
66
    subcategory = "album"
67
    pattern = BASE_PATTERN + r"/a/(\w+)"
68
    example = "https://www.erome.com/a/ID"
69

70
    def items(self):
71
        album_id = self.groups[0]
72
        url = f"{self.root}/a/{album_id}"
73

74
        try:
75
            page = self.request(url).text
76
        except exception.HttpError as exc:
77
            raise exception.AbortExtraction(
78
                f"{album_id}: Unable to fetch album page ({exc})")
79

80
        title, pos = text.extract(
81
            page, 'property="og:title" content="', '"')
82
        pos = page.index('<div class="user-profile', pos)
83
        user, pos = text.extract(
84
            page, 'href="https://www.erome.com/', '"', pos)
85
        tags, pos = text.extract(
86
            page, '<p class="mt-10"', '</p>', pos)
87

88
        urls = []
89
        date = None
90
        groups = page.split('<div class="media-group"')
91
        for group in util.advance(groups, 1):
92
            url = (text.extr(group, '<source src="', '"') or
93
                   text.extr(group, 'data-src="', '"'))
94
            if url:
95
                urls.append(url)
96
            if not date:
97
                ts = text.extr(group, '?v=', '"')
98
                if len(ts) > 1:
99
                    date = text.parse_timestamp(ts)
100

101
        data = {
102
            "album_id": album_id,
103
            "title"   : text.unescape(title),
104
            "user"    : text.unquote(user),
105
            "count"   : len(urls),
106
            "date"    : date,
107
            "tags"    : ([t.replace("+", " ")
108
                          for t in text.extract_iter(tags, "?q=", '"')]
109
                         if tags else ()),
110
            "_http_headers": {"Referer": url},
111
        }
112

113
        yield Message.Directory, data
114
        for data["num"], url in enumerate(urls, 1):
115
            yield Message.Url, url, text.nameext_from_url(url, data)
116

117

118
class EromeUserExtractor(EromeExtractor):
119
    subcategory = "user"
120
    pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)(?:/?\?([^#]+))?"
121
    example = "https://www.erome.com/USER"
122

123
    def albums(self):
124
        user, qs = self.groups
125
        url = f"{self.root}/{user}"
126

127
        params = text.parse_query(qs)
128
        if "t" not in params and not self.config("reposts", False):
129
            params["t"] = "posts"
130

131
        return self._pagination(url, params)
132

133

134
class EromeSearchExtractor(EromeExtractor):
135
    subcategory = "search"
136
    pattern = BASE_PATTERN + r"/search/?\?(q=[^#]+)"
137
    example = "https://www.erome.com/search?q=QUERY"
138

139
    def albums(self):
140
        url = f"{self.root}/search"
141
        params = text.parse_query(self.groups[0])
142
        return self._pagination(url, params)
143

144

145
@cache()
146
def _cookie_cache():
147
    return ()
148

149
Product

Resources

Company