Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/everia.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as
5
# published by the Free Software Foundation.
6
7
"""Extractors for https://everia.club"""
8
9
from .common import Extractor, Message
10
from .. import text, util
11
12
BASE_PATTERN = r"(?:https?://)?everia\.club"
13
14
15
class EveriaExtractor(Extractor):
16
category = "everia"
17
root = "https://everia.club"
18
19
def items(self):
20
data = {"_extractor": EveriaPostExtractor}
21
for url in self.posts():
22
yield Message.Queue, url, data
23
24
def posts(self):
25
return self._pagination(self.groups[0])
26
27
def _pagination(self, path, params=None, pnum=1):
28
find_posts = util.re(r'thumbnail">\s*<a href="([^"]+)').findall
29
30
while True:
31
if pnum == 1:
32
url = f"{self.root}{path}/"
33
else:
34
url = f"{self.root}{path}/page/{pnum}/"
35
response = self.request(url, params=params, allow_redirects=False)
36
37
if response.status_code >= 300:
38
return
39
40
yield from find_posts(response.text)
41
pnum += 1
42
43
44
class EveriaPostExtractor(EveriaExtractor):
45
subcategory = "post"
46
directory_fmt = ("{category}", "{title}")
47
archive_fmt = "{post_url}_{num}"
48
pattern = BASE_PATTERN + r"(/\d{4}/\d{2}/\d{2}/[^/?#]+)"
49
example = "https://everia.club/0000/00/00/TITLE"
50
51
def items(self):
52
url = self.root + self.groups[0] + "/"
53
page = self.request(url).text
54
content = text.extr(page, 'itemprop="text">', "<h3")
55
urls = util.re(r'img.*?lazy-src="([^"]+)').findall(content)
56
57
data = {
58
"title": text.unescape(
59
text.extr(page, 'itemprop="headline">', "</h")),
60
"tags": list(text.extract_iter(page, 'rel="tag">', "</a>")),
61
"post_url": text.unquote(url),
62
"post_category": text.extr(
63
page, "post-in-category-", " ").capitalize(),
64
"count": len(urls),
65
}
66
67
yield Message.Directory, data
68
for data["num"], url in enumerate(urls, 1):
69
url = text.unquote(url)
70
yield Message.Url, url, text.nameext_from_url(url, data)
71
72
73
class EveriaTagExtractor(EveriaExtractor):
74
subcategory = "tag"
75
pattern = BASE_PATTERN + r"(/tag/[^/?#]+)"
76
example = "https://everia.club/tag/TAG"
77
78
79
class EveriaCategoryExtractor(EveriaExtractor):
80
subcategory = "category"
81
pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
82
example = "https://everia.club/category/CATEGORY"
83
84
85
class EveriaDateExtractor(EveriaExtractor):
86
subcategory = "date"
87
pattern = (BASE_PATTERN +
88
r"(/\d{4}(?:/\d{2})?(?:/\d{2})?)(?:/page/\d+)?/?$")
89
example = "https://everia.club/0000/00/00"
90
91
92
class EveriaSearchExtractor(EveriaExtractor):
93
subcategory = "search"
94
pattern = BASE_PATTERN + r"/(?:page/\d+/)?\?s=([^&#]+)"
95
example = "https://everia.club/?s=SEARCH"
96
97
def posts(self):
98
params = {"s": self.groups[0]}
99
return self._pagination("", params)
100
101