Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/booru.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2015-2023 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for *booru sites"""
10
11
from .common import BaseExtractor, Message
12
from .. import text
13
import operator
14
15
16
class BooruExtractor(BaseExtractor):
17
"""Base class for *booru extractors"""
18
basecategory = "booru"
19
filename_fmt = "{category}_{id}_{md5}.{extension}"
20
page_start = 0
21
per_page = 100
22
23
def items(self):
24
self.login()
25
data = self.metadata()
26
tags = self.config("tags", False)
27
notes = self.config("notes", False)
28
fetch_html = tags or notes
29
30
if url_key := self.config("url"):
31
if isinstance(url_key, (list, tuple)):
32
self._file_url = self._file_url_list
33
self._file_url_keys = url_key
34
else:
35
self._file_url = operator.itemgetter(url_key)
36
37
for post in self.posts():
38
try:
39
url = self._file_url(post)
40
if url[0] == "/":
41
url = self.root + url
42
except Exception as exc:
43
self.log.debug("%s: %s", exc.__class__.__name__, exc)
44
self.log.warning("Unable to fetch download URL for post %s "
45
"(md5: %s)", post.get("id"), post.get("md5"))
46
continue
47
48
if fetch_html:
49
html = self._html(post)
50
if tags:
51
self._tags(post, html)
52
if notes:
53
self._notes(post, html)
54
55
if "extension" not in post:
56
text.nameext_from_url(url, post)
57
post.update(data)
58
self._prepare(post)
59
60
yield Message.Directory, post
61
yield Message.Url, url, post
62
63
def skip(self, num):
64
pages = num // self.per_page
65
self.page_start += pages
66
return pages * self.per_page
67
68
def login(self):
69
"""Login and set necessary cookies"""
70
71
def metadata(self):
72
"""Return a dict with general metadata"""
73
return ()
74
75
def posts(self):
76
"""Return an iterable with post objects"""
77
return ()
78
79
_file_url = operator.itemgetter("file_url")
80
81
def _file_url_list(self, post):
82
urls = (post[key] for key in self._file_url_keys if post.get(key))
83
post["_fallback"] = it = iter(urls)
84
return next(it)
85
86
def _prepare(self, post):
87
"""Prepare a 'post's metadata"""
88
89
def _html(self, post):
90
"""Return HTML content of a post"""
91
92
def _tags(self, post, page):
93
"""Extract extended tag metadata"""
94
95
def _notes(self, post, page):
96
"""Extract notes metadata"""
97
98