Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/cyberfile.py
8898 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://cyberfile.me/"""
10
11
from .common import Extractor, Message
12
from .. import text, exception
13
14
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberfile\.me"
15
16
17
class CyberfileExtractor(Extractor):
18
"""Base class for cyberfile extractors"""
19
category = "cyberfile"
20
root = "https://cyberfile.me"
21
22
def request_api(self, endpoint, data):
23
url = self.root + endpoint
24
headers = {
25
"X-Requested-With": "XMLHttpRequest",
26
"Origin": self.root,
27
}
28
resp = self.request_json(
29
url, method="POST", headers=headers, data=data)
30
31
if "albumPasswordModel" in resp.get("javascript", ""):
32
url_pw = self.root + "/ajax/folder_password_process"
33
data_pw = {
34
"folderPassword": self._get_auth_info(password=True)[1],
35
"folderId": text.extr(
36
resp["html"], '<input type="hidden" value="', '"'),
37
"submitme": "1",
38
}
39
resp = self.request_json(
40
url_pw, method="POST", headers=headers, data=data_pw)
41
if not resp.get("success"):
42
raise exception.AuthorizationError(f"'{resp.get('msg')}'")
43
resp = self.request_json(
44
url, method="POST", headers=headers, data=data)
45
46
return resp
47
48
49
class CyberfileFolderExtractor(CyberfileExtractor):
50
subcategory = "folder"
51
pattern = BASE_PATTERN + r"/folder/([0-9a-f]+)"
52
example = "https://cyberfile.me/folder/0123456789abcdef/NAME"
53
54
def items(self):
55
folder_hash = self.groups[0]
56
url = f"{self.root}/folder/{folder_hash}"
57
folder_num = text.extr(self.request(url).text, "ages('folder', '", "'")
58
59
extract_folders = text.re(r'sharing-url="([^"]+)').findall
60
extract_files = text.re(r'dtfullurl="([^"]+)').findall
61
recursive = self.config("recursive", True)
62
perpage = 600
63
64
data = {
65
"pageType" : "folder",
66
"nodeId" : folder_num,
67
"pageStart": 1,
68
"perPage" : perpage,
69
"filterOrderBy": "",
70
}
71
resp = self.request_api("/account/ajax/load_files", data)
72
html = resp["html"]
73
74
folder = {
75
"folder_hash": folder_hash,
76
"folder_num" : text.parse_int(folder_num),
77
"folder" : resp["page_title"],
78
}
79
80
while True:
81
folders = extract_folders(html)
82
if recursive and folders:
83
folder["_extractor"] = CyberfileFolderExtractor
84
for url in folders:
85
yield Message.Queue, url, folder
86
87
if files := extract_files(html):
88
folder["_extractor"] = CyberfileFileExtractor
89
for url in files:
90
yield Message.Queue, url, folder
91
92
if len(folders) + len(files) < perpage:
93
return
94
data["pageStart"] += 1
95
resp = self.request_api("/account/ajax/load_files", data)
96
97
98
class CyberfileSharedExtractor(CyberfileExtractor):
99
subcategory = "shared"
100
pattern = BASE_PATTERN + r"/shared/([a-zA-Z0-9]+)"
101
example = "https://cyberfile.me/shared/AbCdEfGhIjK"
102
103
def items(self):
104
# get 'filehosting' cookie
105
url = f"{self.root}/shared/{self.groups[0]}"
106
self.request(url, method="HEAD")
107
108
data = {
109
"pageType" : "nonaccountshared",
110
"nodeId" : "",
111
"pageStart": "1",
112
"perPage" : "500",
113
"filterOrderBy": "",
114
}
115
resp = self.request_api("/account/ajax/load_files", data)
116
117
html = resp["html"]
118
pos = html.find("<!-- /.navbar-collapse -->") + 26
119
120
data = {"_extractor": CyberfileFolderExtractor}
121
for url in text.extract_iter(html, 'sharing-url="', '"', pos):
122
yield Message.Queue, url, data
123
124
data = {"_extractor": CyberfileFileExtractor}
125
for url in text.extract_iter(html, 'dtfullurl="', '"', pos):
126
yield Message.Queue, url, data
127
128
129
class CyberfileFileExtractor(CyberfileExtractor):
130
subcategory = "file"
131
directory_fmt = ("{category}", "{uploader}", "{folder}")
132
pattern = BASE_PATTERN + r"/([a-zA-Z0-9]+)"
133
example = "https://cyberfile.me/AbCdE"
134
135
def items(self):
136
file_id = self.groups[0]
137
url = f"{self.root}/{file_id}"
138
file_num = text.extr(self.request(url).text, "owFileInformation(", ")")
139
140
data = {"u": file_num}
141
resp = self.request_api("/account/ajax/file_details", data)
142
extr = text.extract_from(resp["html"])
143
info = text.split_html(extr('class="text-section">', "</span>"))
144
folder = info[0] if len(info) > 1 else ""
145
146
file = {
147
"file_id" : file_id,
148
"file_num": text.parse_int(file_num),
149
"name" : resp["page_title"],
150
"folder" : folder,
151
"uploader": info[-1][2:].strip(),
152
"size" : text.parse_bytes(text.remove_html(extr(
153
"Filesize:", "</tr>"))[:-1]),
154
"tags" : text.split_html(extr(
155
"Keywords:", "</tr>")),
156
"date" : self.parse_datetime(text.remove_html(extr(
157
"Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
158
"permissions": text.remove_html(extr(
159
"Permissions:", "</tr>")).split(" &amp; "),
160
}
161
162
file["file_url"] = url = extr("openUrl('", "'")
163
text.nameext_from_url(file["name"] or url, file)
164
yield Message.Directory, "", file
165
yield Message.Url, url, file
166
167