Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
taux1c
GitHub Repository: taux1c/onlyfans-scraper
Path: blob/main/onlyfans_scraper/utils/download.py
961 views
1
r"""
2
_ __
3
___ _ __ | | _ _ / _| __ _ _ __ ___ ___ ___ _ __ __ _ _ __ ___ _ __
4
/ _ \ | '_ \ | || | | || |_ / _` || '_ \ / __| _____ / __| / __|| '__| / _` || '_ \ / _ \| '__|
5
| (_) || | | || || |_| || _|| (_| || | | |\__ \|_____|\__ \| (__ | | | (_| || |_) || __/| |
6
\___/ |_| |_||_| \__, ||_| \__,_||_| |_||___/ |___/ \___||_| \__,_|| .__/ \___||_|
7
|___/ |_|
8
"""
9
10
import asyncio
11
import math
12
import pathlib
13
import platform
14
import sys
15
16
import httpx
17
from tqdm.asyncio import tqdm
18
try:
19
from win32_setctime import setctime # pylint: disable=import-error
20
except ModuleNotFoundError:
21
pass
22
23
from .auth import add_cookies
24
from .config import read_config
25
from .dates import convert_date_to_timestamp
26
from .separate import separate_by_id
27
from ..db import operations
28
29
30
async def process_urls(headers, username, model_id, urls):
31
if urls:
32
operations.create_database(model_id)
33
media_ids = operations.get_media_ids(model_id)
34
separated_urls = separate_by_id(urls, media_ids)
35
36
config = read_config()['config']
37
38
save_location = config.get('save_location')
39
if save_location:
40
try:
41
dir = pathlib.Path(save_location)
42
except:
43
print(f"Unable to find save location. Using current working directory. ({pathlib.Path.cwd()})")
44
else:
45
dir = pathlib.Path.cwd()
46
try:
47
path = dir / username
48
path.mkdir(exist_ok=True, parents=True)
49
except:
50
print("Error saving to save directory, check the directory and make sure correct permissions have been issued.")
51
sys.exit()
52
file_size_limit = config.get('file_size_limit')
53
54
# Added pool limit:
55
limits = httpx.Limits(max_connections=8, max_keepalive_connections=5)
56
async with httpx.AsyncClient(headers=headers, limits=limits, timeout=None) as c:
57
add_cookies(c)
58
59
aws = [asyncio.create_task(
60
download(c, path, model_id, file_size_limit, *url)) for url in separated_urls]
61
62
photo_count = 0
63
video_count = 0
64
skipped = 0
65
total_bytes_downloaded = 0
66
data = 0
67
68
desc = 'Progress: ({p_count} photos, {v_count} videos, {skipped} skipped || {data})'
69
70
with tqdm(desc=desc.format(p_count=photo_count, v_count=video_count, skipped=skipped, data=data), total=len(aws), colour='cyan', leave=True) as main_bar:
71
for coro in asyncio.as_completed(aws):
72
try:
73
media_type, num_bytes_downloaded = await coro
74
except Exception as e:
75
media_type = None
76
num_bytes_downloaded = 0
77
print(e)
78
79
total_bytes_downloaded += num_bytes_downloaded
80
data = convert_num_bytes(total_bytes_downloaded)
81
82
if media_type == 'photo':
83
photo_count += 1
84
main_bar.set_description(
85
desc.format(
86
p_count=photo_count, v_count=video_count, skipped=skipped, data=data), refresh=False)
87
88
elif media_type == 'video':
89
video_count += 1
90
main_bar.set_description(
91
desc.format(
92
p_count=photo_count, v_count=video_count, skipped=skipped, data=data), refresh=False)
93
94
elif media_type == 'skipped':
95
skipped += 1
96
main_bar.set_description(
97
desc.format(
98
p_count=photo_count, v_count=video_count, skipped=skipped, data=data), refresh=False)
99
100
main_bar.update()
101
102
103
def convert_num_bytes(num_bytes: int) -> str:
104
if num_bytes == 0:
105
return '0 B'
106
num_digits = int(math.log10(num_bytes)) + 1
107
108
if num_digits >= 10:
109
return f'{round(num_bytes / 10**9, 2)} GB'
110
return f'{round(num_bytes / 10 ** 6, 2)} MB'
111
112
113
async def download(client, path, model_id, file_size_limit,
114
url, date=None, id_=None, media_type=None):
115
filename = url.split('?', 1)[0].rsplit('/', 1)[-1]
116
path_to_file = path / filename
117
#path_to_file = config.path_to_file
118
#num_bytes_downloaded = 0
119
120
async with client.stream('GET', url) as r:
121
if not r.is_error:
122
total = int(r.headers['Content-Length'])
123
if file_size_limit:
124
if total > int(file_size_limit):
125
return 'skipped', 1
126
127
with tqdm(desc=filename, total=total, unit_scale=True, unit_divisor=1024, unit='B', leave=False) as bar:
128
num_bytes_downloaded = r.num_bytes_downloaded
129
with open(path_to_file, 'wb') as f:
130
async for chunk in r.aiter_bytes(chunk_size=1024):
131
f.write(chunk)
132
bar.update(
133
r.num_bytes_downloaded - num_bytes_downloaded)
134
num_bytes_downloaded = r.num_bytes_downloaded
135
136
else:
137
r.raise_for_status()
138
139
if path_to_file.is_file():
140
if date:
141
set_time(path_to_file, convert_date_to_timestamp(date))
142
143
if id_:
144
data = (id_, filename)
145
operations.write_from_data(data, model_id)
146
147
return media_type, num_bytes_downloaded
148
149
150
def set_time(path, timestamp):
151
if platform.system() == 'Windows':
152
setctime(path, timestamp)
153
pathlib.os.utime(path, (timestamp, timestamp))
154
155
156
def get_error_message(content):
157
error_content = content.get('error', 'No error message available')
158
try:
159
return error_content.get('message', 'No error message available')
160
except AttributeError:
161
return error_content
162
163