Path: blob/main/onlyfans_scraper/utils/download.py
961 views
r"""1_ __2___ _ __ | | _ _ / _| __ _ _ __ ___ ___ ___ _ __ __ _ _ __ ___ _ __3/ _ \ | '_ \ | || | | || |_ / _` || '_ \ / __| _____ / __| / __|| '__| / _` || '_ \ / _ \| '__|4| (_) || | | || || |_| || _|| (_| || | | |\__ \|_____|\__ \| (__ | | | (_| || |_) || __/| |5\___/ |_| |_||_| \__, ||_| \__,_||_| |_||___/ |___/ \___||_| \__,_|| .__/ \___||_|6|___/ |_|7"""89import asyncio10import math11import pathlib12import platform13import sys1415import httpx16from tqdm.asyncio import tqdm17try:18from win32_setctime import setctime # pylint: disable=import-error19except ModuleNotFoundError:20pass2122from .auth import add_cookies23from .config import read_config24from .dates import convert_date_to_timestamp25from .separate import separate_by_id26from ..db import operations272829async def process_urls(headers, username, model_id, urls):30if urls:31operations.create_database(model_id)32media_ids = operations.get_media_ids(model_id)33separated_urls = separate_by_id(urls, media_ids)3435config = read_config()['config']3637save_location = config.get('save_location')38if save_location:39try:40dir = pathlib.Path(save_location)41except:42print(f"Unable to find save location. Using current working directory. ({pathlib.Path.cwd()})")43else:44dir = pathlib.Path.cwd()45try:46path = dir / username47path.mkdir(exist_ok=True, parents=True)48except:49print("Error saving to save directory, check the directory and make sure correct permissions have been issued.")50sys.exit()51file_size_limit = config.get('file_size_limit')5253# Added pool limit:54limits = httpx.Limits(max_connections=8, max_keepalive_connections=5)55async with httpx.AsyncClient(headers=headers, limits=limits, timeout=None) as c:56add_cookies(c)5758aws = [asyncio.create_task(59download(c, path, model_id, file_size_limit, *url)) for url in separated_urls]6061photo_count = 062video_count = 063skipped = 064total_bytes_downloaded = 065data = 06667desc = 'Progress: ({p_count} photos, {v_count} videos, {skipped} skipped || {data})'6869with tqdm(desc=desc.format(p_count=photo_count, v_count=video_count, skipped=skipped, data=data), total=len(aws), colour='cyan', leave=True) as main_bar:70for coro in asyncio.as_completed(aws):71try:72media_type, num_bytes_downloaded = await coro73except Exception as e:74media_type = None75num_bytes_downloaded = 076print(e)7778total_bytes_downloaded += num_bytes_downloaded79data = convert_num_bytes(total_bytes_downloaded)8081if media_type == 'photo':82photo_count += 183main_bar.set_description(84desc.format(85p_count=photo_count, v_count=video_count, skipped=skipped, data=data), refresh=False)8687elif media_type == 'video':88video_count += 189main_bar.set_description(90desc.format(91p_count=photo_count, v_count=video_count, skipped=skipped, data=data), refresh=False)9293elif media_type == 'skipped':94skipped += 195main_bar.set_description(96desc.format(97p_count=photo_count, v_count=video_count, skipped=skipped, data=data), refresh=False)9899main_bar.update()100101102def convert_num_bytes(num_bytes: int) -> str:103if num_bytes == 0:104return '0 B'105num_digits = int(math.log10(num_bytes)) + 1106107if num_digits >= 10:108return f'{round(num_bytes / 10**9, 2)} GB'109return f'{round(num_bytes / 10 ** 6, 2)} MB'110111112async def download(client, path, model_id, file_size_limit,113url, date=None, id_=None, media_type=None):114filename = url.split('?', 1)[0].rsplit('/', 1)[-1]115path_to_file = path / filename116#path_to_file = config.path_to_file117#num_bytes_downloaded = 0118119async with client.stream('GET', url) as r:120if not r.is_error:121total = int(r.headers['Content-Length'])122if file_size_limit:123if total > int(file_size_limit):124return 'skipped', 1125126with tqdm(desc=filename, total=total, unit_scale=True, unit_divisor=1024, unit='B', leave=False) as bar:127num_bytes_downloaded = r.num_bytes_downloaded128with open(path_to_file, 'wb') as f:129async for chunk in r.aiter_bytes(chunk_size=1024):130f.write(chunk)131bar.update(132r.num_bytes_downloaded - num_bytes_downloaded)133num_bytes_downloaded = r.num_bytes_downloaded134135else:136r.raise_for_status()137138if path_to_file.is_file():139if date:140set_time(path_to_file, convert_date_to_timestamp(date))141142if id_:143data = (id_, filename)144operations.write_from_data(data, model_id)145146return media_type, num_bytes_downloaded147148149def set_time(path, timestamp):150if platform.system() == 'Windows':151setctime(path, timestamp)152pathlib.os.utime(path, (timestamp, timestamp))153154155def get_error_message(content):156error_content = content.get('error', 'No error message available')157try:158return error_content.get('message', 'No error message available')159except AttributeError:160return error_content161162163