Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
taux1c
GitHub Repository: taux1c/onlyfans-scraper
Path: blob/main/onlyfans_scraper/api/posts.py
961 views
1
r"""
2
_ __
3
___ _ __ | | _ _ / _| __ _ _ __ ___ ___ ___ _ __ __ _ _ __ ___ _ __
4
/ _ \ | '_ \ | || | | || |_ / _` || '_ \ / __| _____ / __| / __|| '__| / _` || '_ \ / _ \| '__|
5
| (_) || | | || || |_| || _|| (_| || | | |\__ \|_____|\__ \| (__ | | | (_| || |_) || __/| |
6
\___/ |_| |_||_| \__, ||_| \__,_||_| |_||___/ |___/ \___||_| \__,_|| .__/ \___||_|
7
|___/ |_|
8
"""
9
10
import httpx
11
12
13
from ..constants import (
14
timelineEP, timelineNextEP,
15
timelinePinnedEP,
16
archivedEP, archivedNextEP
17
)
18
from ..utils import auth
19
20
21
def scrape_pinned_posts(headers, model_id) -> list:
22
with httpx.Client(http2=True, headers=headers) as c:
23
url = timelinePinnedEP.format(model_id)
24
25
auth.add_cookies(c)
26
c.headers.update(auth.create_sign(url, headers))
27
28
r = c.get(url, timeout=None)
29
if not r.is_error:
30
return r.json()['list']
31
r.raise_for_status()
32
33
34
def scrape_timeline_posts(headers, model_id, timestamp=0) -> list:
35
ep = timelineNextEP if timestamp else timelineEP
36
url = ep.format(model_id, timestamp)
37
38
with httpx.Client(http2=True, headers=headers) as c:
39
auth.add_cookies(c)
40
c.headers.update(auth.create_sign(url, headers))
41
42
r = c.get(url, timeout=None)
43
if not r.is_error:
44
posts = r.json()['list']
45
if not posts:
46
return posts
47
posts += scrape_timeline_posts(
48
headers, model_id, posts[-1]['postedAtPrecise'])
49
return posts
50
r.raise_for_status()
51
52
53
def scrape_archived_posts(headers, model_id, timestamp=0) -> list:
54
ep = archivedNextEP if timestamp else archivedEP
55
url = ep.format(model_id, timestamp)
56
57
with httpx.Client(http2=True, headers=headers) as c:
58
auth.add_cookies(c)
59
c.headers.update(auth.create_sign(url, headers))
60
61
r = c.get(url, timeout=None)
62
if not r.is_error:
63
posts = r.json()['list']
64
if not posts:
65
return posts
66
posts += scrape_archived_posts(
67
headers, model_id, posts[-1]['postedAtPrecise'])
68
return posts
69
r.raise_for_status()
70
71
72
def parse_posts(posts: list):
73
media = [post['media'] for post in posts if post.get('media')]
74
urls = [
75
(i['info']['source']['source'], i['createdAt'], i['id'], i['type']) for m in media for i in m if i['canView']]
76
return urls
77
78