CoCalc -- posts.py

GitHub Repository: taux1c/onlyfans-scraper
Path: blob/main/onlyfans_scraper/api/posts.py
¹⁰⁶⁴ views
1
r"""
2
               _          __                                                                      
3
  ___   _ __  | | _   _  / _|  __ _  _ __   ___         ___   ___  _ __   __ _  _ __    ___  _ __ 
4
 / _ \ | '_ \ | || | | || |_  / _` || '_ \ / __| _____ / __| / __|| '__| / _` || '_ \  / _ \| '__|
5
| (_) || | | || || |_| ||  _|| (_| || | | |\__ \|_____|\__ \| (__ | |   | (_| || |_) ||  __/| |   
6
 \___/ |_| |_||_| \__, ||_|   \__,_||_| |_||___/       |___/ \___||_|    \__,_|| .__/  \___||_|   
7
                  |___/                                                        |_|                
8
"""
9

10
import httpx
11

12

13
from ..constants import (
14
    timelineEP, timelineNextEP,
15
    timelinePinnedEP,
16
    archivedEP, archivedNextEP
17
)
18
from ..utils import auth
19

20

21
def scrape_pinned_posts(headers, model_id) -> list:
22
    with httpx.Client(http2=True, headers=headers) as c:
23
        url = timelinePinnedEP.format(model_id)
24

25
        auth.add_cookies(c)
26
        c.headers.update(auth.create_sign(url, headers))
27

28
        r = c.get(url, timeout=None)
29
        if not r.is_error:
30
            return r.json()['list']
31
        r.raise_for_status()
32

33

34
def scrape_timeline_posts(headers, model_id, timestamp=0) -> list:
35
    ep = timelineNextEP if timestamp else timelineEP
36
    url = ep.format(model_id, timestamp)
37

38
    with httpx.Client(http2=True, headers=headers) as c:
39
        auth.add_cookies(c)
40
        c.headers.update(auth.create_sign(url, headers))
41

42
        r = c.get(url, timeout=None)
43
        if not r.is_error:
44
            posts = r.json()['list']
45
            if not posts:
46
                return posts
47
            posts += scrape_timeline_posts(
48
                headers, model_id, posts[-1]['postedAtPrecise'])
49
            return posts
50
        r.raise_for_status()
51

52

53
def scrape_archived_posts(headers, model_id, timestamp=0) -> list:
54
    ep = archivedNextEP if timestamp else archivedEP
55
    url = ep.format(model_id, timestamp)
56

57
    with httpx.Client(http2=True, headers=headers) as c:
58
        auth.add_cookies(c)
59
        c.headers.update(auth.create_sign(url, headers))
60

61
        r = c.get(url, timeout=None)
62
        if not r.is_error:
63
            posts = r.json()['list']
64
            if not posts:
65
                return posts
66
            posts += scrape_archived_posts(
67
                headers, model_id, posts[-1]['postedAtPrecise'])
68
            return posts
69
        r.raise_for_status()
70

71

72
def parse_posts(posts: list):
73
    media = [post['media'] for post in posts if post.get('media')]
74
    urls = [
75
        (i['info']['source']['source'], i['createdAt'], i['id'], i['type']) for m in media for i in m if i['canView']]
76
    return urls
77

78
Product

Resources

Company