CoCalc -- arena.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/arena.py
⁸⁸⁷⁴ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractor for https://are.na/"""
10

11
from .common import GalleryExtractor
12

13

14
class ArenaChannelExtractor(GalleryExtractor):
15
    """Extractor for are.na channels"""
16
    category = "arena"
17
    subcategory = "channel"
18
    root = "https://are.na"
19
    directory_fmt = ("{category}", "{user[full_name]} ({user[id]})",
20
                     "{channel[title]} ({channel[id]})")
21
    filename_fmt = "{num:>03}{block[id]:? //}.{extension}"
22
    archive_fmt = "{channel[id]}/{block[id]}"
23
    pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)"
24
    example = "https://are.na/evan-collins-1522646491/cassette-futurism"
25

26
    def metadata(self, page):
27
        url = "https://api.are.na/v2/channels/" + self.groups[0]
28
        channel = self.request_json(url)
29

30
        channel["date"] = self.parse_datetime_iso(
31
            channel["created_at"])
32
        channel["date_updated"] = self.parse_datetime_iso(
33
            channel["updated_at"])
34
        channel.pop("contents", None)
35

36
        return {
37
            "count"  : channel.get("length"),
38
            "user"   : channel.pop("user", None),
39
            "owner"  : channel.pop("owner", None),
40
            "channel": channel,
41
        }
42

43
    def images(self, page):
44
        api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents"
45
        limit = 100
46
        params = {"page": 1, "per": limit}
47

48
        while True:
49
            data = self.request_json(api, params=params)
50

51
            contents = data.get("contents")
52
            if not contents:
53
                return
54

55
            for block in contents:
56
                url = None
57

58
                # Attachments (e.g., PDFs, files)
59
                if attachment := block.get("attachment"):
60
                    url = attachment.get("url")
61

62
                # Images
63
                elif image := block.get("image"):
64
                    # Prefer original image
65
                    if original := image.get("original"):
66
                        url = original.get("url")
67
                    # Fallback to display/large image if present
68
                    elif display := image.get("display"):
69
                        url = display.get("url")
70
                    elif large := image.get("large"):
71
                        url = large.get("url")
72

73
                # Some Links/Channels may not have downloadable media
74
                if not url:
75
                    continue
76

77
                block["date"] = self.parse_datetime_iso(
78
                    block["created_at"])
79
                block["date_updated"] = self.parse_datetime_iso(
80
                    block["updated_at"])
81

82
                yield url, {
83
                    "block" : block,
84
                    "source": block.pop("source", None),
85
                }
86

87
            if len(contents) < limit:
88
                return
89
            params["page"] += 1
90

91
Product

Resources

Company