Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/500px.py
8949 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019-2026 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://500px.com/"""
10
11
from .common import Extractor, Message
12
from .. import util
13
14
BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com"
15
16
17
class _500pxExtractor(Extractor):
18
"""Base class for 500px extractors"""
19
category = "500px"
20
directory_fmt = ("{category}", "{user[username]}")
21
filename_fmt = "{id}_{name}.{extension}"
22
archive_fmt = "{id}"
23
root = "https://500px.com"
24
cookies_domain = ".500px.com"
25
26
def items(self):
27
data = self.metadata()
28
29
for photo in self.photos():
30
url = photo["images"][-1]["url"]
31
photo["extension"] = photo["image_format"]
32
if data:
33
photo.update(data)
34
yield Message.Directory, "", photo
35
yield Message.Url, url, photo
36
37
def metadata(self):
38
"""Returns general metadata"""
39
40
def photos(self):
41
"""Returns an iterable containing all relevant photo IDs"""
42
43
def _extend(self, edges):
44
"""Extend photos with additional metadata and higher resolution URLs"""
45
ids = [str(edge["node"]["legacyId"]) for edge in edges]
46
47
url = "https://api.500px.com/v1/photos"
48
params = {
49
"expanded_user_info" : "true",
50
"include_tags" : "true",
51
"include_geo" : "true",
52
"include_equipment_info": "true",
53
"vendor_photos" : "true",
54
"include_licensing" : "true",
55
"include_releases" : "true",
56
"liked_by" : "1",
57
"following_sample" : "100",
58
"image_size" : "4096",
59
"ids" : ",".join(ids),
60
}
61
62
photos = self._request_api(url, params)["photos"]
63
return [
64
photos[pid] for pid in ids
65
if pid in photos or
66
self.log.warning("Unable to fetch photo %s", pid)
67
]
68
69
def _request_api(self, url, params):
70
headers = {
71
"Origin": self.root,
72
"x-csrf-token": self.cookies.get(
73
"x-csrf-token", domain=".500px.com"),
74
}
75
return self.request_json(url, headers=headers, params=params)
76
77
def _request_graphql(self, opname, variables):
78
url = "https://api.500px.com/graphql"
79
headers = {
80
"x-csrf-token": self.cookies.get(
81
"x-csrf-token", domain=".500px.com"),
82
}
83
data = {
84
"operationName": opname,
85
"variables" : util.json_dumps(variables),
86
"query" : self.utils("graphql", opname),
87
}
88
return self.request_json(
89
url, method="POST", headers=headers, json=data)["data"]
90
91
92
class _500pxUserExtractor(_500pxExtractor):
93
"""Extractor for photos from a user's photostream on 500px.com"""
94
subcategory = "user"
95
pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
96
example = "https://500px.com/USER"
97
98
def __init__(self, match):
99
_500pxExtractor.__init__(self, match)
100
self.user = match[1]
101
102
def photos(self):
103
variables = {"username": self.user, "pageSize": 20}
104
photos = self._request_graphql(
105
"OtherPhotosQuery", variables,
106
)["user"]["photos"]
107
108
while True:
109
yield from self._extend(photos["edges"])
110
111
if not photos["pageInfo"]["hasNextPage"]:
112
return
113
114
variables["cursor"] = photos["pageInfo"]["endCursor"]
115
photos = self._request_graphql(
116
"OtherPhotosPaginationContainerQuery", variables,
117
)["userByUsername"]["photos"]
118
119
120
class _500pxGalleryExtractor(_500pxExtractor):
121
"""Extractor for photo galleries on 500px.com"""
122
subcategory = "gallery"
123
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
124
pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
125
r"([^/?#]+)/galleries/([^/?#]+)")
126
example = "https://500px.com/USER/galleries/GALLERY"
127
128
def __init__(self, match):
129
_500pxExtractor.__init__(self, match)
130
self.user_name, self.gallery_name = match.groups()
131
self.user_id = self._photos = None
132
133
def metadata(self):
134
user = self._request_graphql(
135
"ProfileRendererQuery", {"username": self.user_name},
136
)["profile"]
137
self.user_id = str(user["legacyId"])
138
139
variables = {
140
"galleryOwnerLegacyId": self.user_id,
141
"ownerLegacyId" : self.user_id,
142
"slug" : self.gallery_name,
143
"token" : None,
144
"pageSize" : 20,
145
}
146
gallery = self._request_graphql(
147
"GalleriesDetailQueryRendererQuery", variables,
148
)["gallery"]
149
150
self._photos = gallery["photos"]
151
del gallery["photos"]
152
return {
153
"gallery": gallery,
154
"user" : user,
155
}
156
157
def photos(self):
158
photos = self._photos
159
variables = {
160
"ownerLegacyId": self.user_id,
161
"slug" : self.gallery_name,
162
"token" : None,
163
"pageSize" : 20,
164
}
165
166
while True:
167
yield from self._extend(photos["edges"])
168
169
if not photos["pageInfo"]["hasNextPage"]:
170
return
171
172
variables["cursor"] = photos["pageInfo"]["endCursor"]
173
photos = self._request_graphql(
174
"GalleriesDetailPaginationContainerQuery", variables,
175
)["galleryByOwnerIdAndSlugOrToken"]["photos"]
176
177
178
class _500pxFavoriteExtractor(_500pxExtractor):
179
"""Extractor for favorite 500px photos"""
180
subcategory = "favorite"
181
pattern = BASE_PATTERN + r"/liked/?$"
182
example = "https://500px.com/liked"
183
184
def photos(self):
185
variables = {"pageSize": 20}
186
photos = self._request_graphql(
187
"LikedPhotosQueryRendererQuery", variables,
188
)["likedPhotos"]
189
190
while True:
191
yield from self._extend(photos["edges"])
192
193
if not photos["pageInfo"]["hasNextPage"]:
194
return
195
196
variables["cursor"] = photos["pageInfo"]["endCursor"]
197
photos = self._request_graphql(
198
"LikedPhotosPaginationContainerQuery", variables,
199
)["likedPhotos"]
200
201
202
class _500pxImageExtractor(_500pxExtractor):
203
"""Extractor for individual images from 500px.com"""
204
subcategory = "image"
205
pattern = BASE_PATTERN + r"/photo/(\d+)"
206
example = "https://500px.com/photo/12345/TITLE"
207
208
def __init__(self, match):
209
_500pxExtractor.__init__(self, match)
210
self.photo_id = match[1]
211
212
def photos(self):
213
edges = ({"node": {"legacyId": self.photo_id}},)
214
return self._extend(edges)
215
216