Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/500px.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://500px.com/"""
10
11
from .common import Extractor, Message
12
from .. import util
13
14
BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com"
15
16
17
class _500pxExtractor(Extractor):
18
"""Base class for 500px extractors"""
19
category = "500px"
20
directory_fmt = ("{category}", "{user[username]}")
21
filename_fmt = "{id}_{name}.{extension}"
22
archive_fmt = "{id}"
23
root = "https://500px.com"
24
cookies_domain = ".500px.com"
25
26
def items(self):
27
data = self.metadata()
28
29
for photo in self.photos():
30
url = photo["images"][-1]["url"]
31
photo["extension"] = photo["image_format"]
32
if data:
33
photo.update(data)
34
yield Message.Directory, photo
35
yield Message.Url, url, photo
36
37
def metadata(self):
38
"""Returns general metadata"""
39
40
def photos(self):
41
"""Returns an iterable containing all relevant photo IDs"""
42
43
def _extend(self, edges):
44
"""Extend photos with additional metadata and higher resolution URLs"""
45
ids = [str(edge["node"]["legacyId"]) for edge in edges]
46
47
url = "https://api.500px.com/v1/photos"
48
params = {
49
"expanded_user_info" : "true",
50
"include_tags" : "true",
51
"include_geo" : "true",
52
"include_equipment_info": "true",
53
"vendor_photos" : "true",
54
"include_licensing" : "true",
55
"include_releases" : "true",
56
"liked_by" : "1",
57
"following_sample" : "100",
58
"image_size" : "4096",
59
"ids" : ",".join(ids),
60
}
61
62
photos = self._request_api(url, params)["photos"]
63
return [
64
photos[pid] for pid in ids
65
if pid in photos or
66
self.log.warning("Unable to fetch photo %s", pid)
67
]
68
69
def _request_api(self, url, params):
70
headers = {
71
"Origin": self.root,
72
"x-csrf-token": self.cookies.get(
73
"x-csrf-token", domain=".500px.com"),
74
}
75
return self.request_json(url, headers=headers, params=params)
76
77
def _request_graphql(self, opname, variables):
78
url = "https://api.500px.com/graphql"
79
headers = {
80
"x-csrf-token": self.cookies.get(
81
"x-csrf-token", domain=".500px.com"),
82
}
83
data = {
84
"operationName": opname,
85
"variables" : util.json_dumps(variables),
86
"query" : QUERIES[opname],
87
}
88
return self.request_json(
89
url, method="POST", headers=headers, json=data)["data"]
90
91
92
class _500pxUserExtractor(_500pxExtractor):
93
"""Extractor for photos from a user's photostream on 500px.com"""
94
subcategory = "user"
95
pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
96
example = "https://500px.com/USER"
97
98
def __init__(self, match):
99
_500pxExtractor.__init__(self, match)
100
self.user = match[1]
101
102
def photos(self):
103
variables = {"username": self.user, "pageSize": 20}
104
photos = self._request_graphql(
105
"OtherPhotosQuery", variables,
106
)["user"]["photos"]
107
108
while True:
109
yield from self._extend(photos["edges"])
110
111
if not photos["pageInfo"]["hasNextPage"]:
112
return
113
114
variables["cursor"] = photos["pageInfo"]["endCursor"]
115
photos = self._request_graphql(
116
"OtherPhotosPaginationContainerQuery", variables,
117
)["userByUsername"]["photos"]
118
119
120
class _500pxGalleryExtractor(_500pxExtractor):
121
"""Extractor for photo galleries on 500px.com"""
122
subcategory = "gallery"
123
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
124
pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
125
r"([^/?#]+)/galleries/([^/?#]+)")
126
example = "https://500px.com/USER/galleries/GALLERY"
127
128
def __init__(self, match):
129
_500pxExtractor.__init__(self, match)
130
self.user_name, self.gallery_name = match.groups()
131
self.user_id = self._photos = None
132
133
def metadata(self):
134
user = self._request_graphql(
135
"ProfileRendererQuery", {"username": self.user_name},
136
)["profile"]
137
self.user_id = str(user["legacyId"])
138
139
variables = {
140
"galleryOwnerLegacyId": self.user_id,
141
"ownerLegacyId" : self.user_id,
142
"slug" : self.gallery_name,
143
"token" : None,
144
"pageSize" : 20,
145
}
146
gallery = self._request_graphql(
147
"GalleriesDetailQueryRendererQuery", variables,
148
)["gallery"]
149
150
self._photos = gallery["photos"]
151
del gallery["photos"]
152
return {
153
"gallery": gallery,
154
"user" : user,
155
}
156
157
def photos(self):
158
photos = self._photos
159
variables = {
160
"ownerLegacyId": self.user_id,
161
"slug" : self.gallery_name,
162
"token" : None,
163
"pageSize" : 20,
164
}
165
166
while True:
167
yield from self._extend(photos["edges"])
168
169
if not photos["pageInfo"]["hasNextPage"]:
170
return
171
172
variables["cursor"] = photos["pageInfo"]["endCursor"]
173
photos = self._request_graphql(
174
"GalleriesDetailPaginationContainerQuery", variables,
175
)["galleryByOwnerIdAndSlugOrToken"]["photos"]
176
177
178
class _500pxFavoriteExtractor(_500pxExtractor):
179
"""Extractor for favorite 500px photos"""
180
subcategory = "favorite"
181
pattern = BASE_PATTERN + r"/liked/?$"
182
example = "https://500px.com/liked"
183
184
def photos(self):
185
variables = {"pageSize": 20}
186
photos = self._request_graphql(
187
"LikedPhotosQueryRendererQuery", variables,
188
)["likedPhotos"]
189
190
while True:
191
yield from self._extend(photos["edges"])
192
193
if not photos["pageInfo"]["hasNextPage"]:
194
return
195
196
variables["cursor"] = photos["pageInfo"]["endCursor"]
197
photos = self._request_graphql(
198
"LikedPhotosPaginationContainerQuery", variables,
199
)["likedPhotos"]
200
201
202
class _500pxImageExtractor(_500pxExtractor):
203
"""Extractor for individual images from 500px.com"""
204
subcategory = "image"
205
pattern = BASE_PATTERN + r"/photo/(\d+)"
206
example = "https://500px.com/photo/12345/TITLE"
207
208
def __init__(self, match):
209
_500pxExtractor.__init__(self, match)
210
self.photo_id = match[1]
211
212
def photos(self):
213
edges = ({"node": {"legacyId": self.photo_id}},)
214
return self._extend(edges)
215
216
217
QUERIES = {
218
219
"OtherPhotosQuery": """\
220
query OtherPhotosQuery($username: String!, $pageSize: Int) {
221
user: userByUsername(username: $username) {
222
...OtherPhotosPaginationContainer_user_RlXb8
223
id
224
}
225
}
226
227
fragment OtherPhotosPaginationContainer_user_RlXb8 on User {
228
photos(first: $pageSize, privacy: PROFILE, sort: ID_DESC) {
229
edges {
230
node {
231
id
232
legacyId
233
canonicalPath
234
width
235
height
236
name
237
isLikedByMe
238
notSafeForWork
239
photographer: uploader {
240
id
241
legacyId
242
username
243
displayName
244
canonicalPath
245
followedByUsers {
246
isFollowedByMe
247
}
248
}
249
images(sizes: [33, 35]) {
250
size
251
url
252
jpegUrl
253
webpUrl
254
id
255
}
256
__typename
257
}
258
cursor
259
}
260
totalCount
261
pageInfo {
262
endCursor
263
hasNextPage
264
}
265
}
266
}
267
""",
268
269
"OtherPhotosPaginationContainerQuery": """\
270
query OtherPhotosPaginationContainerQuery($username: String!, $pageSize: Int, $cursor: String) {
271
userByUsername(username: $username) {
272
...OtherPhotosPaginationContainer_user_3e6UuE
273
id
274
}
275
}
276
277
fragment OtherPhotosPaginationContainer_user_3e6UuE on User {
278
photos(first: $pageSize, after: $cursor, privacy: PROFILE, sort: ID_DESC) {
279
edges {
280
node {
281
id
282
legacyId
283
canonicalPath
284
width
285
height
286
name
287
isLikedByMe
288
notSafeForWork
289
photographer: uploader {
290
id
291
legacyId
292
username
293
displayName
294
canonicalPath
295
followedByUsers {
296
isFollowedByMe
297
}
298
}
299
images(sizes: [33, 35]) {
300
size
301
url
302
jpegUrl
303
webpUrl
304
id
305
}
306
__typename
307
}
308
cursor
309
}
310
totalCount
311
pageInfo {
312
endCursor
313
hasNextPage
314
}
315
}
316
}
317
""",
318
319
"ProfileRendererQuery": """\
320
query ProfileRendererQuery($username: String!) {
321
profile: userByUsername(username: $username) {
322
id
323
legacyId
324
userType: type
325
username
326
firstName
327
displayName
328
registeredAt
329
canonicalPath
330
avatar {
331
...ProfileAvatar_avatar
332
id
333
}
334
userProfile {
335
firstname
336
lastname
337
state
338
country
339
city
340
about
341
id
342
}
343
socialMedia {
344
website
345
twitter
346
instagram
347
facebook
348
id
349
}
350
coverPhotoUrl
351
followedByUsers {
352
totalCount
353
isFollowedByMe
354
}
355
followingUsers {
356
totalCount
357
}
358
membership {
359
expiryDate
360
membershipTier: tier
361
photoUploadQuota
362
refreshPhotoUploadQuotaAt
363
paymentStatus
364
id
365
}
366
profileTabs {
367
tabs {
368
name
369
visible
370
}
371
}
372
...EditCover_cover
373
photoStats {
374
likeCount
375
viewCount
376
}
377
photos(privacy: PROFILE) {
378
totalCount
379
}
380
licensingPhotos(status: ACCEPTED) {
381
totalCount
382
}
383
portfolio {
384
id
385
status
386
userDisabled
387
}
388
}
389
}
390
391
fragment EditCover_cover on User {
392
coverPhotoUrl
393
}
394
395
fragment ProfileAvatar_avatar on UserAvatar {
396
images(sizes: [MEDIUM, LARGE]) {
397
size
398
url
399
id
400
}
401
}
402
""",
403
404
"GalleriesDetailQueryRendererQuery": """\
405
query GalleriesDetailQueryRendererQuery($galleryOwnerLegacyId: ID!, $ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $gallerySize: Int) {
406
galleries(galleryOwnerLegacyId: $galleryOwnerLegacyId, first: $gallerySize) {
407
edges {
408
node {
409
legacyId
410
description
411
name
412
privacy
413
canonicalPath
414
notSafeForWork
415
buttonName
416
externalUrl
417
cover {
418
images(sizes: [35, 33]) {
419
size
420
webpUrl
421
jpegUrl
422
id
423
}
424
id
425
}
426
photos {
427
totalCount
428
}
429
id
430
}
431
}
432
}
433
gallery: galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) {
434
...GalleriesDetailPaginationContainer_gallery_RlXb8
435
id
436
}
437
}
438
439
fragment GalleriesDetailPaginationContainer_gallery_RlXb8 on Gallery {
440
id
441
legacyId
442
name
443
privacy
444
notSafeForWork
445
ownPhotosOnly
446
canonicalPath
447
publicSlug
448
lastPublishedAt
449
photosAddedSinceLastPublished
450
reportStatus
451
creator {
452
legacyId
453
id
454
}
455
cover {
456
images(sizes: [33, 32, 36, 2048]) {
457
url
458
size
459
webpUrl
460
id
461
}
462
id
463
}
464
description
465
externalUrl
466
buttonName
467
photos(first: $pageSize) {
468
totalCount
469
edges {
470
cursor
471
node {
472
id
473
legacyId
474
canonicalPath
475
name
476
description
477
category
478
uploadedAt
479
location
480
width
481
height
482
isLikedByMe
483
photographer: uploader {
484
id
485
legacyId
486
username
487
displayName
488
canonicalPath
489
avatar {
490
images(sizes: SMALL) {
491
url
492
id
493
}
494
id
495
}
496
followedByUsers {
497
totalCount
498
isFollowedByMe
499
}
500
}
501
images(sizes: [33, 32]) {
502
size
503
url
504
webpUrl
505
id
506
}
507
__typename
508
}
509
}
510
pageInfo {
511
endCursor
512
hasNextPage
513
}
514
}
515
}
516
""",
517
518
"GalleriesDetailPaginationContainerQuery": """\
519
query GalleriesDetailPaginationContainerQuery($ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $cursor: String) {
520
galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) {
521
...GalleriesDetailPaginationContainer_gallery_3e6UuE
522
id
523
}
524
}
525
526
fragment GalleriesDetailPaginationContainer_gallery_3e6UuE on Gallery {
527
id
528
legacyId
529
name
530
privacy
531
notSafeForWork
532
ownPhotosOnly
533
canonicalPath
534
publicSlug
535
lastPublishedAt
536
photosAddedSinceLastPublished
537
reportStatus
538
creator {
539
legacyId
540
id
541
}
542
cover {
543
images(sizes: [33, 32, 36, 2048]) {
544
url
545
size
546
webpUrl
547
id
548
}
549
id
550
}
551
description
552
externalUrl
553
buttonName
554
photos(first: $pageSize, after: $cursor) {
555
totalCount
556
edges {
557
cursor
558
node {
559
id
560
legacyId
561
canonicalPath
562
name
563
description
564
category
565
uploadedAt
566
location
567
width
568
height
569
isLikedByMe
570
photographer: uploader {
571
id
572
legacyId
573
username
574
displayName
575
canonicalPath
576
avatar {
577
images(sizes: SMALL) {
578
url
579
id
580
}
581
id
582
}
583
followedByUsers {
584
totalCount
585
isFollowedByMe
586
}
587
}
588
images(sizes: [33, 32]) {
589
size
590
url
591
webpUrl
592
id
593
}
594
__typename
595
}
596
}
597
pageInfo {
598
endCursor
599
hasNextPage
600
}
601
}
602
}
603
""",
604
605
"LikedPhotosQueryRendererQuery": """\
606
query LikedPhotosQueryRendererQuery($pageSize: Int) {
607
...LikedPhotosPaginationContainer_query_RlXb8
608
}
609
610
fragment LikedPhotosPaginationContainer_query_RlXb8 on Query {
611
likedPhotos(first: $pageSize) {
612
edges {
613
node {
614
id
615
legacyId
616
canonicalPath
617
name
618
description
619
category
620
uploadedAt
621
location
622
width
623
height
624
isLikedByMe
625
notSafeForWork
626
tags
627
photographer: uploader {
628
id
629
legacyId
630
username
631
displayName
632
canonicalPath
633
avatar {
634
images {
635
url
636
id
637
}
638
id
639
}
640
followedByUsers {
641
totalCount
642
isFollowedByMe
643
}
644
}
645
images(sizes: [33, 35]) {
646
size
647
url
648
jpegUrl
649
webpUrl
650
id
651
}
652
__typename
653
}
654
cursor
655
}
656
pageInfo {
657
endCursor
658
hasNextPage
659
}
660
}
661
}
662
""",
663
664
"LikedPhotosPaginationContainerQuery": """\
665
query LikedPhotosPaginationContainerQuery($cursor: String, $pageSize: Int) {
666
...LikedPhotosPaginationContainer_query_3e6UuE
667
}
668
669
fragment LikedPhotosPaginationContainer_query_3e6UuE on Query {
670
likedPhotos(first: $pageSize, after: $cursor) {
671
edges {
672
node {
673
id
674
legacyId
675
canonicalPath
676
name
677
description
678
category
679
uploadedAt
680
location
681
width
682
height
683
isLikedByMe
684
notSafeForWork
685
tags
686
photographer: uploader {
687
id
688
legacyId
689
username
690
displayName
691
canonicalPath
692
avatar {
693
images {
694
url
695
id
696
}
697
id
698
}
699
followedByUsers {
700
totalCount
701
isFollowedByMe
702
}
703
}
704
images(sizes: [33, 35]) {
705
size
706
url
707
jpegUrl
708
webpUrl
709
id
710
}
711
__typename
712
}
713
cursor
714
}
715
pageInfo {
716
endCursor
717
hasNextPage
718
}
719
}
720
}
721
""",
722
723
}
724
725