Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
mikf
GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/behance.py
5399 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8
9
"""Extractors for https://www.behance.net/"""
10
11
from .common import Extractor, Message
12
from .. import text, util, exception
13
14
15
class BehanceExtractor(Extractor):
16
"""Base class for behance extractors"""
17
category = "behance"
18
root = "https://www.behance.net"
19
request_interval = (2.0, 4.0)
20
browser = "firefox"
21
tls12 = False
22
23
def _init(self):
24
self._bcp = self.cookies.get("bcp", domain="www.behance.net")
25
if not self._bcp:
26
self._bcp = "4c34489d-914c-46cd-b44c-dfd0e661136d"
27
self.cookies.set("bcp", self._bcp, domain="www.behance.net")
28
29
def items(self):
30
for gallery in self.galleries():
31
gallery["_extractor"] = BehanceGalleryExtractor
32
yield Message.Queue, gallery["url"], self._update(gallery)
33
34
def galleries(self):
35
"""Return all relevant gallery URLs"""
36
37
def _request_graphql(self, endpoint, variables):
38
url = self.root + "/v3/graphql"
39
headers = {
40
"Origin": self.root,
41
"X-BCP" : self._bcp,
42
"X-Requested-With": "XMLHttpRequest",
43
}
44
data = {
45
"query" : GRAPHQL_QUERIES[endpoint],
46
"variables": variables,
47
}
48
49
return self.request_json(
50
url, method="POST", headers=headers, json=data)["data"]
51
52
def _update(self, data):
53
# compress data to simple lists
54
if (fields := data.get("fields")) and isinstance(fields[0], dict):
55
data["fields"] = [
56
field.get("name") or field.get("label")
57
for field in fields
58
]
59
60
data["owners"] = [
61
owner.get("display_name") or owner.get("displayName")
62
for owner in data["owners"]
63
]
64
65
tags = data.get("tags") or ()
66
if tags and isinstance(tags[0], dict):
67
tags = [tag["title"] for tag in tags]
68
data["tags"] = tags
69
70
data["date"] = text.parse_timestamp(
71
data.get("publishedOn") or data.get("conceived_on") or 0)
72
73
if creator := data.get("creator"):
74
creator["name"] = creator["url"].rpartition("/")[2]
75
76
# backwards compatibility
77
data["gallery_id"] = data["id"]
78
data["title"] = data["name"]
79
data["user"] = ", ".join(data["owners"])
80
81
return data
82
83
84
class BehanceGalleryExtractor(BehanceExtractor):
85
"""Extractor for image galleries from www.behance.net"""
86
subcategory = "gallery"
87
directory_fmt = ("{category}", "{owners:J, }", "{id} {name}")
88
filename_fmt = "{category}_{id}_{num:>02}.{extension}"
89
archive_fmt = "{id}_{num}"
90
pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
91
example = "https://www.behance.net/gallery/12345/TITLE"
92
93
def __init__(self, match):
94
BehanceExtractor.__init__(self, match)
95
self.gallery_id = match[1]
96
97
def _init(self):
98
BehanceExtractor._init(self)
99
100
if modules := self.config("modules"):
101
if isinstance(modules, str):
102
modules = modules.split(",")
103
self.modules = set(modules)
104
else:
105
self.modules = {"image", "video", "mediacollection", "embed"}
106
107
def items(self):
108
data = self.get_gallery_data()
109
imgs = self.get_images(data)
110
data["count"] = len(imgs)
111
112
yield Message.Directory, data
113
for data["num"], (url, module) in enumerate(imgs, 1):
114
data["module"] = module
115
data["extension"] = (module.get("extension") or
116
text.ext_from_url(url))
117
yield Message.Url, url, data
118
119
def get_gallery_data(self):
120
"""Collect gallery info dict"""
121
url = f"{self.root}/gallery/{self.gallery_id}/a"
122
cookies = {
123
"gk_suid": "14118261",
124
"gki": "feature_3_in_1_checkout_test:false,hire_browse_get_quote_c"
125
"ta_ab_test:false,feature_hire_dashboard_services_ab_test:f"
126
"alse,feature_show_details_jobs_row_ab_test:false,feature_a"
127
"i_freelance_project_create_flow:false,",
128
"ilo0": "true",
129
"originalReferrer": "",
130
}
131
page = self.request(url, cookies=cookies).text
132
133
data = util.json_loads(text.extr(
134
page, 'id="beconfig-store_state">', '</script>'))
135
return self._update(data["project"]["project"])
136
137
def get_images(self, data):
138
"""Extract image results from an API response"""
139
if not data["modules"]:
140
access = data.get("matureAccess")
141
if access == "logged-out":
142
raise exception.AuthorizationError(
143
"Mature content galleries require logged-in cookies")
144
if access == "restricted-safe":
145
raise exception.AuthorizationError(
146
"Mature content blocked in account settings")
147
if access and access != "allowed":
148
raise exception.AuthorizationError()
149
return ()
150
151
results = []
152
for module in data["modules"]:
153
mtype = module["__typename"][:-6].lower()
154
155
if mtype not in self.modules:
156
self.log.debug("Skipping '%s' module", mtype)
157
continue
158
159
if mtype == "image":
160
sizes = {
161
size["url"].rsplit("/", 2)[1]: size
162
for size in module["imageSizes"]["allAvailable"]
163
}
164
size = (sizes.get("source") or
165
sizes.get("max_3840") or
166
sizes.get("fs") or
167
sizes.get("hd") or
168
sizes.get("disp"))
169
results.append((size["url"], module))
170
171
elif mtype == "video":
172
try:
173
url = text.extr(module["embed"], 'src="', '"')
174
page = self.request(text.unescape(url)).text
175
176
url = text.extr(page, '<source src="', '"')
177
if text.ext_from_url(url) == "m3u8":
178
url = "ytdl:" + url
179
module["_ytdl_manifest"] = "hls"
180
module["extension"] = "mp4"
181
results.append((url, module))
182
continue
183
except Exception as exc:
184
self.log.debug("%s: %s", exc.__class__.__name__, exc)
185
186
try:
187
renditions = module["videoData"]["renditions"]
188
except Exception:
189
self.log.warning("No download URLs for video %s",
190
module.get("id") or "???")
191
continue
192
193
try:
194
url = [
195
r["url"] for r in renditions
196
if text.ext_from_url(r["url"]) != "m3u8"
197
][-1]
198
except Exception as exc:
199
self.log.debug("%s: %s", exc.__class__.__name__, exc)
200
url = "ytdl:" + renditions[-1]["url"]
201
202
results.append((url, module))
203
204
elif mtype == "mediacollection":
205
for component in module["components"]:
206
for size in component["imageSizes"].values():
207
if size:
208
parts = size["url"].split("/")
209
parts[4] = "source"
210
results.append(("/".join(parts), module))
211
break
212
213
elif mtype == "embed":
214
if embed := (module.get("originalEmbed") or
215
module.get("fluidEmbed")):
216
embed = text.unescape(text.extr(embed, 'src="', '"'))
217
module["extension"] = "mp4"
218
results.append(("ytdl:" + embed, module))
219
220
elif mtype == "text":
221
module["extension"] = "txt"
222
results.append(("text:" + module["text"], module))
223
224
return results
225
226
227
class BehanceUserExtractor(BehanceExtractor):
228
"""Extractor for a user's galleries from www.behance.net"""
229
subcategory = "user"
230
categorytransfer = True
231
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
232
example = "https://www.behance.net/USER"
233
234
def __init__(self, match):
235
BehanceExtractor.__init__(self, match)
236
self.user = match[1]
237
238
def galleries(self):
239
endpoint = "GetProfileProjects"
240
variables = {
241
"username": self.user,
242
"after" : "MAo=", # "0" in base64
243
}
244
245
while True:
246
data = self._request_graphql(endpoint, variables)
247
items = data["user"]["profileProjects"]
248
yield from items["nodes"]
249
250
if not items["pageInfo"]["hasNextPage"]:
251
return
252
variables["after"] = items["pageInfo"]["endCursor"]
253
254
255
class BehanceCollectionExtractor(BehanceExtractor):
256
"""Extractor for a collection's galleries from www.behance.net"""
257
subcategory = "collection"
258
categorytransfer = True
259
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
260
example = "https://www.behance.net/collection/12345/TITLE"
261
262
def __init__(self, match):
263
BehanceExtractor.__init__(self, match)
264
self.collection_id = match[1]
265
266
def galleries(self):
267
endpoint = "GetMoodboardItemsAndRecommendations"
268
variables = {
269
"afterItem": "MAo=", # "0" in base64
270
"firstItem": 40,
271
"id" : int(self.collection_id),
272
"shouldGetItems" : True,
273
"shouldGetMoodboardFields": False,
274
"shouldGetRecommendations": False,
275
}
276
277
while True:
278
data = self._request_graphql(endpoint, variables)
279
items = data["moodboard"]["items"]
280
281
for node in items["nodes"]:
282
yield node["entity"]
283
284
if not items["pageInfo"]["hasNextPage"]:
285
return
286
variables["afterItem"] = items["pageInfo"]["endCursor"]
287
288
289
GRAPHQL_QUERIES = {
290
"GetProfileProjects": """\
291
query GetProfileProjects($username: String, $after: String) {
292
user(username: $username) {
293
profileProjects(first: 12, after: $after) {
294
pageInfo {
295
endCursor
296
hasNextPage
297
}
298
nodes {
299
__typename
300
adminFlags {
301
mature_lock
302
privacy_lock
303
dmca_lock
304
flagged_lock
305
privacy_violation_lock
306
trademark_lock
307
spam_lock
308
eu_ip_lock
309
}
310
colors {
311
r
312
g
313
b
314
}
315
covers {
316
size_202 {
317
url
318
}
319
size_404 {
320
url
321
}
322
size_808 {
323
url
324
}
325
}
326
features {
327
url
328
name
329
featuredOn
330
ribbon {
331
image
332
image2x
333
image3x
334
}
335
}
336
fields {
337
id
338
label
339
slug
340
url
341
}
342
hasMatureContent
343
id
344
isFeatured
345
isHiddenFromWorkTab
346
isMatureReviewSubmitted
347
isOwner
348
isFounder
349
isPinnedToSubscriptionOverview
350
isPrivate
351
linkedAssets {
352
...sourceLinkFields
353
}
354
linkedAssetsCount
355
sourceFiles {
356
...sourceFileFields
357
}
358
matureAccess
359
modifiedOn
360
name
361
owners {
362
...OwnerFields
363
images {
364
size_50 {
365
url
366
}
367
}
368
}
369
premium
370
publishedOn
371
stats {
372
appreciations {
373
all
374
}
375
views {
376
all
377
}
378
comments {
379
all
380
}
381
}
382
slug
383
tools {
384
id
385
title
386
category
387
categoryLabel
388
categoryId
389
approved
390
url
391
backgroundColor
392
}
393
url
394
}
395
}
396
}
397
}
398
399
fragment sourceFileFields on SourceFile {
400
__typename
401
sourceFileId
402
projectId
403
userId
404
title
405
assetId
406
renditionUrl
407
mimeType
408
size
409
category
410
licenseType
411
unitAmount
412
currency
413
tier
414
hidden
415
extension
416
hasUserPurchased
417
}
418
419
fragment sourceLinkFields on LinkedAsset {
420
__typename
421
name
422
premium
423
url
424
category
425
licenseType
426
}
427
428
fragment OwnerFields on User {
429
displayName
430
hasPremiumAccess
431
id
432
isFollowing
433
isProfileOwner
434
location
435
locationUrl
436
url
437
username
438
availabilityInfo {
439
availabilityTimeline
440
isAvailableFullTime
441
isAvailableFreelance
442
}
443
}
444
""",
445
446
"GetMoodboardItemsAndRecommendations": """\
447
query GetMoodboardItemsAndRecommendations(
448
$id: Int!
449
$firstItem: Int!
450
$afterItem: String
451
$shouldGetRecommendations: Boolean!
452
$shouldGetItems: Boolean!
453
$shouldGetMoodboardFields: Boolean!
454
) {
455
viewer @include(if: $shouldGetMoodboardFields) {
456
isOptedOutOfRecommendations
457
isAdmin
458
}
459
moodboard(id: $id) {
460
...moodboardFields @include(if: $shouldGetMoodboardFields)
461
462
items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems) {
463
pageInfo {
464
endCursor
465
hasNextPage
466
}
467
nodes {
468
...nodesFields
469
}
470
}
471
472
recommendedItems(first: 80) @include(if: $shouldGetRecommendations) {
473
nodes {
474
...nodesFields
475
fetchSource
476
}
477
}
478
}
479
}
480
481
fragment moodboardFields on Moodboard {
482
id
483
label
484
privacy
485
followerCount
486
isFollowing
487
projectCount
488
url
489
isOwner
490
owners {
491
...OwnerFields
492
images {
493
size_50 {
494
url
495
}
496
size_100 {
497
url
498
}
499
size_115 {
500
url
501
}
502
size_230 {
503
url
504
}
505
size_138 {
506
url
507
}
508
size_276 {
509
url
510
}
511
}
512
}
513
}
514
515
fragment projectFields on Project {
516
__typename
517
id
518
isOwner
519
publishedOn
520
matureAccess
521
hasMatureContent
522
modifiedOn
523
name
524
url
525
isPrivate
526
slug
527
license {
528
license
529
description
530
id
531
label
532
url
533
text
534
images
535
}
536
fields {
537
label
538
}
539
colors {
540
r
541
g
542
b
543
}
544
owners {
545
...OwnerFields
546
images {
547
size_50 {
548
url
549
}
550
size_100 {
551
url
552
}
553
size_115 {
554
url
555
}
556
size_230 {
557
url
558
}
559
size_138 {
560
url
561
}
562
size_276 {
563
url
564
}
565
}
566
}
567
covers {
568
size_original {
569
url
570
}
571
size_max_808 {
572
url
573
}
574
size_808 {
575
url
576
}
577
size_404 {
578
url
579
}
580
size_202 {
581
url
582
}
583
size_230 {
584
url
585
}
586
size_115 {
587
url
588
}
589
}
590
stats {
591
views {
592
all
593
}
594
appreciations {
595
all
596
}
597
comments {
598
all
599
}
600
}
601
}
602
603
fragment exifDataValueFields on exifDataValue {
604
id
605
label
606
value
607
searchValue
608
}
609
610
fragment nodesFields on MoodboardItem {
611
id
612
entityType
613
width
614
height
615
flexWidth
616
flexHeight
617
images {
618
size
619
url
620
}
621
622
entity {
623
... on Project {
624
...projectFields
625
}
626
627
... on ImageModule {
628
project {
629
...projectFields
630
}
631
632
colors {
633
r
634
g
635
b
636
}
637
638
exifData {
639
lens {
640
...exifDataValueFields
641
}
642
software {
643
...exifDataValueFields
644
}
645
makeAndModel {
646
...exifDataValueFields
647
}
648
focalLength {
649
...exifDataValueFields
650
}
651
iso {
652
...exifDataValueFields
653
}
654
location {
655
...exifDataValueFields
656
}
657
flash {
658
...exifDataValueFields
659
}
660
exposureMode {
661
...exifDataValueFields
662
}
663
shutterSpeed {
664
...exifDataValueFields
665
}
666
aperture {
667
...exifDataValueFields
668
}
669
}
670
}
671
672
... on MediaCollectionComponent {
673
project {
674
...projectFields
675
}
676
}
677
}
678
}
679
680
fragment OwnerFields on User {
681
displayName
682
hasPremiumAccess
683
id
684
isFollowing
685
isProfileOwner
686
location
687
locationUrl
688
url
689
username
690
availabilityInfo {
691
availabilityTimeline
692
isAvailableFullTime
693
isAvailableFreelance
694
}
695
}
696
""",
697
698
}
699
700