CoCalc -- behance.py

GitHub Repository: mikf/gallery-dl
Path: blob/master/gallery_dl/extractor/behance.py
⁵³⁹⁹ views
1
# -*- coding: utf-8 -*-
2

3
# Copyright 2018-2025 Mike Fährmann
4
#
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License version 2 as
7
# published by the Free Software Foundation.
8

9
"""Extractors for https://www.behance.net/"""
10

11
from .common import Extractor, Message
12
from .. import text, util, exception
13

14

15
class BehanceExtractor(Extractor):
16
    """Base class for behance extractors"""
17
    category = "behance"
18
    root = "https://www.behance.net"
19
    request_interval = (2.0, 4.0)
20
    browser = "firefox"
21
    tls12 = False
22

23
    def _init(self):
24
        self._bcp = self.cookies.get("bcp", domain="www.behance.net")
25
        if not self._bcp:
26
            self._bcp = "4c34489d-914c-46cd-b44c-dfd0e661136d"
27
            self.cookies.set("bcp", self._bcp, domain="www.behance.net")
28

29
    def items(self):
30
        for gallery in self.galleries():
31
            gallery["_extractor"] = BehanceGalleryExtractor
32
            yield Message.Queue, gallery["url"], self._update(gallery)
33

34
    def galleries(self):
35
        """Return all relevant gallery URLs"""
36

37
    def _request_graphql(self, endpoint, variables):
38
        url = self.root + "/v3/graphql"
39
        headers = {
40
            "Origin": self.root,
41
            "X-BCP" : self._bcp,
42
            "X-Requested-With": "XMLHttpRequest",
43
        }
44
        data = {
45
            "query"    : GRAPHQL_QUERIES[endpoint],
46
            "variables": variables,
47
        }
48

49
        return self.request_json(
50
            url, method="POST", headers=headers, json=data)["data"]
51

52
    def _update(self, data):
53
        # compress data to simple lists
54
        if (fields := data.get("fields")) and isinstance(fields[0], dict):
55
            data["fields"] = [
56
                field.get("name") or field.get("label")
57
                for field in fields
58
            ]
59

60
        data["owners"] = [
61
            owner.get("display_name") or owner.get("displayName")
62
            for owner in data["owners"]
63
        ]
64

65
        tags = data.get("tags") or ()
66
        if tags and isinstance(tags[0], dict):
67
            tags = [tag["title"] for tag in tags]
68
        data["tags"] = tags
69

70
        data["date"] = text.parse_timestamp(
71
            data.get("publishedOn") or data.get("conceived_on") or 0)
72

73
        if creator := data.get("creator"):
74
            creator["name"] = creator["url"].rpartition("/")[2]
75

76
        # backwards compatibility
77
        data["gallery_id"] = data["id"]
78
        data["title"] = data["name"]
79
        data["user"] = ", ".join(data["owners"])
80

81
        return data
82

83

84
class BehanceGalleryExtractor(BehanceExtractor):
85
    """Extractor for image galleries from www.behance.net"""
86
    subcategory = "gallery"
87
    directory_fmt = ("{category}", "{owners:J, }", "{id} {name}")
88
    filename_fmt = "{category}_{id}_{num:>02}.{extension}"
89
    archive_fmt = "{id}_{num}"
90
    pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
91
    example = "https://www.behance.net/gallery/12345/TITLE"
92

93
    def __init__(self, match):
94
        BehanceExtractor.__init__(self, match)
95
        self.gallery_id = match[1]
96

97
    def _init(self):
98
        BehanceExtractor._init(self)
99

100
        if modules := self.config("modules"):
101
            if isinstance(modules, str):
102
                modules = modules.split(",")
103
            self.modules = set(modules)
104
        else:
105
            self.modules = {"image", "video", "mediacollection", "embed"}
106

107
    def items(self):
108
        data = self.get_gallery_data()
109
        imgs = self.get_images(data)
110
        data["count"] = len(imgs)
111

112
        yield Message.Directory, data
113
        for data["num"], (url, module) in enumerate(imgs, 1):
114
            data["module"] = module
115
            data["extension"] = (module.get("extension") or
116
                                 text.ext_from_url(url))
117
            yield Message.Url, url, data
118

119
    def get_gallery_data(self):
120
        """Collect gallery info dict"""
121
        url = f"{self.root}/gallery/{self.gallery_id}/a"
122
        cookies = {
123
            "gk_suid": "14118261",
124
            "gki": "feature_3_in_1_checkout_test:false,hire_browse_get_quote_c"
125
                   "ta_ab_test:false,feature_hire_dashboard_services_ab_test:f"
126
                   "alse,feature_show_details_jobs_row_ab_test:false,feature_a"
127
                   "i_freelance_project_create_flow:false,",
128
            "ilo0": "true",
129
            "originalReferrer": "",
130
        }
131
        page = self.request(url, cookies=cookies).text
132

133
        data = util.json_loads(text.extr(
134
            page, 'id="beconfig-store_state">', '</script>'))
135
        return self._update(data["project"]["project"])
136

137
    def get_images(self, data):
138
        """Extract image results from an API response"""
139
        if not data["modules"]:
140
            access = data.get("matureAccess")
141
            if access == "logged-out":
142
                raise exception.AuthorizationError(
143
                    "Mature content galleries require logged-in cookies")
144
            if access == "restricted-safe":
145
                raise exception.AuthorizationError(
146
                    "Mature content blocked in account settings")
147
            if access and access != "allowed":
148
                raise exception.AuthorizationError()
149
            return ()
150

151
        results = []
152
        for module in data["modules"]:
153
            mtype = module["__typename"][:-6].lower()
154

155
            if mtype not in self.modules:
156
                self.log.debug("Skipping '%s' module", mtype)
157
                continue
158

159
            if mtype == "image":
160
                sizes = {
161
                    size["url"].rsplit("/", 2)[1]: size
162
                    for size in module["imageSizes"]["allAvailable"]
163
                }
164
                size = (sizes.get("source") or
165
                        sizes.get("max_3840") or
166
                        sizes.get("fs") or
167
                        sizes.get("hd") or
168
                        sizes.get("disp"))
169
                results.append((size["url"], module))
170

171
            elif mtype == "video":
172
                try:
173
                    url = text.extr(module["embed"], 'src="', '"')
174
                    page = self.request(text.unescape(url)).text
175

176
                    url = text.extr(page, '<source src="', '"')
177
                    if text.ext_from_url(url) == "m3u8":
178
                        url = "ytdl:" + url
179
                        module["_ytdl_manifest"] = "hls"
180
                        module["extension"] = "mp4"
181
                    results.append((url, module))
182
                    continue
183
                except Exception as exc:
184
                    self.log.debug("%s: %s", exc.__class__.__name__, exc)
185

186
                try:
187
                    renditions = module["videoData"]["renditions"]
188
                except Exception:
189
                    self.log.warning("No download URLs for video %s",
190
                                     module.get("id") or "???")
191
                    continue
192

193
                try:
194
                    url = [
195
                        r["url"] for r in renditions
196
                        if text.ext_from_url(r["url"]) != "m3u8"
197
                    ][-1]
198
                except Exception as exc:
199
                    self.log.debug("%s: %s", exc.__class__.__name__, exc)
200
                    url = "ytdl:" + renditions[-1]["url"]
201

202
                results.append((url, module))
203

204
            elif mtype == "mediacollection":
205
                for component in module["components"]:
206
                    for size in component["imageSizes"].values():
207
                        if size:
208
                            parts = size["url"].split("/")
209
                            parts[4] = "source"
210
                            results.append(("/".join(parts), module))
211
                            break
212

213
            elif mtype == "embed":
214
                if embed := (module.get("originalEmbed") or
215
                             module.get("fluidEmbed")):
216
                    embed = text.unescape(text.extr(embed, 'src="', '"'))
217
                    module["extension"] = "mp4"
218
                    results.append(("ytdl:" + embed, module))
219

220
            elif mtype == "text":
221
                module["extension"] = "txt"
222
                results.append(("text:" + module["text"], module))
223

224
        return results
225

226

227
class BehanceUserExtractor(BehanceExtractor):
228
    """Extractor for a user's galleries from www.behance.net"""
229
    subcategory = "user"
230
    categorytransfer = True
231
    pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
232
    example = "https://www.behance.net/USER"
233

234
    def __init__(self, match):
235
        BehanceExtractor.__init__(self, match)
236
        self.user = match[1]
237

238
    def galleries(self):
239
        endpoint = "GetProfileProjects"
240
        variables = {
241
            "username": self.user,
242
            "after"   : "MAo=",  # "0" in base64
243
        }
244

245
        while True:
246
            data = self._request_graphql(endpoint, variables)
247
            items = data["user"]["profileProjects"]
248
            yield from items["nodes"]
249

250
            if not items["pageInfo"]["hasNextPage"]:
251
                return
252
            variables["after"] = items["pageInfo"]["endCursor"]
253

254

255
class BehanceCollectionExtractor(BehanceExtractor):
256
    """Extractor for a collection's galleries from www.behance.net"""
257
    subcategory = "collection"
258
    categorytransfer = True
259
    pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
260
    example = "https://www.behance.net/collection/12345/TITLE"
261

262
    def __init__(self, match):
263
        BehanceExtractor.__init__(self, match)
264
        self.collection_id = match[1]
265

266
    def galleries(self):
267
        endpoint = "GetMoodboardItemsAndRecommendations"
268
        variables = {
269
            "afterItem": "MAo=",  # "0" in base64
270
            "firstItem": 40,
271
            "id"       : int(self.collection_id),
272
            "shouldGetItems"          : True,
273
            "shouldGetMoodboardFields": False,
274
            "shouldGetRecommendations": False,
275
        }
276

277
        while True:
278
            data = self._request_graphql(endpoint, variables)
279
            items = data["moodboard"]["items"]
280

281
            for node in items["nodes"]:
282
                yield node["entity"]
283

284
            if not items["pageInfo"]["hasNextPage"]:
285
                return
286
            variables["afterItem"] = items["pageInfo"]["endCursor"]
287

288

289
GRAPHQL_QUERIES = {
290
    "GetProfileProjects": """\
291
query GetProfileProjects($username: String, $after: String) {
292
  user(username: $username) {
293
    profileProjects(first: 12, after: $after) {
294
      pageInfo {
295
        endCursor
296
        hasNextPage
297
      }
298
      nodes {
299
        __typename
300
        adminFlags {
301
          mature_lock
302
          privacy_lock
303
          dmca_lock
304
          flagged_lock
305
          privacy_violation_lock
306
          trademark_lock
307
          spam_lock
308
          eu_ip_lock
309
        }
310
        colors {
311
          r
312
          g
313
          b
314
        }
315
        covers {
316
          size_202 {
317
            url
318
          }
319
          size_404 {
320
            url
321
          }
322
          size_808 {
323
            url
324
          }
325
        }
326
        features {
327
          url
328
          name
329
          featuredOn
330
          ribbon {
331
            image
332
            image2x
333
            image3x
334
          }
335
        }
336
        fields {
337
          id
338
          label
339
          slug
340
          url
341
        }
342
        hasMatureContent
343
        id
344
        isFeatured
345
        isHiddenFromWorkTab
346
        isMatureReviewSubmitted
347
        isOwner
348
        isFounder
349
        isPinnedToSubscriptionOverview
350
        isPrivate
351
        linkedAssets {
352
          ...sourceLinkFields
353
        }
354
        linkedAssetsCount
355
        sourceFiles {
356
          ...sourceFileFields
357
        }
358
        matureAccess
359
        modifiedOn
360
        name
361
        owners {
362
          ...OwnerFields
363
          images {
364
            size_50 {
365
              url
366
            }
367
          }
368
        }
369
        premium
370
        publishedOn
371
        stats {
372
          appreciations {
373
            all
374
          }
375
          views {
376
            all
377
          }
378
          comments {
379
            all
380
          }
381
        }
382
        slug
383
        tools {
384
          id
385
          title
386
          category
387
          categoryLabel
388
          categoryId
389
          approved
390
          url
391
          backgroundColor
392
        }
393
        url
394
      }
395
    }
396
  }
397
}
398

399
fragment sourceFileFields on SourceFile {
400
  __typename
401
  sourceFileId
402
  projectId
403
  userId
404
  title
405
  assetId
406
  renditionUrl
407
  mimeType
408
  size
409
  category
410
  licenseType
411
  unitAmount
412
  currency
413
  tier
414
  hidden
415
  extension
416
  hasUserPurchased
417
}
418

419
fragment sourceLinkFields on LinkedAsset {
420
  __typename
421
  name
422
  premium
423
  url
424
  category
425
  licenseType
426
}
427

428
fragment OwnerFields on User {
429
  displayName
430
  hasPremiumAccess
431
  id
432
  isFollowing
433
  isProfileOwner
434
  location
435
  locationUrl
436
  url
437
  username
438
  availabilityInfo {
439
    availabilityTimeline
440
    isAvailableFullTime
441
    isAvailableFreelance
442
  }
443
}
444
""",
445

446
    "GetMoodboardItemsAndRecommendations": """\
447
query GetMoodboardItemsAndRecommendations(
448
  $id: Int!
449
  $firstItem: Int!
450
  $afterItem: String
451
  $shouldGetRecommendations: Boolean!
452
  $shouldGetItems: Boolean!
453
  $shouldGetMoodboardFields: Boolean!
454
) {
455
  viewer @include(if: $shouldGetMoodboardFields) {
456
    isOptedOutOfRecommendations
457
    isAdmin
458
  }
459
  moodboard(id: $id) {
460
    ...moodboardFields @include(if: $shouldGetMoodboardFields)
461

462
    items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems) {
463
      pageInfo {
464
        endCursor
465
        hasNextPage
466
      }
467
      nodes {
468
        ...nodesFields
469
      }
470
    }
471

472
    recommendedItems(first: 80) @include(if: $shouldGetRecommendations) {
473
      nodes {
474
        ...nodesFields
475
        fetchSource
476
      }
477
    }
478
  }
479
}
480

481
fragment moodboardFields on Moodboard {
482
  id
483
  label
484
  privacy
485
  followerCount
486
  isFollowing
487
  projectCount
488
  url
489
  isOwner
490
  owners {
491
    ...OwnerFields
492
    images {
493
      size_50 {
494
        url
495
      }
496
      size_100 {
497
        url
498
      }
499
      size_115 {
500
        url
501
      }
502
      size_230 {
503
        url
504
      }
505
      size_138 {
506
        url
507
      }
508
      size_276 {
509
        url
510
      }
511
    }
512
  }
513
}
514

515
fragment projectFields on Project {
516
  __typename
517
  id
518
  isOwner
519
  publishedOn
520
  matureAccess
521
  hasMatureContent
522
  modifiedOn
523
  name
524
  url
525
  isPrivate
526
  slug
527
  license {
528
    license
529
    description
530
    id
531
    label
532
    url
533
    text
534
    images
535
  }
536
  fields {
537
    label
538
  }
539
  colors {
540
    r
541
    g
542
    b
543
  }
544
  owners {
545
    ...OwnerFields
546
    images {
547
      size_50 {
548
        url
549
      }
550
      size_100 {
551
        url
552
      }
553
      size_115 {
554
        url
555
      }
556
      size_230 {
557
        url
558
      }
559
      size_138 {
560
        url
561
      }
562
      size_276 {
563
        url
564
      }
565
    }
566
  }
567
  covers {
568
    size_original {
569
      url
570
    }
571
    size_max_808 {
572
      url
573
    }
574
    size_808 {
575
      url
576
    }
577
    size_404 {
578
      url
579
    }
580
    size_202 {
581
      url
582
    }
583
    size_230 {
584
      url
585
    }
586
    size_115 {
587
      url
588
    }
589
  }
590
  stats {
591
    views {
592
      all
593
    }
594
    appreciations {
595
      all
596
    }
597
    comments {
598
      all
599
    }
600
  }
601
}
602

603
fragment exifDataValueFields on exifDataValue {
604
  id
605
  label
606
  value
607
  searchValue
608
}
609

610
fragment nodesFields on MoodboardItem {
611
  id
612
  entityType
613
  width
614
  height
615
  flexWidth
616
  flexHeight
617
  images {
618
    size
619
    url
620
  }
621

622
  entity {
623
    ... on Project {
624
      ...projectFields
625
    }
626

627
    ... on ImageModule {
628
      project {
629
        ...projectFields
630
      }
631

632
      colors {
633
        r
634
        g
635
        b
636
      }
637

638
      exifData {
639
        lens {
640
          ...exifDataValueFields
641
        }
642
        software {
643
          ...exifDataValueFields
644
        }
645
        makeAndModel {
646
          ...exifDataValueFields
647
        }
648
        focalLength {
649
          ...exifDataValueFields
650
        }
651
        iso {
652
          ...exifDataValueFields
653
        }
654
        location {
655
          ...exifDataValueFields
656
        }
657
        flash {
658
          ...exifDataValueFields
659
        }
660
        exposureMode {
661
          ...exifDataValueFields
662
        }
663
        shutterSpeed {
664
          ...exifDataValueFields
665
        }
666
        aperture {
667
          ...exifDataValueFields
668
        }
669
      }
670
    }
671

672
    ... on MediaCollectionComponent {
673
      project {
674
        ...projectFields
675
      }
676
    }
677
  }
678
}
679

680
fragment OwnerFields on User {
681
  displayName
682
  hasPremiumAccess
683
  id
684
  isFollowing
685
  isProfileOwner
686
  location
687
  locationUrl
688
  url
689
  username
690
  availabilityInfo {
691
    availabilityTimeline
692
    isAvailableFullTime
693
    isAvailableFreelance
694
  }
695
}
696
""",
697

698
}
699

700
Product

Resources

Company