CoCalc -- jekyll-jsonld.rb

GitHub Repository: galaxyproject/training-material
Path: blob/main/_plugins/jekyll-jsonld.rb
¹⁶⁷⁷ views
1
# frozen_string_literal: true
2

3
require 'json'
4
require './_plugins/gtn'
5
require './_plugins/gtn/git'
6
require './_plugins/util'
7

8
module Jekyll
9
  module Filters
10

11
    # Generate JSON-LD metadata for the GTN.
12
    module JsonldFilter
13
      GTN = {
14
        '@type': 'Organization',
15
        'http://purl.org/dc/terms/conformsTo': {
16
          # Bioschemas profile
17
          '@id': 'https://bioschemas.org/profiles/Organization/0.2-DRAFT-2019_07_19',
18
          '@type': 'Organization'
19
        },
20
        id: 'https://training.galaxyproject.org',
21
        email: '[email protected]',
22
        name: 'Galaxy Training Network',
23
        legalName: 'Galaxy Training Network',
24
        alternateName: 'GTN',
25
        url: 'https://training.galaxyproject.org',
26
        logo: 'https://training.galaxyproject.org/training-material/assets/images/GTNLogo1000.png',
27
        keywords: %w[galaxy bioinformatics training fair accessible],
28
        status: 'active',
29
        foundingDate: Gtn::Git.discover['founding_date'].to_s,
30
      }.freeze
31

32
      A11Y = {
33
        accessMode: %w[textual visual],
34
        accessModeSufficient: %w[textual visual],
35
        # "accessibilityAPI": ,
36
        accessibilityControl: %w[fullKeyboardControl fullMouseControl],
37
        accessibilityFeature: %w[alternativeText tableOfContents],
38
        # "accessibilityHazard": [],
39
        accessibilitySummary: 'The text aims to be as accessible as possible. Image descriptions will vary per ' \
40
                              'tutorial, from images being completely inaccessible, to images with good descriptions ' \
41
                              'for non-visual users.',
42
      }.freeze
43

44
      EDU_ROLES = {
45
        'use' => 'Students',
46
        'admin-dev' => 'Galaxy Administrators',
47
        'basics' => 'Students',
48
        'data-science' => 'Data-Science Students',
49
        'instructors' => 'Instructors',
50
      }
51

52
      ##
53
      # Generate the Dublin Core metadata for a material.
54
      # Parmaeters:
55
      # +material+:: The material to generate the metadata for.
56
      # +site+:: The site object.
57
      # Returns:
58
      # A string containing the metadata.
59
      #
60
      # Example:
61
      #  {{ material | generate_dublin_core: site }}
62
      #  => <meta name="DC.identifier" content="..." />
63
      def generate_dublin_core(material, site)
64
        return if material.key?('data') && material['data'].fetch('type', 'none') != 'tutorial_hands_on'
65

66
        attributes = [
67
          ['DC.identifier', site['github_repository']],
68
          ['DC.type', 'text'],
69
          ['DC.title', material['title']],
70
          ['DC.publisher', 'Galaxy Training Network'],
71
          ['DC.date', Gtn::ModificationTimes.obtain_time(material['path'])]
72
        ]
73

74
        attributes += Gtn::Contributors.get_authors(material).map do |user|
75
          ['DC.creator', Gtn::Contributors.fetch_name(site, user)]
76
        end
77

78
        attributes.map { |a, b| "<meta name=\"#{a}\" content=\"#{b}\">" }.join("\n")
79
      end
80

81
      ##
82
      # Generate the JSON-LD metadata for a person
83
      # Parameters:
84
      # +id+:: The id of the person.
85
      # +contributor+:: The contributor object from CONTRIBUTORS.yaml.
86
      # +site+:: The site object.
87
      # Returns:
88
      # +Hash+:: The JSON-LD metadata.
89
      #
90
      # Example:
91
      #  generate_person_jsonld("hexylena", site['data']['contributors']['hexylena'], site)
92
      #  => {
93
      #    "@context": "https://schema.org",
94
      #    "@type": "Person",
95
      #    "http://purl.org/dc/terms/conformsTo": {
96
      #      # Bioschemas profile
97
      #      "@id": "https://bioschemas.org/profiles/Person/0.2-DRAFT-2019_07_19",
98
      #      "@type": "Person"
99
      #    },
100
      #    "url": "https://training.galaxyproject.org/hall-of-fame/hexylena/",
101
      #    "mainEntityOfPage": "https://training.galaxyproject.org/hall-of-fame/hexylena/",
102
      #    "name": "hexylena",
103
      #    "image": "https://avatars.githubusercontent.com/hexylena",
104
      #    "description": "A contributor to the GTN project.",
105
      #    "memberOf": [...],
106
      #    "identifier": "https://orcid.org/0000-0002-6601-2165",
107
      #    "orcid": "https://orcid.org/0000-0002-6601-2165"
108
      #  }
109
      #
110
      def generate_person_jsonld(id, contributor, site)
111
        member_of = Gtn::Contributors.fetch_contributor(site, id)['affiliations'] || []
112
        member_of = member_of.map do |org_id|
113
          org = Gtn::Contributors.fetch_contributor(site, org_id)
114
          generate_org_jsonld(org_id, org, site)
115
        end
116

117
        person = {
118
          '@context': 'https://schema.org',
119
          '@type': 'Person',
120
          'http://purl.org/dc/terms/conformsTo': {
121
            '@id': 'https://bioschemas.org/profiles/Person/0.3-DRAFT',
122
            '@type': 'CreativeWork'
123
          },
124
          # I guess these are identical?
125
          url: "#{site['url']}#{site['baseurl']}/hall-of-fame/#{id}/",
126
          mainEntityOfPage: "#{site['url']}#{site['baseurl']}/hall-of-fame/#{id}/",
127
          name: Gtn::Contributors.fetch_name(site, id),
128
          image: "https://avatars.githubusercontent.com/#{id}",
129
          # No clue what to put here it's a person.
130
          description: if contributor.nil?
131
                         'A contributor to the GTN project.'
132
                       else
133
                         contributor.fetch('bio',
134
                                           'A contributor to the GTN project.')
135
                       end,
136
          memberOf: [GTN] + member_of,
137
        }
138
        if !contributor.nil? && contributor.key?('orcid') && contributor['orcid']
139
          person['identifier'] = "https://orcid.org/#{contributor['orcid']}"
140
        end
141

142
        person
143
      end
144

145
      ##
146
      # Generate the JSON-LD metadata for an organisation
147
      # Parameters:
148
      # +id+:: The id of the org.
149
      # +contributor+:: The contributor object from ORGANISATIONS.yaml.
150
      # +site+:: The site object.
151
      # Returns:
152
      # +Hash+:: The JSON-LD metadata.
153
      def generate_org_jsonld(id, contributor, site)
154
        organization = {
155
          '@context': 'https://schema.org',
156
          '@type': 'Organization',
157
          'http://purl.org/dc/terms/conformsTo': {
158
            '@id': 'https://bioschemas.org/profiles/Organization/0.3-DRAFT',
159
            '@type': 'CreativeWork'
160
          },
161
          id: "#{site['url']}#{site['baseurl']}/hall-of-fame/#{id}/",
162
          name: Gtn::Contributors.fetch_name(site, id),
163
          description: 'An organization supporting the Galaxy Training Network',
164
        }
165

166
        organization['url'] = contributor['url'] if contributor.key?('url') && contributor['url']
167

168
        organization
169
      end
170

171
      ##
172
      # Generate the JSON-LD metadata for a funding organisation
173
      # Parameters:
174
      # +id+:: The id of the person.
175
      # +contributor+:: The contributor object from ORGANISATIONS.yaml.
176
      # +site+:: The site object.
177
      # Returns:
178
      # +Hash+:: The JSON-LD metadata.
179
      def generate_funder_jsonld(id, contributor, site)
180
        {
181
          '@context': 'https://schema.org',
182
          '@type': 'Organization',
183
          'http://purl.org/dc/terms/conformsTo': {
184
            '@id': 'https://bioschemas.org/profiles/Organization/0.3-DRAFT',
185
            '@type': 'CreativeWork'
186
          },
187
          name: Gtn::Contributors.fetch_name(site, id),
188
          description: contributor.fetch('funding_statement', 'An organization supporting the Galaxy Training Network'),
189
          url: contributor.fetch('url', "https://training.galaxyproject.org/training-material/hall-of-fame/#{id}/"),
190
          logo: contributor.fetch('avatar', "https://github.com/#{id}.png"),
191
        }
192
      end
193

194
      ##
195
      # Generate the JSON-LD metadata for a grant
196
      # Parameters:
197
      # +id+:: The id of the grant.
198
      # +contributor+:: The contributor object from GRANTS.yaml.
199
      # +site+:: The site object.
200
      # Returns:
201
      # +Hash+:: The JSON-LD metadata.
202
      def generate_grant_jsonld(id, contributor, site)
203
        organization = {
204
          '@context': 'https://schema.org',
205
          '@type': 'Grant',
206
          identifier: contributor['funding_id'],
207
          url: Gtn::Contributors.fetch_funding_url(contributor) || contributor['url'],
208
          funder: generate_funder_jsonld(id, contributor, site)
209
        }
210

211
        organization['startDate'] = contributor['start_date'] if contributor.key?('start_date')
212
        organization['endDate'] = contributor['end_date'] if contributor.key?('end_date')
213

214
        organization
215
      end
216

217
      ##
218
      # Generate the JSON-LD metadata for a person, funder, or organisation as JSON.
219
      # Parameters:
220
      # +id+:: The id of the person.
221
      # +site+:: The site object.
222
      # +json+:: Should the output be rendered as JSON (only really used in contributor page.)
223
      # Returns:
224
      # +String+:: The JSON-LD metadata.
225
      def to_pfo_jsonld(id, site, json: true)
226
        contributor = Gtn::Contributors.fetch_contributor(site, id)
227
        d = if Gtn::Contributors.person?(site, id)
228
              generate_person_jsonld(id, contributor, site)
229
            elsif Gtn::Contributors.grant?(site, id)
230
              generate_grant_jsonld(id, contributor, site)
231
            else
232
              generate_org_jsonld(id, contributor, site)
233
            end
234

235
        if json
236
          JSON.pretty_generate(d)
237
        else
238
          d
239
        end
240
      end
241

242
      ##
243
      # Generate the JSON-LD metadata for a news article (blog)
244
      # Parameters:
245
      # +page+:: The page object.
246
      # +site+:: The +Jekyll::Site+ site object.
247
      # Returns:
248
      # +Hash+:: The JSON-LD metadata.
249
      def generate_news_jsonld(page, site)
250
        authors = Gtn::Contributors.get_authors(page.to_h).map do |x|
251
          to_pfo_jsonld(x, site, json: false)
252
        end
253

254
        data = {
255
          '@context': 'https://schema.org',
256
          '@type': 'BlogPosting',
257
          url: "#{site['url']}#{site['baseurl']}#{page['url']}",
258
          name: page['title'],
259
          headline: page.excerpt[0..100].gsub(/\n/, ' '), # TODO: remove html tags.
260
          keywords: page['tags'] || [],
261
          description: page.excerpt[0..100].gsub(/\n/, ' '), # TODO: remove html tags
262
          articleBody: page.content, # TODO: remove html tags
263
          datePublished: page.date,
264
          dateModified: Gtn::ModificationTimes.obtain_time(page.path),
265
          author: authors,
266
          publisher: GTN,
267
          mainEntityOfPage: {
268
            '@type': 'WebPage',
269
            '@id': "#{site['url']}#{page['url']}"
270
          },
271
          image: {
272
            '@type': 'ImageObject',
273
            width: 60,
274
            height: 60,
275
            url: "#{site['baseurl']}/assets/images/GTN-60px.png"
276
          }
277
        }
278
        data.update(A11Y)
279

280
        JSON.pretty_generate(data)
281
      end
282

283
      ##
284
      # Generate the JSON-LD metadata for an event
285
      # Parameters:
286
      # +page+:: The page object.
287
      # +site+:: The +Jekyll::Site+ site object.
288
      # Returns:
289
      # +Hash+:: The JSON-LD metadata.
290
      #
291
      # Examples:
292
      #   {{ page | generate_event_jsonld: site }}
293
      def generate_event_jsonld(page, site)
294
        organisers = Gtn::Contributors.get_organisers(page.to_h).map do |x|
295
          to_pfo_jsonld(x, site, json: false)
296
        end
297
        instructors = Gtn::Contributors.get_instructors(page.to_h).map do |x|
298
          to_pfo_jsonld(x, site, json: false)
299
        end
300
        funders = Gtn::Contributors.get_funders(site, page.to_h).map do |x|
301
          to_pfo_jsonld(x, site, json: false)
302
        end
303
        funding = Gtn::Contributors.get_grants(site, page.to_h).map do |x|
304
          to_pfo_jsonld(x, site, json: false)
305
        end
306

307
        materials = []
308
        if page['program']
309
          page['program'].each do |section|
310
            if !section.key? 'tutorials'
311
              next
312
            end
313

314
            section['tutorials'].each do |tutorial|
315
              if tutorial.key?('custom')
316
                next
317
              end
318

319
              material = Gtn::TopicFilter.fetch_tutorial_material(site, tutorial['topic'], tutorial['name'])
320
              materials.push(material)
321
            end
322
          end
323
        end
324
        materials.compact!
325

326
        # Extract EDAM terms from all materials
327
        edam_terms = materials.map do |material|
328
          material.fetch('edam_ontology', []).map do |term|
329
            {
330
              '@type': 'DefinedTerm',
331
              '@id': "http://edamontology.org/#{term}",
332
              inDefinedTermSet: 'http://edamontology.org',
333
              termCode: term,
334
            }
335
          end
336
        end.flatten.uniq
337

338
        learning_objectives = materials.map do |material|
339
          material.fetch('objectives', [])
340
        end.flatten.compact
341

342
        # TODO: add topic edam terms too? Not sure.
343
        parts = []
344
        materials.each do |material|
345
          mat = generate_material_jsonld(material, site['data'][material['topic_name']], site)
346
          if !mat.nil? && !mat.empty?
347
            parts.push(mat)
348
          end
349
        end
350

351
        if page['program']
352
          syllab = page['program'].reject { |s| s['section'].nil? }.map do |section|
353
            {
354
              '@type': 'Syllabus',
355
              name: section['section'],
356
              description: section.fetch('description', nil),
357
            }
358
          end
359
        end
360

361
        data = {
362
          '@context': 'https://schema.org',
363
          '@type': 'Course',
364
          url: "#{site['url']}#{site['baseurl']}#{page['url']}",
365
          name: page['title'],
366
          keywords: page['tags'] || [],
367
          description: page['description'],
368

369
          about: edam_terms, # TeSS, "scientific topics".
370
          audience: page['audience'], # TeSS: target audience
371
          # If 'online' is present in the mode, the course is online.
372
          # Will fail on "this is NOT an online course"
373
          # Acceptable.
374
          courseMode: page['mode'],
375
          startDate: page['date_start'],
376
          endDate: page['date_end'],
377
          organizer: organisers, # TeSS only, US spelling, non-standard
378

379
          location: page['location'], # TODO, TeSS location
380
          teaches: learning_objectives, # TeSS, "learning objectives"
381
          # timeRequired: 'P1D', # TeSS, "duration", TODO: calculate from start/end date, not implemented in scraper currently.
382

383
          availableLanguage: ['en'], # TODO: support other languages
384
          inLanguage: ['en'], # TODO: support other languages
385
          # courseCode
386
          # coursePrerequisites
387
          # educationalCredentialAwarded
388
          # financialAidEligible
389
          # hasCourseInstance
390
          # numberOfCredits
391
          # occupationalCredentialAwarded
392
          # syllabusSections
393
          # totalHistoricalEnrollment
394

395
          # assesses
396
          # competencyRequired
397
          # educationalAlignment
398
          # educationalLevel
399
          # educationalUse
400
          # learningResourceType
401
          # teaches
402

403
          funder: funders, # Org or person
404
          funding: funding, # Grant
405
          publisher: GTN,
406
          provider: GTN,
407
          syllabusSections: syllab,
408
          # Session materials
409
          # TODO: not currently parsed by TeSS, google just complains about it, so we're leaving it out.
410
          # hasPart: parts,
411
        }
412

413
        begin
414
          data['dateModified'] = Gtn::ModificationTimes.obtain_time(page.path)
415
          data['datePublished'] = Gtn::PublicationTimes.obtain_time(page.path)
416
        rescue StandardError
417
          data['dateModified'] = Gtn::ModificationTimes.obtain_time(page['path'])
418
          data['datePublished'] = Gtn::PublicationTimes.obtain_time(page['path'])
419
        end
420

421
        if page['cover']
422
          data['image'] = if page['cover'] =~ /^http/
423
                            [page['cover']]
424
                          else
425
                            ["#{site['url']}#{site['baseurl']}#{page['cover']}"]
426
                          end
427
        end
428

429
        # We CANNOT guarantee A11Y
430
        # data.update(A11Y)
431
        if page['cost'] and page['cost'].downcase == 'free'
432
          data['isAccessibleForFree'] = true
433
          offer = {
434
            '@type': 'Offer',
435
            price: 0,
436
            priceCurrency: 'EUR',
437
            category: 'Free',
438
            isAccessibleForFree: true,
439
          }
440
        elsif page['cost']
441
          data['isAccessibleForFree'] = false
442
          offer = {
443
            '@type': 'Offer',
444
            price: page['cost'].split[0],
445
            priceCurrency: page['cost'].split[1],
446
            isAccessibleForFree: false,
447
            category: 'Paid',
448
            # TODO: this can be more advanced but we need to collect start/end times, and timezone.
449
          }
450
        end
451

452
        # TODO: this is wrong in a whole host of scenarios like incl weekends.
453
        course_days = (page.fetch('date_end', page['date_start']) - page['date_start']).to_i
454
        if course_days < 1
455
          course_days = 1
456
        end
457
        data['hasCourseInstance'] = [
458
          {
459
            '@type': 'CourseInstance',
460
            courseMode: page['mode'],
461
            # courseWorkload: "A daily course running from #{page['date_start']} to #{page['date_end']}",
462
            offers: offer,
463
            instructor: instructors,
464
            isAccessibleForFree: data['isAccessibleForFree'],
465
            courseSchedule: {
466
              '@type': 'Schedule',
467
              startDate: page['date_start'],
468
              endDate: page.fetch('date_end', page['date_start']),
469
              repeatCount: course_days,
470
              repeatFrequency: 'daily', # Contrary to schema.org spec, this is what Google wants.
471
            },
472
            courseWorkload: "P#{course_days}D",
473
          }
474
        ]
475

476
        data['offers'] = [offer]
477

478
        if page.key?('location') && page['location'].keys.length > 1
479
          data['location'] = {
480
            '@type': 'Place',
481
            name: page['location']['name'],
482
            address: {
483
              '@type': 'PostalAddress',
484
              streetAddress: page['location'].fetch('address', nil),
485
              addressLocality: page['location'].fetch('city', nil),
486
              addressRegion: page['location'].fetch('region', nil),
487
              postalCode: page['location'].fetch('postcode', nil),
488
              addressCountry: page['location'].fetch('country', nil)
489
            }
490
          }
491
        end
492

493
        JSON.pretty_generate(data)
494
      end
495

496
      ##
497
      # Generate the JSON-LD metadata for a learning pathway
498
      # Parameters:
499
      # +page+:: The page object.
500
      # +site+:: The +Jekyll::Site+ site object.
501
      # Returns:
502
      # +Hash+:: The JSON-LD metadata.
503
      #
504
      # Examples:
505
      #   {{ page | generate_learning_pathway_jsonld: site }}
506
      def generate_learning_pathway_jsonld(page, site)
507
        materials = []
508
        page['pathway'].each do |section|
509
          if !section.key? 'tutorials'
510
            next
511
          end
512

513
          section['tutorials'].each do |tutorial|
514
            if tutorial.key?('custom')
515
              next
516
            end
517

518
            material = Gtn::TopicFilter.fetch_tutorial_material(site, tutorial['topic'], tutorial['name'])
519
            materials.push(material)
520
          end
521
        end
522
        materials.compact!
523

524
        # Extract EDAM terms from all materials
525
        edam_terms = materials.map do |material|
526
          material.fetch('edam_ontology', []).map do |term|
527
            {
528
              '@type': 'DefinedTerm',
529
              '@id': "http://edamontology.org/#{term}",
530
              inDefinedTermSet: 'http://edamontology.org',
531
              termCode: term,
532
            }
533
          end
534
        end.flatten.uniq
535

536
        learning_objectives = materials.map do |material|
537
          material.fetch('objectives', [])
538
        end.flatten.compact
539

540
        funders = materials.map do |material|
541
          Gtn::Contributors.get_funders(site, material).map do |x|
542
            to_pfo_jsonld(x, site, json: false)
543
          end
544
        end.flatten.uniq.compact
545

546
        funding = materials.map do |material|
547
          Gtn::Contributors.get_grants(site, material).map do |x|
548
            to_pfo_jsonld(x, site, json: false)
549
          end
550
        end.flatten.uniq.compact
551

552
        # TODO: add topic edam terms too? Not sure.
553
        parts = []
554
        materials.each do |material|
555
          mat = generate_material_jsonld(material, site['data'][material['topic_name']], site)
556
          if !mat.nil? && !mat.empty?
557
            parts.push(mat)
558
          end
559
        end
560

561
        syllab = page['pathway'].reject { |s| s['section'].nil? }.map do |section|
562
          {
563
            '@type': 'Syllabus',
564
            name: section['section'],
565
            description: section.fetch('description', nil),
566
          }
567
        end
568

569
        data = {
570
          '@context': 'https://schema.org',
571
          '@type': 'Course',
572
          url: "#{site['url']}#{site['baseurl']}#{page['url']}",
573
          name: "Learning Pathway #{page['title']}",
574
          keywords: page['tags'] || [],
575
          description: page['description'],
576
          about: edam_terms, # TeSS, "scientific topics".
577
          audience: page['audience'], # TeSS: target audience
578
          teaches: learning_objectives, # TeSS, "learning objectives"
579
          availableLanguage: ['en'], # TODO: support other languages
580
          inLanguage: ['en'], # TODO: support other languages
581
          # courseCode
582
          # coursePrerequisites
583
          # educationalCredentialAwarded
584
          # financialAidEligible
585
          # hasCourseInstance
586
          # numberOfCredits
587
          # occupationalCredentialAwarded
588
          # syllabusSections
589
          # totalHistoricalEnrollment
590

591
          # assesses
592
          # competencyRequired
593
          # educationalAlignment
594
          # educationalLevel
595
          # educationalUse
596
          # learningResourceType
597
          # teaches
598

599
          funder: funders, # Org or person
600
          funding: funding, # Grant
601
          publisher: GTN,
602
          provider: GTN,
603
          syllabusSections: syllab,
604
          # Session materials
605
          # TODO: not currently parsed by TeSS, google just complains about it, so we're leaving it out.
606
          # hasPart: parts,
607
        }
608

609
        begin
610
          data['dateModified'] = Gtn::ModificationTimes.obtain_time(page.path)
611
          data['datePublished'] = Gtn::PublicationTimes.obtain_time(page.path)
612
        rescue StandardError
613
          data['dateModified'] = Gtn::ModificationTimes.obtain_time(page['path'])
614
          data['datePublished'] = Gtn::PublicationTimes.obtain_time(page['path'])
615
        end
616

617
        if page['cover']
618
          data['image'] = if page['cover'] =~ /^http/
619
                            [page['cover']]
620
                          else
621
                            ["#{site['url']}#{site['baseurl']}#{page['cover']}"]
622
                          end
623
        end
624

625
        # We CANNOT guarantee A11Y
626
        # data.update(A11Y)
627
        data['isAccessibleForFree'] = true
628
        offer = {
629
          '@type': 'Offer',
630
          price: 0,
631
          priceCurrency: 'EUR',
632
          category: 'Free',
633
          isAccessibleForFree: true,
634
        }
635
        data['offers'] = [offer]
636

637
        # TODO: this is basically just wrong.
638
        data['hasCourseInstance'] = [
639
          {
640
            '@type': 'CourseInstance',
641
            courseMode: 'online',
642
            offers: offer,
643
            isAccessibleForFree: data['isAccessibleForFree'],
644
          }
645
        ]
646

647
        JSON.pretty_generate(data)
648
      end
649

650
      ##
651
      # Convert a material to JSON-LD, intended to be used in Jekyll Liquid templates.
652
      # Parameters:
653
      # +material+:: The material object.
654
      # +topic+:: The topic object.
655
      # +site+:: The +Jekyll::Site+ site object.
656
      #
657
      # Returns:
658
      # +String+:: The JSON-LD metadata.
659
      #
660
      # Examples:
661
      #   {{ material | to_jsonld: topic, site }}
662
      def to_jsonld(material, topic, site)
663
        JSON.pretty_generate(generate_material_jsonld(material, topic, site))
664
      end
665

666
      ##
667
      # Convert a material to JSON-LD. Intended to be used by the filters which you should call in templates.
668
      #
669
      # Parameters:
670
      # +material+:: The material object.
671
      # +topic+:: The topic object.
672
      # +site+:: The +Jekyll::Site+ site object.
673
      #
674
      # Returns:
675
      # +Hash+:: The JSON-LD metadata.
676
      def generate_material_jsonld(material, topic, site)
677
        langCodeMap = {
678
          "en" => 'English',
679
          "es" => 'Español',
680
          "fr" => 'Français',
681
        }
682

683
        eduLevel = {
684
          'Introductory' => 'Beginner',
685
          'Intermediate' => 'Intermediate',
686
          'Advanced' => 'Advanced'
687
        }
688
        return '{}' if !topic
689

690
        topic_desc = {
691
          '@type': 'CreativeWork',
692
          name: (topic['title']).to_s,
693
          description: (topic['summary']).to_s,
694
          url: "#{site['url']}#{site['baseurl']}/topics/#{topic['name']}/"
695
        }
696

697
        # aggregate everything
698
        data = {
699
          # Properties from Course
700
          '@context': 'http://schema.org',
701
          '@type': 'LearningResource',
702

703
          # Required for BioSchemas
704
          'http://purl.org/dc/terms/conformsTo': {
705
            '@id': 'https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE',
706
            '@type': 'CreativeWork'
707
          },
708

709
          # Properties from CreativeWork
710
          # "about" described below
711
          #
712
          # "accountablePerson":,
713
          # "aggregateRating":,
714
          # "alternativeHeadline":,
715
          # "associatedMedia":,
716
          audience: {
717
            '@type': 'EducationalAudience',
718
            educationalRole: EDU_ROLES[topic['type']]
719
          },
720
          # "audio":,
721
          # "award":,
722
          # "author" described below
723
          # "character":,
724
          citation: [
725
            {
726
              '@type': 'CreativeWork',
727
              name: 'Galaxy Training: A Powerful Framework for Teaching!',
728
              url: 'https://doi.org/10.1371/journal.pcbi.1010752'
729
            },
730
            {
731
              '@type': 'CreativeWork',
732
              name: 'Community-Driven Data Analysis Training for Biology',
733
              url: 'https://doi.org/10.1016/j.cels.2018.05.012'
734
            }
735
          ],
736
          # "comment":,
737
          # "commentCount":,
738
          # "contentLocation":,
739
          # "contentRating":,
740
          # "contentReferenceTime":,
741
          # "contributor" described below
742
          # copyrightHolder: GTN,
743
          # copyrightNotice: m
744
          # "copyrightYear":,
745
          # "correction":,
746
          # "creator":,
747
          # "dateCreated":,
748
          # "datePublished":,
749
          discussionUrl: site['gitter_url'],
750
          # "editor":,
751
          # "educationalAlignment":,
752
          # "educationalUse":,
753
          # "encoding":,
754
          # "encodingFormat":,
755
          # "exampleOfWork":,
756
          # "expires":,
757
          # "funder": funding,
758
          # "genre":,
759
          # "hasPart" described below
760
          headline: (material['title']).to_s,
761
          # "interactionStatistic":,
762
          interactivityType: 'mixed',
763
          isAccessibleForFree: true,
764
          # "isBasedOn":,
765
          isFamilyFriendly: true,
766
          # "isPartOf" described below
767
          # "keywords": described below
768
          # "learningResourceType" described below
769
          license: 'https://spdx.org/licenses/CC-BY-4.0.html',
770
          # "locationCreated":,
771
          # "mainEntity":,
772
          # "material":,
773
          # "mentions" described below
774
          # "offers":,
775
          # "position":,
776
          producer: GTN,
777
          provider: GTN,
778
          # "publication":,
779
          # "publisher":,
780
          # "publisherImprint":,
781
          # "publishingPrinciples":,
782
          # "recordedAt":,
783
          # "releasedEvent":,
784
          # "review":,
785
          # "schemaVersion":,
786
          # "sdDatePublished":,
787
          # "sdLicense":,
788
          # "sdPublisher":,
789
          sourceOrganization: GTN,
790
          # "spatialCoverage":,
791
          # "sponsor":,
792
          # "temporalCoverage":,
793
          # "text":,
794
          # "thumbnailUrl":,
795
          # "timeRequired" described below
796
          # "translationOfWork":,
797
          # "translator": Google Translate???,
798
          # "typicalAgeRange":,
799
          # "version":,
800
          # "video":,
801
          # "workExample":,
802
          # "workTranslation":,
803

804
          # Properties from Thing
805
          # "additionalType":,
806
          # "alternateName":,
807
          # "description" described below
808
          # "disambiguatingDescription":,
809
          # "image":,
810
          # "mainEntityOfPage":,
811
          # "name" described below
812
          # "potentialAction":,
813
          # "sameAs":,
814
          # "subjectOf":,
815
          # "url" described below
816
          workTranslation: [],
817
          creativeWorkStatus: material['draft'] ? 'Draft' : 'Active',
818
        }
819

820
        if material.key?('pub_date')
821
          data['dateModified'] = material['mod_date']
822
          data['datePublished'] = material['pub_date']
823
        else
824
          begin
825
            data['dateModified'] = Gtn::ModificationTimes.obtain_time(material.path)
826
            data['datePublished'] = Gtn::PublicationTimes.obtain_time(material.path)
827
          rescue StandardError
828
            data['dateModified'] = Gtn::ModificationTimes.obtain_time(material['path'])
829
            data['datePublished'] = Gtn::PublicationTimes.obtain_time(material['path'])
830
          end
831
        end
832

833
        if material.key?('copyright')
834
          # copyrightHolder: GTN,
835
          data['copyrightNotice'] = material['copyright']
836
        else
837
          # I'm not sure this is accurate.
838
          data['copyrightHolder'] = GTN
839
        end
840

841
        funders = Gtn::Contributors.get_funders(site, material).map do |x|
842
          to_pfo_jsonld(x, site, json: false)
843
        end
844
        grants = Gtn::Contributors.get_grants(site, material).map do |x|
845
          to_pfo_jsonld(x, site, json: false)
846
        end
847

848
        data['funder'] = funders
849
        data['funding'] = grants
850

851
        data['identifier'] = "https://gxy.io/GTN:#{material['short_id']}" if material.key?('short_id')
852

853
        data.update(A11Y)
854

855
        actual_material = Gtn::TopicFilter.fetch_tutorial_material(site, material['topic_name'], material['tutorial_name'])
856

857
        # info depending if tutorial, hands-on or slide level
858
        # parts = []
859
        # data['hasPart'] = parts
860

861
        mentions = []
862
        description = []
863

864
        data['isPartOf'] = topic_desc
865

866
        data['abstract'] = material
867
          .fetch('content', '')
868
          .strip
869
          .split("\n")
870
          .first
871

872
        if ! data['abstract'].nil?
873
          data['abstract'] = data['abstract']
874
            .gsub(/\{\{\s*site.baseurl\s*\}\}/, url_prefix(site))
875
            .gsub(/\[{{\s*site.url\s*}}/, '[' + url_prefix(site))
876
            .gsub(/{% link (topics[^%]*).md %}/, url_prefix(site) + '\1.html')
877
            .gsub(/{% link (topics[^%]*).html %}/, url_prefix(site) + '\1.html')
878
            .gsub(/\s*\(?{%\s*cite [^}]+\s*%}\)?/, '')
879
            .gsub('{{ site.github_repository }}', safe_site_config(site, 'github_repository', 'https://example.com'))
880
            .gsub(/{% snippet ([^%]*) %}/, '')
881
            .gsub(/{% include ([^%]*) %}/, '')
882
        end
883

884
        description.push("## Abstract\n\n#{data['abstract']}\n\n")
885

886
        if (material['name'] == 'tutorial.md') || (material['name'] == 'slides.html')
887

888
          if material['name'] == 'tutorial.md'
889
            data['learningResourceType'] = 'e-learning'
890
            description.push("## About This Material\n\nThis is a Hands-on Tutorial from the GTN which is usable either for individual self-study, or as a teaching material in a classroom.\n\n")
891
          else
892
            data['learningResourceType'] = 'slides'
893
          end
894

895
          data['name'] = material['title']
896
          data['url'] = "#{site['url']}#{site['baseurl']}#{material['url']}"
897

898
          # Requires https://github.com/galaxyproject/training-material/pull/4271
899
          data['version'] = Gtn::ModificationTimes.obtain_modification_count(material['path'])
900

901
          # Time required
902
          if material.key?('time_estimation') && !material['time_estimation'].nil?
903
            data['timeRequired'] = "PT#{material['time_estimation'].upcase}"
904
          end
905

906
          # Description with questions, objectives and keypoints
907
          if material.key?('questions') && !material['questions'].nil? && material['questions'].length.positive?
908
            questions = material['questions'].join("\n - ")
909
            description.push("## Questions this #{material['type']} will address\n\n - #{questions}\n\n")
910
          end
911
          if material.key?('objectives') && !material['objectives'].nil? && material['objectives'].length.positive?
912
            objectives = material['objectives'].map{|x| "- #{x}"}.join("\n")
913
            description.push("## Learning Objectives\n\n#{objectives}\n\n")
914
            data['teaches'] = objectives
915
          end
916
          if material.key?('keypoints') && !material['keypoints'].nil? && material['keypoints'].length.positive?
917
            keypoints = material['keypoints'].join("\n - ")
918
            description.push("## Key Points\n\n - #{keypoints}\n\n")
919
          end
920

921
          # Keywords
922
          data['keywords'] = [topic['title']] + (material['tags'] || [])
923
          # Zenodo links
924
        end
925

926
        # Mentions are 'external resources' in TeSS.
927
        # This could be expanded with
928
        # - supported servers
929
        # - tools and resources used (e.g. Galaxy) or tools linked to the TS.
930
        # - slides (if tutorial) and tutorial (if slides)
931
        # - other materials in the same topic?
932
        if actual_material.key?('workflows')
933
          mentions.push({
934
                          '@type': 'Thing',
935
                          url: "#{site['url']}#{site['baseurl']}#{material['dir']}workflows/",
936
                          name: "Associated Workflows"
937
                        })
938
        end
939

940
        # Notebooks
941
        if actual_material.key?('notebook')
942
          if actual_material['notebook']['language'] != 'r'
943
            # Python, Bash, SQL (all via jupyter)
944
            url = "#{site['url']}#{site['baseurl']}#{material['dir']}#{material['topic_name']}-#{material['tutorial_name']}.ipynb"
945
            mentions.push({
946
                            '@type': 'Thing',
947
                            url: url,
948
                            name: "Jupyter Notebook (with Solutions)"
949
                          })
950
            mentions.push({
951
                            '@type': 'Thing',
952
                            url: url.gsub(/\.ipynb$/, '-course.ipynb'),
953
                            name: "Jupyter Notebook (without Solutions)"
954
                          })
955
          elsif actual_material['notebook']['language'] == 'r' # Actual R
956
            url = "#{site['url']}#{site['baseurl']}#{material['dir']}#{material['topic_name']}-#{material['tutorial_name']}.Rmd"
957
            mentions.push({
958
                            '@type': 'Thing',
959
                            url: url,
960
                            name: "Quarto/RMarkdown Notebook"
961
                          })
962
            mentions.push({
963
                            '@type': 'Thing',
964
                            url: "https://bio.tools/tool/rstudio",
965
                            name: "RStudio"
966
                          })
967
          end
968
        end
969

970
        # Tools
971
        uses_tools = false
972
        (actual_material['tools'] || []).each do |tool|
973
          if site.data['tool-meta'].nil?
974
            next
975
          end
976

977
          toolmeta = site.data['tool-meta'][tool]
978
          if toolmeta.nil?
979
            next
980
          end
981

982
          if toolmeta['bio.tools'].length.positive?
983
            mentions.push({
984
                            '@type': 'Thing',
985
                            url: "https://bio.tools/tool/#{toolmeta['bio.tools']}",
986
                            name: toolmeta.fetch('bio.tools_name', toolmeta['name'])
987
                          })
988
          end
989
          uses_tools = true
990
        end
991
        if uses_tools
992
          mentions.push({
993
                          '@type': 'Thing',
994
                          url: "https://bio.tools/tool/galaxy",
995
                          name: "Galaxy"
996
                        })
997
        end
998

999
        # Zenodo link out
1000
        if actual_material.key?('zenodo_link') && ! actual_material['zenodo_link'].nil?
1001
          if actual_material['zenodo_link'].length.positive?
1002
            mentions.push({
1003
                            '@type': 'Thing',
1004
                            url: (actual_material['zenodo_link']).to_s,
1005
                            name: "Associated Training Datasets"
1006
                          })
1007
          end
1008
        end
1009

1010
        if description.empty?
1011
          description.push(material.fetch('content', '').strip.split("\n").first)
1012
        end
1013
        data['description'] = description.join("\n")
1014

1015
        data['inLanguage'] = if material.key?('lang')
1016
                               {
1017
                                 '@type': 'Language',
1018
                                 name: langCodeMap[material['lang']],
1019
                                 alternateName: material['lang']
1020
                               }
1021
                             else
1022
                               {
1023
                                 '@type': 'Language',
1024
                                 name: 'English',
1025
                                 alternateName: 'en'
1026
                               }
1027
                             end
1028

1029
        # Course requirements (material + topic)
1030
        reqs = []
1031
        reqs.push(*topic['requirements']) if topic.key?('requirements')
1032
        reqs.push(*material['requirements']) if material.key?('requirements')
1033
        if !reqs.empty?
1034
          coursePrerequisites = []
1035
          reqs.each do |req|
1036
            if req['type'] == 'internal'
1037
              if req.key?('tutorials')
1038
                (req['tutorials']).each do |tuto|
1039
                  (site['pages']).each do |page|
1040
                    if ((page['name'] == 'tutorial.md') || (page['name'] == 'slides.html')) &&
1041
                       ((page['topic_name'] == req['topic_name']) && (page['tutorial_name'] == tuto))
1042
                      # slides
1043
                      if page['name'] == 'slides.html'
1044
                        coursePrerequisites.push(
1045
                          {
1046
                            '@context': 'http://schema.org',
1047
                            '@type': 'LearningResource',
1048
                            url: "#{site['url']}#{site['baseurl']}/topics/#{req['topic_name']}/" \
1049
                                 "tutorials/#{tuto}/slides.html",
1050
                            name: (page['title']).to_s,
1051
                            description: "Slides for '#{page['title']}' tutorial",
1052
                            learningResourceType: 'slides',
1053
                            interactivityType: 'expositive',
1054
                            provider: GTN
1055
                          }
1056
                        )
1057
                        if page['hands_on_url']
1058
                          coursePrerequisites.push(
1059
                            {
1060
                              '@context': 'http://schema.org',
1061
                              '@type': 'LearningResource',
1062
                              url: (page['hands_on_url']).to_s,
1063
                              learningResourceType: 'e-learning',
1064
                              interactivityType: 'expositive',
1065
                            }
1066
                          )
1067
                        end
1068
                      end
1069
                      # hands-on
1070
                      if page['name'] == 'tutorial.md'
1071
                        coursePrerequisites.push(
1072
                          {
1073
                            '@context': 'http://schema.org',
1074
                            '@type': 'LearningResource',
1075
                            url: "#{site['url']}#{site['baseurl']}/topics/#{req['topic_name']}/tutorials" \
1076
                                 "/#{tuto}/tutorial.html",
1077
                            name: (page['title']).to_s,
1078
                            description: "Hands-on for '#{page['title']}' tutorial",
1079
                            learningResourceType: 'e-learning',
1080
                            interactivityType: 'expositive',
1081
                            provider: GTN
1082
                          }
1083
                        )
1084
                      end
1085
                    end
1086
                  end
1087
                end
1088
              else
1089
                coursePrerequisites.push(
1090
                  {
1091
                    '@context': 'http://schema.org',
1092
                    '@type': 'LearningResource',
1093
                    url: "#{site['url']}#{site['baseurl']}/topics/#{req['topic_name']}/",
1094
                    name: (site['data'][req['topic_name']]['title']).to_s,
1095
                    description: (site['data'][req['topic_name']]['title']).to_s,
1096
                    provider: GTN
1097
                  }
1098
                )
1099
              end
1100
            elsif req['type'] == 'external'
1101
              coursePrerequisites.push({
1102
                                         '@type': 'CreativeWork',
1103
                                         url: (req['link']).to_s,
1104
                                         name: (req['title']).to_s
1105
                                       })
1106
            else
1107
              coursePrerequisites.push((req['title']).to_s)
1108
            end
1109
          end
1110
          data['competencyRequired'] = coursePrerequisites.uniq
1111
        end
1112

1113
        # Add contributors/authors
1114
        if material.key?('contributors') || material.key?('contributions')
1115
          authors = Gtn::Contributors.get_authors(material).map do |x|
1116
            generate_person_jsonld(x, Gtn::Contributors.fetch_contributor(site, x), site)
1117
          end
1118

1119
          data['author'] = authors
1120
        end
1121

1122
        # Add non-author contributors
1123
        if material.key?('contributions')
1124
          data['contributor'] = Gtn::Contributors.get_non_authors(material).map do |x|
1125
            generate_person_jsonld(x, site['data']['contributors'][x], site)
1126
          end
1127
        end
1128

1129
        about = []
1130
        about.push(topic_desc)
1131
        edam_terms = topic.fetch('edam_ontology', []) | material.fetch('edam_ontology', [])
1132

1133
        about += edam_terms.map do |term|
1134
          {
1135
            '@type': 'DefinedTerm',
1136
            '@id': "http://edamontology.org/#{term}",
1137
            inDefinedTermSet: 'http://edamontology.org',
1138
            termCode: term,
1139
            # "name": ,
1140
            url: 'https://bioportal.bioontology.org/ontologies/EDAM/?p=classes&conceptid=' \
1141
                 "http%3A%2F%2Fedamontology.org%2F#{term}"
1142
          }
1143
        end
1144

1145
        data['about'] = about
1146

1147
        data['educationalLevel'] = material.key?('level') ? eduLevel[material['level']] : 'Beginner'
1148
        data['mentions'] = mentions
1149

1150
        data
1151
      end
1152
    end
1153
  end
1154
end
1155

1156
Liquid::Template.register_filter(Jekyll::Filters::JsonldFilter)
1157

1158
Product

Resources

Company