CoCalc -- jekyll-topic-filter.rb

GitHub Repository: galaxyproject/training-material
Path: blob/main/_plugins/jekyll-topic-filter.rb
¹⁶⁷⁷ views
1
# frozen_string_literal: true
2

3
require 'json'
4
require 'yaml'
5
require './_plugins/gtn'
6
require './_plugins/util'
7
require 'securerandom'
8

9
class Array
10
  def cumulative_sum
11
    sum = 0
12
    self.map{|x| sum += x}
13
  end
14
end
15

16
module Gtn
17
  # The main GTN module to parse tutorial.md and slides.html and topics into useful lists of things that can be shown on topic pages, i.e. "materials" (a possible combination of tutorial + slides)
18
  #
19
  # This is by far the most complicated module and the least
20
  # disaggregated/modular part of the GTN infrastructure.
21
  # TopicFilter.resolve_material is probably the single most important function
22
  # in the entire suite.
23
  module TopicFilter
24

25

26
    ##
27
    # This function returns a list of all the topics that are available.
28
    # Params:
29
    # +site+:: The +Jekyll::Site+ object
30
    # Returns:
31
    # +Array+:: The list of topics
32
    def self.list_topics(site)
33
      list_topics_h(site).keys
34
    end
35

36
    def self.list_topics_h(site)
37
      site.data.select { |_k, v| v.is_a?(Hash) && v.key?('editorial_board') }
38
    end
39

40
    ##
41
    # This function returns a list of all the topics that are available.
42
    # Params:
43
    # +site+:: The +Jekyll::Site+ object
44
    # Returns:
45
    # +Array+:: The topic objects themselves
46
    def self.enumerate_topics(site)
47
      list_topics_h(site).values
48
    end
49

50
    ##
51
    # Setup the local cache via +Jekyll::Cache+
52
    def self.cache
53
      @@cache ||= Jekyll::Cache.new('GtnTopicFilter')
54
    end
55

56
    ##
57
    # Fill the cache with all the topics if it hasn't been done already. Safe to be called multiple times.
58
    # Params:
59
    # +site+:: The +Jekyll::Site+ object
60
    # Returns:
61
    # +nil+
62
    def self.fill_cache(site)
63
      return if site.data.key?('cache_topic_filter')
64

65
      Jekyll.logger.debug '[GTN/TopicFilter] Begin Cache Prefill'
66
      site.data['cache_topic_filter'] = {}
67

68
      # For each topic
69
      list_topics(site).each do |topic|
70
        site.data['cache_topic_filter'][topic] = filter_by_topic(site, topic)
71
      end
72
      Jekyll.logger.debug '[GTN/TopicFilter] End Cache Prefill'
73
    end
74

75
    ##
76
    # This function returns a list of all the materials that are available for a specific topic.
77
    # Params:
78
    # +site+:: The +Jekyll::Site+ object
79
    # +topic_name+:: The name of the topic
80
    # Returns:
81
    # +Array+:: The list of materials
82
    def self.topic_filter(site, topic_name)
83
      fill_cache(site)
84
      site.data['cache_topic_filter'][topic_name]
85
    end
86

87
    ##
88
    # This function returns a list of all the materials that are available for a
89
    # specific topic, but this time in a structured manner
90
    # Params:
91
    # +site+:: The +Jekyll::Site+ object
92
    # +topic_name+:: The name of the topic
93
    # Returns:
94
    # +Hash+:: The subtopics and their materials
95
    #
96
    # Example:
97
    #  {
98
    #   "intro" => {
99
    #     "subtopic" => {"title" => "Introduction", "description" => "Introduction to the topic", "id" => "intro"},
100
    #     "materials" => [
101
    #       ...
102
    #     ]
103
    #   },
104
    #   "__OTHER__" => {
105
    #     "subtopic" => {"title" => "Other", "description" => "Other materials", "id" => "__OTHER__"},
106
    #     "materials" => [.. ]
107
    #   }
108
    #  ]
109
    # This method is built with the idea to replace the "topic_filter" command,
110
    # and instead of returning semi-structured data, we will immediately return
111
    # fully structured data for a specific "topic_name" query, like, "admin"
112
    #
113
    # Instead of returning a flat list of tutorials, instead we'll structure
114
    # them properly in subtopics (if they exist) or return the flat list
115
    # otherwise.
116
    #
117
    # This will let us generate new "views" into the tutorial lists, having
118
    # them arranged in new and exciting ways.
119
    def self.list_materials_structured(site, topic_name)
120

121
      fill_cache(site)
122

123
      # Here we want to either return data structured around subtopics
124

125
      if site.data[topic_name]['tag_based'].nil? && site.data[topic_name].key?('subtopics')
126
        # We'll construct a new hash of subtopic => tutorials
127
        out = {}
128
        seen_ids = []
129
        site.data[topic_name]['subtopics'].each do |subtopic, _v|
130
          specific_resources = filter_by_topic_subtopic(site, topic_name, subtopic['id'])
131
          out[subtopic['id']] = {
132
            'subtopic' => subtopic,
133
            'materials' => specific_resources
134
          }
135
          seen_ids += specific_resources.map { |x| x['id'] }
136
        end
137

138
        # And we'll have this __OTHER__ subtopic for any tutorials that weren't
139
        # in a subtopic.
140
        all_topics_for_tutorial = filter_by_topic(site, topic_name)
141
        out['__OTHER__'] = {
142
          'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },
143
          'materials' => all_topics_for_tutorial.reject { |x| seen_ids.include?(x['id']) }
144
        }
145
      elsif site.data[topic_name]['tag_based'] && site.data[topic_name].key?('subtopics')
146
        out = {}
147
        seen_ids = []
148
        tn = topic_name.gsub('by_tag_', '')
149
        materials = filter_by_tag(site, tn)
150

151
        # For each subtopics
152
        site.data[topic_name]['subtopics'].each do |subtopic|
153
          # Find matching tag-based tutorials in our filtered-by-tag materials
154
          specific_resources = materials.select { |x| (x['tags'] || []).include?(subtopic['id']) }
155
          out[subtopic['id']] = {
156
            'subtopic' => subtopic,
157
            'materials' => specific_resources
158
          }
159
          seen_ids += specific_resources.map { |x| x['id'] }
160
        end
161

162
        filter_by_tag(site, tn)
163
        out['__OTHER__'] = {
164
          'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },
165
          'materials' => materials.reject { |x| seen_ids.include?(x['id']) }
166
        }
167
      elsif site.data[topic_name]['tag_based'] # Tag based Topic
168
        # We'll construct a new hash of subtopic(parent topic) => tutorials
169
        out = {}
170
        seen_ids = []
171
        tn = topic_name.gsub('by_tag_', '')
172
        materials = filter_by_tag(site, tn)
173

174
        # Which topics are represented in those materials?
175
        seen_topics = materials.map { |x| x['topic_name'] }.sort
176

177
        # Treat them like subtopics, but fake subtopics.
178
        seen_topics.each do |parent_topic, _v|
179
          specific_resources = materials.select { |x| x['topic_name'] == parent_topic }
180
          out[parent_topic] = {
181
            'subtopic' => { 'id' => parent_topic, 'title' => site.data[parent_topic]['title'], 'description' => nil },
182
            'materials' => specific_resources
183
          }
184
          seen_ids += specific_resources.map { |x| x['id'] }
185
        end
186

187
        # And we'll have this __OTHER__ subtopic for any tutorials that weren't
188
        # in a subtopic.
189
        all_topics_for_tutorial = filter_by_tag(site, tn)
190
        out['__OTHER__'] = {
191
          'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },
192
          'materials' => all_topics_for_tutorial.reject { |x| seen_ids.include?(x['id']) }
193
        }
194
      else
195
        # Or just the list (Jury is still out on this one, should it really be a
196
        # flat list? Or in this identical structure.)
197
        out = {
198
          '__FLAT__' => {
199
            'subtopic' => nil,
200
            'materials' => filter_by_topic(site, topic_name)
201
          }
202
        }
203
      end
204

205
      # Cleanup empty sections
206
      out.delete('__OTHER__') if out.key?('__OTHER__') && out['__OTHER__']['materials'].empty?
207

208
      out.each do |_k, v|
209
        v['materials'].sort_by! { |m| [m.fetch('priority', 1), m['title']] }
210
      end
211

212
      out
213
    end
214

215
    ##
216
    # Fetch a specific tutorial material by topic and tutorial name
217
    # Params:
218
    # +site+:: The +Jekyll::Site+ object
219
    # +topic_name+:: The name of the topic
220
    # +tutorial_name+:: The name of the tutorial
221
    # Returns:
222
    # +Hash+:: The tutorial material
223
    def self.fetch_tutorial_material(site, topic_name, tutorial_name)
224
      if topic_name.nil?
225
        return nil
226
      end
227
      fill_cache(site)
228
      if site.data['cache_topic_filter'][topic_name].nil?
229
        Jekyll.logger.warn "Cannot fetch tutorial material for #{topic_name}"
230
        nil
231
      else
232
        site.data['cache_topic_filter'][topic_name].select { |p| p['tutorial_name'] == tutorial_name }[0]
233
      end
234
    end
235

236
    ##
237
    # Extract the list of tools used in a workflow
238
    # Params:
239
    # +data+:: The Galaxy Workflow JSON data, parsed
240
    # Returns:
241
    # +Array+:: The list of tool IDs
242
    def self.extract_workflow_tool_list(data)
243
      out = data['steps'].select { |_k, v| v['type'] == 'tool' }.map { |_k, v| v['tool_id'] }.compact
244
      out += data['steps'].select do |_k, v|
245
               v['type'] == 'subworkflow'
246
             end.map { |_k, v| extract_workflow_tool_list(v['subworkflow']) }
247
      out
248
    end
249

250
    ##
251
    # Annotation of a path with topic and tutorial information
252
    # Params:
253
    # +path+:: The path to annotate
254
    # +layout+:: The page layout if known
255
    # Returns:
256
    # +Hash+:: The annotation
257
    #
258
    # Example:
259
    #
260
    #   h = Gtn::TopicFilter.annotate_path("topics/assembly/tutorials/velvet-assembly/tutorial.md", nil)
261
    #   h # => {
262
    #     #  "topic"=>"assembly",
263
    #     #  "topic_name"=>"assembly",
264
    #     #  "material"=>"assembly/velvet-assembly",
265
    #     #  "tutorial_name"=>"velvet-assembly",
266
    #     #  "dir"=>"topics/assembly/tutorials/velvet-assembly",
267
    #     #  "type"=>"tutorial"
268
    #     # }
269

270
    def self.annotate_path(path, layout)
271
      parts = path.split('/')
272
      parts.shift if parts[0] == '.'
273

274
      return nil if parts[0] != 'topics'
275

276
      return nil if parts[2] != 'tutorials'
277

278
      return nil if parts.length < 4
279

280
      material = {
281
        'topic' => parts[1], # Duplicate
282
        'topic_name' => parts[1],
283
        'material' => "#{parts[1]}/#{parts[3]}",
284
        'tutorial_name' => parts[3],
285
        'dir' => parts[0..3].join('/'),
286
      }
287

288
      return nil if path =~ %r{/faqs/}
289

290
      return nil if parts[-1] =~ /data[_-]library.yaml/ || parts[-1] =~ /data[_-]manager.yaml/
291

292
      # Check if it's a symlink
293
      material['symlink'] = true if File.symlink?(material['dir'])
294

295
      if parts[4] =~ /tutorial.*\.md/ || layout == 'tutorial_hands_on'
296
        material['type'] = 'tutorial'
297
      elsif parts[4] =~ /slides.*\.html/ || %w[tutorial_slides base_slides introduction_slides].include?(layout)
298
        material['type'] = 'slides'
299
      elsif parts[4] =~ /ipynb$/
300
        material['type'] = 'ipynb'
301
      elsif parts[4] =~ /Rmd$/
302
        material['type'] = 'rmd'
303
      elsif parts[4] == 'workflows'
304
        material['type'] = 'workflow'
305
      elsif parts[4] == 'recordings'
306
        material['type'] = 'recordings'
307
      elsif parts[4] == 'tours'
308
        material['type'] = 'tour'
309
      elsif parts[-1] == 'index.md'
310
        return nil
311
      else
312
        return nil
313
        # material['type'] = 'unknown'
314
      end
315

316
      material
317
    end
318

319
    ##
320
    # Get the list of posts from the site
321
    # Params:
322
    # +site+:: The +Jekyll::Site+ object
323
    # Returns:
324
    # +Array+:: The list of posts
325
    #
326
    # This is a transition period function that can later be removed. It is added
327
    # because with the jekyll version we're using, site.posts is an iterable in
328
    # prod+dev (_config-dev.yml) modes, however! If we access site.posts.docs in
329
    # prod it's fine, while in dev mode, site.posts claims to be an Array (rather
330
    # than I guess a 'posts' object with a docs method). So we check if it has
331
    # docs and use that, otherwise just site.posts should be iterable.
332
    def self.get_posts(site)
333
      # Handle the transition period
334
      if site.posts.respond_to?(:docs)
335
        site.posts.docs
336
      else
337
        site.posts
338
      end
339
    end
340

341
    ##
342
    # Collate the materials into a large hash
343
    # Params:
344
    # +site+:: The +Jekyll::Site+ object
345
    # +pages+:: The list of pages to collate
346
    # Returns:
347
    # +Hash+:: The collated materials
348
    #
349
    # Example:
350
    #   h = collate_materials(site, pages)
351
    #   h # => {
352
    #     # "assembly/velvet-assembly" => {
353
    #     #  "topic" => "assembly",
354
    #     #  "topic_name" => "assembly",
355
    #     #  "material" => "assembly/velvet-assembly",
356
    #     #  "tutorial_name" => "velvet-assembly",
357
    #     #  "dir" => "topics/assembly/tutorials/velvet-assembly",
358
    #     #  "resources" => [
359
    #     #    {
360
    #     #    "type" => "slides",
361
    #     #    "url" => "/topics/assembly/tutorials/velvet-assembly/slides.html",
362
    #     #    "title" => "Slides",
363
    #     #    "priority" => 1
364
    #     #    },
365
    #     #    {
366
    #     #    "type" => "tutorial",
367
    #     #    "url" => "/topics/assembly/tutorials/velvet-assembly/tutorial.html",
368
    #     #    "title" => "Tutorial",
369
    #     #    "priority" => 2
370
    #     #    }
371
    #     #   ]
372
    #     #  }
373
    def self.collate_materials(site, pages)
374
      # In order to speed up queries later, we'll store a set of "interesting"
375
      # pages (i.e. things that are under `topic_name`)
376
      shortlinks = site.data['shortlinks']
377
      shortlinks_reversed = shortlinks['id'].invert
378

379
      interesting = {}
380
      pages.each do |page|
381
        # Skip anything outside of topics.
382
        next if !page.url.include?('/topics/')
383

384
        # Extract the material metadata based on the path
385
        page.data['url'] = page.url
386
        material_meta = annotate_path(page.path, page.data['layout'])
387

388
        # If unannotated then we want to skip this material.
389
        next if material_meta.nil?
390

391
        mk = material_meta['material']
392

393
        if !interesting.key? mk
394
          interesting[mk] = material_meta.dup
395
          interesting[mk].delete('type') # Remove the type since it's specific, not generic
396
          interesting[mk]['resources'] = []
397
        end
398

399
        page.data['topic_name'] = material_meta['topic_name']
400
        page.data['tutorial_name'] = material_meta['tutorial_name']
401
        page.data['dir'] = material_meta['dir']
402
        page.data['short_id'] = shortlinks_reversed[page.data['url']]
403
        page.data['symlink'] = material_meta['symlink']
404

405
        interesting[mk]['resources'].push([material_meta['type'], page])
406
      end
407

408
      interesting
409
    end
410

411
    ##
412
    # Make a label safe for use in mermaid (without ()[]"')
413
    def self.mermaid_safe_label(label)
414
      (label || '')
415
        .gsub('(', '').gsub(')', '')
416
        .gsub('[', '').gsub(']', '')
417
        .gsub('"', '”') # We accept that this is not perfectly correct.
418
        .gsub("'", '’')
419
    end
420

421
    ##
422
    # Build a Mermaid.js compatible graph of a given Galaxy Workflow
423
    #
424
    # TODO: extract into own module along with DOT>
425
    #
426
    # Params:
427
    # +wf+:: The Galaxy Workflow JSON representation
428
    # Returns:
429
    # +String+:: A Mermaid.js compatible graph of the workflow.
430
    def self.mermaid(wf)
431
      # We're converting it to Mermaid.js
432
      # flowchart TD
433
      #     A[Start] --> B{Is it?}
434
      #     B -- Yes --> C[OK]
435
      #     C --> D[Rethink]
436
      #     D --> B
437
      #     B -- No ----> E[End]
438

439
      statements = []
440
      wf['steps'].each_key do |id|
441
        step = wf['steps'][id]
442
        chosen_label = mermaid_safe_label(step['label'] || step['name'])
443

444
        case step['type']
445
        when 'data_collection_input'
446
          statements.append "#{id}[\"ℹ️ Input Collection\\n#{chosen_label}\"];"
447
        when 'data_input'
448
          statements.append "#{id}[\"ℹ️ Input Dataset\\n#{chosen_label}\"];"
449
        when 'parameter_input'
450
          statements.append "#{id}[\"ℹ️ Input Parameter\\n#{chosen_label}\"];"
451
        when 'subworkflow'
452
          statements.append "#{id}[\"🛠️ Subworkflow\\n#{chosen_label}\"];"
453
        else
454
          statements.append "#{id}[\"#{chosen_label}\"];"
455
        end
456

457
        case step['type']
458
        when 'data_collection_input', 'data_input'
459
          statements.append "style #{id} stroke:#2c3143,stroke-width:4px;"
460
        when 'parameter_input'
461
          statements.append "style #{id} fill:#ded,stroke:#393,stroke-width:4px;"
462
        when 'subworkflow'
463
          statements.append "style #{id} fill:#edd,stroke:#900,stroke-width:4px;"
464
        end
465

466
        step = wf['steps'][id]
467
        step['input_connections'].each do |_, v|
468
          # if v is a list
469
          if v.is_a?(Array)
470
            v.each do |v2|
471
              statements.append "#{v2['id']} -->|#{mermaid_safe_label(v2['output_name'])}| #{id};"
472
            end
473
          else
474
            statements.append "#{v['id']} -->|#{mermaid_safe_label(v['output_name'])}| #{id};"
475
          end
476
        end
477

478
        (step['workflow_outputs'] || [])
479
          .reject { |wo| wo['label'].nil? }
480
          .map do |wo|
481
            wo['uuid'] = SecureRandom.uuid.to_s if wo['uuid'].nil?
482
            wo
483
          end
484
          .each do |wo|
485
          statements.append "#{wo['uuid']}[\"Output\\n#{wo['label']}\"];"
486
          statements.append "#{id} --> #{wo['uuid']};"
487
          statements.append "style #{wo['uuid']} stroke:#2c3143,stroke-width:4px;"
488
        end
489
      end
490

491
      "flowchart TD\n" + statements.map { |q| "  #{q}" }.join("\n")
492
    end
493

494
    ##
495
    # Build a DOT graph for a given tutorial file.
496
    #
497
    # TODO: extract into own module along with mermaid.
498
    #
499
    # Params:
500
    # +wf+:: The Galaxy Workflow JSON representation
501
    # Returns:
502
    # +String+:: A DOT graph of the workflow.
503
    def self.graph_dot(wf)
504
      # digraph test {
505
      #   0[shape=box,style=filled,color=lightblue,label="ℹ️ Input Dataset\nBionano_dataset"]
506
      #   1[shape=box,style=filled,color=lightblue,label="ℹ️ Input Dataset\nHi-C_dataset_R"]
507
      #   3 -> 6 [label="output"]
508
      #   7[shape=box,label="Busco"]
509
      #   4 -> 7 [label="out_fa"]
510
      #   8[shape=box,label="Busco"]
511
      #   5 -> 8 [label="out_fa"]
512

513
      statements = [
514
        'node [fontname="Atkinson Hyperlegible", shape=box, color=white,style=filled,color=peachpuff,margin="0.2,0.2"];',
515
        'edge [fontname="Atkinson Hyperlegible"];',
516
      ]
517
      wf['steps'].each_key do |id|
518
        step = wf['steps'][id]
519
        chosen_label = mermaid_safe_label(step['label'] || step['name'])
520

521
        case step['type']
522
        when 'data_collection_input'
523
          statements.append "#{id}[color=lightblue,label=\"ℹ️ Input Collection\\n#{chosen_label}\"]"
524
        when 'data_input'
525
          statements.append "#{id}[color=lightblue,label=\"ℹ️ Input Dataset\\n#{chosen_label}\"]"
526
        when 'parameter_input'
527
          statements.append "#{id}[color=lightgreen,label=\"ℹ️ Input Parameter\\n#{chosen_label}\"]"
528
        when 'subworkflow'
529
          statements.append "#{id}[color=lightcoral,label=\"🛠️ Subworkflow\\n#{chosen_label}\"]"
530
        else
531
          statements.append "#{id}[label=\"#{chosen_label}\"]"
532
        end
533

534
        step = wf['steps'][id]
535
        step['input_connections'].each do |_, v|
536
          # if v is a list
537
          if v.is_a?(Array)
538
            v.each do |v2|
539
              statements.append "#{v2['id']} -> #{id} [label=\"#{mermaid_safe_label(v2['output_name'])}\"]"
540
            end
541
          else
542
            statements.append "#{v['id']} -> #{id} [label=\"#{mermaid_safe_label(v['output_name'])}\"]"
543
          end
544
        end
545

546
        (step['workflow_outputs'] || [])
547
          .reject { |wo| wo['label'].nil? }
548
          .map do |wo|
549
            wo['uuid'] = SecureRandom.uuid.to_s if wo['uuid'].nil?
550
            wo
551
          end
552
          .each do |wo|
553
            statements.append "k#{wo['uuid'].gsub('-', '')}[color=lightseagreen,label=\"Output\\n#{wo['label']}\"]"
554
            statements.append "#{id} -> k#{wo['uuid'].gsub('-', '')}"
555
          end
556
      end
557

558
      "digraph main {\n" + statements.map { |q| "  #{q}" }.join("\n") + "\n}"
559
    end
560

561
    ##
562
    # (PRODUCTION ONLY) Extract a log of commits (hash, timestamp, message) for commits to a specific path
563
    #
564
    # Params:
565
    # +wf_path+:: Path to a file
566
    # Returns:
567
    # +Array+:: An array of {'hash' => ..., 'unix' => 1230, 'message' => 'I did something', 'short_hash' => ... }
568
    def self.git_log(wf_path)
569
      if Jekyll.env != 'production'
570
        return []
571
      end
572

573
      cache.getset(wf_path) do
574
        require 'shellwords'
575

576
        commits = %x[git log --format="%H %at %s" #{Shellwords.escape(wf_path)}]
577
          .split("\n")
578
          .map { |x| x.split(' ', 3) }
579
          .map { |x| { 'hash' => x[0], 'unix' => x[1], 'message' => x[2], 'short_hash' => x[0][0..8] } }
580

581
        commits.map.with_index do |c, i|
582
          c['num'] = commits.length - i
583
          c
584
        end
585
      end
586
    end
587

588
    ##
589
    # Resolve a material from a given collated material. What does that entail? A LOT.
590
    #
591
    # Given a collated material, e.g.
592
    #
593
    #    material = Gtn::TopicFilter.collate_materials(site, site.pages)['proteomics/database-handling']
594
    #    material # =>
595
    #        # {"topic"=>"proteomics",
596
    #        #  "topic_name"=>"proteomics",
597
    #        #  "material"=>"proteomics/database-handling",
598
    #        #  "tutorial_name"=>"database-handling",
599
    #        #  "dir"=>"topics/proteomics/tutorials/database-handling",
600
    #        #  "resources"=>
601
    #        #   [["workflow", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/workflows/index.md">],
602
    #        #    ["tour", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tours/proteomics-database-handling-mycroplasma.yaml">],
603
    #        #    ["tour", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tours/proteomics-database-handling.yaml">],
604
    #        #    ["tutorial", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tutorial.md">],
605
    #        #    ["recordings", #<Jekyll::PageWithoutAFile @relative_path="topics/proteomics/tutorials/database-handling/recordings/index.html">],
606
    #        #    ["workflow", #<Jekyll::PageWithoutAFile @relative_path="topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.html">],
607
    #        #    ["workflow", #<Jekyll::PageWithoutAFile @relative_path="topics/proteomics/tutorials/database-handling/workflows/wf_database-handling_mycoplasma.html">]]}
608
    #
609
    # We can then choose to 'resolve' that material, i.e. collect all of the
610
    # relevant information that is needed for it to really be useful. This
611
    # includes things like tools, workflows, etc. Everything is packed into a
612
    # highly annotated 'material' Hash.
613
    #
614
    # You might look below and say "Wow that is ridiculously unnecessarily
615
    # complicated", or, maybe not. But either way, this is what is required to display a full 'learning material'
616
    # on the GTN, and all of the metadata that goes into it.
617
    #
618
    # Some of the highlights are:
619
    # - learning resource metadata (taken from tutorial if it exists, otherwise, from the slides)
620
    # - short ID
621
    # - topic information (topic name/ topic_id)
622
    # - any javascript requirements
623
    # - All associated workflows, and metadata about those workflows (tests, features used, associated test results, mermaid and dot graphs, associated tools, inputs and outputs.)
624
    # - +ref+, +ref_tutorials+, +ref_slides+ that point to the actual Jekyll pages, in case you need those.
625
    # - api URL
626
    # - tools (discovered from the tutorial text + workflows)
627
    # - a list of supported servers for easy display (exact and inexact matches)
628
    # - a matrix of which servers support which versions of those tools, for a full compatibility table (used on maintainer page.)
629
    # - requisite metdata for an admin to install these tools
630
    #
631
    #    resource = Gtn::TopicFilter.collate_materials(site, site.pages)['proteomics/database-handling']
632
    #    material = Gtn::TopicFilter.resolve_material(site, resource)
633
    #    material # =>
634
    #    {"layout"=>"tutorial_hands_on",
635
    #     "title"=>"Protein FASTA Database Handling",
636
    #     "edam_ontology"=>["topic_0121"],
637
    #     "zenodo_link"=>"",
638
    #     "level"=>"Introductory",
639
    #     "questions"=>["How to download protein FASTA databases of a certain organism?", "How to download a contaminant database?", "How to create a decoy database?", "How to combine databases?"],
640
    #     "objectives"=>["Creation of a protein FASTA database ready for use with database search algorithms."],
641
    #     "time_estimation"=>"30m",
642
    #     "key_points"=>
643
    #      ["There are several types of Uniprot databases.",
644
    #       "Search databases should always include possible contaminants.",
645
    #       "For analyzing cell culture or organic samples, search databases should include mycoplasma databases.",
646
    #       "Some peptide search engines depend on decoys to calculate the FDR."],
647
    #     "contributors"=>["stortebecker", "bgruening"],
648
    #     "subtopic"=>"id-quant",
649
    #     "tags"=>["DDA"],
650
    #     "js_requirements"=>{"mathjax"=>nil, "mermaid"=>false},
651
    #     "short_id"=>"T00214",
652
    #     "symlink"=>nil,
653
    #     "url"=>"/topics/proteomics/tutorials/database-handling/tutorial.html",
654
    #     "topic_name"=>"proteomics",
655
    #     "tutorial_name"=>"database-handling",
656
    #     "dir"=>"topics/proteomics/tutorials/database-handling",
657
    #     "redirect_from"=>["/short/proteomics/database-handling", "/short/T00214"],
658
    #     "id"=>"proteomics/database-handling",
659
    #     "ref"=>#<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tutorial.md">,
660
    #     "ref_tutorials"=>[#<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tutorial.md">],                                                                                                    "ref_slides"=>[],                                                                                                                                                                                                 "hands_on"=>true,                                                                                                                                                                                                 "slides"=>false,                                                                                                                                                                                                  "mod_date"=>2023-11-09 09:55:09 +0100,
661
    #     "pub_date"=>2017-02-14 13:20:30 +0100,
662
    #     "version"=>29,
663
    #     "workflows"=>
664
    #     "workflows"=>
665
    #      [{"workflow"=>"wf_database-handling.ga",
666
    #        "tests"=>false,
667
    #        "url"=>"https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.ga",
668
    #        "url_html"=>"https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.html",
669
    #        "path"=>"topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.ga",
670
    #        "wfid"=>"proteomics-database-handling",
671
    #        "wfname"=>"wf-database-handling",
672
    #        "trs_endpoint"=>"https://training.galaxyproject.org/training-material/api/ga4gh/trs/v2/tools/proteomics-database-handling/versions/wf-database-handling",
673
    #        "license"=>nil,
674
    #        "parent_id"=>"proteomics/database-handling",
675
    #        "topic_id"=>"proteomics",
676
    #        "tutorial_id"=>"database-handling",
677
    #        "creators"=>[],
678
    #        "name"=>"Proteomics: database handling",
679
    #        "title"=>"Proteomics: database handling",
680
    #        "version"=>5,
681
    #        "description"=>"Protein FASTA Database Handling",
682
    #        "tags"=>["proteomics"],
683
    #        "features"=>{"report"=>nil, "subworkflows"=>false, "comments"=>false, "parameters"=>false},
684
    #        "workflowhub_id"=>"1204",
685
    #        "history"=>[],
686
    #        "test_results"=>nil,
687
    #        "modified"=>2024-03-18 12:38:44.394831189 +0100,
688
    #        "mermaid"=>
689
    #         "flowchart TD\n  0[\"Protein Database Downloader\"];\n  1[\"Protein Database Downloader\"];\n  2[\"FASTA-to-Tabular\"];\n  0 -->|output_database| 2;\n  3[\"Add column\"];\n  2 -->|output| 3;\n  4[\"Tabular
690
    #    -to-FASTA\"];\n  3 -->|out_file1| 4;\n  5[\"FASTA Merge Files and Filter Unique Sequences\"];\n  4 -->|output| 5;\n  1 -->|output_database| 5;\n  6[\"DecoyDatabase\"];\n  5 -->|output| 6;",
691
    #        "graph_dot"=>
692
    #         "digraph main {\n  node [fontname=\"Atkinson Hyperlegible\", shape=box, color=white,style=filled,color=peachpuff,margin=\"0.2,0.2\"];\n  edge [fontname=\"Atkinson Hyperlegible\"];\n  0[label=\"Protein Data
693
    #    base Downloader\"]\n  1[label=\"Protein Database Downloader\"]\n  2[label=\"FASTA-to-Tabular\"]\n  0 -> 2 [label=\"output_database\"]\n  3[label=\"Add column\"]\n  2 -> 3 [label=\"output\"]\n  4[label=\"Tabular
694
    #    -to-FASTA\"]\n  3 -> 4 [label=\"out_file1\"]\n  5[label=\"FASTA Merge Files and Filter Unique Sequences\"]\n  4 -> 5 [label=\"output\"]\n  1 -> 5 [label=\"output_database\"]\n  6[label=\"DecoyDatabase\"]\n  5 -
695
    #    > 6 [label=\"output\"]\n}",
696
    #        "workflow_tools"=>
697
    #         ["addValue",
698
    #          "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1",
699
    #          "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1",
700
    #          "toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",
701
    #          "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0",
702
    #          "toolshed.g2.bx.psu.edu/repos/galaxyp/openms_decoydatabase/DecoyDatabase/2.6+galaxy0"],
703
    #        "inputs"=>[],
704
    #        "outputs"=>
705
    #         [{"annotation"=>"",
706
    #           "content_id"=>"toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",
707
    #           "errors"=>nil,
708
    #           "id"=>0,
709
    #           "input_connections"=>{},
710
    #           "inputs"=>[],
711
    #           "label"=>nil,
712
    #           "name"=>"Protein Database Downloader",
713
    #           "outputs"=>[{"name"=>"output_database", "type"=>"fasta"}],
714
    #           "position"=>{"bottom"=>380.6000061035156, "height"=>102.60000610351562, "left"=>-110, "right"=>90, "top"=>278, "width"=>200, "x"=>-110, "y"=>278},
715
    #           "post_job_actions"=>{},
716
    #           "tool_id"=>"toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",
717
    #           "tool_shed_repository"=>{"changeset_revision"=>"c1b437242fee", "name"=>"dbbuilder", "owner"=>"galaxyp", "tool_shed"=>"toolshed.g2.bx.psu.edu"},
718
    #           "tool_state"=>
719
    #            "{\"__input_ext\": \"data\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"source\": {\"from\": \"cRAP\", \"__current_case__\": 1}, \"__page__\": null, \"__rerun_remap_job_id__\":
720
    #    null}",
721
    #           "tool_version"=>"0.3.1",
722
    #           "type"=>"tool",
723
    #           "uuid"=>"6613b72c-2bab-423c-88fc-05edfe9ea8ec",
724
    #           "workflow_outputs"=>[{"label"=>nil, "output_name"=>"output_database", "uuid"=>"2d289b03-c396-46a2-a725-987b6c75ada9"}]},
725
    #          ...
726
    #     "api"=>"https://training.galaxyproject.org/training-material/api/topics/proteomics/tutorials/database-handling/tutorial.json",
727
    #     "tools"=>
728
    #      ["addValue",
729
    #       "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1",
730
    #       "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1",
731
    #       "toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",
732
    #       "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0",
733
    #       "toolshed.g2.bx.psu.edu/repos/galaxyp/openms_decoydatabase/DecoyDatabase/2.6+galaxy0"],
734
    #     "supported_servers"=>
735
    #      {"exact"=>[{"url"=>"https://usegalaxy.eu", "name"=>"UseGalaxy.eu", "usegalaxy"=>true}, {"url"=>"https://usegalaxy.org.au", "name"=>"UseGalaxy.org.au", "usegalaxy"=>true}],
736
    #       "inexact"=>[{"url"=>"https://usegalaxy.no/", "name"=>"UseGalaxy.no", "usegalaxy"=>false}]},
737
    #     "supported_servers_matrix"=>
738
    #      {"servers"=>
739
    #        [{"url"=>"http://aspendb.uga.edu:8085/", "name"=>"AGEseq @ AspenDB"},
740
    #         {"url"=>"http://motherbox.chemeng.ntua.gr/anastasia_dev/", "name"=>"ANASTASIA"},
741
    #          ...
742
    #       "tools"=>
743
    #        [{"id"=>"addValue",
744
    #          "servers"=>
745
    #           [{"state"=>"local", "server"=>"http://aspendb.uga.edu:8085/"},
746
    #            {"state"=>"missing", "server"=>"http://motherbox.chemeng.ntua.gr/anastasia_dev/"},
747
    #            {"state"=>"local", "server"=>"http://apostl.moffitt.org/"},
748
    #            {"state"=>"local", "server"=>"http://smile.hku.hk/SARGs"},
749
    #            {"state"=>"local", "server"=>"http://bf2i-galaxy.insa-lyon.fr:8080/"},
750
    #            {"state"=>"local", "server"=>"http://143.169.238.104/galaxy/"},
751
    #            {"state"=>"missing", "server"=>"https://iris.angers.inra.fr/galaxypub-cfbp"},
752
    #            {"state"=>"local", "server"=>"https://cpt.tamu.edu/galaxy-public/"},
753
    #            {"state"=>"missing", "server"=>"https://vm-chemflow-francegrille.eu/"},
754
    #            {"state"=>"local", "server"=>"https://hyperbrowser.uio.no/coloc-stats"},
755
    #            {"state"=>"local", "server"=>"http://corgat.cloud.ba.infn.it/galaxy"},
756
    #            {"state"=>"local", "server"=>"http://cropgalaxy.excellenceinbreeding.org/"},
757
    #            {"state"=>"local", "server"=>"http://dintor.eurac.edu/"},
758
    #            {"state"=>"missing", "server"=>"http://www.freebioinfo.org/"},
759
    #            {"state"=>"local", "server"=>"http://igg.cloud.ba.infn.it/galaxy"},
760
    #     "topic_name_human"=>"Proteomics",
761
    #     "admin_install"=>
762
    #      {"install_tool_dependencies"=>true,
763
    #       "install_repository_dependencies"=>true,
764
    #       "install_resolver_dependencies"=>true,
765
    #       "tools"=>
766
    #        [{"name"=>"fasta_to_tabular", "owner"=>"devteam", "revisions"=>"e7ed3c310b74", "tool_panel_section_label"=>"FASTA/FASTQ", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},
767
    #         {"name"=>"tabular_to_fasta", "owner"=>"devteam", "revisions"=>"0a7799698fe5", "tool_panel_section_label"=>"FASTA/FASTQ", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},
768
    #         {"name"=>"dbbuilder", "owner"=>"galaxyp", "revisions"=>"c1b437242fee", "tool_panel_section_label"=>"Get Data", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},
769
    #         {"name"=>"fasta_merge_files_and_filter_unique_sequences", "owner"=>"galaxyp", "revisions"=>"f546e7278f04", "tool_panel_section_label"=>"FASTA/FASTQ", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},
770
    #         {"name"=>"openms_decoydatabase", "owner"=>"galaxyp", "revisions"=>"370141bc0da3", "tool_panel_section_label"=>"Proteomics", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"}]},
771
    #     "admin_install_yaml"=>
772
    #      "---\ninstall_tool_dependencies: true\ninstall_repository_dependencies: true\ninstall_resolver_dependencies: true\ntools:\n- name: fasta_to_tabular\n  owner: devteam\n  revisions: e7ed3c310b74\n  tool_panel_s
773
    #    ection_label: FASTA/FASTQ\n  tool_shed_url: https://toolshed.g2.bx.psu.edu/\n- name: tabular_to_fasta\n  owner: devteam\n  revisions: 0a7799698fe5\n  tool_panel_section_label: FASTA/FASTQ\n  tool_shed_url: http
774
    #    s://toolshed.g2.bx.psu.edu/\n- name: dbbuilder\n  owner: galaxyp\n  revisions: c1b437242fee\n  tool_panel_section_label: Get Data\n  tool_shed_url: https://toolshed.g2.bx.psu.edu/\n- name: fasta_merge_files_and
775
    #    _filter_unique_sequences\n  owner: galaxyp\n  revisions: f546e7278f04\n  tool_panel_section_label: FASTA/FASTQ\n  tool_shed_url: https://toolshed.g2.bx.psu.edu/\n- name: openms_decoydatabase\n  owner: galaxyp\n
776
    #      revisions: 370141bc0da3\n  tool_panel_section_label: Proteomics\n  tool_shed_url: https://toolshed.g2.bx.psu.edu/\n",
777
    #     "tours"=>false,
778
    #     "video"=>false,
779
    #     "slides_recordings"=>false,
780
    #     "translations"=>{"tutorial"=>[], "slides"=>[], "video"=>false},
781
    #     "license"=>"CC-BY-4.0",
782
    #     "type"=>"tutorial"}
783

784

785

786

787

788
    def self.resolve_material(site, material)
789
      # We've already
790
      # looked in every /topic/*/tutorials/* folder, and turn these disparate
791
      # resources into a page_obj as well. Most variables are copied directly,
792
      # either from a tutorial, or a slides (if no tutorial is available.) This
793
      # means we do not (cannot) support external_slides AND external_handson.
794
      # This is probably a sub-optimal situation we'll end up fixing someday.
795
      #
796
      tutorials = material['resources'].select { |a| a[0] == 'tutorial' }
797
      slides    = material['resources'].select { |a| a[0] == 'slides' }
798
      tours     = material['resources'].select { |a| a[0] == 'tours' }
799

800
      # Our final "page" object (a "material")
801
      page = nil
802

803
      slide_has_video = false
804
      slide_has_recordings = false
805
      slide_translations = []
806
      page_ref = nil
807

808
      if slides.length.positive?
809
        page = slides.min { |a, b| a[1].path <=> b[1].path }[1]
810
        slide_has_video = page.data.fetch('video', false)
811
        slide_has_recordings = page.data.fetch('recordings', false)
812
        slide_translations = page.data.fetch('translations', [])
813
        page_ref = page
814
      end
815

816
      # No matter if there were slides, we override with tutorials if present.
817
      tutorial_translations = []
818
      if tutorials.length.positive?
819
        page = tutorials.min { |a, b| a[1].path <=> b[1].path }[1]
820
        tutorial_translations = page.data.fetch('translations', [])
821
        page_ref = page
822
      end
823

824
      if page.nil?
825
        Jekyll.logger.error '[GTN/TopicFilter] Could not process material'
826
        return {}
827
      end
828

829
      # Otherwise clone the metadata from it which works well enough.
830
      page_obj = page.data.dup
831
      page_obj['id'] = "#{page['topic_name']}/#{page['tutorial_name']}"
832
      page_obj['ref'] = page_ref
833
      page_obj['ref_tutorials'] = tutorials.map { |a| a[1] }
834
      page_obj['ref_slides'] = slides.map { |a| a[1] }
835

836
      id = page_obj['id']
837

838
      # Sometimes `hands_on` is set to something like `external`, in which
839
      # case it is important to not override it. So we only do that if the
840
      # key isn't already set. Then we choose to set it to a test for the
841
      # tutorial being present. We probably don't need to test both, but it
842
      # is hard to follow which keys are which and safer to test for both in
843
      # case someone edits the code later. If either of these exist, we can
844
      # automatically set `hands_on: true`
845
      page_obj['hands_on'] = tutorials.length.positive? if !page_obj.key?('hands_on')
846

847
      # Same for slides, if there's a resource by that name, we can
848
      # automatically set `slides: true`
849
      page_obj['slides'] = slides.length.positive? if !page_obj.key?('slides')
850

851
      all_resources = slides + tutorials
852
      page_obj['mod_date'] = all_resources
853
                             .map { |p| Gtn::ModificationTimes.obtain_time(p[1].path) }
854
                             .max
855

856
      page_obj['pub_date'] = all_resources
857
                             .map { |p| Gtn::PublicationTimes.obtain_time(p[1].path) }
858
                             .min
859

860
      page_obj['version'] = all_resources
861
                            .map { |p| Gtn::ModificationTimes.obtain_modification_count(p[1].path) }
862
                            .max
863

864
      folder = material['dir']
865

866
      ymls = Dir.glob("#{folder}/quiz/*.yml") + Dir.glob("#{folder}/quiz/*.yaml")
867
      if ymls.length.positive?
868
        quizzes = ymls.map { |a| a.split('/')[-1] }
869
        page_obj['quiz'] = quizzes.map do |q|
870
          quiz_data = YAML.load_file("#{folder}/quiz/#{q}")
871
          quiz_data['id'] = q
872
          quiz_data['path'] = "#{folder}/quiz/#{q}"
873
          quiz_data
874
        end
875
      end
876

877
      # In dev configuration, this breaks for me. Not sure why config isn't available.
878
      domain = if !site.config.nil? && site.config.key?('url')
879
                 "#{site.config['url']}#{site.config['baseurl']}"
880
               else
881
                 'http://localhost:4000/training-material/'
882
               end
883
      # Similar as above.
884
      workflows = Dir.glob("#{folder}/workflows/*.ga") # TODO: support gxformat2
885
      if workflows.length.positive?
886
        workflow_names = workflows.map { |a| a.split('/')[-1] }
887
        page_obj['workflows'] = workflow_names.map do |wf|
888
          wfid = "#{page['topic_name']}-#{page['tutorial_name']}"
889
          wfname = wf.gsub(/.ga/, '').downcase.gsub(/[^a-z0-9]/, '-')
890
          trs = "api/ga4gh/trs/v2/tools/#{wfid}/versions/#{wfname}"
891
          wf_path = "#{folder}/workflows/#{wf}"
892
          wf_json = JSON.parse(File.read(wf_path))
893
          license = wf_json['license']
894
          creators = wf_json['creator'] || []
895
          wftitle = wf_json['name']
896

897
          # /galaxy-intro-101-workflow.eu.json
898
          workflow_test_results = Dir.glob(wf_path.gsub(/.ga$/, '.*.json'))
899
          workflow_test_outputs = {}
900
          workflow_test_results.each do |test_result|
901
            server = workflow_test_results[0].match(/\.(..)\.json$/)[1]
902
            workflow_test_outputs[server] = JSON.parse(File.read(test_result))
903
          end
904
          workflow_test_outputs = nil if workflow_test_outputs.empty?
905

906
          wfhkey = [page['topic_name'], page['tutorial_name'], wfname].join('/')
907

908
          {
909
            'workflow' => wf,
910
            'tests' => Dir.glob("#{folder}/workflows/" + wf.gsub(/.ga/, '-test*')).length.positive?,
911
            'url' => "#{domain}/#{folder}/workflows/#{wf}",
912
            'url_html' => "#{domain}/#{folder}/workflows/#{wf.gsub(/.ga$/, '.html')}",
913
            'path' => wf_path,
914
            'wfid' => wfid,
915
            'wfname' => wfname,
916
            'trs_endpoint' => "#{domain}/#{trs}",
917
            'license' => license,
918
            'parent_id' => page_obj['id'],
919
            'topic_id' => page['topic_name'],
920
            'tutorial_id' => page['tutorial_name'],
921
            'creators' => creators,
922
            'name' => wf_json['name'],
923
            'title' => wftitle,
924
            'version' => Gtn::ModificationTimes.obtain_modification_count(wf_path),
925
            'description' => wf_json['annotation'],
926
            'tags' => wf_json['tags'],
927
            'features' => {
928
              'report' => wf_json['report'],
929
              'subworkflows' => wf_json['steps'].map{|_, x| x['type']}.any?{|x| x == "subworkflow"},
930
              'comments' => (wf_json['comments'] || []).length.positive?,
931
              'parameters' =>  wf_json['steps'].map{|_, x| x['type']}.any?{|x| x == "parameter_input"},
932
            },
933
            'workflowhub_id' => (site.data['workflowhub'] || {}).fetch(wfhkey, nil),
934
            'history' => git_log(wf_path),
935
            'test_results' => workflow_test_outputs,
936
            'modified' => File.mtime(wf_path),
937
            'mermaid' => mermaid(wf_json),
938
            'graph_dot' => graph_dot(wf_json),
939
            'workflow_tools' => extract_workflow_tool_list(wf_json).flatten.uniq.sort,
940
            'inputs' => wf_json['steps'].select { |_k, v| ['data_input', 'data_collection_input', 'parameter_input'].include? v['type'] }.map{|_, v| v},
941
            'outputs' => wf_json['steps'].select { |_k, v| v['workflow_outputs'] && v['workflow_outputs'].length.positive? }.map{|_, v| v},
942
          }
943
        end
944
      end
945

946
      # Really only used for tool list install for ephemeris, not general.
947
      page_obj['api'] = "#{domain}/api/topics/#{page['topic_name']}/tutorials/#{page['tutorial_name']}/tutorial.json"
948

949
      # Tool List
950
      #
951
      # This is exposed in the GTN API to help admins/devs easily get the tool
952
      # list for installation.
953
      page_obj['tools'] = []
954
      page_obj['tools'] += page.content.scan(/{% tool \[[^\]]*\]\(([^)]*)\)\s*%}/) if page_obj['hands_on']
955

956
      page_obj['workflows']&.each do |wf|
957
        wf_path = "#{folder}/workflows/#{wf['workflow']}"
958

959
        page_obj['tools'] += wf['workflow_tools']
960
      end
961
      page_obj['tools'] = page_obj['tools'].flatten.sort.uniq
962

963
      topic = site.data[page_obj['topic_name']]
964
      page_obj['supported_servers'] = if topic['type'] == 'use' || topic['type'] == 'basics'
965
                                        Gtn::Supported.calculate(site.data['public-server-tools'], page_obj['tools'])
966
                                      else
967
                                        []
968
                                      end
969

970
      page_obj['supported_servers_matrix'] = if topic['type'] == 'use' || topic['type'] == 'basics'
971
        Gtn::Supported.calculate_matrix(site.data['public-server-tools'], page_obj['tools'])
972
      else
973
        []
974
      end
975

976

977
      topic_name_human = site.data[page_obj['topic_name']]['title']
978
      page_obj['topic_name_human'] = topic_name_human # TODO: rename 'topic_name' and 'topic_name' to 'topic_id'
979
      admin_install = Gtn::Toolshed.format_admin_install(site.data['toolshed-revisions'], page_obj['tools'],
980
                                                         topic_name_human, site.data['toolcats'])
981
      page_obj['admin_install'] = admin_install
982
      page_obj['admin_install_yaml'] = admin_install.to_yaml
983

984
      page_obj['tours'] = tours.length.positive?
985
      page_obj['video'] = slide_has_video
986
      page_obj['slides_recordings'] = slide_has_recordings
987
      page_obj['translations'] = {}
988
      page_obj['translations']['tutorial'] = tutorial_translations
989
      page_obj['translations']['slides'] = slide_translations
990
      page_obj['translations']['video'] = slide_has_video # Just demand it?
991
      page_obj['license'] = 'CC-BY-4.0' if page_obj['license'].nil?
992
      # I feel less certain about this override, but it works well enough in
993
      # practice, and I did not find any examples of `type: <anything other
994
      # than tutorial>` in topics/*/tutorials/*/tutorial.md but that doesn't
995
      # make it future proof.
996
      page_obj['type'] = 'tutorial'
997

998
      if page_obj.key?('draft') && page_obj['draft']
999
        page_obj['tags'] = [] if !page_obj.key? 'tags'
1000
        page_obj['tags'].push('work-in-progress')
1001
      end
1002

1003
      page_obj
1004
    end
1005

1006
    def self.process_pages(site, pages)
1007
      # eww.
1008
      return site.data['cache_processed_pages'] if site.data.key?('cache_processed_pages')
1009

1010
      materials = collate_materials(site, pages).map { |_k, v| resolve_material(site, v) }
1011
      Jekyll.logger.info '[GTN/TopicFilter] Filling Materials Cache'
1012
      site.data['cache_processed_pages'] = materials
1013

1014
      # Prepare short URLs
1015
      shortlinks = site.data['shortlinks']
1016
      mappings = Hash.new { |h, k| h[k] = [] }
1017

1018
      shortlinks.each_key do |kp|
1019
        shortlinks[kp].each do |k, v|
1020
          mappings[v].push("/short/#{k}")
1021
        end
1022
      end
1023
      # Update the materials with their short IDs + redirects
1024
      pages.select { |p| mappings.keys.include? p.url }.each do |p|
1025
        # Set the short id on the material
1026
        if p['ref']
1027
          # Initialise redirects if it wasn't set
1028
          p['ref'].data['redirect_from'] = [] if !p['ref'].data.key?('redirect_from')
1029
          p['ref'].data['redirect_from'].push(*mappings[p.url])
1030
          p['ref'].data['redirect_from'].uniq!
1031
        else
1032
          p.data['redirect_from'] = [] if !p.data.key?('redirect_from')
1033

1034
          p.data['redirect_from'].push(*mappings[p.url])
1035
          p.data['redirect_from'].uniq!
1036
        end
1037
      end
1038
      # Same for news
1039
      get_posts(site).select { |p| mappings.keys.include? p.url }.each do |p|
1040
        # Set the short id on the material
1041
        p.data['redirect_from'] = [] if !p.data.key?('redirect_from')
1042
        p.data['redirect_from'].push(*mappings[p.url])
1043
        p.data['redirect_from'].uniq!
1044
      end
1045

1046
      materials
1047
    end
1048

1049
    ##
1050
    # This is a helper function to get all the materials in a site.
1051
    def self.list_all_materials(site)
1052
      process_pages(site, site.pages)
1053
    end
1054

1055
    ##
1056
    # This is a helper function to get materials with automated videos.
1057
    def self.list_videos(site)
1058
      materials = process_pages(site, site.pages)
1059
      materials.select { |x| x['video'] == true }
1060
    end
1061

1062
    ##
1063
    # List every tag used across all materials.
1064
    # This is used to generate the tag cloud.
1065
    #
1066
    # Parameters:
1067
    # +site+:: The +Jekyll::Site+ object, used to get the list of pages.
1068
    # Returns:
1069
    # +Array+:: An array of strings, each string is a tag. (sorted and unique)
1070
    #
1071
    def self.list_all_tags(site)
1072
      materials = process_pages(site, site.pages)
1073
      (materials.map { |x| x['tags'] || [] }.flatten + list_topics(site)).sort.uniq
1074
    end
1075

1076
    def self.filter_by_topic(site, topic_name)
1077
      # Here we make a (cached) call to load materials into memory and sort them
1078
      # properly.
1079
      materials = process_pages(site, site.pages)
1080

1081
      # Select out the materials by topic:
1082
      resource_pages = materials.select { |x| x['topic_name'] == topic_name }
1083

1084
      # If there is nothing with that topic name, try generating it by tags.
1085
      resource_pages = materials.select { |x| (x['tags'] || []).include?(topic_name) } if resource_pages.empty?
1086

1087
      # The complete resources we'll return is the introduction slides first
1088
      # (EDIT: not anymore, we rely on prioritisation!)
1089
      # and then the rest of the pages.
1090
      resource_pages = resource_pages.sort_by { |k| k.fetch('priority', 1) }
1091

1092
      Jekyll.logger.error "Error? Could not find any relevant pages for #{topic_name}" if resource_pages.empty?
1093

1094
      resource_pages
1095
    end
1096

1097
    def self.filter_by_tag(site, topic_name)
1098
      # Here we make a (cached) call to load materials into memory and sort them
1099
      # properly.
1100
      materials = process_pages(site, site.pages)
1101

1102
      # Select those with that topic ID or that tag
1103
      resource_pages = materials.select { |x| x['topic_name'] == topic_name }
1104
      resource_pages += materials.select { |x| (x['tags'] || []).include?(topic_name) }
1105

1106
      # The complete resources we'll return is the introduction slides first
1107
      # (EDIT: not anymore, we rely on prioritisation!)
1108
      # and then the rest of the pages.
1109
      resource_pages = resource_pages.sort_by { |k| k.fetch('priority', 1) }
1110

1111
      Jekyll.logger.error "Error? Could not find any relevant tagged pages for #{topic_name}" if resource_pages.empty?
1112

1113
      resource_pages
1114
    end
1115

1116
    ##
1117
    # Filter a list of materials by topic and subtopic.
1118
    def self.filter_by_topic_subtopic(site, topic_name, subtopic_id)
1119
      resource_pages = filter_by_topic(site, topic_name)
1120

1121
      # Select out materials with the correct subtopic
1122
      resource_pages = resource_pages.select { |x| x['subtopic'] == subtopic_id }
1123

1124
      if resource_pages.empty?
1125
        Jekyll.logger.error "Error? Could not find any relevant pages for #{topic_name} / #{subtopic_id}"
1126
      end
1127

1128
      resource_pages
1129
    end
1130

1131
    ##
1132
    # Get a list of contributors for a list of materials
1133
    # Parameters:
1134
    # +materials+:: An array of materials
1135
    # Returns:
1136
    # +Array+:: An array of individual contributors as strings.
1137
    def self.identify_contributors(materials, site)
1138
      materials
1139
        .map { |_k, v| v['materials'] }.flatten
1140
        # Not 100% sure why this flatten is needed? Probably due to the map over hash
1141
        .map { |mat| Gtn::Contributors.get_contributors(mat) }
1142
        .flatten
1143
        .select { |c| Gtn::Contributors.person?(site, c) }
1144
        .uniq
1145
        .shuffle
1146
    end
1147

1148
    ##
1149
    # Get a list of funders for a list of materials
1150
    # Parameters:
1151
    # +materials+:: An array of materials
1152
    # Returns:
1153
    # +Array+:: An array of funder (organisations that provided support) IDs as strings.
1154
    def self.identify_funders_and_grants(materials, site)
1155
      materials
1156
        .map { |_k, v| v['materials'] }.flatten
1157
        # Not 100% sure why this flatten is needed? Probably due to the map over hash
1158
        .map { |mat| Gtn::Contributors.get_all_funding(site, mat) }
1159
        .flatten
1160
        .uniq
1161
        .shuffle
1162
    end
1163

1164
    ##
1165
    # Get the version of a tool.
1166
    # Parameters:
1167
    # +tool+:: A tool string
1168
    # Returns:
1169
    # +String+:: The version of the tool.
1170
    #
1171
    # Examples:
1172
    # get_version("toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.0") => "1.0.0"
1173
    def self.get_version(tool)
1174
      if tool.count('/') > 4
1175
        tool.split('/')[-1]
1176
      else
1177
        tool
1178
      end
1179
    end
1180

1181
    ##
1182
    # Get a short version of a tool.
1183
    # Parameters:
1184
    # +tool+:: A tool string
1185
    # Returns:
1186
    # +String+:: The short version of the tool.
1187
    #
1188
    # Examples:
1189
    # short_tool("toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.0") => "galaxyp/regex1"
1190
    def self.short_tool(tool)
1191
      if tool.count('/') > 4
1192
        "#{tool.split('/')[2]}/#{tool.split('/')[3]}/#{tool.split('/')[4]}"
1193
      else
1194
        tool
1195
      end
1196
    end
1197

1198
    ##
1199
    # List materials by tool
1200
    #
1201
    # Parameters:
1202
    # +site+:: The +Jekyll::Site+ object, used to get the list of pages.
1203
    # Returns:
1204
    # +Hash+:: A hash as below:
1205
    #
1206
    #   {
1207
    #     tool_id => {
1208
    #       "tool_id" => [tool_id, version],
1209
    #       "tutorials" => [tutorial_id, tutorial_title, topic_title, tutorial_url]
1210
    #     }, ...
1211
    #   }
1212
    #
1213
    # *Nota Bene!!!*: Galaxy depends on the structure of this response, please
1214
    # do not change it, add a new API instead if you need to modify it
1215
    # significantly.
1216
    #
1217
    def self.list_materials_by_tool(site)
1218
      tool_map = {}
1219

1220
      list_all_materials(site).each do |m|
1221
        m.fetch('tools', []).each do |tool|
1222
          sid = short_tool(tool)
1223
          tool_map[sid] = { 'tool_id' => [], 'tutorials' => [] } if !tool_map.key?(sid)
1224

1225
          tool_map[sid]['tool_id'].push([tool, get_version(tool)])
1226
          tool_map[sid]['tutorials'].push([
1227
                                            m['id'], m['title'], site.data[m['topic_name']]['title'], m['url']
1228
                                          ])
1229
        end
1230
      end
1231

1232
      # Uniqueify/sort
1233
      t = tool_map.to_h do |k, v|
1234
        v['tool_id'].uniq!
1235
        v['tool_id'].sort_by! { |k2| k2[1] }
1236
        v['tool_id'].reverse!
1237

1238
        v['tutorials'].uniq!
1239
        v['tutorials'].sort!
1240
        [k, v]
1241
      end
1242

1243
      # Order by most popular tool
1244
      t.sort_by { |_k, v| v['tutorials'].length }.reverse.to_h
1245
    end
1246

1247

1248
    ##
1249
    # Not materials but resources (including e.g. recordings, slides separate from tutorials, etc.)
1250
    #
1251
    # The structure is a large array of arrays, with [date, category, page-like object, tags]
1252
    #
1253
    #   [#<DateTime: 2019-02-22T20:53:50+01:00 ((2458537j,71630s,0n),+3600s,2299161j)>,
1254
    #    "tutorials",
1255
    #    #<Jekyll::Page @relative_path="topics/single-cell/tutorials/scrna-preprocessing/tutorial.md">,
1256
    #    ["single-cell"]],
1257
    #   [#<DateTime: 2019-02-20T19:33:11+01:00 ((2458535j,66791s,0n),+3600s,2299161j)>,
1258
    #    "tutorials",
1259
    #    #<Jekyll::Page @relative_path="topics/single-cell/tutorials/scrna-umis/tutorial.md">,
1260
    #    ["single-cell"]],
1261
    #   [#<DateTime: 2019-02-16T21:04:07+01:00 ((2458531j,72247s,0n),+3600s,2299161j)>,
1262
    #    "slides",
1263
    #    #<Jekyll::Page @relative_path="topics/single-cell/tutorials/scrna-plates-batches-barcodes/slides.html">,
1264
    #    ["single-cell"]]] 
1265
    def self.all_date_sorted_resources(site)
1266
      cache.getset('all_date_sorted_resources') do
1267
        self._all_date_sorted_resources(site)
1268
      end
1269
    end
1270

1271
    def self._all_date_sorted_resources(site)
1272
      events = site.pages.select { |x| x['layout'] == 'event' || x['layout'] == 'event-external' }
1273
      materials = list_all_materials(site).reject { |k, _v| k['draft'] }
1274
      news = site.posts.select { |x| x['layout'] == 'news' }
1275
      faqs = site.pages.select { |x| x['layout'] == 'faq' }
1276
      pathways = site.pages.select { |x| x['layout'] == 'learning-pathway' }
1277
      workflows = Dir.glob('topics/**/*.ga')
1278

1279
      bucket = events.map do |e|
1280
        [Gtn::PublicationTimes.obtain_time(e.path).to_datetime, 'events', e, ['event'] + e.data.fetch('tags', [])]
1281
      end
1282

1283
      materials.each do |m|
1284
        tags = [m['topic_name']] + (m['tags'] || [])
1285
        m.fetch('ref_tutorials', []).map do |t|
1286
          bucket << [Gtn::PublicationTimes.obtain_time(t.path).to_datetime, 'tutorials', t, tags]
1287

1288
          (t['recordings'] || []).map do |r|
1289
            url = '/' + t.path.gsub(/tutorial(_[A_Z_]*)?.(html|md)$/, 'recordings/')
1290
            url += "#tutorial-recording-#{Date.parse(r['date']).strftime('%-d-%B-%Y').downcase}"
1291
            attr = {'title' => "Recording of " + t['title'], 
1292
                    'contributors' => r['speakers'] + (r['captions'] || []),
1293
                    'content' => "A #{r['length']} long recording is now available."}
1294

1295
            obj = objectify(attr, url, t.path)
1296
            bucket << [DateTime.parse(r['date'].to_s), 'recordings', obj, tags]
1297
          end
1298
        end
1299

1300
        m.fetch('ref_slides', []).reject { |s| s.url =~ /-plain.html/ }.map do |s|
1301
          bucket << [Gtn::PublicationTimes.obtain_time(s.path).to_datetime, 'slides', s, tags]
1302

1303
          (s['recordings'] || []).map do |r|
1304
            url = '/' + s.path.gsub(/slides(_[A_Z_]*)?.(html|md)$/, 'recordings/')
1305
            url += "#tutorial-recording-#{Date.parse(r['date']).strftime('%-d-%B-%Y').downcase}"
1306
            attr = {'title' => "Recording of " + s['title'], 
1307
                    'contributors' => r['speakers'] + (r['captions'] || []),
1308
                    'content' => "A #{r['length']} long recording is now available."}
1309
            obj = objectify(attr, url, s.path)
1310
            bucket << [DateTime.parse(r['date'].to_s), 'recordings', obj, tags]
1311
          end
1312
        end
1313
      end
1314

1315
      bucket += news.map do |n|
1316
        [n.date.to_datetime, 'news', n, ['news'] + n.data.fetch('tags', [])]
1317
      end
1318

1319
      bucket += faqs.map do |n|
1320
        tag = Gtn::PublicationTimes.clean_path(n.path).split('/')[1]
1321
        [Gtn::PublicationTimes.obtain_time(n.path).to_datetime, 'faqs', n, ['faqs', tag]]
1322
      end
1323

1324
      bucket += pathways.map do |n|
1325
        tags = ['learning-pathway'] + (n['tags'] || [])
1326
        [Gtn::PublicationTimes.obtain_time(n.path).to_datetime, 'learning-pathways', n, tags]
1327
      end
1328

1329
      bucket += workflows.map do |n|
1330
        tag = Gtn::PublicationTimes.clean_path(n).split('/')[1]
1331
        wf_data = JSON.parse(File.read(n))
1332

1333
        attrs = {
1334
          'title' => wf_data['name'],
1335
          'description' => wf_data['annotation'],
1336
          'tags' => wf_data['tags'],
1337
          'contributors' => wf_data.fetch('creator', []).map do |c|
1338
            matched = site.data['contributors'].select{|k, v| 
1339
              v.fetch('orcid', "does-not-exist") == c.fetch('identifier', "").gsub('https://orcid.org/', '')
1340
            }.first
1341
            if matched
1342
              matched[0]
1343
            else
1344
              c['name']
1345
            end
1346
          end
1347
        }
1348
        # These aren't truly stable. I'm not sure what to do about that.
1349
        obj = objectify(attrs, '/' + n.gsub(/\.ga$/, '.html'), n)
1350
        # obj = objectify(attrs, '/' + n.path[0..n.path.rindex('/')], n)
1351
        [Gtn::PublicationTimes.obtain_time(n).to_datetime, 'workflows', obj, ['workflows', tag] + obj['tags']]
1352
      end
1353

1354
      # Remove symlinks from bucket.
1355
      bucket = bucket.reject { |date, type, page, tags|
1356
        File.symlink?(page.path) || File.symlink?(File.dirname(page.path)) || File.symlink?(File.dirname(File.dirname(page.path)))
1357
      }
1358

1359
      bucket += site.data['contributors'].map do |k, v|
1360
        a = {'title' => "@#{k}",
1361
             'content' => "GTN Contributions from #{k}"}
1362
        obj = objectify(a, "/hall-of-fame/#{k}/", k)
1363

1364
        [DateTime.parse("#{v['joined']}-01T12:00:00", 'content' => "GTN Contributions from #{k}"), 'contributors', obj, ['contributor']]
1365
      end
1366

1367
      bucket += site.data['grants'].map do |k, v|
1368
        a = {'title' => "@#{k}",
1369
             'content' => "GTN Contributions from #{k}"}
1370
        obj = objectify(a, "/hall-of-fame/#{k}/", k)
1371

1372
        # TODO: backdate grants, organisations
1373
        if v['joined']
1374
          [DateTime.parse("#{v['joined']}-01T12:00:00"), 'grants', obj, ['grant']]
1375
        end
1376
      end.compact
1377

1378
      bucket += site.data['organisations'].map do |k, v|
1379
        a = {'title' => "@#{k}",
1380
             'content' => "GTN Contributions from #{k}"}
1381
        obj = objectify(a, "/hall-of-fame/#{k}/", k)
1382

1383
        if v['joined']
1384
          [DateTime.parse("#{v['joined']}-01T12:00:00"), 'organisations', obj, ['organisation']]
1385
        end
1386
      end.compact
1387

1388
      bucket
1389
        .reject{|x| x[0] > DateTime.now } # Remove future-dated materials
1390
        .reject{|x| x[2]['draft'] == true } # Remove drafts
1391
        .sort_by {|x| x[0] } # Date-sorted, not strictly necessary since will be grouped.
1392
        .reverse
1393
    end
1394
  end
1395
end
1396

1397
module Jekyll
1398
  # The "implementation" of the topic filter as liquid accessible filters
1399
  module Filters
1400
    module TopicFilter
1401
      ##
1402
      # List the most recent contributors to the GTN.
1403
      # Parameters:
1404
      # +contributors+:: A hash of contributors
1405
      # +count+:: The number of contributors to return
1406
      # Returns:
1407
      # +Hash+:: A hash of contributors
1408
      #
1409
      # Example:
1410
      # most_recent_contributors(contributors, 5)
1411
      # => {
1412
      #  "hexylena" => {
1413
      #  "name" => "Hexylena",
1414
      #  "avatar" => "https://avatars.githubusercontent.com/u/458683?v=3",
1415
      #  ...
1416
      #  }
1417
      # }
1418
      def most_recent_contributors(contributors, count)
1419
        # Remove non-hof
1420
        hof = contributors.reject { |_k, v| v.fetch('halloffame', 'yes') == 'no' }
1421
        # Get keys + sort by joined date
1422
        hof_k = hof.keys.sort do |x, y|
1423
          hof[y].fetch('joined', '2016-01') <=> hof[x].fetch('joined', '2016-01')
1424
        end
1425

1426
        # Transform back into hash
1427
        hof_k.slice(0, count).to_h { |k| [k, hof[k]] }
1428
      end
1429

1430
      ##
1431
      # Find the most recently modified tutorials
1432
      # Parameters:
1433
      # +site+:: The +Jekyll::Site+ object, used to get the list of pages.
1434
      # +exclude_recently_published+:: Do not include ones that were recently
1435
      #                                published in the slice, to make it look a bit nicer.
1436
      # Returns:
1437
      # +Array+:: An array of the 10 most recently modified pages
1438
      # Example:
1439
      #  {% assign latest_tutorials = site | recently_modified_tutorials %}
1440
      def recently_modified_tutorials(site, exclude_recently_published: true)
1441
        tutorials = site.pages.select { |page| page.data['layout'] == 'tutorial_hands_on' }
1442

1443
        latest = tutorials.sort do |x, y|
1444
          Gtn::ModificationTimes.obtain_time(y.path) <=> Gtn::ModificationTimes.obtain_time(x.path)
1445
        end
1446

1447
        latest_published = recently_published_tutorials(site)
1448
        latest = latest.reject { |x| latest_published.include?(x) } if exclude_recently_published
1449

1450
        latest.slice(0, 10)
1451
      end
1452

1453
      ##
1454
      # Find the most recently published tutorials
1455
      # Parameters:
1456
      # +site+:: The +Jekyll::Site+ object, used to get the list of pages.
1457
      # Returns:
1458
      # +Array+:: An array of the 10 most recently published modified pages
1459
      # Example:
1460
      #  {% assign latest_tutorials = site | recently_modified_tutorials %}
1461
      def recently_published_tutorials(site)
1462
        tutorials = site.pages.select { |page| page.data['layout'] == 'tutorial_hands_on' }
1463

1464
        latest = tutorials.sort do |x, y|
1465
          Gtn::PublicationTimes.obtain_time(y.path) <=> Gtn::PublicationTimes.obtain_time(x.path)
1466
        end
1467

1468
        latest.slice(0, 10)
1469
      end
1470

1471
      def topic_count(resources)
1472
        # Count lines in the table except introduction slides
1473
        resources.length
1474
      end
1475

1476
      ##
1477
      # Fetch a tutorial material's metadata
1478
      # Parameters:
1479
      # +site+:: The +Jekyll::Site+ object, used to get the list of pages.
1480
      # +topic_name+:: The name of the topic
1481
      # +page_name+:: The name of the page
1482
      # Returns:
1483
      # +Hash+:: The metadata for the tutorial material
1484
      #
1485
      # Example:
1486
      #  {% assign material = site | fetch_tutorial_material:page.topic_name,page.tutorial_name%}
1487
      def fetch_tutorial_material(site, topic_name, page_name)
1488
        Gtn::TopicFilter.fetch_tutorial_material(site, topic_name, page_name)
1489
      end
1490

1491
      def fetch_tutorial_material_by_id(site, id)
1492
        Gtn::TopicFilter.fetch_tutorial_material(site, id.split('/')[0], id.split('/')[1])
1493
      end
1494

1495
      def list_topics_ids(site)
1496
        ['introduction'] + Gtn::TopicFilter.list_topics(site).filter { |k| k != 'introduction' }
1497
      end
1498

1499
      def list_topics_h(site)
1500
        Gtn::TopicFilter.list_topics(site)
1501
      end
1502

1503
      def list_topics_by_category(site, category)
1504
        q = Gtn::TopicFilter.list_topics(site).map do |k|
1505
          [k, site.data[k]]
1506
        end
1507

1508
        # Alllow filtering by a category, or return "all" otherwise.
1509
        if category == 'non-tag'
1510
          q = q.select { |_k, v| v['tag_based'].nil? }
1511
        elsif category == 'science'
1512
          q = q.select { |_k, v| %w[use basics].include? v['type'] }
1513
        elsif category == 'technical'
1514
          q = q.select { |_k, v| %w[admin-dev data-science instructors].include? v['type'] }
1515
        elsif category == 'science-technical'
1516
          q = q.select { |_k, v| %w[use basics admin-dev data-science instructors].include? v['type'] }
1517
        elsif category != 'all'
1518
          q = q.select { |_k, v| v['type'] == category }
1519
        end
1520

1521
        # Sort alphabetically by titles
1522
        q.sort { |a, b| a[1]['title'] <=> b[1]['title'] }
1523
      end
1524

1525
      def to_keys(arr)
1526
        arr.map { |k| k[0] }
1527
      end
1528

1529
      def to_vals(arr)
1530
        arr.map { |k| k[1] }
1531
      end
1532

1533
      ##
1534
      # Galaxy depends on the structure of this response, please do not change
1535
      # it, add a new API instead if you need to modify it significantly.
1536
      def list_materials_by_tool(site)
1537
        Gtn::TopicFilter.list_materials_by_tool(site)
1538
      end
1539

1540
      def list_materials_structured(site, topic_name)
1541
        Gtn::TopicFilter.list_materials_structured(site, topic_name)
1542
      end
1543

1544
      def list_materials_flat(site, topic_name)
1545
        Gtn::TopicFilter
1546
          .list_materials_structured(site, topic_name)
1547
          .map { |k, v| v['materials'] }
1548
          .flatten
1549
          .uniq { |x| x['id'] }
1550
      end
1551

1552
      def list_topic_materials_yearly(site, topic_name)
1553
        flat_mats = list_materials_flat(site, topic_name)
1554
        years = flat_mats.map{|x| x['pub_date'].year} + flat_mats.map{|x| x['mod_date'].year}
1555
        # doesn't use identify_contributors because that excludes grants/orgs.
1556
        topic_contribs = flat_mats.map{|x| x['contributions']  || {"all" => x['contributors']}}.map{|x| x.values.flatten}.flatten.uniq.sort
1557
        pfo = ['contributors', 'grants', 'organisations']
1558

1559
        Gtn::TopicFilter.all_date_sorted_resources(site)
1560
          .select{|x| (x[3].include? topic_name) || (pfo.include?(x[1]) && topic_contribs.include?(x[2].title[1..]))}
1561
          .group_by{|x| x[0].year}
1562
          .map{|k, v| [k, v.group_by{|z| z[1]}]}
1563
          .to_h
1564
      end
1565

1566
      def count_topic_materials_yearly(site, topic_name)
1567
        flat_mats = list_materials_flat(site, topic_name)
1568
        years = flat_mats.map{|x| x['pub_date'].year} + flat_mats.map{|x| x['mod_date'].year}
1569
        # doesn't use identify_contributors because that excludes grants/orgs.
1570
        topic_contribs = flat_mats.map{|x| x['contributions']  || {"all" => x['contributors']}}.map{|x| x.values.flatten}.flatten.uniq.sort
1571
        pfo = ['contributors', 'grants', 'organisations']
1572

1573
        r = Gtn::TopicFilter.all_date_sorted_resources(site)
1574
          .select{|x| (x[3].include? topic_name) || (pfo.include?(x[1]) && topic_contribs.include?(x[2].title[1..]))}
1575
          .map{|x| [x[0].year, x[1]]} # Only need year + type
1576
          .group_by{|x| x[1]} # Group by type.
1577
          .map{|k, v| [k, v.map{|vv| vv[0]}.tally]}
1578
          .to_h
1579

1580
        years = (2015..Date.today.year).to_a
1581
        # Fill in zeros for missing years
1582
        r.map{|k, v| [k, years.map{|y| v[y] || 0}
1583
          .cumulative_sum
1584
          .map.with_index{|value, i| {"y" => value, "x" => "#{years[i]}-01-01"}}]
1585
        }.to_h
1586
      end
1587

1588
      def list_all_tags(site)
1589
        Gtn::TopicFilter.list_all_tags(site)
1590
      end
1591

1592
      def topic_filter(site, topic_name)
1593
        Gtn::TopicFilter.topic_filter(site, topic_name)
1594
      end
1595

1596
      def topic_filter_tutorial_count(site, topic_name)
1597
        Gtn::TopicFilter.topic_filter(site, topic_name).length
1598
      end
1599

1600
      def identify_contributors(materials, site)
1601
        Gtn::TopicFilter.identify_contributors(materials, site)
1602
      end
1603

1604
      def identify_funders(materials, site)
1605
        Gtn::TopicFilter.identify_funders_and_grants(materials, site)
1606
      end
1607

1608
      ##
1609
      # Just used for stats page.
1610
      def list_videos(site)
1611
        Gtn::TopicFilter.list_all_materials(site)
1612
          .select { |k, _v| k['recordings'] || k['slides_recordings'] }
1613
          .map { |k, _v| (k['recordings'] || []) + (k['slides_recordings'] || []) }
1614
          .flatten
1615
      end
1616

1617
      def findDuration(duration)
1618
        if ! duration.nil?
1619
          eval(duration.gsub(/H/, ' * 3600 + ').gsub(/M/, ' * 60 + ').gsub(/S/, ' + ') + " 0")
1620
        else
1621
          0
1622
        end
1623
      end
1624

1625
      ##
1626
      # Just used for stats page.
1627
      def list_videos_total_time(site)
1628
        vids = list_videos(site)
1629
        vids.map { |v| findDuration(v['length']) }.sum / 3600.0
1630
      end
1631

1632
      def list_draft_materials(site)
1633
        Gtn::TopicFilter.list_all_materials(site).select { |k, _v| k['draft'] }
1634
      end
1635

1636
      def to_material(site, page)
1637
        topic = page['path'].split('/')[1]
1638
        material = page['path'].split('/')[3]
1639
        ret = Gtn::TopicFilter.fetch_tutorial_material(site, topic, material)
1640
        Jekyll.logger.warn "Could not find material #{topic} #{material}" if ret.nil?
1641
        ret
1642
      end
1643

1644
      def get_workflow(site, page, workflow)
1645
        mat = to_material(site, page)
1646
        mat['workflows'].select { |w| w['workflow'] == workflow }[0]
1647
      end
1648

1649
      def tool_version_support(site, tool)
1650
        Gtn::Supported.calculate(site.data['public-server-tools'], [tool])
1651
      end
1652

1653
      def edamify(term, site)
1654
        site.data['EDAM'].select{|row| row['Class ID'] == "http://edamontology.org/#{term}"}.first.to_h
1655
      end
1656

1657
      def titlecase(term)
1658
        term.split(' ').map(&:capitalize).join(' ')
1659
      end
1660
    end
1661
  end
1662
end
1663

1664
Liquid::Template.register_filter(Jekyll::Filters::TopicFilter)
1665

1666
Product

Resources

Company