Path: blob/main/_plugins/jekyll-topic-filter.rb
1677 views
# frozen_string_literal: true12require 'json'3require 'yaml'4require './_plugins/gtn'5require './_plugins/util'6require 'securerandom'78class Array9def cumulative_sum10sum = 011self.map{|x| sum += x}12end13end1415module Gtn16# The main GTN module to parse tutorial.md and slides.html and topics into useful lists of things that can be shown on topic pages, i.e. "materials" (a possible combination of tutorial + slides)17#18# This is by far the most complicated module and the least19# disaggregated/modular part of the GTN infrastructure.20# TopicFilter.resolve_material is probably the single most important function21# in the entire suite.22module TopicFilter232425##26# This function returns a list of all the topics that are available.27# Params:28# +site+:: The +Jekyll::Site+ object29# Returns:30# +Array+:: The list of topics31def self.list_topics(site)32list_topics_h(site).keys33end3435def self.list_topics_h(site)36site.data.select { |_k, v| v.is_a?(Hash) && v.key?('editorial_board') }37end3839##40# This function returns a list of all the topics that are available.41# Params:42# +site+:: The +Jekyll::Site+ object43# Returns:44# +Array+:: The topic objects themselves45def self.enumerate_topics(site)46list_topics_h(site).values47end4849##50# Setup the local cache via +Jekyll::Cache+51def self.cache52@@cache ||= Jekyll::Cache.new('GtnTopicFilter')53end5455##56# Fill the cache with all the topics if it hasn't been done already. Safe to be called multiple times.57# Params:58# +site+:: The +Jekyll::Site+ object59# Returns:60# +nil+61def self.fill_cache(site)62return if site.data.key?('cache_topic_filter')6364Jekyll.logger.debug '[GTN/TopicFilter] Begin Cache Prefill'65site.data['cache_topic_filter'] = {}6667# For each topic68list_topics(site).each do |topic|69site.data['cache_topic_filter'][topic] = filter_by_topic(site, topic)70end71Jekyll.logger.debug '[GTN/TopicFilter] End Cache Prefill'72end7374##75# This function returns a list of all the materials that are available for a specific topic.76# Params:77# +site+:: The +Jekyll::Site+ object78# +topic_name+:: The name of the topic79# Returns:80# +Array+:: The list of materials81def self.topic_filter(site, topic_name)82fill_cache(site)83site.data['cache_topic_filter'][topic_name]84end8586##87# This function returns a list of all the materials that are available for a88# specific topic, but this time in a structured manner89# Params:90# +site+:: The +Jekyll::Site+ object91# +topic_name+:: The name of the topic92# Returns:93# +Hash+:: The subtopics and their materials94#95# Example:96# {97# "intro" => {98# "subtopic" => {"title" => "Introduction", "description" => "Introduction to the topic", "id" => "intro"},99# "materials" => [100# ...101# ]102# },103# "__OTHER__" => {104# "subtopic" => {"title" => "Other", "description" => "Other materials", "id" => "__OTHER__"},105# "materials" => [.. ]106# }107# ]108# This method is built with the idea to replace the "topic_filter" command,109# and instead of returning semi-structured data, we will immediately return110# fully structured data for a specific "topic_name" query, like, "admin"111#112# Instead of returning a flat list of tutorials, instead we'll structure113# them properly in subtopics (if they exist) or return the flat list114# otherwise.115#116# This will let us generate new "views" into the tutorial lists, having117# them arranged in new and exciting ways.118def self.list_materials_structured(site, topic_name)119120fill_cache(site)121122# Here we want to either return data structured around subtopics123124if site.data[topic_name]['tag_based'].nil? && site.data[topic_name].key?('subtopics')125# We'll construct a new hash of subtopic => tutorials126out = {}127seen_ids = []128site.data[topic_name]['subtopics'].each do |subtopic, _v|129specific_resources = filter_by_topic_subtopic(site, topic_name, subtopic['id'])130out[subtopic['id']] = {131'subtopic' => subtopic,132'materials' => specific_resources133}134seen_ids += specific_resources.map { |x| x['id'] }135end136137# And we'll have this __OTHER__ subtopic for any tutorials that weren't138# in a subtopic.139all_topics_for_tutorial = filter_by_topic(site, topic_name)140out['__OTHER__'] = {141'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },142'materials' => all_topics_for_tutorial.reject { |x| seen_ids.include?(x['id']) }143}144elsif site.data[topic_name]['tag_based'] && site.data[topic_name].key?('subtopics')145out = {}146seen_ids = []147tn = topic_name.gsub('by_tag_', '')148materials = filter_by_tag(site, tn)149150# For each subtopics151site.data[topic_name]['subtopics'].each do |subtopic|152# Find matching tag-based tutorials in our filtered-by-tag materials153specific_resources = materials.select { |x| (x['tags'] || []).include?(subtopic['id']) }154out[subtopic['id']] = {155'subtopic' => subtopic,156'materials' => specific_resources157}158seen_ids += specific_resources.map { |x| x['id'] }159end160161filter_by_tag(site, tn)162out['__OTHER__'] = {163'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },164'materials' => materials.reject { |x| seen_ids.include?(x['id']) }165}166elsif site.data[topic_name]['tag_based'] # Tag based Topic167# We'll construct a new hash of subtopic(parent topic) => tutorials168out = {}169seen_ids = []170tn = topic_name.gsub('by_tag_', '')171materials = filter_by_tag(site, tn)172173# Which topics are represented in those materials?174seen_topics = materials.map { |x| x['topic_name'] }.sort175176# Treat them like subtopics, but fake subtopics.177seen_topics.each do |parent_topic, _v|178specific_resources = materials.select { |x| x['topic_name'] == parent_topic }179out[parent_topic] = {180'subtopic' => { 'id' => parent_topic, 'title' => site.data[parent_topic]['title'], 'description' => nil },181'materials' => specific_resources182}183seen_ids += specific_resources.map { |x| x['id'] }184end185186# And we'll have this __OTHER__ subtopic for any tutorials that weren't187# in a subtopic.188all_topics_for_tutorial = filter_by_tag(site, tn)189out['__OTHER__'] = {190'subtopic' => { 'title' => 'Other', 'description' => 'Assorted Tutorials', 'id' => 'other' },191'materials' => all_topics_for_tutorial.reject { |x| seen_ids.include?(x['id']) }192}193else194# Or just the list (Jury is still out on this one, should it really be a195# flat list? Or in this identical structure.)196out = {197'__FLAT__' => {198'subtopic' => nil,199'materials' => filter_by_topic(site, topic_name)200}201}202end203204# Cleanup empty sections205out.delete('__OTHER__') if out.key?('__OTHER__') && out['__OTHER__']['materials'].empty?206207out.each do |_k, v|208v['materials'].sort_by! { |m| [m.fetch('priority', 1), m['title']] }209end210211out212end213214##215# Fetch a specific tutorial material by topic and tutorial name216# Params:217# +site+:: The +Jekyll::Site+ object218# +topic_name+:: The name of the topic219# +tutorial_name+:: The name of the tutorial220# Returns:221# +Hash+:: The tutorial material222def self.fetch_tutorial_material(site, topic_name, tutorial_name)223if topic_name.nil?224return nil225end226fill_cache(site)227if site.data['cache_topic_filter'][topic_name].nil?228Jekyll.logger.warn "Cannot fetch tutorial material for #{topic_name}"229nil230else231site.data['cache_topic_filter'][topic_name].select { |p| p['tutorial_name'] == tutorial_name }[0]232end233end234235##236# Extract the list of tools used in a workflow237# Params:238# +data+:: The Galaxy Workflow JSON data, parsed239# Returns:240# +Array+:: The list of tool IDs241def self.extract_workflow_tool_list(data)242out = data['steps'].select { |_k, v| v['type'] == 'tool' }.map { |_k, v| v['tool_id'] }.compact243out += data['steps'].select do |_k, v|244v['type'] == 'subworkflow'245end.map { |_k, v| extract_workflow_tool_list(v['subworkflow']) }246out247end248249##250# Annotation of a path with topic and tutorial information251# Params:252# +path+:: The path to annotate253# +layout+:: The page layout if known254# Returns:255# +Hash+:: The annotation256#257# Example:258#259# h = Gtn::TopicFilter.annotate_path("topics/assembly/tutorials/velvet-assembly/tutorial.md", nil)260# h # => {261# # "topic"=>"assembly",262# # "topic_name"=>"assembly",263# # "material"=>"assembly/velvet-assembly",264# # "tutorial_name"=>"velvet-assembly",265# # "dir"=>"topics/assembly/tutorials/velvet-assembly",266# # "type"=>"tutorial"267# # }268269def self.annotate_path(path, layout)270parts = path.split('/')271parts.shift if parts[0] == '.'272273return nil if parts[0] != 'topics'274275return nil if parts[2] != 'tutorials'276277return nil if parts.length < 4278279material = {280'topic' => parts[1], # Duplicate281'topic_name' => parts[1],282'material' => "#{parts[1]}/#{parts[3]}",283'tutorial_name' => parts[3],284'dir' => parts[0..3].join('/'),285}286287return nil if path =~ %r{/faqs/}288289return nil if parts[-1] =~ /data[_-]library.yaml/ || parts[-1] =~ /data[_-]manager.yaml/290291# Check if it's a symlink292material['symlink'] = true if File.symlink?(material['dir'])293294if parts[4] =~ /tutorial.*\.md/ || layout == 'tutorial_hands_on'295material['type'] = 'tutorial'296elsif parts[4] =~ /slides.*\.html/ || %w[tutorial_slides base_slides introduction_slides].include?(layout)297material['type'] = 'slides'298elsif parts[4] =~ /ipynb$/299material['type'] = 'ipynb'300elsif parts[4] =~ /Rmd$/301material['type'] = 'rmd'302elsif parts[4] == 'workflows'303material['type'] = 'workflow'304elsif parts[4] == 'recordings'305material['type'] = 'recordings'306elsif parts[4] == 'tours'307material['type'] = 'tour'308elsif parts[-1] == 'index.md'309return nil310else311return nil312# material['type'] = 'unknown'313end314315material316end317318##319# Get the list of posts from the site320# Params:321# +site+:: The +Jekyll::Site+ object322# Returns:323# +Array+:: The list of posts324#325# This is a transition period function that can later be removed. It is added326# because with the jekyll version we're using, site.posts is an iterable in327# prod+dev (_config-dev.yml) modes, however! If we access site.posts.docs in328# prod it's fine, while in dev mode, site.posts claims to be an Array (rather329# than I guess a 'posts' object with a docs method). So we check if it has330# docs and use that, otherwise just site.posts should be iterable.331def self.get_posts(site)332# Handle the transition period333if site.posts.respond_to?(:docs)334site.posts.docs335else336site.posts337end338end339340##341# Collate the materials into a large hash342# Params:343# +site+:: The +Jekyll::Site+ object344# +pages+:: The list of pages to collate345# Returns:346# +Hash+:: The collated materials347#348# Example:349# h = collate_materials(site, pages)350# h # => {351# # "assembly/velvet-assembly" => {352# # "topic" => "assembly",353# # "topic_name" => "assembly",354# # "material" => "assembly/velvet-assembly",355# # "tutorial_name" => "velvet-assembly",356# # "dir" => "topics/assembly/tutorials/velvet-assembly",357# # "resources" => [358# # {359# # "type" => "slides",360# # "url" => "/topics/assembly/tutorials/velvet-assembly/slides.html",361# # "title" => "Slides",362# # "priority" => 1363# # },364# # {365# # "type" => "tutorial",366# # "url" => "/topics/assembly/tutorials/velvet-assembly/tutorial.html",367# # "title" => "Tutorial",368# # "priority" => 2369# # }370# # ]371# # }372def self.collate_materials(site, pages)373# In order to speed up queries later, we'll store a set of "interesting"374# pages (i.e. things that are under `topic_name`)375shortlinks = site.data['shortlinks']376shortlinks_reversed = shortlinks['id'].invert377378interesting = {}379pages.each do |page|380# Skip anything outside of topics.381next if !page.url.include?('/topics/')382383# Extract the material metadata based on the path384page.data['url'] = page.url385material_meta = annotate_path(page.path, page.data['layout'])386387# If unannotated then we want to skip this material.388next if material_meta.nil?389390mk = material_meta['material']391392if !interesting.key? mk393interesting[mk] = material_meta.dup394interesting[mk].delete('type') # Remove the type since it's specific, not generic395interesting[mk]['resources'] = []396end397398page.data['topic_name'] = material_meta['topic_name']399page.data['tutorial_name'] = material_meta['tutorial_name']400page.data['dir'] = material_meta['dir']401page.data['short_id'] = shortlinks_reversed[page.data['url']]402page.data['symlink'] = material_meta['symlink']403404interesting[mk]['resources'].push([material_meta['type'], page])405end406407interesting408end409410##411# Make a label safe for use in mermaid (without ()[]"')412def self.mermaid_safe_label(label)413(label || '')414.gsub('(', '').gsub(')', '')415.gsub('[', '').gsub(']', '')416.gsub('"', '”') # We accept that this is not perfectly correct.417.gsub("'", '’')418end419420##421# Build a Mermaid.js compatible graph of a given Galaxy Workflow422#423# TODO: extract into own module along with DOT>424#425# Params:426# +wf+:: The Galaxy Workflow JSON representation427# Returns:428# +String+:: A Mermaid.js compatible graph of the workflow.429def self.mermaid(wf)430# We're converting it to Mermaid.js431# flowchart TD432# A[Start] --> B{Is it?}433# B -- Yes --> C[OK]434# C --> D[Rethink]435# D --> B436# B -- No ----> E[End]437438statements = []439wf['steps'].each_key do |id|440step = wf['steps'][id]441chosen_label = mermaid_safe_label(step['label'] || step['name'])442443case step['type']444when 'data_collection_input'445statements.append "#{id}[\"ℹ️ Input Collection\\n#{chosen_label}\"];"446when 'data_input'447statements.append "#{id}[\"ℹ️ Input Dataset\\n#{chosen_label}\"];"448when 'parameter_input'449statements.append "#{id}[\"ℹ️ Input Parameter\\n#{chosen_label}\"];"450when 'subworkflow'451statements.append "#{id}[\"🛠️ Subworkflow\\n#{chosen_label}\"];"452else453statements.append "#{id}[\"#{chosen_label}\"];"454end455456case step['type']457when 'data_collection_input', 'data_input'458statements.append "style #{id} stroke:#2c3143,stroke-width:4px;"459when 'parameter_input'460statements.append "style #{id} fill:#ded,stroke:#393,stroke-width:4px;"461when 'subworkflow'462statements.append "style #{id} fill:#edd,stroke:#900,stroke-width:4px;"463end464465step = wf['steps'][id]466step['input_connections'].each do |_, v|467# if v is a list468if v.is_a?(Array)469v.each do |v2|470statements.append "#{v2['id']} -->|#{mermaid_safe_label(v2['output_name'])}| #{id};"471end472else473statements.append "#{v['id']} -->|#{mermaid_safe_label(v['output_name'])}| #{id};"474end475end476477(step['workflow_outputs'] || [])478.reject { |wo| wo['label'].nil? }479.map do |wo|480wo['uuid'] = SecureRandom.uuid.to_s if wo['uuid'].nil?481wo482end483.each do |wo|484statements.append "#{wo['uuid']}[\"Output\\n#{wo['label']}\"];"485statements.append "#{id} --> #{wo['uuid']};"486statements.append "style #{wo['uuid']} stroke:#2c3143,stroke-width:4px;"487end488end489490"flowchart TD\n" + statements.map { |q| " #{q}" }.join("\n")491end492493##494# Build a DOT graph for a given tutorial file.495#496# TODO: extract into own module along with mermaid.497#498# Params:499# +wf+:: The Galaxy Workflow JSON representation500# Returns:501# +String+:: A DOT graph of the workflow.502def self.graph_dot(wf)503# digraph test {504# 0[shape=box,style=filled,color=lightblue,label="ℹ️ Input Dataset\nBionano_dataset"]505# 1[shape=box,style=filled,color=lightblue,label="ℹ️ Input Dataset\nHi-C_dataset_R"]506# 3 -> 6 [label="output"]507# 7[shape=box,label="Busco"]508# 4 -> 7 [label="out_fa"]509# 8[shape=box,label="Busco"]510# 5 -> 8 [label="out_fa"]511512statements = [513'node [fontname="Atkinson Hyperlegible", shape=box, color=white,style=filled,color=peachpuff,margin="0.2,0.2"];',514'edge [fontname="Atkinson Hyperlegible"];',515]516wf['steps'].each_key do |id|517step = wf['steps'][id]518chosen_label = mermaid_safe_label(step['label'] || step['name'])519520case step['type']521when 'data_collection_input'522statements.append "#{id}[color=lightblue,label=\"ℹ️ Input Collection\\n#{chosen_label}\"]"523when 'data_input'524statements.append "#{id}[color=lightblue,label=\"ℹ️ Input Dataset\\n#{chosen_label}\"]"525when 'parameter_input'526statements.append "#{id}[color=lightgreen,label=\"ℹ️ Input Parameter\\n#{chosen_label}\"]"527when 'subworkflow'528statements.append "#{id}[color=lightcoral,label=\"🛠️ Subworkflow\\n#{chosen_label}\"]"529else530statements.append "#{id}[label=\"#{chosen_label}\"]"531end532533step = wf['steps'][id]534step['input_connections'].each do |_, v|535# if v is a list536if v.is_a?(Array)537v.each do |v2|538statements.append "#{v2['id']} -> #{id} [label=\"#{mermaid_safe_label(v2['output_name'])}\"]"539end540else541statements.append "#{v['id']} -> #{id} [label=\"#{mermaid_safe_label(v['output_name'])}\"]"542end543end544545(step['workflow_outputs'] || [])546.reject { |wo| wo['label'].nil? }547.map do |wo|548wo['uuid'] = SecureRandom.uuid.to_s if wo['uuid'].nil?549wo550end551.each do |wo|552statements.append "k#{wo['uuid'].gsub('-', '')}[color=lightseagreen,label=\"Output\\n#{wo['label']}\"]"553statements.append "#{id} -> k#{wo['uuid'].gsub('-', '')}"554end555end556557"digraph main {\n" + statements.map { |q| " #{q}" }.join("\n") + "\n}"558end559560##561# (PRODUCTION ONLY) Extract a log of commits (hash, timestamp, message) for commits to a specific path562#563# Params:564# +wf_path+:: Path to a file565# Returns:566# +Array+:: An array of {'hash' => ..., 'unix' => 1230, 'message' => 'I did something', 'short_hash' => ... }567def self.git_log(wf_path)568if Jekyll.env != 'production'569return []570end571572cache.getset(wf_path) do573require 'shellwords'574575commits = %x[git log --format="%H %at %s" #{Shellwords.escape(wf_path)}]576.split("\n")577.map { |x| x.split(' ', 3) }578.map { |x| { 'hash' => x[0], 'unix' => x[1], 'message' => x[2], 'short_hash' => x[0][0..8] } }579580commits.map.with_index do |c, i|581c['num'] = commits.length - i582c583end584end585end586587##588# Resolve a material from a given collated material. What does that entail? A LOT.589#590# Given a collated material, e.g.591#592# material = Gtn::TopicFilter.collate_materials(site, site.pages)['proteomics/database-handling']593# material # =>594# # {"topic"=>"proteomics",595# # "topic_name"=>"proteomics",596# # "material"=>"proteomics/database-handling",597# # "tutorial_name"=>"database-handling",598# # "dir"=>"topics/proteomics/tutorials/database-handling",599# # "resources"=>600# # [["workflow", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/workflows/index.md">],601# # ["tour", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tours/proteomics-database-handling-mycroplasma.yaml">],602# # ["tour", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tours/proteomics-database-handling.yaml">],603# # ["tutorial", #<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tutorial.md">],604# # ["recordings", #<Jekyll::PageWithoutAFile @relative_path="topics/proteomics/tutorials/database-handling/recordings/index.html">],605# # ["workflow", #<Jekyll::PageWithoutAFile @relative_path="topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.html">],606# # ["workflow", #<Jekyll::PageWithoutAFile @relative_path="topics/proteomics/tutorials/database-handling/workflows/wf_database-handling_mycoplasma.html">]]}607#608# We can then choose to 'resolve' that material, i.e. collect all of the609# relevant information that is needed for it to really be useful. This610# includes things like tools, workflows, etc. Everything is packed into a611# highly annotated 'material' Hash.612#613# You might look below and say "Wow that is ridiculously unnecessarily614# complicated", or, maybe not. But either way, this is what is required to display a full 'learning material'615# on the GTN, and all of the metadata that goes into it.616#617# Some of the highlights are:618# - learning resource metadata (taken from tutorial if it exists, otherwise, from the slides)619# - short ID620# - topic information (topic name/ topic_id)621# - any javascript requirements622# - All associated workflows, and metadata about those workflows (tests, features used, associated test results, mermaid and dot graphs, associated tools, inputs and outputs.)623# - +ref+, +ref_tutorials+, +ref_slides+ that point to the actual Jekyll pages, in case you need those.624# - api URL625# - tools (discovered from the tutorial text + workflows)626# - a list of supported servers for easy display (exact and inexact matches)627# - a matrix of which servers support which versions of those tools, for a full compatibility table (used on maintainer page.)628# - requisite metdata for an admin to install these tools629#630# resource = Gtn::TopicFilter.collate_materials(site, site.pages)['proteomics/database-handling']631# material = Gtn::TopicFilter.resolve_material(site, resource)632# material # =>633# {"layout"=>"tutorial_hands_on",634# "title"=>"Protein FASTA Database Handling",635# "edam_ontology"=>["topic_0121"],636# "zenodo_link"=>"",637# "level"=>"Introductory",638# "questions"=>["How to download protein FASTA databases of a certain organism?", "How to download a contaminant database?", "How to create a decoy database?", "How to combine databases?"],639# "objectives"=>["Creation of a protein FASTA database ready for use with database search algorithms."],640# "time_estimation"=>"30m",641# "key_points"=>642# ["There are several types of Uniprot databases.",643# "Search databases should always include possible contaminants.",644# "For analyzing cell culture or organic samples, search databases should include mycoplasma databases.",645# "Some peptide search engines depend on decoys to calculate the FDR."],646# "contributors"=>["stortebecker", "bgruening"],647# "subtopic"=>"id-quant",648# "tags"=>["DDA"],649# "js_requirements"=>{"mathjax"=>nil, "mermaid"=>false},650# "short_id"=>"T00214",651# "symlink"=>nil,652# "url"=>"/topics/proteomics/tutorials/database-handling/tutorial.html",653# "topic_name"=>"proteomics",654# "tutorial_name"=>"database-handling",655# "dir"=>"topics/proteomics/tutorials/database-handling",656# "redirect_from"=>["/short/proteomics/database-handling", "/short/T00214"],657# "id"=>"proteomics/database-handling",658# "ref"=>#<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tutorial.md">,659# "ref_tutorials"=>[#<Jekyll::Page @relative_path="topics/proteomics/tutorials/database-handling/tutorial.md">], "ref_slides"=>[], "hands_on"=>true, "slides"=>false, "mod_date"=>2023-11-09 09:55:09 +0100,660# "pub_date"=>2017-02-14 13:20:30 +0100,661# "version"=>29,662# "workflows"=>663# "workflows"=>664# [{"workflow"=>"wf_database-handling.ga",665# "tests"=>false,666# "url"=>"https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.ga",667# "url_html"=>"https://training.galaxyproject.org/training-material/topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.html",668# "path"=>"topics/proteomics/tutorials/database-handling/workflows/wf_database-handling.ga",669# "wfid"=>"proteomics-database-handling",670# "wfname"=>"wf-database-handling",671# "trs_endpoint"=>"https://training.galaxyproject.org/training-material/api/ga4gh/trs/v2/tools/proteomics-database-handling/versions/wf-database-handling",672# "license"=>nil,673# "parent_id"=>"proteomics/database-handling",674# "topic_id"=>"proteomics",675# "tutorial_id"=>"database-handling",676# "creators"=>[],677# "name"=>"Proteomics: database handling",678# "title"=>"Proteomics: database handling",679# "version"=>5,680# "description"=>"Protein FASTA Database Handling",681# "tags"=>["proteomics"],682# "features"=>{"report"=>nil, "subworkflows"=>false, "comments"=>false, "parameters"=>false},683# "workflowhub_id"=>"1204",684# "history"=>[],685# "test_results"=>nil,686# "modified"=>2024-03-18 12:38:44.394831189 +0100,687# "mermaid"=>688# "flowchart TD\n 0[\"Protein Database Downloader\"];\n 1[\"Protein Database Downloader\"];\n 2[\"FASTA-to-Tabular\"];\n 0 -->|output_database| 2;\n 3[\"Add column\"];\n 2 -->|output| 3;\n 4[\"Tabular689# -to-FASTA\"];\n 3 -->|out_file1| 4;\n 5[\"FASTA Merge Files and Filter Unique Sequences\"];\n 4 -->|output| 5;\n 1 -->|output_database| 5;\n 6[\"DecoyDatabase\"];\n 5 -->|output| 6;",690# "graph_dot"=>691# "digraph main {\n node [fontname=\"Atkinson Hyperlegible\", shape=box, color=white,style=filled,color=peachpuff,margin=\"0.2,0.2\"];\n edge [fontname=\"Atkinson Hyperlegible\"];\n 0[label=\"Protein Data692# base Downloader\"]\n 1[label=\"Protein Database Downloader\"]\n 2[label=\"FASTA-to-Tabular\"]\n 0 -> 2 [label=\"output_database\"]\n 3[label=\"Add column\"]\n 2 -> 3 [label=\"output\"]\n 4[label=\"Tabular693# -to-FASTA\"]\n 3 -> 4 [label=\"out_file1\"]\n 5[label=\"FASTA Merge Files and Filter Unique Sequences\"]\n 4 -> 5 [label=\"output\"]\n 1 -> 5 [label=\"output_database\"]\n 6[label=\"DecoyDatabase\"]\n 5 -694# > 6 [label=\"output\"]\n}",695# "workflow_tools"=>696# ["addValue",697# "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1",698# "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1",699# "toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",700# "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0",701# "toolshed.g2.bx.psu.edu/repos/galaxyp/openms_decoydatabase/DecoyDatabase/2.6+galaxy0"],702# "inputs"=>[],703# "outputs"=>704# [{"annotation"=>"",705# "content_id"=>"toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",706# "errors"=>nil,707# "id"=>0,708# "input_connections"=>{},709# "inputs"=>[],710# "label"=>nil,711# "name"=>"Protein Database Downloader",712# "outputs"=>[{"name"=>"output_database", "type"=>"fasta"}],713# "position"=>{"bottom"=>380.6000061035156, "height"=>102.60000610351562, "left"=>-110, "right"=>90, "top"=>278, "width"=>200, "x"=>-110, "y"=>278},714# "post_job_actions"=>{},715# "tool_id"=>"toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",716# "tool_shed_repository"=>{"changeset_revision"=>"c1b437242fee", "name"=>"dbbuilder", "owner"=>"galaxyp", "tool_shed"=>"toolshed.g2.bx.psu.edu"},717# "tool_state"=>718# "{\"__input_ext\": \"data\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"source\": {\"from\": \"cRAP\", \"__current_case__\": 1}, \"__page__\": null, \"__rerun_remap_job_id__\":719# null}",720# "tool_version"=>"0.3.1",721# "type"=>"tool",722# "uuid"=>"6613b72c-2bab-423c-88fc-05edfe9ea8ec",723# "workflow_outputs"=>[{"label"=>nil, "output_name"=>"output_database", "uuid"=>"2d289b03-c396-46a2-a725-987b6c75ada9"}]},724# ...725# "api"=>"https://training.galaxyproject.org/training-material/api/topics/proteomics/tutorials/database-handling/tutorial.json",726# "tools"=>727# ["addValue",728# "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1",729# "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1",730# "toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder/dbbuilder/0.3.1",731# "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0",732# "toolshed.g2.bx.psu.edu/repos/galaxyp/openms_decoydatabase/DecoyDatabase/2.6+galaxy0"],733# "supported_servers"=>734# {"exact"=>[{"url"=>"https://usegalaxy.eu", "name"=>"UseGalaxy.eu", "usegalaxy"=>true}, {"url"=>"https://usegalaxy.org.au", "name"=>"UseGalaxy.org.au", "usegalaxy"=>true}],735# "inexact"=>[{"url"=>"https://usegalaxy.no/", "name"=>"UseGalaxy.no", "usegalaxy"=>false}]},736# "supported_servers_matrix"=>737# {"servers"=>738# [{"url"=>"http://aspendb.uga.edu:8085/", "name"=>"AGEseq @ AspenDB"},739# {"url"=>"http://motherbox.chemeng.ntua.gr/anastasia_dev/", "name"=>"ANASTASIA"},740# ...741# "tools"=>742# [{"id"=>"addValue",743# "servers"=>744# [{"state"=>"local", "server"=>"http://aspendb.uga.edu:8085/"},745# {"state"=>"missing", "server"=>"http://motherbox.chemeng.ntua.gr/anastasia_dev/"},746# {"state"=>"local", "server"=>"http://apostl.moffitt.org/"},747# {"state"=>"local", "server"=>"http://smile.hku.hk/SARGs"},748# {"state"=>"local", "server"=>"http://bf2i-galaxy.insa-lyon.fr:8080/"},749# {"state"=>"local", "server"=>"http://143.169.238.104/galaxy/"},750# {"state"=>"missing", "server"=>"https://iris.angers.inra.fr/galaxypub-cfbp"},751# {"state"=>"local", "server"=>"https://cpt.tamu.edu/galaxy-public/"},752# {"state"=>"missing", "server"=>"https://vm-chemflow-francegrille.eu/"},753# {"state"=>"local", "server"=>"https://hyperbrowser.uio.no/coloc-stats"},754# {"state"=>"local", "server"=>"http://corgat.cloud.ba.infn.it/galaxy"},755# {"state"=>"local", "server"=>"http://cropgalaxy.excellenceinbreeding.org/"},756# {"state"=>"local", "server"=>"http://dintor.eurac.edu/"},757# {"state"=>"missing", "server"=>"http://www.freebioinfo.org/"},758# {"state"=>"local", "server"=>"http://igg.cloud.ba.infn.it/galaxy"},759# "topic_name_human"=>"Proteomics",760# "admin_install"=>761# {"install_tool_dependencies"=>true,762# "install_repository_dependencies"=>true,763# "install_resolver_dependencies"=>true,764# "tools"=>765# [{"name"=>"fasta_to_tabular", "owner"=>"devteam", "revisions"=>"e7ed3c310b74", "tool_panel_section_label"=>"FASTA/FASTQ", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},766# {"name"=>"tabular_to_fasta", "owner"=>"devteam", "revisions"=>"0a7799698fe5", "tool_panel_section_label"=>"FASTA/FASTQ", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},767# {"name"=>"dbbuilder", "owner"=>"galaxyp", "revisions"=>"c1b437242fee", "tool_panel_section_label"=>"Get Data", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},768# {"name"=>"fasta_merge_files_and_filter_unique_sequences", "owner"=>"galaxyp", "revisions"=>"f546e7278f04", "tool_panel_section_label"=>"FASTA/FASTQ", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"},769# {"name"=>"openms_decoydatabase", "owner"=>"galaxyp", "revisions"=>"370141bc0da3", "tool_panel_section_label"=>"Proteomics", "tool_shed_url"=>"https://toolshed.g2.bx.psu.edu/"}]},770# "admin_install_yaml"=>771# "---\ninstall_tool_dependencies: true\ninstall_repository_dependencies: true\ninstall_resolver_dependencies: true\ntools:\n- name: fasta_to_tabular\n owner: devteam\n revisions: e7ed3c310b74\n tool_panel_s772# ection_label: FASTA/FASTQ\n tool_shed_url: https://toolshed.g2.bx.psu.edu/\n- name: tabular_to_fasta\n owner: devteam\n revisions: 0a7799698fe5\n tool_panel_section_label: FASTA/FASTQ\n tool_shed_url: http773# s://toolshed.g2.bx.psu.edu/\n- name: dbbuilder\n owner: galaxyp\n revisions: c1b437242fee\n tool_panel_section_label: Get Data\n tool_shed_url: https://toolshed.g2.bx.psu.edu/\n- name: fasta_merge_files_and774# _filter_unique_sequences\n owner: galaxyp\n revisions: f546e7278f04\n tool_panel_section_label: FASTA/FASTQ\n tool_shed_url: https://toolshed.g2.bx.psu.edu/\n- name: openms_decoydatabase\n owner: galaxyp\n775# revisions: 370141bc0da3\n tool_panel_section_label: Proteomics\n tool_shed_url: https://toolshed.g2.bx.psu.edu/\n",776# "tours"=>false,777# "video"=>false,778# "slides_recordings"=>false,779# "translations"=>{"tutorial"=>[], "slides"=>[], "video"=>false},780# "license"=>"CC-BY-4.0",781# "type"=>"tutorial"}782783784785786787def self.resolve_material(site, material)788# We've already789# looked in every /topic/*/tutorials/* folder, and turn these disparate790# resources into a page_obj as well. Most variables are copied directly,791# either from a tutorial, or a slides (if no tutorial is available.) This792# means we do not (cannot) support external_slides AND external_handson.793# This is probably a sub-optimal situation we'll end up fixing someday.794#795tutorials = material['resources'].select { |a| a[0] == 'tutorial' }796slides = material['resources'].select { |a| a[0] == 'slides' }797tours = material['resources'].select { |a| a[0] == 'tours' }798799# Our final "page" object (a "material")800page = nil801802slide_has_video = false803slide_has_recordings = false804slide_translations = []805page_ref = nil806807if slides.length.positive?808page = slides.min { |a, b| a[1].path <=> b[1].path }[1]809slide_has_video = page.data.fetch('video', false)810slide_has_recordings = page.data.fetch('recordings', false)811slide_translations = page.data.fetch('translations', [])812page_ref = page813end814815# No matter if there were slides, we override with tutorials if present.816tutorial_translations = []817if tutorials.length.positive?818page = tutorials.min { |a, b| a[1].path <=> b[1].path }[1]819tutorial_translations = page.data.fetch('translations', [])820page_ref = page821end822823if page.nil?824Jekyll.logger.error '[GTN/TopicFilter] Could not process material'825return {}826end827828# Otherwise clone the metadata from it which works well enough.829page_obj = page.data.dup830page_obj['id'] = "#{page['topic_name']}/#{page['tutorial_name']}"831page_obj['ref'] = page_ref832page_obj['ref_tutorials'] = tutorials.map { |a| a[1] }833page_obj['ref_slides'] = slides.map { |a| a[1] }834835id = page_obj['id']836837# Sometimes `hands_on` is set to something like `external`, in which838# case it is important to not override it. So we only do that if the839# key isn't already set. Then we choose to set it to a test for the840# tutorial being present. We probably don't need to test both, but it841# is hard to follow which keys are which and safer to test for both in842# case someone edits the code later. If either of these exist, we can843# automatically set `hands_on: true`844page_obj['hands_on'] = tutorials.length.positive? if !page_obj.key?('hands_on')845846# Same for slides, if there's a resource by that name, we can847# automatically set `slides: true`848page_obj['slides'] = slides.length.positive? if !page_obj.key?('slides')849850all_resources = slides + tutorials851page_obj['mod_date'] = all_resources852.map { |p| Gtn::ModificationTimes.obtain_time(p[1].path) }853.max854855page_obj['pub_date'] = all_resources856.map { |p| Gtn::PublicationTimes.obtain_time(p[1].path) }857.min858859page_obj['version'] = all_resources860.map { |p| Gtn::ModificationTimes.obtain_modification_count(p[1].path) }861.max862863folder = material['dir']864865ymls = Dir.glob("#{folder}/quiz/*.yml") + Dir.glob("#{folder}/quiz/*.yaml")866if ymls.length.positive?867quizzes = ymls.map { |a| a.split('/')[-1] }868page_obj['quiz'] = quizzes.map do |q|869quiz_data = YAML.load_file("#{folder}/quiz/#{q}")870quiz_data['id'] = q871quiz_data['path'] = "#{folder}/quiz/#{q}"872quiz_data873end874end875876# In dev configuration, this breaks for me. Not sure why config isn't available.877domain = if !site.config.nil? && site.config.key?('url')878"#{site.config['url']}#{site.config['baseurl']}"879else880'http://localhost:4000/training-material/'881end882# Similar as above.883workflows = Dir.glob("#{folder}/workflows/*.ga") # TODO: support gxformat2884if workflows.length.positive?885workflow_names = workflows.map { |a| a.split('/')[-1] }886page_obj['workflows'] = workflow_names.map do |wf|887wfid = "#{page['topic_name']}-#{page['tutorial_name']}"888wfname = wf.gsub(/.ga/, '').downcase.gsub(/[^a-z0-9]/, '-')889trs = "api/ga4gh/trs/v2/tools/#{wfid}/versions/#{wfname}"890wf_path = "#{folder}/workflows/#{wf}"891wf_json = JSON.parse(File.read(wf_path))892license = wf_json['license']893creators = wf_json['creator'] || []894wftitle = wf_json['name']895896# /galaxy-intro-101-workflow.eu.json897workflow_test_results = Dir.glob(wf_path.gsub(/.ga$/, '.*.json'))898workflow_test_outputs = {}899workflow_test_results.each do |test_result|900server = workflow_test_results[0].match(/\.(..)\.json$/)[1]901workflow_test_outputs[server] = JSON.parse(File.read(test_result))902end903workflow_test_outputs = nil if workflow_test_outputs.empty?904905wfhkey = [page['topic_name'], page['tutorial_name'], wfname].join('/')906907{908'workflow' => wf,909'tests' => Dir.glob("#{folder}/workflows/" + wf.gsub(/.ga/, '-test*')).length.positive?,910'url' => "#{domain}/#{folder}/workflows/#{wf}",911'url_html' => "#{domain}/#{folder}/workflows/#{wf.gsub(/.ga$/, '.html')}",912'path' => wf_path,913'wfid' => wfid,914'wfname' => wfname,915'trs_endpoint' => "#{domain}/#{trs}",916'license' => license,917'parent_id' => page_obj['id'],918'topic_id' => page['topic_name'],919'tutorial_id' => page['tutorial_name'],920'creators' => creators,921'name' => wf_json['name'],922'title' => wftitle,923'version' => Gtn::ModificationTimes.obtain_modification_count(wf_path),924'description' => wf_json['annotation'],925'tags' => wf_json['tags'],926'features' => {927'report' => wf_json['report'],928'subworkflows' => wf_json['steps'].map{|_, x| x['type']}.any?{|x| x == "subworkflow"},929'comments' => (wf_json['comments'] || []).length.positive?,930'parameters' => wf_json['steps'].map{|_, x| x['type']}.any?{|x| x == "parameter_input"},931},932'workflowhub_id' => (site.data['workflowhub'] || {}).fetch(wfhkey, nil),933'history' => git_log(wf_path),934'test_results' => workflow_test_outputs,935'modified' => File.mtime(wf_path),936'mermaid' => mermaid(wf_json),937'graph_dot' => graph_dot(wf_json),938'workflow_tools' => extract_workflow_tool_list(wf_json).flatten.uniq.sort,939'inputs' => wf_json['steps'].select { |_k, v| ['data_input', 'data_collection_input', 'parameter_input'].include? v['type'] }.map{|_, v| v},940'outputs' => wf_json['steps'].select { |_k, v| v['workflow_outputs'] && v['workflow_outputs'].length.positive? }.map{|_, v| v},941}942end943end944945# Really only used for tool list install for ephemeris, not general.946page_obj['api'] = "#{domain}/api/topics/#{page['topic_name']}/tutorials/#{page['tutorial_name']}/tutorial.json"947948# Tool List949#950# This is exposed in the GTN API to help admins/devs easily get the tool951# list for installation.952page_obj['tools'] = []953page_obj['tools'] += page.content.scan(/{% tool \[[^\]]*\]\(([^)]*)\)\s*%}/) if page_obj['hands_on']954955page_obj['workflows']&.each do |wf|956wf_path = "#{folder}/workflows/#{wf['workflow']}"957958page_obj['tools'] += wf['workflow_tools']959end960page_obj['tools'] = page_obj['tools'].flatten.sort.uniq961962topic = site.data[page_obj['topic_name']]963page_obj['supported_servers'] = if topic['type'] == 'use' || topic['type'] == 'basics'964Gtn::Supported.calculate(site.data['public-server-tools'], page_obj['tools'])965else966[]967end968969page_obj['supported_servers_matrix'] = if topic['type'] == 'use' || topic['type'] == 'basics'970Gtn::Supported.calculate_matrix(site.data['public-server-tools'], page_obj['tools'])971else972[]973end974975976topic_name_human = site.data[page_obj['topic_name']]['title']977page_obj['topic_name_human'] = topic_name_human # TODO: rename 'topic_name' and 'topic_name' to 'topic_id'978admin_install = Gtn::Toolshed.format_admin_install(site.data['toolshed-revisions'], page_obj['tools'],979topic_name_human, site.data['toolcats'])980page_obj['admin_install'] = admin_install981page_obj['admin_install_yaml'] = admin_install.to_yaml982983page_obj['tours'] = tours.length.positive?984page_obj['video'] = slide_has_video985page_obj['slides_recordings'] = slide_has_recordings986page_obj['translations'] = {}987page_obj['translations']['tutorial'] = tutorial_translations988page_obj['translations']['slides'] = slide_translations989page_obj['translations']['video'] = slide_has_video # Just demand it?990page_obj['license'] = 'CC-BY-4.0' if page_obj['license'].nil?991# I feel less certain about this override, but it works well enough in992# practice, and I did not find any examples of `type: <anything other993# than tutorial>` in topics/*/tutorials/*/tutorial.md but that doesn't994# make it future proof.995page_obj['type'] = 'tutorial'996997if page_obj.key?('draft') && page_obj['draft']998page_obj['tags'] = [] if !page_obj.key? 'tags'999page_obj['tags'].push('work-in-progress')1000end10011002page_obj1003end10041005def self.process_pages(site, pages)1006# eww.1007return site.data['cache_processed_pages'] if site.data.key?('cache_processed_pages')10081009materials = collate_materials(site, pages).map { |_k, v| resolve_material(site, v) }1010Jekyll.logger.info '[GTN/TopicFilter] Filling Materials Cache'1011site.data['cache_processed_pages'] = materials10121013# Prepare short URLs1014shortlinks = site.data['shortlinks']1015mappings = Hash.new { |h, k| h[k] = [] }10161017shortlinks.each_key do |kp|1018shortlinks[kp].each do |k, v|1019mappings[v].push("/short/#{k}")1020end1021end1022# Update the materials with their short IDs + redirects1023pages.select { |p| mappings.keys.include? p.url }.each do |p|1024# Set the short id on the material1025if p['ref']1026# Initialise redirects if it wasn't set1027p['ref'].data['redirect_from'] = [] if !p['ref'].data.key?('redirect_from')1028p['ref'].data['redirect_from'].push(*mappings[p.url])1029p['ref'].data['redirect_from'].uniq!1030else1031p.data['redirect_from'] = [] if !p.data.key?('redirect_from')10321033p.data['redirect_from'].push(*mappings[p.url])1034p.data['redirect_from'].uniq!1035end1036end1037# Same for news1038get_posts(site).select { |p| mappings.keys.include? p.url }.each do |p|1039# Set the short id on the material1040p.data['redirect_from'] = [] if !p.data.key?('redirect_from')1041p.data['redirect_from'].push(*mappings[p.url])1042p.data['redirect_from'].uniq!1043end10441045materials1046end10471048##1049# This is a helper function to get all the materials in a site.1050def self.list_all_materials(site)1051process_pages(site, site.pages)1052end10531054##1055# This is a helper function to get materials with automated videos.1056def self.list_videos(site)1057materials = process_pages(site, site.pages)1058materials.select { |x| x['video'] == true }1059end10601061##1062# List every tag used across all materials.1063# This is used to generate the tag cloud.1064#1065# Parameters:1066# +site+:: The +Jekyll::Site+ object, used to get the list of pages.1067# Returns:1068# +Array+:: An array of strings, each string is a tag. (sorted and unique)1069#1070def self.list_all_tags(site)1071materials = process_pages(site, site.pages)1072(materials.map { |x| x['tags'] || [] }.flatten + list_topics(site)).sort.uniq1073end10741075def self.filter_by_topic(site, topic_name)1076# Here we make a (cached) call to load materials into memory and sort them1077# properly.1078materials = process_pages(site, site.pages)10791080# Select out the materials by topic:1081resource_pages = materials.select { |x| x['topic_name'] == topic_name }10821083# If there is nothing with that topic name, try generating it by tags.1084resource_pages = materials.select { |x| (x['tags'] || []).include?(topic_name) } if resource_pages.empty?10851086# The complete resources we'll return is the introduction slides first1087# (EDIT: not anymore, we rely on prioritisation!)1088# and then the rest of the pages.1089resource_pages = resource_pages.sort_by { |k| k.fetch('priority', 1) }10901091Jekyll.logger.error "Error? Could not find any relevant pages for #{topic_name}" if resource_pages.empty?10921093resource_pages1094end10951096def self.filter_by_tag(site, topic_name)1097# Here we make a (cached) call to load materials into memory and sort them1098# properly.1099materials = process_pages(site, site.pages)11001101# Select those with that topic ID or that tag1102resource_pages = materials.select { |x| x['topic_name'] == topic_name }1103resource_pages += materials.select { |x| (x['tags'] || []).include?(topic_name) }11041105# The complete resources we'll return is the introduction slides first1106# (EDIT: not anymore, we rely on prioritisation!)1107# and then the rest of the pages.1108resource_pages = resource_pages.sort_by { |k| k.fetch('priority', 1) }11091110Jekyll.logger.error "Error? Could not find any relevant tagged pages for #{topic_name}" if resource_pages.empty?11111112resource_pages1113end11141115##1116# Filter a list of materials by topic and subtopic.1117def self.filter_by_topic_subtopic(site, topic_name, subtopic_id)1118resource_pages = filter_by_topic(site, topic_name)11191120# Select out materials with the correct subtopic1121resource_pages = resource_pages.select { |x| x['subtopic'] == subtopic_id }11221123if resource_pages.empty?1124Jekyll.logger.error "Error? Could not find any relevant pages for #{topic_name} / #{subtopic_id}"1125end11261127resource_pages1128end11291130##1131# Get a list of contributors for a list of materials1132# Parameters:1133# +materials+:: An array of materials1134# Returns:1135# +Array+:: An array of individual contributors as strings.1136def self.identify_contributors(materials, site)1137materials1138.map { |_k, v| v['materials'] }.flatten1139# Not 100% sure why this flatten is needed? Probably due to the map over hash1140.map { |mat| Gtn::Contributors.get_contributors(mat) }1141.flatten1142.select { |c| Gtn::Contributors.person?(site, c) }1143.uniq1144.shuffle1145end11461147##1148# Get a list of funders for a list of materials1149# Parameters:1150# +materials+:: An array of materials1151# Returns:1152# +Array+:: An array of funder (organisations that provided support) IDs as strings.1153def self.identify_funders_and_grants(materials, site)1154materials1155.map { |_k, v| v['materials'] }.flatten1156# Not 100% sure why this flatten is needed? Probably due to the map over hash1157.map { |mat| Gtn::Contributors.get_all_funding(site, mat) }1158.flatten1159.uniq1160.shuffle1161end11621163##1164# Get the version of a tool.1165# Parameters:1166# +tool+:: A tool string1167# Returns:1168# +String+:: The version of the tool.1169#1170# Examples:1171# get_version("toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.0") => "1.0.0"1172def self.get_version(tool)1173if tool.count('/') > 41174tool.split('/')[-1]1175else1176tool1177end1178end11791180##1181# Get a short version of a tool.1182# Parameters:1183# +tool+:: A tool string1184# Returns:1185# +String+:: The short version of the tool.1186#1187# Examples:1188# short_tool("toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.0") => "galaxyp/regex1"1189def self.short_tool(tool)1190if tool.count('/') > 41191"#{tool.split('/')[2]}/#{tool.split('/')[3]}/#{tool.split('/')[4]}"1192else1193tool1194end1195end11961197##1198# List materials by tool1199#1200# Parameters:1201# +site+:: The +Jekyll::Site+ object, used to get the list of pages.1202# Returns:1203# +Hash+:: A hash as below:1204#1205# {1206# tool_id => {1207# "tool_id" => [tool_id, version],1208# "tutorials" => [tutorial_id, tutorial_title, topic_title, tutorial_url]1209# }, ...1210# }1211#1212# *Nota Bene!!!*: Galaxy depends on the structure of this response, please1213# do not change it, add a new API instead if you need to modify it1214# significantly.1215#1216def self.list_materials_by_tool(site)1217tool_map = {}12181219list_all_materials(site).each do |m|1220m.fetch('tools', []).each do |tool|1221sid = short_tool(tool)1222tool_map[sid] = { 'tool_id' => [], 'tutorials' => [] } if !tool_map.key?(sid)12231224tool_map[sid]['tool_id'].push([tool, get_version(tool)])1225tool_map[sid]['tutorials'].push([1226m['id'], m['title'], site.data[m['topic_name']]['title'], m['url']1227])1228end1229end12301231# Uniqueify/sort1232t = tool_map.to_h do |k, v|1233v['tool_id'].uniq!1234v['tool_id'].sort_by! { |k2| k2[1] }1235v['tool_id'].reverse!12361237v['tutorials'].uniq!1238v['tutorials'].sort!1239[k, v]1240end12411242# Order by most popular tool1243t.sort_by { |_k, v| v['tutorials'].length }.reverse.to_h1244end124512461247##1248# Not materials but resources (including e.g. recordings, slides separate from tutorials, etc.)1249#1250# The structure is a large array of arrays, with [date, category, page-like object, tags]1251#1252# [#<DateTime: 2019-02-22T20:53:50+01:00 ((2458537j,71630s,0n),+3600s,2299161j)>,1253# "tutorials",1254# #<Jekyll::Page @relative_path="topics/single-cell/tutorials/scrna-preprocessing/tutorial.md">,1255# ["single-cell"]],1256# [#<DateTime: 2019-02-20T19:33:11+01:00 ((2458535j,66791s,0n),+3600s,2299161j)>,1257# "tutorials",1258# #<Jekyll::Page @relative_path="topics/single-cell/tutorials/scrna-umis/tutorial.md">,1259# ["single-cell"]],1260# [#<DateTime: 2019-02-16T21:04:07+01:00 ((2458531j,72247s,0n),+3600s,2299161j)>,1261# "slides",1262# #<Jekyll::Page @relative_path="topics/single-cell/tutorials/scrna-plates-batches-barcodes/slides.html">,1263# ["single-cell"]]]1264def self.all_date_sorted_resources(site)1265cache.getset('all_date_sorted_resources') do1266self._all_date_sorted_resources(site)1267end1268end12691270def self._all_date_sorted_resources(site)1271events = site.pages.select { |x| x['layout'] == 'event' || x['layout'] == 'event-external' }1272materials = list_all_materials(site).reject { |k, _v| k['draft'] }1273news = site.posts.select { |x| x['layout'] == 'news' }1274faqs = site.pages.select { |x| x['layout'] == 'faq' }1275pathways = site.pages.select { |x| x['layout'] == 'learning-pathway' }1276workflows = Dir.glob('topics/**/*.ga')12771278bucket = events.map do |e|1279[Gtn::PublicationTimes.obtain_time(e.path).to_datetime, 'events', e, ['event'] + e.data.fetch('tags', [])]1280end12811282materials.each do |m|1283tags = [m['topic_name']] + (m['tags'] || [])1284m.fetch('ref_tutorials', []).map do |t|1285bucket << [Gtn::PublicationTimes.obtain_time(t.path).to_datetime, 'tutorials', t, tags]12861287(t['recordings'] || []).map do |r|1288url = '/' + t.path.gsub(/tutorial(_[A_Z_]*)?.(html|md)$/, 'recordings/')1289url += "#tutorial-recording-#{Date.parse(r['date']).strftime('%-d-%B-%Y').downcase}"1290attr = {'title' => "Recording of " + t['title'],1291'contributors' => r['speakers'] + (r['captions'] || []),1292'content' => "A #{r['length']} long recording is now available."}12931294obj = objectify(attr, url, t.path)1295bucket << [DateTime.parse(r['date'].to_s), 'recordings', obj, tags]1296end1297end12981299m.fetch('ref_slides', []).reject { |s| s.url =~ /-plain.html/ }.map do |s|1300bucket << [Gtn::PublicationTimes.obtain_time(s.path).to_datetime, 'slides', s, tags]13011302(s['recordings'] || []).map do |r|1303url = '/' + s.path.gsub(/slides(_[A_Z_]*)?.(html|md)$/, 'recordings/')1304url += "#tutorial-recording-#{Date.parse(r['date']).strftime('%-d-%B-%Y').downcase}"1305attr = {'title' => "Recording of " + s['title'],1306'contributors' => r['speakers'] + (r['captions'] || []),1307'content' => "A #{r['length']} long recording is now available."}1308obj = objectify(attr, url, s.path)1309bucket << [DateTime.parse(r['date'].to_s), 'recordings', obj, tags]1310end1311end1312end13131314bucket += news.map do |n|1315[n.date.to_datetime, 'news', n, ['news'] + n.data.fetch('tags', [])]1316end13171318bucket += faqs.map do |n|1319tag = Gtn::PublicationTimes.clean_path(n.path).split('/')[1]1320[Gtn::PublicationTimes.obtain_time(n.path).to_datetime, 'faqs', n, ['faqs', tag]]1321end13221323bucket += pathways.map do |n|1324tags = ['learning-pathway'] + (n['tags'] || [])1325[Gtn::PublicationTimes.obtain_time(n.path).to_datetime, 'learning-pathways', n, tags]1326end13271328bucket += workflows.map do |n|1329tag = Gtn::PublicationTimes.clean_path(n).split('/')[1]1330wf_data = JSON.parse(File.read(n))13311332attrs = {1333'title' => wf_data['name'],1334'description' => wf_data['annotation'],1335'tags' => wf_data['tags'],1336'contributors' => wf_data.fetch('creator', []).map do |c|1337matched = site.data['contributors'].select{|k, v|1338v.fetch('orcid', "does-not-exist") == c.fetch('identifier', "").gsub('https://orcid.org/', '')1339}.first1340if matched1341matched[0]1342else1343c['name']1344end1345end1346}1347# These aren't truly stable. I'm not sure what to do about that.1348obj = objectify(attrs, '/' + n.gsub(/\.ga$/, '.html'), n)1349# obj = objectify(attrs, '/' + n.path[0..n.path.rindex('/')], n)1350[Gtn::PublicationTimes.obtain_time(n).to_datetime, 'workflows', obj, ['workflows', tag] + obj['tags']]1351end13521353# Remove symlinks from bucket.1354bucket = bucket.reject { |date, type, page, tags|1355File.symlink?(page.path) || File.symlink?(File.dirname(page.path)) || File.symlink?(File.dirname(File.dirname(page.path)))1356}13571358bucket += site.data['contributors'].map do |k, v|1359a = {'title' => "@#{k}",1360'content' => "GTN Contributions from #{k}"}1361obj = objectify(a, "/hall-of-fame/#{k}/", k)13621363[DateTime.parse("#{v['joined']}-01T12:00:00", 'content' => "GTN Contributions from #{k}"), 'contributors', obj, ['contributor']]1364end13651366bucket += site.data['grants'].map do |k, v|1367a = {'title' => "@#{k}",1368'content' => "GTN Contributions from #{k}"}1369obj = objectify(a, "/hall-of-fame/#{k}/", k)13701371# TODO: backdate grants, organisations1372if v['joined']1373[DateTime.parse("#{v['joined']}-01T12:00:00"), 'grants', obj, ['grant']]1374end1375end.compact13761377bucket += site.data['organisations'].map do |k, v|1378a = {'title' => "@#{k}",1379'content' => "GTN Contributions from #{k}"}1380obj = objectify(a, "/hall-of-fame/#{k}/", k)13811382if v['joined']1383[DateTime.parse("#{v['joined']}-01T12:00:00"), 'organisations', obj, ['organisation']]1384end1385end.compact13861387bucket1388.reject{|x| x[0] > DateTime.now } # Remove future-dated materials1389.reject{|x| x[2]['draft'] == true } # Remove drafts1390.sort_by {|x| x[0] } # Date-sorted, not strictly necessary since will be grouped.1391.reverse1392end1393end1394end13951396module Jekyll1397# The "implementation" of the topic filter as liquid accessible filters1398module Filters1399module TopicFilter1400##1401# List the most recent contributors to the GTN.1402# Parameters:1403# +contributors+:: A hash of contributors1404# +count+:: The number of contributors to return1405# Returns:1406# +Hash+:: A hash of contributors1407#1408# Example:1409# most_recent_contributors(contributors, 5)1410# => {1411# "hexylena" => {1412# "name" => "Hexylena",1413# "avatar" => "https://avatars.githubusercontent.com/u/458683?v=3",1414# ...1415# }1416# }1417def most_recent_contributors(contributors, count)1418# Remove non-hof1419hof = contributors.reject { |_k, v| v.fetch('halloffame', 'yes') == 'no' }1420# Get keys + sort by joined date1421hof_k = hof.keys.sort do |x, y|1422hof[y].fetch('joined', '2016-01') <=> hof[x].fetch('joined', '2016-01')1423end14241425# Transform back into hash1426hof_k.slice(0, count).to_h { |k| [k, hof[k]] }1427end14281429##1430# Find the most recently modified tutorials1431# Parameters:1432# +site+:: The +Jekyll::Site+ object, used to get the list of pages.1433# +exclude_recently_published+:: Do not include ones that were recently1434# published in the slice, to make it look a bit nicer.1435# Returns:1436# +Array+:: An array of the 10 most recently modified pages1437# Example:1438# {% assign latest_tutorials = site | recently_modified_tutorials %}1439def recently_modified_tutorials(site, exclude_recently_published: true)1440tutorials = site.pages.select { |page| page.data['layout'] == 'tutorial_hands_on' }14411442latest = tutorials.sort do |x, y|1443Gtn::ModificationTimes.obtain_time(y.path) <=> Gtn::ModificationTimes.obtain_time(x.path)1444end14451446latest_published = recently_published_tutorials(site)1447latest = latest.reject { |x| latest_published.include?(x) } if exclude_recently_published14481449latest.slice(0, 10)1450end14511452##1453# Find the most recently published tutorials1454# Parameters:1455# +site+:: The +Jekyll::Site+ object, used to get the list of pages.1456# Returns:1457# +Array+:: An array of the 10 most recently published modified pages1458# Example:1459# {% assign latest_tutorials = site | recently_modified_tutorials %}1460def recently_published_tutorials(site)1461tutorials = site.pages.select { |page| page.data['layout'] == 'tutorial_hands_on' }14621463latest = tutorials.sort do |x, y|1464Gtn::PublicationTimes.obtain_time(y.path) <=> Gtn::PublicationTimes.obtain_time(x.path)1465end14661467latest.slice(0, 10)1468end14691470def topic_count(resources)1471# Count lines in the table except introduction slides1472resources.length1473end14741475##1476# Fetch a tutorial material's metadata1477# Parameters:1478# +site+:: The +Jekyll::Site+ object, used to get the list of pages.1479# +topic_name+:: The name of the topic1480# +page_name+:: The name of the page1481# Returns:1482# +Hash+:: The metadata for the tutorial material1483#1484# Example:1485# {% assign material = site | fetch_tutorial_material:page.topic_name,page.tutorial_name%}1486def fetch_tutorial_material(site, topic_name, page_name)1487Gtn::TopicFilter.fetch_tutorial_material(site, topic_name, page_name)1488end14891490def fetch_tutorial_material_by_id(site, id)1491Gtn::TopicFilter.fetch_tutorial_material(site, id.split('/')[0], id.split('/')[1])1492end14931494def list_topics_ids(site)1495['introduction'] + Gtn::TopicFilter.list_topics(site).filter { |k| k != 'introduction' }1496end14971498def list_topics_h(site)1499Gtn::TopicFilter.list_topics(site)1500end15011502def list_topics_by_category(site, category)1503q = Gtn::TopicFilter.list_topics(site).map do |k|1504[k, site.data[k]]1505end15061507# Alllow filtering by a category, or return "all" otherwise.1508if category == 'non-tag'1509q = q.select { |_k, v| v['tag_based'].nil? }1510elsif category == 'science'1511q = q.select { |_k, v| %w[use basics].include? v['type'] }1512elsif category == 'technical'1513q = q.select { |_k, v| %w[admin-dev data-science instructors].include? v['type'] }1514elsif category == 'science-technical'1515q = q.select { |_k, v| %w[use basics admin-dev data-science instructors].include? v['type'] }1516elsif category != 'all'1517q = q.select { |_k, v| v['type'] == category }1518end15191520# Sort alphabetically by titles1521q.sort { |a, b| a[1]['title'] <=> b[1]['title'] }1522end15231524def to_keys(arr)1525arr.map { |k| k[0] }1526end15271528def to_vals(arr)1529arr.map { |k| k[1] }1530end15311532##1533# Galaxy depends on the structure of this response, please do not change1534# it, add a new API instead if you need to modify it significantly.1535def list_materials_by_tool(site)1536Gtn::TopicFilter.list_materials_by_tool(site)1537end15381539def list_materials_structured(site, topic_name)1540Gtn::TopicFilter.list_materials_structured(site, topic_name)1541end15421543def list_materials_flat(site, topic_name)1544Gtn::TopicFilter1545.list_materials_structured(site, topic_name)1546.map { |k, v| v['materials'] }1547.flatten1548.uniq { |x| x['id'] }1549end15501551def list_topic_materials_yearly(site, topic_name)1552flat_mats = list_materials_flat(site, topic_name)1553years = flat_mats.map{|x| x['pub_date'].year} + flat_mats.map{|x| x['mod_date'].year}1554# doesn't use identify_contributors because that excludes grants/orgs.1555topic_contribs = flat_mats.map{|x| x['contributions'] || {"all" => x['contributors']}}.map{|x| x.values.flatten}.flatten.uniq.sort1556pfo = ['contributors', 'grants', 'organisations']15571558Gtn::TopicFilter.all_date_sorted_resources(site)1559.select{|x| (x[3].include? topic_name) || (pfo.include?(x[1]) && topic_contribs.include?(x[2].title[1..]))}1560.group_by{|x| x[0].year}1561.map{|k, v| [k, v.group_by{|z| z[1]}]}1562.to_h1563end15641565def count_topic_materials_yearly(site, topic_name)1566flat_mats = list_materials_flat(site, topic_name)1567years = flat_mats.map{|x| x['pub_date'].year} + flat_mats.map{|x| x['mod_date'].year}1568# doesn't use identify_contributors because that excludes grants/orgs.1569topic_contribs = flat_mats.map{|x| x['contributions'] || {"all" => x['contributors']}}.map{|x| x.values.flatten}.flatten.uniq.sort1570pfo = ['contributors', 'grants', 'organisations']15711572r = Gtn::TopicFilter.all_date_sorted_resources(site)1573.select{|x| (x[3].include? topic_name) || (pfo.include?(x[1]) && topic_contribs.include?(x[2].title[1..]))}1574.map{|x| [x[0].year, x[1]]} # Only need year + type1575.group_by{|x| x[1]} # Group by type.1576.map{|k, v| [k, v.map{|vv| vv[0]}.tally]}1577.to_h15781579years = (2015..Date.today.year).to_a1580# Fill in zeros for missing years1581r.map{|k, v| [k, years.map{|y| v[y] || 0}1582.cumulative_sum1583.map.with_index{|value, i| {"y" => value, "x" => "#{years[i]}-01-01"}}]1584}.to_h1585end15861587def list_all_tags(site)1588Gtn::TopicFilter.list_all_tags(site)1589end15901591def topic_filter(site, topic_name)1592Gtn::TopicFilter.topic_filter(site, topic_name)1593end15941595def topic_filter_tutorial_count(site, topic_name)1596Gtn::TopicFilter.topic_filter(site, topic_name).length1597end15981599def identify_contributors(materials, site)1600Gtn::TopicFilter.identify_contributors(materials, site)1601end16021603def identify_funders(materials, site)1604Gtn::TopicFilter.identify_funders_and_grants(materials, site)1605end16061607##1608# Just used for stats page.1609def list_videos(site)1610Gtn::TopicFilter.list_all_materials(site)1611.select { |k, _v| k['recordings'] || k['slides_recordings'] }1612.map { |k, _v| (k['recordings'] || []) + (k['slides_recordings'] || []) }1613.flatten1614end16151616def findDuration(duration)1617if ! duration.nil?1618eval(duration.gsub(/H/, ' * 3600 + ').gsub(/M/, ' * 60 + ').gsub(/S/, ' + ') + " 0")1619else162001621end1622end16231624##1625# Just used for stats page.1626def list_videos_total_time(site)1627vids = list_videos(site)1628vids.map { |v| findDuration(v['length']) }.sum / 3600.01629end16301631def list_draft_materials(site)1632Gtn::TopicFilter.list_all_materials(site).select { |k, _v| k['draft'] }1633end16341635def to_material(site, page)1636topic = page['path'].split('/')[1]1637material = page['path'].split('/')[3]1638ret = Gtn::TopicFilter.fetch_tutorial_material(site, topic, material)1639Jekyll.logger.warn "Could not find material #{topic} #{material}" if ret.nil?1640ret1641end16421643def get_workflow(site, page, workflow)1644mat = to_material(site, page)1645mat['workflows'].select { |w| w['workflow'] == workflow }[0]1646end16471648def tool_version_support(site, tool)1649Gtn::Supported.calculate(site.data['public-server-tools'], [tool])1650end16511652def edamify(term, site)1653site.data['EDAM'].select{|row| row['Class ID'] == "http://edamontology.org/#{term}"}.first.to_h1654end16551656def titlecase(term)1657term.split(' ').map(&:capitalize).join(' ')1658end1659end1660end1661end16621663Liquid::Template.register_filter(Jekyll::Filters::TopicFilter)166416651666