CoCalc -- ari-extract-script.rb

GitHub Repository: galaxyproject/training-material
Path: blob/main/bin/ari-extract-script.rb
¹⁶⁷⁷ views
1
#!/usr/bin/env ruby
2
# frozen_string_literal: true
3

4
require 'yaml'
5
require 'shellwords'
6
require 'json'
7
require 'find'
8
require 'bibtex'
9
require 'citeproc/ruby'
10
require 'csl/styles'
11

12
fn = ARGV[0]
13
metadata = YAML.load_file(fn)
14

15
topic_fn = "#{fn.split('/').slice(0, 2).join('/')}/metadata.yaml"
16
topic_metadata = YAML.load_file(topic_fn)
17

18
ARI_MAP = File.expand_path(File.join(__dir__, 'ari-map.yml'))
19
WORD_MAP = {}
20
YAML.load_file(ARI_MAP).each_pair do |k, v|
21
  WORD_MAP.merge!({ k.downcase => v })
22
end
23

24
APPROVED_VOICES = {
25
  'en' => [
26
    { 'id' => 'Amy', 'lang' => 'en-GB', 'neural' => true },
27
    { 'id' => 'Aria', 'lang' => 'en-NZ', 'neural' => true },
28
    { 'id' => 'Brian', 'lang' => 'en-GB', 'neural' => true },
29
    { 'id' => 'Emma', 'lang' => 'en-GB', 'neural' => true },
30
    { 'id' => 'Joanna', 'lang' => 'en-US', 'neural' => true },
31
    { 'id' => 'Joey', 'lang' => 'en-US', 'neural' => true },
32
    { 'id' => 'Kendra', 'lang' => 'en-US', 'neural' => true },
33
    { 'id' => 'Matthew', 'lang' => 'en-US', 'neural' => true },
34
    { 'id' => 'Nicole', 'lang' => 'en-AU', 'neural' => false },
35
    { 'id' => 'Olivia', 'lang' => 'en-AU', 'neural' => true },
36
    { 'id' => 'Raveena', 'lang' => 'en-IN', 'neural' => false },
37
    { 'id' => 'Salli', 'lang' => 'en-US', 'neural' => true },
38
    { 'id' => 'Ayanda', 'lang' => 'en-ZA', 'neural' => true },
39
    { 'id' => 'Geraint', 'lang' => 'en-GB-WLS', 'neural' => false }
40
  ],
41
  'es' => [
42
    { 'id' => 'Miguel', 'lang' => 'es-US', 'neural' => false },
43
    { 'id' => 'Mia', 'lang' => 'es-MX', 'neural' => false },
44
    { 'id' => 'Enrique', 'lang' => 'es-ES', 'neural' => false },
45
    { 'id' => 'Conchita', 'lang' => 'es-ES', 'neural' => false },
46
    { 'id' => 'Lupe', 'lang' => 'es-US', 'neural' => true }
47
  ]
48
}.freeze
49

50
# This is copied directly from the plugins, TODO: make into a module.
51
global_bib = BibTeX::Bibliography.new
52
bib_paths = [Find.find('./topics'), Find.find('./faqs')].lazy.flat_map(&:lazy)
53
bib_paths.each  do |path|
54
  if FileTest.directory?(path)
55
    next unless File.basename(path).start_with?('.')
56

57
    Find.prune # Don't look any further into this directory.
58

59
  elsif path =~ /bib$/
60
    BibTeX.open(path).each do |x|
61
      x = x.convert_latex
62
      global_bib << x
63
    end
64
  end
65
end
66
cp = CiteProc::Processor.new format: 'text', locale: 'en'
67
cp.import global_bib.to_citeproc
68

69
# Do we have these slides? Yes or no.
70
m_qs = metadata.fetch('questions', [])
71
m_qs = [] if m_qs.nil?
72
has_questions = m_qs.length.positive?
73

74
m_os = metadata.fetch('objectives', [])
75
m_os = [] if m_os.nil?
76
has_objectives = m_os.length.positive?
77

78
m_kp = metadata.fetch('key_points', [])
79
m_kp = [] if m_kp.nil?
80
has_keypoints = m_kp.length.positive?
81

82
m_rq = metadata.fetch('requirements', [])
83
m_rq = [] if m_rq.nil?
84
t_rq = topic_metadata.fetch('requirements', [])
85
t_rq = [] if t_rq.nil?
86
has_requirements = m_rq.length.positive? || t_rq.length.positive?
87

88
m_lang = metadata.fetch('lang', 'en')
89
m_voice = metadata.fetch('voice', nil)
90

91
# Parse the material for the slide notes
92
file = File.open(fn)
93
lines = file.readlines.map(&:chomp)
94

95
# The structure will be
96
# ---
97
# meta
98
# ---
99
#
100
# contents
101

102
# +1 because we skipped the 0th entry, +1 again to not include the `---`
103
end_meta = lines[1..].index('---') + 2
104

105
# Strip off the metadata
106
contents = lines[end_meta..]
107

108
# This will be our final script
109
blocks = [[metadata['title']]]
110
if has_requirements
111
  if m_lang == 'es'
112
    blocks.push(['Antes de profundizar en el contenido de estas diapositivas, te recomendamos que le des un vistazo a'])
113
  else
114
    blocks.push(['Before diving into this slide deck, we recommend you to have a look at the following.'])
115
  end
116
end
117
blocks.push(metadata['questions']) if has_questions
118
blocks.push(metadata['objectives']) if has_objectives
119

120
# Accumulate portions between ??? and ---
121
current_block = []
122
in_notes = false
123
contents.each do |x|
124
  # Check whether we're in the notes or out of them.
125
  if x == '???'
126
    in_notes = true
127
  elsif ['---', '--'].include?(x)
128
    if in_notes
129
      blocks.push(current_block)
130
      current_block = []
131
    end
132

133
    in_notes = false
134
  end
135

136
  current_block.push(x) if in_notes
137
end
138
blocks.push(current_block)
139
blocks.push(metadata['key_points']) if has_keypoints
140

141
if m_lang == 'es'
142
  blocks.push(['¡Gracias por ver este vídeo!'])
143
else
144
  blocks.push(['Thank you for watching!'])
145
end
146

147
# For each block, cleanup first.
148
blocks = blocks.map do |block|
149
  # Remove the - prefix from each line
150
  script_lines = block.map { |x| x.strip.delete_prefix('- ') }
151
  # Remove the leading ???
152
  script_lines = script_lines[1..] if script_lines[0] == '???'
153
  # Remove blank entries
154
  script_lines = script_lines.reject(&:empty?)
155
  script_lines = script_lines.map do |line|
156
    line.delete_prefix('- ')
157
    line.gsub!(/`/, '"')
158
    # If they don't end with punctuation, fix it.
159
    line += '.' if !(line.end_with?('.') || line.end_with?('?') || line.end_with?('!'))
160

161
    line
162
  end
163
  script_lines = script_lines.map do |line|
164
    line.gsub!(/{%\s*cite ([^}]*)\s*%}/) do |match|
165
      # Strip off the {% %} first, whitespace, and then remove cite at the
166
      # start and restrip again.
167
      value = match[2..-3].strip[4..].strip
168
      # Render the citation, the :text format includes ( ) on both sides which
169
      # we strip off.
170
      cp.render(:citation, id: value)[1..-2]
171
    end
172
    line
173
  end
174
  script_lines
175
end
176

177
# out_subs.write(blocks.map{ |line| line.join(" ") }.join("\n"))
178
res = {}
179
res['blocks'] = blocks
180

181
res['voice'] = if m_voice.nil?
182
                 if m_lang == 'es'
183
                   APPROVED_VOICES['es'].sample
184
                 else
185
                   APPROVED_VOICES['en'].sample
186
                 end
187
               else
188
                 metadata['voice']
189
               end
190

191
print JSON.pretty_generate(res)
192

193
Product

Resources

Company