Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
galaxyproject
GitHub Repository: galaxyproject/training-material
Path: blob/main/bin/ari-extract-script.rb
1677 views
1
#!/usr/bin/env ruby
2
# frozen_string_literal: true
3
4
require 'yaml'
5
require 'shellwords'
6
require 'json'
7
require 'find'
8
require 'bibtex'
9
require 'citeproc/ruby'
10
require 'csl/styles'
11
12
fn = ARGV[0]
13
metadata = YAML.load_file(fn)
14
15
topic_fn = "#{fn.split('/').slice(0, 2).join('/')}/metadata.yaml"
16
topic_metadata = YAML.load_file(topic_fn)
17
18
ARI_MAP = File.expand_path(File.join(__dir__, 'ari-map.yml'))
19
WORD_MAP = {}
20
YAML.load_file(ARI_MAP).each_pair do |k, v|
21
WORD_MAP.merge!({ k.downcase => v })
22
end
23
24
APPROVED_VOICES = {
25
'en' => [
26
{ 'id' => 'Amy', 'lang' => 'en-GB', 'neural' => true },
27
{ 'id' => 'Aria', 'lang' => 'en-NZ', 'neural' => true },
28
{ 'id' => 'Brian', 'lang' => 'en-GB', 'neural' => true },
29
{ 'id' => 'Emma', 'lang' => 'en-GB', 'neural' => true },
30
{ 'id' => 'Joanna', 'lang' => 'en-US', 'neural' => true },
31
{ 'id' => 'Joey', 'lang' => 'en-US', 'neural' => true },
32
{ 'id' => 'Kendra', 'lang' => 'en-US', 'neural' => true },
33
{ 'id' => 'Matthew', 'lang' => 'en-US', 'neural' => true },
34
{ 'id' => 'Nicole', 'lang' => 'en-AU', 'neural' => false },
35
{ 'id' => 'Olivia', 'lang' => 'en-AU', 'neural' => true },
36
{ 'id' => 'Raveena', 'lang' => 'en-IN', 'neural' => false },
37
{ 'id' => 'Salli', 'lang' => 'en-US', 'neural' => true },
38
{ 'id' => 'Ayanda', 'lang' => 'en-ZA', 'neural' => true },
39
{ 'id' => 'Geraint', 'lang' => 'en-GB-WLS', 'neural' => false }
40
],
41
'es' => [
42
{ 'id' => 'Miguel', 'lang' => 'es-US', 'neural' => false },
43
{ 'id' => 'Mia', 'lang' => 'es-MX', 'neural' => false },
44
{ 'id' => 'Enrique', 'lang' => 'es-ES', 'neural' => false },
45
{ 'id' => 'Conchita', 'lang' => 'es-ES', 'neural' => false },
46
{ 'id' => 'Lupe', 'lang' => 'es-US', 'neural' => true }
47
]
48
}.freeze
49
50
# This is copied directly from the plugins, TODO: make into a module.
51
global_bib = BibTeX::Bibliography.new
52
bib_paths = [Find.find('./topics'), Find.find('./faqs')].lazy.flat_map(&:lazy)
53
bib_paths.each do |path|
54
if FileTest.directory?(path)
55
next unless File.basename(path).start_with?('.')
56
57
Find.prune # Don't look any further into this directory.
58
59
elsif path =~ /bib$/
60
BibTeX.open(path).each do |x|
61
x = x.convert_latex
62
global_bib << x
63
end
64
end
65
end
66
cp = CiteProc::Processor.new format: 'text', locale: 'en'
67
cp.import global_bib.to_citeproc
68
69
# Do we have these slides? Yes or no.
70
m_qs = metadata.fetch('questions', [])
71
m_qs = [] if m_qs.nil?
72
has_questions = m_qs.length.positive?
73
74
m_os = metadata.fetch('objectives', [])
75
m_os = [] if m_os.nil?
76
has_objectives = m_os.length.positive?
77
78
m_kp = metadata.fetch('key_points', [])
79
m_kp = [] if m_kp.nil?
80
has_keypoints = m_kp.length.positive?
81
82
m_rq = metadata.fetch('requirements', [])
83
m_rq = [] if m_rq.nil?
84
t_rq = topic_metadata.fetch('requirements', [])
85
t_rq = [] if t_rq.nil?
86
has_requirements = m_rq.length.positive? || t_rq.length.positive?
87
88
m_lang = metadata.fetch('lang', 'en')
89
m_voice = metadata.fetch('voice', nil)
90
91
# Parse the material for the slide notes
92
file = File.open(fn)
93
lines = file.readlines.map(&:chomp)
94
95
# The structure will be
96
# ---
97
# meta
98
# ---
99
#
100
# contents
101
102
# +1 because we skipped the 0th entry, +1 again to not include the `---`
103
end_meta = lines[1..].index('---') + 2
104
105
# Strip off the metadata
106
contents = lines[end_meta..]
107
108
# This will be our final script
109
blocks = [[metadata['title']]]
110
if has_requirements
111
if m_lang == 'es'
112
blocks.push(['Antes de profundizar en el contenido de estas diapositivas, te recomendamos que le des un vistazo a'])
113
else
114
blocks.push(['Before diving into this slide deck, we recommend you to have a look at the following.'])
115
end
116
end
117
blocks.push(metadata['questions']) if has_questions
118
blocks.push(metadata['objectives']) if has_objectives
119
120
# Accumulate portions between ??? and ---
121
current_block = []
122
in_notes = false
123
contents.each do |x|
124
# Check whether we're in the notes or out of them.
125
if x == '???'
126
in_notes = true
127
elsif ['---', '--'].include?(x)
128
if in_notes
129
blocks.push(current_block)
130
current_block = []
131
end
132
133
in_notes = false
134
end
135
136
current_block.push(x) if in_notes
137
end
138
blocks.push(current_block)
139
blocks.push(metadata['key_points']) if has_keypoints
140
141
if m_lang == 'es'
142
blocks.push(['¡Gracias por ver este vídeo!'])
143
else
144
blocks.push(['Thank you for watching!'])
145
end
146
147
# For each block, cleanup first.
148
blocks = blocks.map do |block|
149
# Remove the - prefix from each line
150
script_lines = block.map { |x| x.strip.delete_prefix('- ') }
151
# Remove the leading ???
152
script_lines = script_lines[1..] if script_lines[0] == '???'
153
# Remove blank entries
154
script_lines = script_lines.reject(&:empty?)
155
script_lines = script_lines.map do |line|
156
line.delete_prefix('- ')
157
line.gsub!(/`/, '"')
158
# If they don't end with punctuation, fix it.
159
line += '.' if !(line.end_with?('.') || line.end_with?('?') || line.end_with?('!'))
160
161
line
162
end
163
script_lines = script_lines.map do |line|
164
line.gsub!(/{%\s*cite ([^}]*)\s*%}/) do |match|
165
# Strip off the {% %} first, whitespace, and then remove cite at the
166
# start and restrip again.
167
value = match[2..-3].strip[4..].strip
168
# Render the citation, the :text format includes ( ) on both sides which
169
# we strip off.
170
cp.render(:citation, id: value)[1..-2]
171
end
172
line
173
end
174
script_lines
175
end
176
177
# out_subs.write(blocks.map{ |line| line.join(" ") }.join("\n"))
178
res = {}
179
res['blocks'] = blocks
180
181
res['voice'] = if m_voice.nil?
182
if m_lang == 'es'
183
APPROVED_VOICES['es'].sample
184
else
185
APPROVED_VOICES['en'].sample
186
end
187
else
188
metadata['voice']
189
end
190
191
print JSON.pretty_generate(res)
192
193