Path: blob/main/bin/ari-extract-script.rb
1677 views
#!/usr/bin/env ruby1# frozen_string_literal: true23require 'yaml'4require 'shellwords'5require 'json'6require 'find'7require 'bibtex'8require 'citeproc/ruby'9require 'csl/styles'1011fn = ARGV[0]12metadata = YAML.load_file(fn)1314topic_fn = "#{fn.split('/').slice(0, 2).join('/')}/metadata.yaml"15topic_metadata = YAML.load_file(topic_fn)1617ARI_MAP = File.expand_path(File.join(__dir__, 'ari-map.yml'))18WORD_MAP = {}19YAML.load_file(ARI_MAP).each_pair do |k, v|20WORD_MAP.merge!({ k.downcase => v })21end2223APPROVED_VOICES = {24'en' => [25{ 'id' => 'Amy', 'lang' => 'en-GB', 'neural' => true },26{ 'id' => 'Aria', 'lang' => 'en-NZ', 'neural' => true },27{ 'id' => 'Brian', 'lang' => 'en-GB', 'neural' => true },28{ 'id' => 'Emma', 'lang' => 'en-GB', 'neural' => true },29{ 'id' => 'Joanna', 'lang' => 'en-US', 'neural' => true },30{ 'id' => 'Joey', 'lang' => 'en-US', 'neural' => true },31{ 'id' => 'Kendra', 'lang' => 'en-US', 'neural' => true },32{ 'id' => 'Matthew', 'lang' => 'en-US', 'neural' => true },33{ 'id' => 'Nicole', 'lang' => 'en-AU', 'neural' => false },34{ 'id' => 'Olivia', 'lang' => 'en-AU', 'neural' => true },35{ 'id' => 'Raveena', 'lang' => 'en-IN', 'neural' => false },36{ 'id' => 'Salli', 'lang' => 'en-US', 'neural' => true },37{ 'id' => 'Ayanda', 'lang' => 'en-ZA', 'neural' => true },38{ 'id' => 'Geraint', 'lang' => 'en-GB-WLS', 'neural' => false }39],40'es' => [41{ 'id' => 'Miguel', 'lang' => 'es-US', 'neural' => false },42{ 'id' => 'Mia', 'lang' => 'es-MX', 'neural' => false },43{ 'id' => 'Enrique', 'lang' => 'es-ES', 'neural' => false },44{ 'id' => 'Conchita', 'lang' => 'es-ES', 'neural' => false },45{ 'id' => 'Lupe', 'lang' => 'es-US', 'neural' => true }46]47}.freeze4849# This is copied directly from the plugins, TODO: make into a module.50global_bib = BibTeX::Bibliography.new51bib_paths = [Find.find('./topics'), Find.find('./faqs')].lazy.flat_map(&:lazy)52bib_paths.each do |path|53if FileTest.directory?(path)54next unless File.basename(path).start_with?('.')5556Find.prune # Don't look any further into this directory.5758elsif path =~ /bib$/59BibTeX.open(path).each do |x|60x = x.convert_latex61global_bib << x62end63end64end65cp = CiteProc::Processor.new format: 'text', locale: 'en'66cp.import global_bib.to_citeproc6768# Do we have these slides? Yes or no.69m_qs = metadata.fetch('questions', [])70m_qs = [] if m_qs.nil?71has_questions = m_qs.length.positive?7273m_os = metadata.fetch('objectives', [])74m_os = [] if m_os.nil?75has_objectives = m_os.length.positive?7677m_kp = metadata.fetch('key_points', [])78m_kp = [] if m_kp.nil?79has_keypoints = m_kp.length.positive?8081m_rq = metadata.fetch('requirements', [])82m_rq = [] if m_rq.nil?83t_rq = topic_metadata.fetch('requirements', [])84t_rq = [] if t_rq.nil?85has_requirements = m_rq.length.positive? || t_rq.length.positive?8687m_lang = metadata.fetch('lang', 'en')88m_voice = metadata.fetch('voice', nil)8990# Parse the material for the slide notes91file = File.open(fn)92lines = file.readlines.map(&:chomp)9394# The structure will be95# ---96# meta97# ---98#99# contents100101# +1 because we skipped the 0th entry, +1 again to not include the `---`102end_meta = lines[1..].index('---') + 2103104# Strip off the metadata105contents = lines[end_meta..]106107# This will be our final script108blocks = [[metadata['title']]]109if has_requirements110if m_lang == 'es'111blocks.push(['Antes de profundizar en el contenido de estas diapositivas, te recomendamos que le des un vistazo a'])112else113blocks.push(['Before diving into this slide deck, we recommend you to have a look at the following.'])114end115end116blocks.push(metadata['questions']) if has_questions117blocks.push(metadata['objectives']) if has_objectives118119# Accumulate portions between ??? and ---120current_block = []121in_notes = false122contents.each do |x|123# Check whether we're in the notes or out of them.124if x == '???'125in_notes = true126elsif ['---', '--'].include?(x)127if in_notes128blocks.push(current_block)129current_block = []130end131132in_notes = false133end134135current_block.push(x) if in_notes136end137blocks.push(current_block)138blocks.push(metadata['key_points']) if has_keypoints139140if m_lang == 'es'141blocks.push(['¡Gracias por ver este vídeo!'])142else143blocks.push(['Thank you for watching!'])144end145146# For each block, cleanup first.147blocks = blocks.map do |block|148# Remove the - prefix from each line149script_lines = block.map { |x| x.strip.delete_prefix('- ') }150# Remove the leading ???151script_lines = script_lines[1..] if script_lines[0] == '???'152# Remove blank entries153script_lines = script_lines.reject(&:empty?)154script_lines = script_lines.map do |line|155line.delete_prefix('- ')156line.gsub!(/`/, '"')157# If they don't end with punctuation, fix it.158line += '.' if !(line.end_with?('.') || line.end_with?('?') || line.end_with?('!'))159160line161end162script_lines = script_lines.map do |line|163line.gsub!(/{%\s*cite ([^}]*)\s*%}/) do |match|164# Strip off the {% %} first, whitespace, and then remove cite at the165# start and restrip again.166value = match[2..-3].strip[4..].strip167# Render the citation, the :text format includes ( ) on both sides which168# we strip off.169cp.render(:citation, id: value)[1..-2]170end171line172end173script_lines174end175176# out_subs.write(blocks.map{ |line| line.join(" ") }.join("\n"))177res = {}178res['blocks'] = blocks179180res['voice'] = if m_voice.nil?181if m_lang == 'es'182APPROVED_VOICES['es'].sample183else184APPROVED_VOICES['en'].sample185end186else187metadata['voice']188end189190print JSON.pretty_generate(res)191192193