Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
galaxyproject
GitHub Repository: galaxyproject/training-material
Path: blob/main/bin/ari-prep-script.rb
1677 views
1
#!/usr/bin/env ruby
2
# frozen_string_literal: true
3
4
# Given a script (json file)
5
# explode it into a directory of individual lines in .txt format (.txt, -sub.txt)
6
# For each spoken line convert into mp3
7
# Then re-assemble a script for ffmpeg
8
require "#{__dir__}/ari-synthesize.rb"
9
require 'fileutils'
10
require 'digest'
11
require 'json'
12
13
dir = ARGV[0]
14
script = JSON.parse(File.read(File.join(ARGV[0], 'script.json')))
15
engine = ARGV[1]
16
17
END_OF_SENTENCE_DURATION = script['voice'].fetch('endOfSentencePause', 0.2)
18
END_OF_SLIDE_DURATION = script['voice'].fetch('endOfSlidePause', 1.04)
19
20
editly = {
21
'width' => 1920,
22
'height' => 1080,
23
'fps' => 30,
24
'fast' => ENV.fetch('EDITLY_FAST', 'false') == 'true',
25
'outPath' => File.join(dir, 'tmp.mp4'),
26
'defaults' => {
27
'transition' => {
28
duration: 0,
29
},
30
},
31
keepSourceAudio: true,
32
'clips' => []
33
}
34
35
# Intro slides.
36
# Fast: editly --json editly.json5 126,23s user 5,62s system 126% cpu 1:44,08 total
37
# Slow: editly --json editly.json5 902,71s user 69,27s system 326% cpu 4:57,54 total
38
39
def timefmt(t, fmt)
40
seconds = t % (24 * 3600)
41
hours = seconds.to_i / 3600
42
seconds = seconds % 3600
43
minutes = seconds.to_i / 60
44
seconds = seconds % 60
45
(seconds, ms) = seconds.divmod(1)
46
# seconds = seconds
47
ms = 1000 * ms
48
49
if fmt == 'vtt'
50
format('%<h>02d:%<m>02d:%<s>02d.%<ms>03d', { h: hours, m: minutes, s: seconds, ms: ms })
51
else
52
format('%<h>02d:%<m>02d:%<s>02d,%<ms>03d', { h: hours, m: minutes, s: seconds, ms: ms })
53
end
54
end
55
56
def split_sentence(sentence, timing)
57
res = sentence.split
58
chunk_size = (res.length.to_f / (res.length.to_f / 20).ceil).ceil
59
chunks = res.each_slice(chunk_size).to_a.length
60
res.each_slice(chunk_size).with_index.map do |chunk, idx|
61
t0 = timing * (idx / chunks.to_f)
62
tf = timing * ((1 + idx) / chunks.to_f)
63
[chunk, t0, tf]
64
end
65
end
66
67
# For each line in the script, iterate
68
# The spoken line is the one that's hashed. (2nd col)
69
editly['clips'] = script['blocks'].map.with_index do |phrases, idx|
70
# We're inside a single slide.
71
parts = phrases.map do |subtitle|
72
digest = Digest::MD5.hexdigest subtitle
73
74
# Write out our script bits.
75
handle = File.open(File.join(dir, "#{digest}.txt"), 'w')
76
handle.write(subtitle)
77
78
handle = File.open(File.join(dir, "#{digest}-subtitle.txt"), 'w')
79
handle.write(subtitle)
80
81
# Synthesize and copy to the temp dir
82
voice = script['voice']
83
84
mp3, json, duration = synthesize(subtitle, engine, voice: voice['id'], lang: voice['lang'], neural: voice['neural'])
85
puts "\tSynthesizing: #{mp3} #{subtitle}"
86
FileUtils.cp(mp3, File.join(dir, "#{digest}.mp3"))
87
FileUtils.cp(json, File.join(dir, "#{digest}.json"))
88
89
[
90
{
91
:caption => subtitle,
92
'duration' => duration,
93
'layers' => [{
94
'type' => 'audio',
95
'path' => File.join(dir, "#{digest}.mp3"),
96
}, {
97
'type' => 'image',
98
'path' => File.join(dir, 'slides.%03d.png' % idx),
99
'resizeMode' => 'stretch',
100
'zoomDirection' => nil,
101
}]
102
}, {
103
'duration' => END_OF_SENTENCE_DURATION,
104
'layers' => [{
105
'type' => 'image',
106
'path' => File.join(dir, 'slides.%03d.png' % idx),
107
'resizeMode' => 'stretch',
108
'zoomDirection' => nil,
109
}]
110
}
111
]
112
end
113
parts.flatten!
114
# Strip out the last pause
115
parts = parts[0..-2]
116
117
# Here we add 1 second of silence at the end of each slide.
118
parts.push({
119
'transition' => {
120
'name' => 'fadegrayscale',
121
'duration' => END_OF_SLIDE_DURATION,
122
},
123
'duration' => END_OF_SLIDE_DURATION,
124
'layers' => [{
125
'type' => 'image',
126
'path' => File.join(dir, 'slides.%03d.png' % idx),
127
'resizeMode' => 'stretch',
128
'zoomDirection' => nil,
129
}]
130
})
131
parts.flatten
132
end.flatten
133
134
subtitle_timings = []
135
offset = 0
136
editly['clips'].each do |layer|
137
if layer.key?(:caption)
138
subtitle_timings += split_sentence(layer[:caption], layer['duration']).map do |sen_part, time_prev, time_next|
139
[sen_part.join(' '), offset + time_prev, offset + time_next]
140
end
141
offset += layer['duration']
142
elsif layer.key? 'transition'
143
# End of slide.
144
offset += 1.04 / 2 # The true transition time.
145
else
146
offset += layer['duration']
147
end
148
end
149
150
# Remove our :caption key.
151
# editly['clips'].map!{|layer|
152
# layer.delete(:caption)
153
# layer
154
# }
155
156
video_script = File.open(File.join(dir, 'editly.json5'), 'w')
157
video_script.write(JSON.generate(editly))
158
159
vtt = File.open(File.join(dir, 'out.vtt'), 'w')
160
srt = File.open(File.join(dir, 'out.srt'), 'w')
161
162
vtt.write("WEBVTT\n\n\n")
163
subtitle_timings.each_with_index do |subtitle, index|
164
sub, time_prev, time_next = subtitle
165
166
vtt.write("#{index}\n")
167
srt.write("#{index}\n")
168
vtt.write("#{timefmt(time_prev, 'vtt')} --> #{timefmt(time_next, 'vtt')}\n")
169
srt.write("#{timefmt(time_prev, 'srt')} --> #{timefmt(time_next, 'srt')}\n")
170
vtt.write("#{sub}\n\n")
171
srt.write("#{sub}\n\n")
172
end
173
174