Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
galaxyproject
GitHub Repository: galaxyproject/training-material
Path: blob/main/_plugins/notebook.rb
1677 views
1
require 'digest'
2
require 'json'
3
require 'fileutils'
4
require 'yaml'
5
require 'base64'
6
7
# Monkey patching hash
8
class Hash
9
def fetch2(key, default)
10
fetch(key, default) || default
11
end
12
end
13
14
# Generate Notebooks from Markdown
15
module Gtn
16
##
17
# Notebook generation module, this converts markdown into Jupyter and RMarkdown/Quarto notebooks
18
module Notebooks
19
20
# Colors for the various boxes, based on our 2024 CSS
21
COLORS = {
22
'overview' => '#8A9AD0',
23
'agenda' => '#86D486',
24
'keypoints' => '#FFA1A1',
25
'tip' => '#FFE19E',
26
'warning' => '#de8875',
27
'comment' => '#ffecc1',
28
'hands_on' => '#dfe5f9',
29
'question' => '#8A9AD0',
30
'solution' => '#B8C3EA',
31
'details' => '#ddd',
32
'feedback' => '#86D486',
33
'code-in' => '#86D486',
34
'code-out' => '#fb99d0',
35
}.freeze
36
37
# +COLORS+ but hide the agenda box.
38
COLORS_EXTRA = {
39
'agenda' => 'display: none',
40
}.freeze
41
42
# Emoji icons for the various boxes
43
ICONS = {
44
'tip' => '💡',
45
'code-in' => '⌨️',
46
'code-out' => '🖥',
47
'question' => '❓',
48
'solution' => '👁',
49
'warning' => '⚠️',
50
'comment' => '💬',
51
'feedback' => '⁉️',
52
'details' => '💬',
53
'hands_on' => '✏️',
54
}.freeze
55
56
# Font-awesome equivalents of the icons we use for our boxes
57
ICONS_FA = {
58
'far fa-keyboard' => 'code-in',
59
'fas fa-laptop-code' => 'code-out',
60
'far fa-comment-dots' => 'comment',
61
'fas fa-info-circle' => 'details',
62
'far fa-comments' => 'feedback',
63
'fas fa-pencil-alt' => 'hands_on',
64
'far fa-question-circle' => 'question',
65
'far fa-eye' => 'solution',
66
'far fa-lightbulb' => 'tip',
67
'fas fa-exclamation-triangle' => 'warning',
68
}.freeze
69
70
# Generate the CSS to be included, by mapping our colors to appropriate classes.
71
def self.generate_css
72
COLORS.map do |key, val|
73
".#{key} { padding: 0 1em; margin: 1em 0.2em; border: 2px solid #{val} }"
74
end.join("\n")
75
end
76
77
##
78
# Convert a markdown file into a Jupyter notebook JSON structure.
79
#
80
# Params:
81
# +content+:: The markdown content to convert
82
# +accepted_languages+:: The languages to accept as code blocks. Code blocks that do not match will not be accepted.
83
#
84
# Returns:
85
# +Hash+:: A JSON structure representing the Jupyter notebook.
86
def self.convert_notebook_markdown(content, accepted_languages)
87
out = []
88
inside_block = false
89
cur_lang = nil
90
val = []
91
data = content.split("\n")
92
data.each.with_index do |line, i|
93
m = line.match(/^```(#{accepted_languages.join('|')})\s*$/)
94
if m
95
if inside_block
96
puts data[i - 2..i + 2]
97
raise "[GTN/Notebook] L#{i} Error! we're already in a block:"
98
end
99
# End the previous block
100
out.push([val, inside_block, cur_lang])
101
val = []
102
103
inside_block = true
104
cur_lang = m[1]
105
elsif inside_block && line == '```'
106
# End of code block
107
out.push([val, inside_block, cur_lang])
108
val = []
109
inside_block = false
110
else
111
val.push(line)
112
end
113
end
114
# final flush
115
out.push([val, inside_block, cur_lang]) if !val.nil?
116
117
notebook = {
118
'metadata' => {},
119
'nbformat' => 4,
120
'nbformat_minor' => 5,
121
}
122
123
notebook['cells'] = out.map.with_index do |data2, index|
124
res = {
125
'id' => "cell-#{index}",
126
'source' => data2[0].map { |x| "#{x.rstrip}\n" }
127
}
128
# Strip the trailing newline in the last cell.
129
res['source'][-1] = res['source'][-1].rstrip if res['source'].length.positive?
130
131
# Remove any remaining language tagged code blocks, e.g. in
132
# tip/solution/etc boxes. These do not render well.
133
res['source'] = res['source'].map { |x| x.gsub(/```(#{accepted_languages.join('|')})/, '```') }
134
135
if data2[1]
136
res.update({
137
'cell_type' => 'code',
138
'execution_count' => nil,
139
'outputs' => [],
140
'metadata' => {
141
'attributes' => {
142
'classes' => [
143
data[2]
144
],
145
'id' => '',
146
}
147
}
148
})
149
else
150
res['cell_type'] = 'markdown'
151
end
152
res
153
end
154
notebook
155
end
156
157
##
158
# Group a document by the first character seen, which extracts blockquotes mostly.
159
def self.group_doc_by_first_char(data)
160
out = []
161
first_char = nil
162
val = []
163
data = data.split("\n")
164
165
# Here we collapse running groups of `>` into single blocks.
166
data.each do |line|
167
if first_char.nil?
168
first_char = line[0]
169
val = [line]
170
elsif line[0] == first_char
171
val.push(line)
172
elsif line[0..1] == '{:' && first_char == '>'
173
val.push(line)
174
else
175
# flush
176
out.push(val)
177
first_char = if line.size.positive?
178
line[0]
179
else
180
''
181
end
182
val = [line]
183
end
184
end
185
# final flush
186
out.push(val)
187
188
out.reject! do |v|
189
(v[0][0] == '>' && v[-1][0..1] == '{:' && v[-1].match(/.agenda/))
190
end
191
out.map! do |v|
192
if v[0][0] == '>' && v[-1][0..1] == '{:'
193
cls = v[-1][2..-2].strip
194
res = [":::{#{cls}}"]
195
res += v[0..-2].map { |c| c.sub(/^>\s*/, '') }
196
res += [':::']
197
res
198
else
199
v
200
end
201
end
202
203
out.flatten(1).join("\n")
204
end
205
206
##
207
# Construct a byline from the metadata
208
#
209
# Params:
210
# +site+:: The Jekyll site object
211
# +metadata+:: The metadata to construct the byline from, including a contributions or contributors key
212
#
213
# Returns:
214
# +String+:: The byline with markdown hyperlinks to the contributors
215
def self.construct_byline(site, metadata)
216
folks = Gtn::Contributors.get_authors(metadata)
217
folks.map do |c|
218
name = Gtn::Contributors.fetch_name(site, c)
219
"[#{name}](https://training.galaxyproject.org/hall-of-fame/#{c}/)"
220
end.join(', ')
221
end
222
223
##
224
# Given a notebook, add the metadata cell to the top of the notebook with the agenda, license, LOs, etc.
225
#
226
# Params:
227
# +site+:: The Jekyll site object
228
# +notebook+:: The notebook to add the metadata cell to
229
# +metadata+:: The page.data to construct use for metadata.
230
#
231
# Returns:
232
# +Hash+:: The updated notebook with the metadata cell added to the top.
233
def self.add_metadata_cell(site, notebook, metadata)
234
by_line = construct_byline(site, metadata)
235
236
meta_header = [
237
"<div style=\"border: 2px solid #8A9AD0; margin: 1em 0.2em; padding: 0.5em;\">\n\n",
238
"# #{metadata['title']}\n",
239
"\n",
240
"by #{by_line}\n",
241
"\n",
242
"#{metadata.fetch('license', 'CC-BY')} licensed content from the [Galaxy Training Network]" \
243
"(https://training.galaxyproject.org/)\n",
244
"\n",
245
"**Objectives**\n",
246
"\n"
247
] + metadata.fetch2('questions', []).map { |q| "- #{q}\n" } + [
248
"\n",
249
"**Objectives**\n",
250
"\n"
251
] + metadata.fetch2('objectives', []).map { |q| "- #{q}\n" } + [
252
"\n",
253
"**Time Estimation: #{metadata['time_estimation']}**\n",
254
"\n",
255
"</div>\n"
256
]
257
metadata_cell = {
258
'id' => 'metadata',
259
'cell_type' => 'markdown',
260
'source' => meta_header
261
}
262
notebook['cells'].unshift(metadata_cell)
263
notebook
264
end
265
266
##
267
# Fix an R based Jupyter notebook by setting the kernel to R and stripping out the %%R magic commands.
268
def self.fixRNotebook(notebook)
269
# Set the bash kernel
270
notebook['etadata'] = {
271
'kernelspec' => {
272
'display_name' => 'R',
273
'language' => 'R',
274
'name' => 'r'
275
},
276
'language_info' => {
277
'codemirror_mode' => 'r',
278
'file_extension' => '.r',
279
'mimetype' => 'text/x-r-source',
280
'name' => 'R',
281
'pygments_lexer' => 'r',
282
'version' => '4.1.0'
283
}
284
}
285
# Strip out %%R since we'll use the bash kernel
286
notebook['cells'].map do |cell|
287
if cell.fetch('cell_type') == 'code' && (cell['source'][0] == "%%R\n")
288
cell['source'] = cell['source'].slice(1..-1)
289
end
290
cell
291
end
292
notebook
293
end
294
295
##
296
# Similar to +fixRNotebook+ but for bash.
297
def self.fixBashNotebook(notebook)
298
# Set the bash kernel
299
notebook['metadata'] = {
300
'kernelspec' => {
301
'display_name' => 'Bash',
302
'language' => 'bash',
303
'name' => 'bash'
304
},
305
'language_info' => {
306
'codemirror_mode' => 'shell',
307
'file_extension' => '.sh',
308
'mimetype' => 'text/x-sh',
309
'name' => 'bash'
310
}
311
}
312
# Strip out %%bash since we'll use the bash kernel
313
notebook['cells'].map do |cell|
314
if cell.fetch('cell_type') == 'code' && (cell['source'][0] == "%%bash\n")
315
cell['source'] = cell['source'].slice(1..-1)
316
end
317
cell
318
end
319
notebook
320
end
321
322
##
323
# Similar to +fixRNotebook+ but for Python, bash cells are accepted but must be prefixed with !
324
def self.fixPythonNotebook(notebook)
325
# TODO
326
# prefix bash cells with `!`
327
notebook['cells'].map do |cell|
328
if cell.fetch('metadata', {}).fetch('attributes', {}).fetch('classes', [])[0] == 'bash'
329
cell['source'] = cell['source'].map { |line| "!#{line}" }
330
end
331
cell
332
end
333
notebook
334
end
335
336
##
337
# Ibid, +fixRNotebook+ but for SQL.
338
def self.fixSqlNotebook(notebook)
339
# Add in a %%sql at the top of each cell
340
notebook['cells'].map do |cell|
341
if cell.fetch('cell_type') == 'code' && cell['source'].join.index('load_ext').nil?
342
cell['source'] = ["%%sql\n"] + cell['source']
343
end
344
cell
345
end
346
notebook
347
end
348
349
##
350
# Call Jekyll's markdown plugin or failover to Kramdown
351
#
352
# I have no idea why that failure mode is supported, that's kinda wild.
353
#
354
# Params:
355
# +site+:: The Jekyll site object
356
# +text+:: The text to convert to html
357
#
358
# Returns:
359
# +String+:: The HTML representation
360
def self.markdownify(site, text)
361
site.find_converter_instance(
362
Jekyll::Converters::Markdown
363
).convert(text.to_s)
364
rescue StandardError
365
require 'kramdown'
366
Kramdown::Document.new(text).to_html
367
end
368
369
##
370
# Return true if it's a notebook and the language is correct
371
#
372
# TODO: convert to `notebook?` which is more ruby-esque.
373
#
374
# +data+:: The page data to check
375
# +language+:: The language to check for
376
#
377
# Returns:
378
# +Boolean+:: True if it's a notebook (i.e hands on tutorial, has a notebook key, and the language is correct)
379
def self.notebook_filter(data, language = nil)
380
data['layout'] == 'tutorial_hands_on' \
381
and data.key?('notebook') \
382
and (language.nil? or data['notebook']['language'].downcase == language)
383
end
384
385
##
386
# Massage a page into RMarkdown preferred formatting.
387
#
388
# Params:
389
# +site+:: The Jekyll site object
390
# +page_data+:: The page metadata (page.data)
391
# +page_content+:: The page content (page.content)
392
# +page_url+:: The page URL
393
# +page_last_modified+:: The last modified time of the page
394
# +fn+:: The source filename of the page
395
#
396
# Returns:
397
# +String+:: The RMarkdown formatted content
398
#
399
def self.render_rmarkdown(site, page_data, page_content, page_url, page_last_modified, fn)
400
by_line = construct_byline(site, page_data)
401
402
# Replace top level `>` blocks with fenced `:::`
403
content = group_doc_by_first_char(page_content)
404
405
# Re-run a second time to catch singly-nested Q&A?
406
content = group_doc_by_first_char(content)
407
408
# Replace zenodo links, the only replacement we do
409
if !page_data['zenodo_link'].nil?
410
Jekyll.logger.debug "Replacing zenodo links in #{page_url}, #{page_data['zenodo_link']}"
411
content.gsub!(/{{\s*page.zenodo_link\s*}}/, page_data['zenodo_link'])
412
end
413
414
ICONS.each do |key, val|
415
content.gsub!(/{% icon #{key} %}/, val)
416
end
417
ICONS_FA.each do |key, val|
418
content.gsub!(%r{<i class="#{key}" aria-hidden="true"></i>}, ICONS[val])
419
end
420
421
content += %(\n\n# References\n\n<div id="refs"></div>\n)
422
423
# https://raw.githubusercontent.com/rstudio/cheatsheets/master/rmarkdown-2.0.pdf
424
# https://bookdown.org/yihui/rmarkdown/
425
426
fnparts = fn.split('/')
427
rmddata = {
428
'title' => page_data['title'],
429
'author' => "#{by_line}, #{page_data.fetch('license',
430
'CC-BY')} licensed content from the [Galaxy Training Network](https://training.galaxyproject.org/)",
431
'bibliography' => "#{fnparts[2]}-#{fnparts[4]}.bib",
432
'output' => {
433
'html_notebook' => {
434
'toc' => true,
435
'toc_depth' => 2,
436
'css' => 'gtn.css',
437
'toc_float' => {
438
'collapsed' => false,
439
'smooth_scroll' => false,
440
},
441
# 'theme' => {'bootswatch' => 'journal'}
442
},
443
'word_document' => {
444
'toc' => true,
445
'toc_depth' => 2,
446
'latex_engine' => 'xelatex',
447
},
448
'pdf_document' => {
449
'toc' => true,
450
'toc_depth' => 2,
451
'latex_engine' => 'xelatex',
452
},
453
},
454
'date' => page_last_modified.to_s,
455
'link-citations' => true,
456
'anchor_sections' => true,
457
'code_download' => true,
458
}
459
rmddata['output']['html_document'] = JSON.parse(JSON.generate(rmddata['output']['html_notebook']))
460
461
final_content = [
462
"# Introduction\n",
463
content.gsub(/```[Rr]/, '```{r}'),
464
"# Key Points\n"
465
] + page_data.fetch2('key_points', []).map { |k| "- #{k}" } + [
466
"\n# Congratulations on successfully completing this tutorial!\n",
467
'Please [fill out the feedback on the GTN website](https://training.galaxyproject.org/' \
468
"training-material#{page_url}#feedback) and check there for further resources!\n"
469
]
470
471
"#{rmddata.to_yaml(line_width: rmddata['author'].size + 10)}---\n#{final_content.join("\n")}"
472
end
473
474
475
def self.render_jupyter_notebook(data, content, url, _last_modified, notebook_language, site, dir)
476
# Here we read use internal methods to convert the tutorial to a Hash
477
# representing the notebook
478
accepted_languages = [notebook_language]
479
accepted_languages << 'bash' if notebook_language == 'python'
480
481
if !data['zenodo_link'].nil?
482
Jekyll.logger.debug "Replacing zenodo links in #{url}, #{data['zenodo_link']}"
483
content.gsub!(/{{\s*page.zenodo_link\s*}}/, data['zenodo_link'])
484
end
485
notebook = convert_notebook_markdown(content, accepted_languages)
486
# This extracts the metadata yaml header and does manual formatting of
487
# the header data to make for a nicer notebook.
488
notebook = add_metadata_cell(site, notebook, data)
489
490
# Apply language specific conventions
491
case notebook_language
492
when 'bash'
493
notebook = fixBashNotebook(notebook)
494
when 'sql'
495
notebook = fixSqlNotebook(notebook)
496
when 'r'
497
notebook = fixRNotebook(notebook)
498
when 'python'
499
notebook = fixPythonNotebook(notebook)
500
end
501
502
# Here we loop over the markdown cells and render them to HTML. This
503
# allows us to get rid of classes like {: .tip} that would be left in
504
# the output by Jupyter's markdown renderer, and additionally do any
505
# custom CSS which only seems to work when inline on a cell, i.e. we
506
# can't setup a style block, so we really need to render the markdown
507
# to html.
508
notebook = renderMarkdownCells(site, notebook, data, url, dir)
509
510
# Here we add a close to the notebook
511
notebook['cells'] = notebook['cells'] + [{
512
'cell_type' => 'markdown',
513
'id' => 'final-ending-cell',
514
'metadata' => { 'editable' => false, 'collapsed' => false },
515
'source' => [
516
"# Key Points\n\n"
517
] + data.fetch2('key_points', []).map { |k| "- #{k}\n" } + [
518
"\n# Congratulations on successfully completing this tutorial!\n\n",
519
'Please [fill out the feedback on the GTN website](https://training.galaxyproject.org/training-material' \
520
"#{url}#feedback) and check there for further resources!\n"
521
]
522
}]
523
notebook
524
end
525
526
def self.renderMarkdownCells(site, notebook, metadata, _page_url, dir)
527
seen_abbreviations = {}
528
notebook['cells'].map do |cell|
529
if cell.fetch('cell_type') == 'markdown'
530
531
# The source is initially a list of strings, we'll merge it together
532
# to make it easier to work with.
533
source = cell['source'].join.strip
534
535
# Here we replace individual `s with codeblocks, they screw up
536
# rendering otherwise by going through rouge
537
source = source.gsub(/ `([^`]*)`([^`])/, ' <code>\1</code>\2')
538
.gsub(/([^`])`([^`]*)` /, '\1<code>\2</code> ')
539
540
# Strip out includes, snippets
541
source.gsub!(/{% include .* %}/, '')
542
source.gsub!(/{% snippet .* %}/, '')
543
544
# Replace all the broken icons that can't render, because we don't
545
# have access to the full render pipeline.
546
cell['source'] = markdownify(site, source)
547
548
ICONS.each do |key, val|
549
# Replace the new box titles with h3s.
550
cell['source'].gsub!(%r{<div class="box-title #{key}-title".*?</span>(.*?)</div>},
551
"<div style=\"font-weight:900;font-size: 125%\">#{val} \\1</div>")
552
553
# Remove the fa-icon spans
554
cell['source'].gsub!(%r{<span role="button" class="fold-unfold fa fa-minus-square"></span>}, '')
555
556
# just removing the buttons from solutions since they'll be changed
557
# into summary/details in the parent notebook-jupyter.
558
cell['source'].gsub!(%r{<button class="gtn-boxify-button solution".*?</button>}, '')
559
end
560
561
if metadata.key?('abbreviations')
562
metadata['abbreviations'].each do |abbr, defn|
563
cell['source'].gsub(/\{#{abbr}\}/) do
564
if seen_abbreviations.key?(abbr)
565
firstdef = false
566
else
567
firstdef = true
568
seen_abbreviations[abbr] = true
569
end
570
571
if firstdef
572
"#{defn} (#{abbr})"
573
else
574
"<abbr title=\"#{defn}\">#{abbr}</abbr>"
575
end
576
end
577
end
578
end
579
580
# Here we give a GTN-ish styling that doesn't try to be too faithful,
581
# so we aren't spending time keeping up with changes to GTN css,
582
# we're making it 'our own' a bit.
583
584
COLORS.each do |key, val|
585
val = "#{val};#{COLORS_EXTRA[key]}" if COLORS_EXTRA.key? key
586
587
cell['source'].gsub!(/<blockquote class="#{key}">/,
588
"<blockquote class=\"#{key}\" style=\"border: 2px solid #{val}; margin: 1em 0.2em\">")
589
end
590
591
# Images are referenced in the through relative URLs which is
592
# fab, but in a notebook this doesn't make sense as it will live
593
# outside of the GTN. We need real URLs.
594
#
595
# So either we'll embed the images directly via base64 encoding (cool,
596
# love it) or we'll link to the production images and folks can live
597
# without their images for a bit until it's merged.
598
599
if cell['source'].match(/<img src="\.\./)
600
cell['source'].gsub!(/<img src="(\.\.[^"]*)/) do |img|
601
path = img[10..]
602
image_path = File.join(dir, path)
603
604
if img[-3..].downcase == 'png'
605
data = Base64.encode64(File.binread(image_path))
606
%(<img src="data:image/png;base64,#{data}")
607
elsif (img[-3..].downcase == 'jpg') || (img[-4..].downcase == 'jpeg')
608
data = Base64.encode64(File.binread(image_path))
609
%(<img src="data:image/jpeg;base64,#{data}")
610
elsif img[-3..].downcase == 'svg'
611
data = Base64.encode64(File.binread(image_path))
612
%(<img src="data:image/svg+xml;base64,#{data}")
613
else
614
# Falling back to non-embedded images
615
"<img src=\"https://training.galaxyproject.org/training-material/#{page_url.split('/')[0..-2].join('/')}/.."
616
end
617
end
618
end
619
620
# Strip out the highlighting as it is bad on some platforms.
621
cell['source'].gsub!(/<pre class="highlight">/, '<pre style="color: inherit; background: transparent">')
622
cell['source'].gsub!(/<div class="highlight">/, '<div>')
623
cell['source'].gsub!(/<code>/, '<code style="color: inherit">')
624
625
# There is some weirdness in the processing of $s in Jupyter. After a
626
# certain number of them, it will give up, and just render everything
627
# like with a '<pre>'. We remove this to prevent that result.
628
cell['source'].gsub!(/^\s*</, '<')
629
# Additionally leading spaces are sometimes interpreted as <pre>s and
630
# end up causing paragraphs to be rendered as code. So we wipe out
631
# all leading space.
632
# 'editable' is actually CoCalc specific but oh well.
633
cell['metadata'] = { 'editable' => false, 'collapsed' => false }
634
cell['source'].gsub!(/\$/, '&#36;')
635
end
636
cell
637
end
638
notebook
639
end
640
end
641
end
642
643