Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/resources/jupyter/notebook.py
12921 views
1
# pyright: reportMissingImports=false
2
3
import os
4
import re
5
import atexit
6
import glob
7
import sys
8
import json
9
import pprint
10
import copy
11
import base64
12
13
from pathlib import Path
14
15
from yaml import safe_load as parse_string
16
from yaml import safe_dump
17
18
from log import trace
19
import nbformat
20
from nbclient import NotebookClient
21
from jupyter_client import KernelManager
22
from jupyter_core_utils_vendor import run_sync
23
import asyncio
24
25
# optional import of papermill for params support
26
try:
27
from papermill import translators as papermill_translate
28
except ImportError:
29
papermill_translate = None
30
31
# optional import of jupyter-cache
32
try:
33
from jupyter_cache import get_cache
34
except ImportError:
35
get_cache = None
36
37
NB_FORMAT_VERSION = 4
38
39
40
def get_language_from_nb_metadata(metadata):
41
ks_lang = metadata.kernelspec.get("language", None)
42
li_name = None
43
li = metadata.get("language_info", None)
44
if li:
45
li_name = metadata.language_info.get("name", None)
46
return ks_lang or li_name
47
48
49
# exception to indicate the kernel needs restarting
50
class RestartKernel(Exception):
51
pass
52
53
54
def build_kernel_options(options):
55
# unpack options
56
format = options["format"]
57
resource_dir = options["resourceDir"]
58
params = options.get("params", None)
59
run_path = options.get("cwd", "")
60
quiet = options.get("quiet", False)
61
62
# read variables out of format
63
execute = format["execute"]
64
65
# evaluation
66
eval = execute["eval"]
67
allow_errors = bool(execute["error"])
68
69
# figures
70
fig_width = execute["fig-width"]
71
fig_height = execute["fig-height"]
72
fig_format = execute["fig-format"]
73
fig_dpi = execute["fig-dpi"]
74
75
# shell interactivity
76
interactivity = execute["ipynb-shell-interactivity"]
77
if interactivity == None:
78
interactivity = ""
79
80
# plotly connected
81
plotly_connected = execute["plotly-connected"]
82
83
# server: shiny
84
metadata = format["metadata"]
85
if (
86
"server" in metadata
87
and "type" in metadata["server"]
88
and metadata["server"]["type"] == "shiny"
89
):
90
is_shiny = True
91
else:
92
is_shiny = False
93
94
# dashboard
95
is_dashboard = format["identifier"]["base-format"] == "dashboard"
96
97
# caching
98
if "cache" in execute:
99
cache = execute["cache"]
100
else:
101
cache = "user"
102
103
return {
104
"format": format,
105
"resource_dir": resource_dir,
106
"params": params,
107
"run_path": run_path,
108
"quiet": quiet,
109
"eval": eval,
110
"allow_errors": allow_errors,
111
"fig_width": fig_width,
112
"fig_height": fig_height,
113
"fig_format": fig_format,
114
"fig_dpi": fig_dpi,
115
"interactivity": interactivity,
116
"plotly_connected": plotly_connected,
117
"is_shiny": is_shiny,
118
"is_dashboard": is_dashboard,
119
"cache": cache,
120
}
121
122
123
def set_env_vars(options):
124
os.environ["QUARTO_FIG_WIDTH"] = str(options["fig_width"])
125
os.environ["QUARTO_FIG_HEIGHT"] = str(options["fig_height"])
126
if options["fig_format"] == "retina":
127
os.environ["QUARTO_FIG_DPI"] = str(options["fig_dpi"] * 2)
128
os.environ["QUARTO_FIG_FORMAT"] = "png"
129
else:
130
os.environ["QUARTO_FIG_DPI"] = str(options["fig_dpi"])
131
os.environ["QUARTO_FIG_FORMAT"] = options["fig_format"]
132
133
134
def retrieve_nb_from_cache(nb, status, input, **kwargs):
135
cache = kwargs["cache"]
136
# are we using the cache, if so connect to the cache, and then if we aren't in 'refresh'
137
# (forced re-execution) mode then try to satisfy the execution request from the cache
138
if cache == True or cache == "refresh":
139
trace("using cache")
140
if not get_cache:
141
raise ImportError(
142
"The jupyter-cache package is required for cached execution"
143
)
144
trace("getting cache")
145
# Respect env var used to modify default cache dir
146
# https://jupyter-cache.readthedocs.io/en/latest/using/cli.html
147
nb_cache = get_cache(os.getenv("JUPYTERCACHE", ".jupyter_cache"))
148
if not cache == "refresh":
149
cached_nb = nb_from_cache(nb, nb_cache)
150
if cached_nb:
151
cached_nb.cells.pop(0)
152
nb_write(cached_nb, input)
153
status("(Notebook read from cache)\n\n")
154
trace("(Notebook read from cache)")
155
return True # can persist kernel
156
else:
157
trace("not using cache")
158
nb_cache = None
159
return nb_cache
160
161
162
# check if the kernel needs to be restarted
163
# and records necessary state for the next execution
164
#
165
# TODO why is the state here set on the function?
166
def check_for_kernel_restart(options):
167
# if this is a re-execution of a previously loaded kernel,
168
# make sure the underlying python version hasn't changed
169
python_cmd = options.get("python_cmd", None)
170
if python_cmd:
171
if hasattr(notebook_execute, "python_cmd"):
172
if notebook_execute.python_cmd != python_cmd:
173
return True
174
else:
175
notebook_execute.python_cmd = python_cmd
176
177
# if there is a supervisor_id then abort if it has changed
178
supervisor_pid = options.get("supervisor_pid", None)
179
if supervisor_pid:
180
if hasattr(notebook_execute, "supervisor_pid"):
181
if notebook_execute.supervisor_pid != supervisor_pid:
182
return True
183
else:
184
notebook_execute.supervisor_pid = supervisor_pid
185
186
187
# execute a notebook
188
def notebook_execute(options, status):
189
trace("inside notebook_execute")
190
if check_for_kernel_restart(options):
191
raise RestartKernel
192
193
# change working directory and strip dir off of paths
194
original_input = options["target"]["input"]
195
os.chdir(Path(original_input).parent)
196
input = Path(original_input).name
197
198
quarto_kernel_setup_options = build_kernel_options(options)
199
quarto_kernel_setup_options["input"] = input
200
allow_errors = quarto_kernel_setup_options["allow_errors"]
201
quiet = quarto_kernel_setup_options["quiet"]
202
resource_dir = quarto_kernel_setup_options["resource_dir"]
203
eval = quarto_kernel_setup_options["eval"]
204
205
# set environment variables
206
set_env_vars(quarto_kernel_setup_options)
207
208
# read the notebook
209
nb = nbformat.read(input, as_version=NB_FORMAT_VERSION)
210
211
trace("notebook was read")
212
# inject parameters if provided
213
if quarto_kernel_setup_options["params"]:
214
nb_parameterize(nb, quarto_kernel_setup_options["params"])
215
216
# insert setup cell
217
setup_cell = nb_setup_cell(nb, quarto_kernel_setup_options)
218
nb.cells.insert(0, setup_cell)
219
220
nb_cache = retrieve_nb_from_cache(nb, status, **quarto_kernel_setup_options)
221
if nb_cache == True:
222
return True # True indicates notebook read from cache, and hence kernel can be persisted
223
224
# create resources for execution
225
resources = dict(
226
{
227
"metadata": {
228
"input": original_input,
229
}
230
}
231
)
232
if quarto_kernel_setup_options["run_path"]:
233
resources["metadata"]["path"] = quarto_kernel_setup_options["run_path"]
234
235
trace("Will attempt to create notebook")
236
# create NotebookClient
237
trace("type of notebook: {0}".format(type(nb)))
238
client, created = notebook_init(nb, resources, allow_errors)
239
240
msg = client.kc.session.msg(
241
"comm_open",
242
{
243
"comm_id": "quarto_comm",
244
"target_name": "quarto_kernel_setup",
245
"data": {"options": quarto_kernel_setup_options},
246
},
247
)
248
client.kc.shell_channel.send(msg)
249
250
trace("NotebookClient created")
251
252
# complete progress if necessary
253
if (not quiet) and created:
254
status("Done\n")
255
256
current_code_cell = 1
257
total_code_cells = 0
258
cell_labels = []
259
max_label_len = 0
260
261
kernel_supports_daemonization = False
262
263
def handle_quarto_metadata(cell):
264
def handle_meta_object(obj):
265
nonlocal kernel_supports_daemonization
266
if hasattr(obj, "quarto"):
267
qm = obj["quarto"]
268
if qm.get("restart_kernel"):
269
raise RestartKernel
270
if qm.get("daemonize"):
271
kernel_supports_daemonization = True
272
trace("Kernel is daemonizable from cell metadata")
273
274
handle_meta_object(cell.get("metadata", {}))
275
for output in cell.get("outputs", []):
276
handle_meta_object(output.get("metadata", {}))
277
278
for cell in client.nb.cells:
279
# compute total code cells (for progress)
280
if cell.cell_type == "code":
281
total_code_cells += 1
282
# map cells to their labels
283
language = get_language_from_nb_metadata(client.nb.metadata)
284
label = nb_cell_yaml_options(language, cell).get("label", "")
285
cell_labels.append(label)
286
# find max label length
287
max_label_len = max(max_label_len, len(label))
288
289
# execute the cells
290
for index, cell in enumerate(client.nb.cells):
291
cell_label = cell_labels[index]
292
padding = "." * (max_label_len - len(cell_label))
293
294
# progress
295
progress = (not quiet) and cell.cell_type == "code" and index > 0
296
if progress:
297
status(
298
" Cell {0}/{1}: '{2}'{3}...".format(
299
current_code_cell - 1, total_code_cells - 1, cell_label, padding
300
)
301
)
302
303
# clear cell output
304
cell = cell_clear_output(cell)
305
306
# execute cell
307
trace("Executing cell {0}".format(index))
308
309
if cell.cell_type == "code":
310
cell = cell_execute(
311
client,
312
cell,
313
index,
314
current_code_cell,
315
eval,
316
index > 0, # add_to_history
317
)
318
cell.execution_count = current_code_cell
319
elif cell.cell_type == "markdown":
320
cell = cell_execute_inline(client, cell)
321
322
trace("Executed cell {0}".format(index))
323
324
# if this was the setup cell, see if we need to exit b/c dependencies are out of date
325
if index == 0:
326
# confirm kernel_deps haven't changed (restart if they have)
327
if hasattr(notebook_execute, "kernel_deps"):
328
kernel_deps = nb_kernel_dependencies(cell)
329
if kernel_deps:
330
kernel_supports_daemonization = True
331
for path in kernel_deps.keys():
332
if path in notebook_execute.kernel_deps.keys():
333
if notebook_execute.kernel_deps[path] != kernel_deps[path]:
334
raise RestartKernel
335
else:
336
notebook_execute.kernel_deps[path] = kernel_deps[path]
337
338
trace("Handling quarto metadata")
339
trace(json.dumps(cell, indent=2))
340
# also do it through cell metadata
341
handle_quarto_metadata(cell)
342
343
# we are done w/ setup (with no restarts) so it's safe to print 'Executing...'
344
if not quiet:
345
status("\nExecuting '{0}'\n".format(input))
346
347
# assign cell
348
client.nb.cells[index] = cell
349
350
# increment current code cell
351
if cell.cell_type == "code":
352
current_code_cell += 1
353
354
# end progress
355
if progress:
356
status("Done\n")
357
trace("Done")
358
359
trace("Notebook execution complete")
360
361
# set widgets metadata
362
client.set_widgets_metadata()
363
364
# write to the cache
365
if nb_cache:
366
nb_write(client.nb, input)
367
nb_cache.cache_notebook_file(path=Path(input), overwrite=True)
368
369
# remove setup cell (then renumber execution_Count)
370
client.nb.cells.pop(0)
371
for index, cell in enumerate(client.nb.cells):
372
if cell.cell_type == "code":
373
cell.execution_count = cell.execution_count - 1
374
375
# re-write without setup cell
376
nb_write(client.nb, input)
377
378
# execute cleanup cell
379
cleanup_cell = nb_cleanup_cell(nb, resource_dir)
380
if cleanup_cell:
381
kernel_supports_daemonization = True
382
nb.cells.append(cleanup_cell)
383
client.execute_cell(
384
cell=cleanup_cell, cell_index=len(client.nb.cells) - 1, store_history=False
385
)
386
nb.cells.pop()
387
388
# record kernel deps after execution (picks up imports that occurred
389
# witihn the notebook cells)
390
kernel_deps = nb_kernel_dependencies(cleanup_cell)
391
if kernel_deps:
392
notebook_execute.kernel_deps = kernel_deps
393
else:
394
notebook_execute.kernel_deps = {}
395
396
# progress
397
if not quiet:
398
status("\n")
399
400
# return flag indicating whether we should persist
401
return kernel_supports_daemonization
402
403
404
def notebook_init(nb, resources, allow_errors):
405
created = False
406
if not hasattr(notebook_init, "client"):
407
trace("Creating NotebookClient")
408
# create notebook client
409
client = NotebookClient(nb, resources=resources)
410
client.allow_errors = allow_errors
411
client.record_timing = False
412
client.create_kernel_manager()
413
client.start_new_kernel()
414
client.start_new_kernel_client()
415
416
async def get_info():
417
i = client.kc.kernel_info()
418
if asyncio.isfuture(i):
419
return await i
420
else:
421
return i
422
423
info = run_sync(get_info)()
424
425
info_msg = client.wait_for_reply(info)
426
client.nb.metadata["language_info"] = info_msg["content"]["language_info"]
427
notebook_init.client = client
428
created = True
429
430
# cleanup kernel at process exit
431
atexit.register(client._cleanup_kernel)
432
433
else:
434
# if the kernel has changed we need to force a restart
435
if (
436
nb.metadata.kernelspec.name
437
!= notebook_init.client.nb.metadata.kernelspec.name
438
):
439
raise RestartKernel
440
441
# if the input file has changed we need to force a restart
442
if (
443
resources["metadata"]["input"]
444
!= notebook_init.client.resources["metadata"]["input"]
445
):
446
raise RestartKernel
447
448
# set the new notebook, resources, etc.
449
notebook_init.client.nb = nb
450
notebook_init.client.allow_errors = allow_errors
451
452
return (notebook_init.client, created)
453
454
455
def nb_write(nb, input):
456
nbformat.write(nb, input, version=NB_FORMAT_VERSION)
457
458
459
def nb_setup_cell(nb, options):
460
options = dict(options)
461
options["allow_empty"] = True
462
return nb_language_cell("setup", nb, **options)
463
464
465
def nb_cleanup_cell(nb, resource_dir):
466
return nb_language_cell("cleanup", nb, resource_dir, False)
467
468
469
def nb_language_cell(name, nb, resource_dir, allow_empty, **args):
470
kernelspec = nb.metadata.kernelspec
471
language = get_language_from_nb_metadata(nb.metadata)
472
trace(json.dumps(nb.metadata, indent=2))
473
source = ""
474
lang_dir = os.path.join(resource_dir, "jupyter", "lang", language)
475
if os.path.isdir(lang_dir):
476
cell_file = glob.glob(os.path.join(lang_dir, name + ".*"))
477
# base64-encode the run_path given
478
args["run_path"] = base64.b64encode(
479
args.get("run_path", "").encode("utf-8")
480
).decode("utf-8")
481
if len(cell_file) > 0:
482
with open(cell_file[0], "r") as file:
483
source = file.read().format(**args)
484
else:
485
trace(f"No {language} directory found in {lang_dir}")
486
trace(f"Will look for explicit quarto setup cell information in kernelspec dir")
487
try:
488
with open(
489
os.path.join(kernelspec.path, f"quarto_{name}_cell"), "r"
490
) as file:
491
trace(f"Quarto_{name}_cell file found in {kernelspec.path}")
492
trace(os.path.join(kernelspec.path, f"quarto_{name}_cell"))
493
source = file.read()
494
except FileNotFoundError:
495
trace(f"No quarto_{name}_cell file found in {kernelspec.path}")
496
trace(os.path.join(kernelspec.path, f"quarto_{name}_cell"))
497
pass
498
499
# create cell
500
if source != "" or allow_empty:
501
return nbformat.versions[NB_FORMAT_VERSION].new_code_cell(source=source)
502
else:
503
return None
504
505
506
def nb_from_cache(nb, nb_cache, nb_meta=("kernelspec", "language_info", "widgets")):
507
try:
508
trace("nb_from_cache match")
509
cache_record = nb_cache.match_cache_notebook(nb)
510
trace("nb_from_cache get buncle")
511
cache_bundle = nb_cache.get_cache_bundle(cache_record.pk)
512
cache_nb = cache_bundle.nb
513
nb = copy.deepcopy(nb)
514
# selected (execution-oriented) metadata
515
trace("nb_from_cache processing metadata")
516
if nb_meta is None:
517
nb.metadata = cache_nb.metadata
518
else:
519
for key in nb_meta:
520
if key in cache_nb.metadata:
521
nb.metadata[key] = cache_nb.metadata[key]
522
# code cells
523
trace("nb_from_cache processing cells")
524
for idx in range(len(nb.cells)):
525
if nb.cells[idx].cell_type == "code":
526
cache_cell = cache_nb.cells.pop(0)
527
nb.cells[idx] = cache_cell
528
trace("nb_from_cache returning")
529
return nb
530
except KeyError:
531
return None
532
533
534
# This function is only called on setup cells
535
def nb_kernel_dependencies(setup_cell):
536
for index, output in enumerate(setup_cell.outputs):
537
if output.name == "stdout" and output.output_type == "stream":
538
return json.loads(output.text)
539
540
541
def cell_execute(client, cell, index, execution_count, eval_default, store_history):
542
language = get_language_from_nb_metadata(client.nb.metadata)
543
# read cell options
544
cell_options = nb_cell_yaml_options(language, cell)
545
546
# check options for eval and error
547
eval = cell_options.get("eval", eval_default)
548
allow_errors = cell_options.get("error")
549
550
trace(f"cell_execute with eval={eval}")
551
if allow_errors == True:
552
trace(f"cell_execute with allow_errors={allow_errors}")
553
554
# execute if eval is active
555
if eval == True:
556
# add 'raises-exception' tag for allow_errors
557
if allow_errors:
558
if not "metadata" in cell:
559
cell["metadata"] = {}
560
tags = cell.get("metadata", {}).get("tags", [])
561
cell["metadata"]["tags"] = tags + ["raises-exception"]
562
563
# execute (w/o yaml options so that cell magics work)
564
source = cell.source
565
cell.source = nb_strip_yaml_options(client, cell.source)
566
cell = client.execute_cell(
567
cell=cell,
568
cell_index=index,
569
execution_count=execution_count,
570
store_history=store_history,
571
)
572
cell.source = source
573
574
# if lines_to_next_cell is 0 then fix it to be 1
575
lines_to_next_cell = cell.get("metadata", {}).get("lines_to_next_cell", -1)
576
if lines_to_next_cell == 0:
577
cell["metadata"]["lines_to_next_cell"] = 1
578
579
# remove injected raises-exception
580
if allow_errors:
581
cell["metadata"]["tags"].remove("raises-exception")
582
if len(cell["metadata"]["tags"]) == 0:
583
del cell["metadata"]["tags"]
584
585
# Check for display errors in output (respecting both global and cell settings)
586
cell_allows_errors = (
587
allow_errors if allow_errors is not None else client.allow_errors
588
)
589
if not cell_allows_errors:
590
trace("Cell does not allow errors: checking for uncaught errors")
591
for output in cell.outputs:
592
if output.get("output_type") == "error":
593
trace(" Uncaught error found in output")
594
from nbclient.exceptions import CellExecutionError
595
596
error_name = output.get("ename", "UnnamedError")
597
error_value = output.get("evalue", "")
598
traceback = output.get("traceback", [])
599
# Use same error raising mechanism as nbclient
600
raise CellExecutionError.from_cell_and_msg(
601
cell,
602
{
603
"ename": "UncaughtCellError:" + error_name,
604
"evalue": error_value,
605
"traceback": traceback,
606
},
607
)
608
609
# return cell
610
return cell
611
612
613
def cell_execute_inline(client, cell):
614
# helper to raise an error from a result
615
def raise_error(result):
616
ename = result.get("ename")
617
evalue = result.get("evalue")
618
raise Exception(f"{ename}: {evalue}")
619
620
# helper to clear existing user_expressions if they exist
621
def clear_user_expressions():
622
if "metadata" in cell:
623
metadata = cell.get("metadata")
624
if "user_expressions" in metadata:
625
del metadata["user_expressions"]
626
627
# find expressions in source
628
language = get_language_from_nb_metadata(client.nb.metadata)
629
source = "".join(cell.source)
630
expressions = re.findall(
631
rf"(?:^|[^`])`{{{language}}}[ \t]([^`]+)`", source, re.MULTILINE
632
)
633
if len(expressions):
634
# send and wait for 'execute' kernel message w/ user_expressions
635
kc = client.kc
636
user_expressions = dict()
637
for idx, expr in enumerate(expressions):
638
user_expressions[str(idx).strip()] = expr
639
msg_id = kc.execute("", user_expressions=user_expressions)
640
reply = client.wait_for_reply(msg_id)
641
642
# process reply
643
content = reply.get("content")
644
if content.get("status") == "ok":
645
# build results (check for error on each one)
646
results = []
647
for key in user_expressions:
648
result = content.get("user_expressions").get(key)
649
if result.get("status") == "ok":
650
results.append(
651
{"expression": user_expressions.get(key), "result": result}
652
)
653
elif result.get("status") == "error":
654
raise_error(result)
655
656
# set results into metadata
657
if not "metadata" in cell:
658
cell["metadata"] = {}
659
cell["metadata"]["user_expressions"] = results
660
661
elif content.get("status") == "error":
662
raise_error(content)
663
else:
664
clear_user_expressions()
665
666
# return cell
667
return cell
668
669
670
def cell_clear_output(cell):
671
remove_metadata = ["collapsed", "scrolled"]
672
if cell.cell_type == "code":
673
cell.outputs = []
674
cell.execution_count = None
675
if "metadata" in cell:
676
for field in remove_metadata:
677
cell.metadata.pop(field, None)
678
return cell
679
680
681
def nb_parameterize(nb, params):
682
# verify papermill import
683
if not papermill_translate:
684
raise ImportError(
685
"The papermill package is required for processing --execute-params"
686
)
687
688
# alias kernel name and language
689
kernel_name = nb.metadata.kernelspec.name
690
language = get_language_from_nb_metadata(nb.metadata)
691
692
# find params index and note any tags/yaml on it (exit if no params)
693
params_index = find_first_tagged_cell_index(nb, "parameters")
694
if params_index != -1:
695
params_cell_tags = (
696
nb.cells[params_index].get("metadata", {}).get("tags", []).copy()
697
)
698
params_cell_yaml = nb_cell_yaml_lines(language, nb.cells[params_index].source)
699
params_cell_tags.remove("parameters")
700
else:
701
return
702
703
# Generate parameter content based on the kernel_name
704
params_content = papermill_translate.translate_parameters(
705
kernel_name, language, params, "Injected Parameters"
706
)
707
708
# prepend options
709
if len(params_cell_yaml):
710
# https://github.com/quarto-dev/quarto-cli/issues/10097
711
# We need to find and drop `label: ` from the yaml options
712
# to avoid label duplication
713
# The only way to do this robustly is to parse the yaml
714
# and then re-encode it
715
try:
716
params_cell_yaml = parse_string("\n".join(params_cell_yaml))
717
if "label" in params_cell_yaml:
718
del params_cell_yaml["label"]
719
params_cell_yaml = safe_dump(params_cell_yaml).strip().splitlines()
720
except Exception as e:
721
sys.stderr.write(str(e) + "\naksjdfhakjsdhf\n")
722
sys.stderr.write(
723
"\nWARNING: Invalid YAML option format in cell:\n"
724
+ "\n".join(params_cell_yaml)
725
+ "\n"
726
)
727
sys.stderr.flush()
728
params_cell_yaml = []
729
730
comment_chars = nb_language_comment_chars(language)
731
option_prefix = comment_chars[0] + "| "
732
option_suffix = comment_chars[1] if len(comment_chars) > 1 else None
733
734
def enclose(yaml):
735
yaml = option_prefix + yaml
736
if option_suffix:
737
yaml = yaml + option_suffix
738
return yaml
739
740
params_content = (
741
"\n".join(map(enclose, params_cell_yaml)) + "\n" + params_content
742
)
743
744
# create params cell
745
params_cell = nbformat.v4.new_code_cell(source=params_content)
746
params_cell.metadata["tags"] = ["injected-parameters"] + params_cell_tags
747
748
# find existing injected params index
749
injected_params_index = find_first_tagged_cell_index(nb, "injected-parameters")
750
751
# find the right insertion/replace point for the injected params
752
if injected_params_index >= 0:
753
# Replace the injected cell with a new version
754
before = nb.cells[:injected_params_index]
755
after = nb.cells[injected_params_index + 1 :]
756
else:
757
# Add an injected cell after the parameter cell
758
before = nb.cells[: params_index + 1]
759
after = nb.cells[params_index + 1 :]
760
761
nb.cells = before + [params_cell] + after
762
if not nb.metadata.get("papermill"):
763
nb.metadata.papermill = {}
764
nb.metadata.papermill["parameters"] = params
765
766
767
def find_first_tagged_cell_index(nb, tag):
768
parameters_indices = []
769
for idx, cell in enumerate(nb.cells):
770
if tag in cell.get("metadata", {}).get("tags", {}):
771
parameters_indices.append(idx)
772
if not parameters_indices:
773
return -1
774
return parameters_indices[0]
775
776
777
def nb_strip_yaml_options(client, source):
778
yaml_lines = nb_cell_yaml_lines(
779
get_language_from_nb_metadata(client.nb.metadata), source
780
)
781
num_yaml_lines = len(yaml_lines)
782
if num_yaml_lines > 0:
783
return "\n".join(source.splitlines()[num_yaml_lines:])
784
else:
785
return source
786
787
788
def nb_cell_yaml_options(lang, cell):
789
# go through the lines until we've found all of the yaml
790
yaml_lines = nb_cell_yaml_lines(lang, cell.source)
791
792
# if we have yaml then parse it
793
if len(yaml_lines) > 0:
794
yaml_code = "\n".join(yaml_lines)
795
yaml_options = parse_string(yaml_code)
796
if type(yaml_options) is dict:
797
return yaml_options
798
else:
799
sys.stderr.write(
800
"\nWARNING: Invalid YAML option format in cell:\n" + yaml_code + "\n"
801
)
802
sys.stderr.flush()
803
return dict()
804
805
else:
806
return dict()
807
808
809
def nb_cell_yaml_lines(lang, source):
810
# determine language comment chars
811
comment_chars = nb_language_comment_chars(lang)
812
option_pattern = "^" + re.escape(comment_chars[0]) + "\\s*\\| ?"
813
option_suffix = comment_chars[1] if len(comment_chars) > 1 else None
814
815
# go through the lines until we've found all of the yaml
816
yaml_lines = []
817
for line in source.splitlines():
818
option_match = re.match(option_pattern, line)
819
if option_match:
820
if (not option_suffix) or line.rstrip().endswith(option_suffix):
821
yaml_option = line[len(option_match.group()) :]
822
if option_suffix:
823
yaml_option = yaml_option.rstrip()[: -len(option_suffix)]
824
# strip trailing spaces after : to avoid poyo error
825
# (https://github.com/hackebrot/poyo/issues/30)
826
yaml_option = re.sub(":\\s+$", ":", yaml_option)
827
yaml_lines.append(yaml_option)
828
continue
829
break
830
831
# return the lines
832
return yaml_lines
833
834
835
def nb_language_comment_chars(lang):
836
langs = dict(
837
r="#",
838
python="#",
839
julia="#",
840
scala="//",
841
matlab="%",
842
csharp="//",
843
fsharp="//",
844
c=["/*", "*/"],
845
css=["/*", "*/"],
846
sas=["*", ";"],
847
powershell="#",
848
bash="#",
849
sql="--",
850
mysql="--",
851
psql="--",
852
lua="--",
853
cpp="//",
854
cc="//",
855
stan="#",
856
octave="#",
857
fortran="!",
858
fortran95="!",
859
awk="#",
860
gawk="#",
861
stata="*",
862
java="//",
863
groovy="//",
864
sed="#",
865
perl="#",
866
ruby="#",
867
tikz="%",
868
js="//",
869
d3="//",
870
node="//",
871
sass="//",
872
coffee="#",
873
go="//",
874
asy="//",
875
haskell="--",
876
dot="//",
877
apl="⍝",
878
q = "/",
879
ocaml=["(*", "*)"],
880
)
881
if lang in langs:
882
chars = langs[lang]
883
if not isinstance(chars, type([])):
884
chars = [chars]
885
return chars
886
else:
887
return ["#"]
888
889