CoCalc -- notebook.py

GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/resources/jupyter/notebook.py
¹²⁹²¹ views
1
# pyright: reportMissingImports=false
2

3
import os
4
import re
5
import atexit
6
import glob
7
import sys
8
import json
9
import pprint
10
import copy
11
import base64
12

13
from pathlib import Path
14

15
from yaml import safe_load as parse_string
16
from yaml import safe_dump
17

18
from log import trace
19
import nbformat
20
from nbclient import NotebookClient
21
from jupyter_client import KernelManager
22
from jupyter_core_utils_vendor import run_sync
23
import asyncio
24

25
# optional import of papermill for params support
26
try:
27
    from papermill import translators as papermill_translate
28
except ImportError:
29
    papermill_translate = None
30

31
# optional import of jupyter-cache
32
try:
33
    from jupyter_cache import get_cache
34
except ImportError:
35
    get_cache = None
36

37
NB_FORMAT_VERSION = 4
38

39

40
def get_language_from_nb_metadata(metadata):
41
    ks_lang = metadata.kernelspec.get("language", None)
42
    li_name = None
43
    li = metadata.get("language_info", None)
44
    if li:
45
        li_name = metadata.language_info.get("name", None)
46
    return ks_lang or li_name
47

48

49
# exception to indicate the kernel needs restarting
50
class RestartKernel(Exception):
51
    pass
52

53

54
def build_kernel_options(options):
55
    # unpack options
56
    format = options["format"]
57
    resource_dir = options["resourceDir"]
58
    params = options.get("params", None)
59
    run_path = options.get("cwd", "")
60
    quiet = options.get("quiet", False)
61

62
    # read variables out of format
63
    execute = format["execute"]
64

65
    # evaluation
66
    eval = execute["eval"]
67
    allow_errors = bool(execute["error"])
68

69
    # figures
70
    fig_width = execute["fig-width"]
71
    fig_height = execute["fig-height"]
72
    fig_format = execute["fig-format"]
73
    fig_dpi = execute["fig-dpi"]
74

75
    # shell interactivity
76
    interactivity = execute["ipynb-shell-interactivity"]
77
    if interactivity == None:
78
        interactivity = ""
79

80
    # plotly connected
81
    plotly_connected = execute["plotly-connected"]
82

83
    # server: shiny
84
    metadata = format["metadata"]
85
    if (
86
        "server" in metadata
87
        and "type" in metadata["server"]
88
        and metadata["server"]["type"] == "shiny"
89
    ):
90
        is_shiny = True
91
    else:
92
        is_shiny = False
93

94
    # dashboard
95
    is_dashboard = format["identifier"]["base-format"] == "dashboard"
96

97
    # caching
98
    if "cache" in execute:
99
        cache = execute["cache"]
100
    else:
101
        cache = "user"
102

103
    return {
104
        "format": format,
105
        "resource_dir": resource_dir,
106
        "params": params,
107
        "run_path": run_path,
108
        "quiet": quiet,
109
        "eval": eval,
110
        "allow_errors": allow_errors,
111
        "fig_width": fig_width,
112
        "fig_height": fig_height,
113
        "fig_format": fig_format,
114
        "fig_dpi": fig_dpi,
115
        "interactivity": interactivity,
116
        "plotly_connected": plotly_connected,
117
        "is_shiny": is_shiny,
118
        "is_dashboard": is_dashboard,
119
        "cache": cache,
120
    }
121

122

123
def set_env_vars(options):
124
    os.environ["QUARTO_FIG_WIDTH"] = str(options["fig_width"])
125
    os.environ["QUARTO_FIG_HEIGHT"] = str(options["fig_height"])
126
    if options["fig_format"] == "retina":
127
        os.environ["QUARTO_FIG_DPI"] = str(options["fig_dpi"] * 2)
128
        os.environ["QUARTO_FIG_FORMAT"] = "png"
129
    else:
130
        os.environ["QUARTO_FIG_DPI"] = str(options["fig_dpi"])
131
        os.environ["QUARTO_FIG_FORMAT"] = options["fig_format"]
132

133

134
def retrieve_nb_from_cache(nb, status, input, **kwargs):
135
    cache = kwargs["cache"]
136
    # are we using the cache, if so connect to the cache, and then if we aren't in 'refresh'
137
    # (forced re-execution) mode then try to satisfy the execution request from the cache
138
    if cache == True or cache == "refresh":
139
        trace("using cache")
140
        if not get_cache:
141
            raise ImportError(
142
                "The jupyter-cache package is required for cached execution"
143
            )
144
        trace("getting cache")
145
        # Respect env var used to modify default cache dir
146
        # https://jupyter-cache.readthedocs.io/en/latest/using/cli.html
147
        nb_cache = get_cache(os.getenv("JUPYTERCACHE", ".jupyter_cache"))
148
        if not cache == "refresh":
149
            cached_nb = nb_from_cache(nb, nb_cache)
150
            if cached_nb:
151
                cached_nb.cells.pop(0)
152
                nb_write(cached_nb, input)
153
                status("(Notebook read from cache)\n\n")
154
                trace("(Notebook read from cache)")
155
                return True  # can persist kernel
156
    else:
157
        trace("not using cache")
158
        nb_cache = None
159
    return nb_cache
160

161

162
# check if the kernel needs to be restarted
163
# and records necessary state for the next execution
164
#
165
# TODO why is the state here set on the function?
166
def check_for_kernel_restart(options):
167
    # if this is a re-execution of a previously loaded kernel,
168
    # make sure the underlying python version hasn't changed
169
    python_cmd = options.get("python_cmd", None)
170
    if python_cmd:
171
        if hasattr(notebook_execute, "python_cmd"):
172
            if notebook_execute.python_cmd != python_cmd:
173
                return True
174
        else:
175
            notebook_execute.python_cmd = python_cmd
176

177
    # if there is a supervisor_id then abort if it has changed
178
    supervisor_pid = options.get("supervisor_pid", None)
179
    if supervisor_pid:
180
        if hasattr(notebook_execute, "supervisor_pid"):
181
            if notebook_execute.supervisor_pid != supervisor_pid:
182
                return True
183
        else:
184
            notebook_execute.supervisor_pid = supervisor_pid
185

186

187
# execute a notebook
188
def notebook_execute(options, status):
189
    trace("inside notebook_execute")
190
    if check_for_kernel_restart(options):
191
        raise RestartKernel
192

193
    # change working directory and strip dir off of paths
194
    original_input = options["target"]["input"]
195
    os.chdir(Path(original_input).parent)
196
    input = Path(original_input).name
197

198
    quarto_kernel_setup_options = build_kernel_options(options)
199
    quarto_kernel_setup_options["input"] = input
200
    allow_errors = quarto_kernel_setup_options["allow_errors"]
201
    quiet = quarto_kernel_setup_options["quiet"]
202
    resource_dir = quarto_kernel_setup_options["resource_dir"]
203
    eval = quarto_kernel_setup_options["eval"]
204

205
    # set environment variables
206
    set_env_vars(quarto_kernel_setup_options)
207

208
    # read the notebook
209
    nb = nbformat.read(input, as_version=NB_FORMAT_VERSION)
210

211
    trace("notebook was read")
212
    # inject parameters if provided
213
    if quarto_kernel_setup_options["params"]:
214
        nb_parameterize(nb, quarto_kernel_setup_options["params"])
215

216
    # insert setup cell
217
    setup_cell = nb_setup_cell(nb, quarto_kernel_setup_options)
218
    nb.cells.insert(0, setup_cell)
219

220
    nb_cache = retrieve_nb_from_cache(nb, status, **quarto_kernel_setup_options)
221
    if nb_cache == True:
222
        return True  # True indicates notebook read from cache, and hence kernel can be persisted
223

224
    # create resources for execution
225
    resources = dict(
226
        {
227
            "metadata": {
228
                "input": original_input,
229
            }
230
        }
231
    )
232
    if quarto_kernel_setup_options["run_path"]:
233
        resources["metadata"]["path"] = quarto_kernel_setup_options["run_path"]
234

235
    trace("Will attempt to create notebook")
236
    # create NotebookClient
237
    trace("type of notebook: {0}".format(type(nb)))
238
    client, created = notebook_init(nb, resources, allow_errors)
239

240
    msg = client.kc.session.msg(
241
        "comm_open",
242
        {
243
            "comm_id": "quarto_comm",
244
            "target_name": "quarto_kernel_setup",
245
            "data": {"options": quarto_kernel_setup_options},
246
        },
247
    )
248
    client.kc.shell_channel.send(msg)
249

250
    trace("NotebookClient created")
251

252
    # complete progress if necessary
253
    if (not quiet) and created:
254
        status("Done\n")
255

256
    current_code_cell = 1
257
    total_code_cells = 0
258
    cell_labels = []
259
    max_label_len = 0
260

261
    kernel_supports_daemonization = False
262

263
    def handle_quarto_metadata(cell):
264
        def handle_meta_object(obj):
265
            nonlocal kernel_supports_daemonization
266
            if hasattr(obj, "quarto"):
267
                qm = obj["quarto"]
268
                if qm.get("restart_kernel"):
269
                    raise RestartKernel
270
                if qm.get("daemonize"):
271
                    kernel_supports_daemonization = True
272
                    trace("Kernel is daemonizable from cell metadata")
273

274
        handle_meta_object(cell.get("metadata", {}))
275
        for output in cell.get("outputs", []):
276
            handle_meta_object(output.get("metadata", {}))
277

278
    for cell in client.nb.cells:
279
        # compute total code cells (for progress)
280
        if cell.cell_type == "code":
281
            total_code_cells += 1
282
        # map cells to their labels
283
        language = get_language_from_nb_metadata(client.nb.metadata)
284
        label = nb_cell_yaml_options(language, cell).get("label", "")
285
        cell_labels.append(label)
286
        # find max label length
287
        max_label_len = max(max_label_len, len(label))
288

289
    # execute the cells
290
    for index, cell in enumerate(client.nb.cells):
291
        cell_label = cell_labels[index]
292
        padding = "." * (max_label_len - len(cell_label))
293

294
        # progress
295
        progress = (not quiet) and cell.cell_type == "code" and index > 0
296
        if progress:
297
            status(
298
                "  Cell {0}/{1}: '{2}'{3}...".format(
299
                    current_code_cell - 1, total_code_cells - 1, cell_label, padding
300
                )
301
            )
302

303
        # clear cell output
304
        cell = cell_clear_output(cell)
305

306
        # execute cell
307
        trace("Executing cell {0}".format(index))
308

309
        if cell.cell_type == "code":
310
            cell = cell_execute(
311
                client,
312
                cell,
313
                index,
314
                current_code_cell,
315
                eval,
316
                index > 0,  # add_to_history
317
            )
318
            cell.execution_count = current_code_cell
319
        elif cell.cell_type == "markdown":
320
            cell = cell_execute_inline(client, cell)
321

322
        trace("Executed cell {0}".format(index))
323

324
        # if this was the setup cell, see if we need to exit b/c dependencies are out of date
325
        if index == 0:
326
            # confirm kernel_deps haven't changed (restart if they have)
327
            if hasattr(notebook_execute, "kernel_deps"):
328
                kernel_deps = nb_kernel_dependencies(cell)
329
                if kernel_deps:
330
                    kernel_supports_daemonization = True
331
                    for path in kernel_deps.keys():
332
                        if path in notebook_execute.kernel_deps.keys():
333
                            if notebook_execute.kernel_deps[path] != kernel_deps[path]:
334
                                raise RestartKernel
335
                        else:
336
                            notebook_execute.kernel_deps[path] = kernel_deps[path]
337

338
            trace("Handling quarto metadata")
339
            trace(json.dumps(cell, indent=2))
340
            # also do it through cell metadata
341
            handle_quarto_metadata(cell)
342

343
            # we are done w/ setup (with no restarts) so it's safe to print 'Executing...'
344
            if not quiet:
345
                status("\nExecuting '{0}'\n".format(input))
346

347
        # assign cell
348
        client.nb.cells[index] = cell
349

350
        # increment current code cell
351
        if cell.cell_type == "code":
352
            current_code_cell += 1
353

354
        # end progress
355
        if progress:
356
            status("Done\n")
357
            trace("Done")
358

359
    trace("Notebook execution complete")
360

361
    # set widgets metadata
362
    client.set_widgets_metadata()
363

364
    # write to the cache
365
    if nb_cache:
366
        nb_write(client.nb, input)
367
        nb_cache.cache_notebook_file(path=Path(input), overwrite=True)
368

369
    # remove setup cell (then renumber execution_Count)
370
    client.nb.cells.pop(0)
371
    for index, cell in enumerate(client.nb.cells):
372
        if cell.cell_type == "code":
373
            cell.execution_count = cell.execution_count - 1
374

375
    # re-write without setup cell
376
    nb_write(client.nb, input)
377

378
    # execute cleanup cell
379
    cleanup_cell = nb_cleanup_cell(nb, resource_dir)
380
    if cleanup_cell:
381
        kernel_supports_daemonization = True
382
        nb.cells.append(cleanup_cell)
383
        client.execute_cell(
384
            cell=cleanup_cell, cell_index=len(client.nb.cells) - 1, store_history=False
385
        )
386
        nb.cells.pop()
387

388
        # record kernel deps after execution (picks up imports that occurred
389
        # witihn the notebook cells)
390
        kernel_deps = nb_kernel_dependencies(cleanup_cell)
391
        if kernel_deps:
392
            notebook_execute.kernel_deps = kernel_deps
393
        else:
394
            notebook_execute.kernel_deps = {}
395

396
    # progress
397
    if not quiet:
398
        status("\n")
399

400
    # return flag indicating whether we should persist
401
    return kernel_supports_daemonization
402

403

404
def notebook_init(nb, resources, allow_errors):
405
    created = False
406
    if not hasattr(notebook_init, "client"):
407
        trace("Creating NotebookClient")
408
        # create notebook client
409
        client = NotebookClient(nb, resources=resources)
410
        client.allow_errors = allow_errors
411
        client.record_timing = False
412
        client.create_kernel_manager()
413
        client.start_new_kernel()
414
        client.start_new_kernel_client()
415

416
        async def get_info():
417
            i = client.kc.kernel_info()
418
            if asyncio.isfuture(i):
419
                return await i
420
            else:
421
                return i
422

423
        info = run_sync(get_info)()
424

425
        info_msg = client.wait_for_reply(info)
426
        client.nb.metadata["language_info"] = info_msg["content"]["language_info"]
427
        notebook_init.client = client
428
        created = True
429

430
        # cleanup kernel at process exit
431
        atexit.register(client._cleanup_kernel)
432

433
    else:
434
        # if the kernel has changed we need to force a restart
435
        if (
436
            nb.metadata.kernelspec.name
437
            != notebook_init.client.nb.metadata.kernelspec.name
438
        ):
439
            raise RestartKernel
440

441
        # if the input file has changed we need to force a restart
442
        if (
443
            resources["metadata"]["input"]
444
            != notebook_init.client.resources["metadata"]["input"]
445
        ):
446
            raise RestartKernel
447

448
        # set the new notebook, resources, etc.
449
        notebook_init.client.nb = nb
450
        notebook_init.client.allow_errors = allow_errors
451

452
    return (notebook_init.client, created)
453

454

455
def nb_write(nb, input):
456
    nbformat.write(nb, input, version=NB_FORMAT_VERSION)
457

458

459
def nb_setup_cell(nb, options):
460
    options = dict(options)
461
    options["allow_empty"] = True
462
    return nb_language_cell("setup", nb, **options)
463

464

465
def nb_cleanup_cell(nb, resource_dir):
466
    return nb_language_cell("cleanup", nb, resource_dir, False)
467

468

469
def nb_language_cell(name, nb, resource_dir, allow_empty, **args):
470
    kernelspec = nb.metadata.kernelspec
471
    language = get_language_from_nb_metadata(nb.metadata)
472
    trace(json.dumps(nb.metadata, indent=2))
473
    source = ""
474
    lang_dir = os.path.join(resource_dir, "jupyter", "lang", language)
475
    if os.path.isdir(lang_dir):
476
        cell_file = glob.glob(os.path.join(lang_dir, name + ".*"))
477
        # base64-encode the run_path given
478
        args["run_path"] = base64.b64encode(
479
            args.get("run_path", "").encode("utf-8")
480
        ).decode("utf-8")
481
        if len(cell_file) > 0:
482
            with open(cell_file[0], "r") as file:
483
                source = file.read().format(**args)
484
    else:
485
        trace(f"No {language} directory found in {lang_dir}")
486
        trace(f"Will look for explicit quarto setup cell information in kernelspec dir")
487
        try:
488
            with open(
489
                os.path.join(kernelspec.path, f"quarto_{name}_cell"), "r"
490
            ) as file:
491
                trace(f"Quarto_{name}_cell file found in {kernelspec.path}")
492
                trace(os.path.join(kernelspec.path, f"quarto_{name}_cell"))
493
                source = file.read()
494
        except FileNotFoundError:
495
            trace(f"No quarto_{name}_cell file found in {kernelspec.path}")
496
            trace(os.path.join(kernelspec.path, f"quarto_{name}_cell"))
497
            pass
498

499
    # create cell
500
    if source != "" or allow_empty:
501
        return nbformat.versions[NB_FORMAT_VERSION].new_code_cell(source=source)
502
    else:
503
        return None
504

505

506
def nb_from_cache(nb, nb_cache, nb_meta=("kernelspec", "language_info", "widgets")):
507
    try:
508
        trace("nb_from_cache match")
509
        cache_record = nb_cache.match_cache_notebook(nb)
510
        trace("nb_from_cache get buncle")
511
        cache_bundle = nb_cache.get_cache_bundle(cache_record.pk)
512
        cache_nb = cache_bundle.nb
513
        nb = copy.deepcopy(nb)
514
        # selected (execution-oriented) metadata
515
        trace("nb_from_cache processing metadata")
516
        if nb_meta is None:
517
            nb.metadata = cache_nb.metadata
518
        else:
519
            for key in nb_meta:
520
                if key in cache_nb.metadata:
521
                    nb.metadata[key] = cache_nb.metadata[key]
522
        # code cells
523
        trace("nb_from_cache processing cells")
524
        for idx in range(len(nb.cells)):
525
            if nb.cells[idx].cell_type == "code":
526
                cache_cell = cache_nb.cells.pop(0)
527
                nb.cells[idx] = cache_cell
528
        trace("nb_from_cache returning")
529
        return nb
530
    except KeyError:
531
        return None
532

533

534
# This function is only called on setup cells
535
def nb_kernel_dependencies(setup_cell):
536
    for index, output in enumerate(setup_cell.outputs):
537
        if output.name == "stdout" and output.output_type == "stream":
538
            return json.loads(output.text)
539

540

541
def cell_execute(client, cell, index, execution_count, eval_default, store_history):
542
    language = get_language_from_nb_metadata(client.nb.metadata)
543
    # read cell options
544
    cell_options = nb_cell_yaml_options(language, cell)
545

546
    # check options for eval and error
547
    eval = cell_options.get("eval", eval_default)
548
    allow_errors = cell_options.get("error")
549

550
    trace(f"cell_execute with eval={eval}")
551
    if allow_errors == True:
552
        trace(f"cell_execute with allow_errors={allow_errors}")
553

554
    # execute if eval is active
555
    if eval == True:
556
        # add 'raises-exception' tag for allow_errors
557
        if allow_errors:
558
            if not "metadata" in cell:
559
                cell["metadata"] = {}
560
            tags = cell.get("metadata", {}).get("tags", [])
561
            cell["metadata"]["tags"] = tags + ["raises-exception"]
562

563
        # execute (w/o yaml options so that cell magics work)
564
        source = cell.source
565
        cell.source = nb_strip_yaml_options(client, cell.source)
566
        cell = client.execute_cell(
567
            cell=cell,
568
            cell_index=index,
569
            execution_count=execution_count,
570
            store_history=store_history,
571
        )
572
        cell.source = source
573

574
        # if lines_to_next_cell is 0 then fix it to be 1
575
        lines_to_next_cell = cell.get("metadata", {}).get("lines_to_next_cell", -1)
576
        if lines_to_next_cell == 0:
577
            cell["metadata"]["lines_to_next_cell"] = 1
578

579
        # remove injected raises-exception
580
        if allow_errors:
581
            cell["metadata"]["tags"].remove("raises-exception")
582
            if len(cell["metadata"]["tags"]) == 0:
583
                del cell["metadata"]["tags"]
584

585
        # Check for display errors in output (respecting both global and cell settings)
586
        cell_allows_errors = (
587
            allow_errors if allow_errors is not None else client.allow_errors
588
        )
589
        if not cell_allows_errors:
590
            trace("Cell does not allow errors: checking for uncaught errors")
591
            for output in cell.outputs:
592
                if output.get("output_type") == "error":
593
                    trace("   Uncaught error found in output")
594
                    from nbclient.exceptions import CellExecutionError
595

596
                    error_name = output.get("ename", "UnnamedError")
597
                    error_value = output.get("evalue", "")
598
                    traceback = output.get("traceback", [])
599
                    # Use same error raising mechanism as nbclient
600
                    raise CellExecutionError.from_cell_and_msg(
601
                        cell,
602
                        {
603
                            "ename": "UncaughtCellError:" + error_name,
604
                            "evalue": error_value,
605
                            "traceback": traceback,
606
                        },
607
                    )
608

609
    # return cell
610
    return cell
611

612

613
def cell_execute_inline(client, cell):
614
    # helper to raise an error from a result
615
    def raise_error(result):
616
        ename = result.get("ename")
617
        evalue = result.get("evalue")
618
        raise Exception(f"{ename}: {evalue}")
619

620
    # helper to clear existing user_expressions if they exist
621
    def clear_user_expressions():
622
        if "metadata" in cell:
623
            metadata = cell.get("metadata")
624
            if "user_expressions" in metadata:
625
                del metadata["user_expressions"]
626

627
    # find expressions in source
628
    language = get_language_from_nb_metadata(client.nb.metadata)
629
    source = "".join(cell.source)
630
    expressions = re.findall(
631
        rf"(?:^|[^`])`{{{language}}}[ \t]([^`]+)`", source, re.MULTILINE
632
    )
633
    if len(expressions):
634
        # send and wait for 'execute' kernel message w/ user_expressions
635
        kc = client.kc
636
        user_expressions = dict()
637
        for idx, expr in enumerate(expressions):
638
            user_expressions[str(idx).strip()] = expr
639
        msg_id = kc.execute("", user_expressions=user_expressions)
640
        reply = client.wait_for_reply(msg_id)
641

642
        # process reply
643
        content = reply.get("content")
644
        if content.get("status") == "ok":
645
            # build results (check for error on each one)
646
            results = []
647
            for key in user_expressions:
648
                result = content.get("user_expressions").get(key)
649
                if result.get("status") == "ok":
650
                    results.append(
651
                        {"expression": user_expressions.get(key), "result": result}
652
                    )
653
                elif result.get("status") == "error":
654
                    raise_error(result)
655

656
            # set results into metadata
657
            if not "metadata" in cell:
658
                cell["metadata"] = {}
659
            cell["metadata"]["user_expressions"] = results
660

661
        elif content.get("status") == "error":
662
            raise_error(content)
663
    else:
664
        clear_user_expressions()
665

666
    # return cell
667
    return cell
668

669

670
def cell_clear_output(cell):
671
    remove_metadata = ["collapsed", "scrolled"]
672
    if cell.cell_type == "code":
673
        cell.outputs = []
674
        cell.execution_count = None
675
        if "metadata" in cell:
676
            for field in remove_metadata:
677
                cell.metadata.pop(field, None)
678
    return cell
679

680

681
def nb_parameterize(nb, params):
682
    # verify papermill import
683
    if not papermill_translate:
684
        raise ImportError(
685
            "The papermill package is required for processing --execute-params"
686
        )
687

688
    # alias kernel name and language
689
    kernel_name = nb.metadata.kernelspec.name
690
    language = get_language_from_nb_metadata(nb.metadata)
691

692
    # find params index and note any tags/yaml on it (exit if no params)
693
    params_index = find_first_tagged_cell_index(nb, "parameters")
694
    if params_index != -1:
695
        params_cell_tags = (
696
            nb.cells[params_index].get("metadata", {}).get("tags", []).copy()
697
        )
698
        params_cell_yaml = nb_cell_yaml_lines(language, nb.cells[params_index].source)
699
        params_cell_tags.remove("parameters")
700
    else:
701
        return
702

703
    # Generate parameter content based on the kernel_name
704
    params_content = papermill_translate.translate_parameters(
705
        kernel_name, language, params, "Injected Parameters"
706
    )
707

708
    # prepend options
709
    if len(params_cell_yaml):
710
        # https://github.com/quarto-dev/quarto-cli/issues/10097
711
        # We need to find and drop `label: ` from the yaml options
712
        # to avoid label duplication
713
        # The only way to do this robustly is to parse the yaml
714
        # and then re-encode it
715
        try:
716
            params_cell_yaml = parse_string("\n".join(params_cell_yaml))
717
            if "label" in params_cell_yaml:
718
                del params_cell_yaml["label"]
719
            params_cell_yaml = safe_dump(params_cell_yaml).strip().splitlines()
720
        except Exception as e:
721
            sys.stderr.write(str(e) + "\naksjdfhakjsdhf\n")
722
            sys.stderr.write(
723
                "\nWARNING: Invalid YAML option format in cell:\n"
724
                + "\n".join(params_cell_yaml)
725
                + "\n"
726
            )
727
            sys.stderr.flush()
728
            params_cell_yaml = []
729

730
        comment_chars = nb_language_comment_chars(language)
731
        option_prefix = comment_chars[0] + "| "
732
        option_suffix = comment_chars[1] if len(comment_chars) > 1 else None
733

734
        def enclose(yaml):
735
            yaml = option_prefix + yaml
736
            if option_suffix:
737
                yaml = yaml + option_suffix
738
            return yaml
739

740
        params_content = (
741
            "\n".join(map(enclose, params_cell_yaml)) + "\n" + params_content
742
        )
743

744
    # create params cell
745
    params_cell = nbformat.v4.new_code_cell(source=params_content)
746
    params_cell.metadata["tags"] = ["injected-parameters"] + params_cell_tags
747

748
    # find existing injected params index
749
    injected_params_index = find_first_tagged_cell_index(nb, "injected-parameters")
750

751
    # find the right insertion/replace point for the injected params
752
    if injected_params_index >= 0:
753
        # Replace the injected cell with a new version
754
        before = nb.cells[:injected_params_index]
755
        after = nb.cells[injected_params_index + 1 :]
756
    else:
757
        # Add an injected cell after the parameter cell
758
        before = nb.cells[: params_index + 1]
759
        after = nb.cells[params_index + 1 :]
760

761
    nb.cells = before + [params_cell] + after
762
    if not nb.metadata.get("papermill"):
763
        nb.metadata.papermill = {}
764
    nb.metadata.papermill["parameters"] = params
765

766

767
def find_first_tagged_cell_index(nb, tag):
768
    parameters_indices = []
769
    for idx, cell in enumerate(nb.cells):
770
        if tag in cell.get("metadata", {}).get("tags", {}):
771
            parameters_indices.append(idx)
772
    if not parameters_indices:
773
        return -1
774
    return parameters_indices[0]
775

776

777
def nb_strip_yaml_options(client, source):
778
    yaml_lines = nb_cell_yaml_lines(
779
        get_language_from_nb_metadata(client.nb.metadata), source
780
    )
781
    num_yaml_lines = len(yaml_lines)
782
    if num_yaml_lines > 0:
783
        return "\n".join(source.splitlines()[num_yaml_lines:])
784
    else:
785
        return source
786

787

788
def nb_cell_yaml_options(lang, cell):
789
    # go through the lines until we've found all of the yaml
790
    yaml_lines = nb_cell_yaml_lines(lang, cell.source)
791

792
    # if we have yaml then parse it
793
    if len(yaml_lines) > 0:
794
        yaml_code = "\n".join(yaml_lines)
795
        yaml_options = parse_string(yaml_code)
796
        if type(yaml_options) is dict:
797
            return yaml_options
798
        else:
799
            sys.stderr.write(
800
                "\nWARNING: Invalid YAML option format in cell:\n" + yaml_code + "\n"
801
            )
802
            sys.stderr.flush()
803
            return dict()
804

805
    else:
806
        return dict()
807

808

809
def nb_cell_yaml_lines(lang, source):
810
    # determine language comment chars
811
    comment_chars = nb_language_comment_chars(lang)
812
    option_pattern = "^" + re.escape(comment_chars[0]) + "\\s*\\| ?"
813
    option_suffix = comment_chars[1] if len(comment_chars) > 1 else None
814

815
    # go through the lines until we've found all of the yaml
816
    yaml_lines = []
817
    for line in source.splitlines():
818
        option_match = re.match(option_pattern, line)
819
        if option_match:
820
            if (not option_suffix) or line.rstrip().endswith(option_suffix):
821
                yaml_option = line[len(option_match.group()) :]
822
                if option_suffix:
823
                    yaml_option = yaml_option.rstrip()[: -len(option_suffix)]
824
                # strip trailing spaces after : to avoid poyo error
825
                # (https://github.com/hackebrot/poyo/issues/30)
826
                yaml_option = re.sub(":\\s+$", ":", yaml_option)
827
                yaml_lines.append(yaml_option)
828
                continue
829
        break
830

831
    # return the lines
832
    return yaml_lines
833

834

835
def nb_language_comment_chars(lang):
836
    langs = dict(
837
        r="#",
838
        python="#",
839
        julia="#",
840
        scala="//",
841
        matlab="%",
842
        csharp="//",
843
        fsharp="//",
844
        c=["/*", "*/"],
845
        css=["/*", "*/"],
846
        sas=["*", ";"],
847
        powershell="#",
848
        bash="#",
849
        sql="--",
850
        mysql="--",
851
        psql="--",
852
        lua="--",
853
        cpp="//",
854
        cc="//",
855
        stan="#",
856
        octave="#",
857
        fortran="!",
858
        fortran95="!",
859
        awk="#",
860
        gawk="#",
861
        stata="*",
862
        java="//",
863
        groovy="//",
864
        sed="#",
865
        perl="#",
866
        ruby="#",
867
        tikz="%",
868
        js="//",
869
        d3="//",
870
        node="//",
871
        sass="//",
872
        coffee="#",
873
        go="//",
874
        asy="//",
875
        haskell="--",
876
        dot="//",
877
        apl="⍝",
878
        q = "/",
879
        ocaml=["(*", "*)"],
880
    )
881
    if lang in langs:
882
        chars = langs[lang]
883
        if not isinstance(chars, type([])):
884
            chars = [chars]
885
        return chars
886
    else:
887
        return ["#"]
888

889
Product

Resources

Company