Path: blob/main/src/resources/jupyter/notebook.py
12921 views
# pyright: reportMissingImports=false12import os3import re4import atexit5import glob6import sys7import json8import pprint9import copy10import base641112from pathlib import Path1314from yaml import safe_load as parse_string15from yaml import safe_dump1617from log import trace18import nbformat19from nbclient import NotebookClient20from jupyter_client import KernelManager21from jupyter_core_utils_vendor import run_sync22import asyncio2324# optional import of papermill for params support25try:26from papermill import translators as papermill_translate27except ImportError:28papermill_translate = None2930# optional import of jupyter-cache31try:32from jupyter_cache import get_cache33except ImportError:34get_cache = None3536NB_FORMAT_VERSION = 4373839def get_language_from_nb_metadata(metadata):40ks_lang = metadata.kernelspec.get("language", None)41li_name = None42li = metadata.get("language_info", None)43if li:44li_name = metadata.language_info.get("name", None)45return ks_lang or li_name464748# exception to indicate the kernel needs restarting49class RestartKernel(Exception):50pass515253def build_kernel_options(options):54# unpack options55format = options["format"]56resource_dir = options["resourceDir"]57params = options.get("params", None)58run_path = options.get("cwd", "")59quiet = options.get("quiet", False)6061# read variables out of format62execute = format["execute"]6364# evaluation65eval = execute["eval"]66allow_errors = bool(execute["error"])6768# figures69fig_width = execute["fig-width"]70fig_height = execute["fig-height"]71fig_format = execute["fig-format"]72fig_dpi = execute["fig-dpi"]7374# shell interactivity75interactivity = execute["ipynb-shell-interactivity"]76if interactivity == None:77interactivity = ""7879# plotly connected80plotly_connected = execute["plotly-connected"]8182# server: shiny83metadata = format["metadata"]84if (85"server" in metadata86and "type" in metadata["server"]87and metadata["server"]["type"] == "shiny"88):89is_shiny = True90else:91is_shiny = False9293# dashboard94is_dashboard = format["identifier"]["base-format"] == "dashboard"9596# caching97if "cache" in execute:98cache = execute["cache"]99else:100cache = "user"101102return {103"format": format,104"resource_dir": resource_dir,105"params": params,106"run_path": run_path,107"quiet": quiet,108"eval": eval,109"allow_errors": allow_errors,110"fig_width": fig_width,111"fig_height": fig_height,112"fig_format": fig_format,113"fig_dpi": fig_dpi,114"interactivity": interactivity,115"plotly_connected": plotly_connected,116"is_shiny": is_shiny,117"is_dashboard": is_dashboard,118"cache": cache,119}120121122def set_env_vars(options):123os.environ["QUARTO_FIG_WIDTH"] = str(options["fig_width"])124os.environ["QUARTO_FIG_HEIGHT"] = str(options["fig_height"])125if options["fig_format"] == "retina":126os.environ["QUARTO_FIG_DPI"] = str(options["fig_dpi"] * 2)127os.environ["QUARTO_FIG_FORMAT"] = "png"128else:129os.environ["QUARTO_FIG_DPI"] = str(options["fig_dpi"])130os.environ["QUARTO_FIG_FORMAT"] = options["fig_format"]131132133def retrieve_nb_from_cache(nb, status, input, **kwargs):134cache = kwargs["cache"]135# are we using the cache, if so connect to the cache, and then if we aren't in 'refresh'136# (forced re-execution) mode then try to satisfy the execution request from the cache137if cache == True or cache == "refresh":138trace("using cache")139if not get_cache:140raise ImportError(141"The jupyter-cache package is required for cached execution"142)143trace("getting cache")144# Respect env var used to modify default cache dir145# https://jupyter-cache.readthedocs.io/en/latest/using/cli.html146nb_cache = get_cache(os.getenv("JUPYTERCACHE", ".jupyter_cache"))147if not cache == "refresh":148cached_nb = nb_from_cache(nb, nb_cache)149if cached_nb:150cached_nb.cells.pop(0)151nb_write(cached_nb, input)152status("(Notebook read from cache)\n\n")153trace("(Notebook read from cache)")154return True # can persist kernel155else:156trace("not using cache")157nb_cache = None158return nb_cache159160161# check if the kernel needs to be restarted162# and records necessary state for the next execution163#164# TODO why is the state here set on the function?165def check_for_kernel_restart(options):166# if this is a re-execution of a previously loaded kernel,167# make sure the underlying python version hasn't changed168python_cmd = options.get("python_cmd", None)169if python_cmd:170if hasattr(notebook_execute, "python_cmd"):171if notebook_execute.python_cmd != python_cmd:172return True173else:174notebook_execute.python_cmd = python_cmd175176# if there is a supervisor_id then abort if it has changed177supervisor_pid = options.get("supervisor_pid", None)178if supervisor_pid:179if hasattr(notebook_execute, "supervisor_pid"):180if notebook_execute.supervisor_pid != supervisor_pid:181return True182else:183notebook_execute.supervisor_pid = supervisor_pid184185186# execute a notebook187def notebook_execute(options, status):188trace("inside notebook_execute")189if check_for_kernel_restart(options):190raise RestartKernel191192# change working directory and strip dir off of paths193original_input = options["target"]["input"]194os.chdir(Path(original_input).parent)195input = Path(original_input).name196197quarto_kernel_setup_options = build_kernel_options(options)198quarto_kernel_setup_options["input"] = input199allow_errors = quarto_kernel_setup_options["allow_errors"]200quiet = quarto_kernel_setup_options["quiet"]201resource_dir = quarto_kernel_setup_options["resource_dir"]202eval = quarto_kernel_setup_options["eval"]203204# set environment variables205set_env_vars(quarto_kernel_setup_options)206207# read the notebook208nb = nbformat.read(input, as_version=NB_FORMAT_VERSION)209210trace("notebook was read")211# inject parameters if provided212if quarto_kernel_setup_options["params"]:213nb_parameterize(nb, quarto_kernel_setup_options["params"])214215# insert setup cell216setup_cell = nb_setup_cell(nb, quarto_kernel_setup_options)217nb.cells.insert(0, setup_cell)218219nb_cache = retrieve_nb_from_cache(nb, status, **quarto_kernel_setup_options)220if nb_cache == True:221return True # True indicates notebook read from cache, and hence kernel can be persisted222223# create resources for execution224resources = dict(225{226"metadata": {227"input": original_input,228}229}230)231if quarto_kernel_setup_options["run_path"]:232resources["metadata"]["path"] = quarto_kernel_setup_options["run_path"]233234trace("Will attempt to create notebook")235# create NotebookClient236trace("type of notebook: {0}".format(type(nb)))237client, created = notebook_init(nb, resources, allow_errors)238239msg = client.kc.session.msg(240"comm_open",241{242"comm_id": "quarto_comm",243"target_name": "quarto_kernel_setup",244"data": {"options": quarto_kernel_setup_options},245},246)247client.kc.shell_channel.send(msg)248249trace("NotebookClient created")250251# complete progress if necessary252if (not quiet) and created:253status("Done\n")254255current_code_cell = 1256total_code_cells = 0257cell_labels = []258max_label_len = 0259260kernel_supports_daemonization = False261262def handle_quarto_metadata(cell):263def handle_meta_object(obj):264nonlocal kernel_supports_daemonization265if hasattr(obj, "quarto"):266qm = obj["quarto"]267if qm.get("restart_kernel"):268raise RestartKernel269if qm.get("daemonize"):270kernel_supports_daemonization = True271trace("Kernel is daemonizable from cell metadata")272273handle_meta_object(cell.get("metadata", {}))274for output in cell.get("outputs", []):275handle_meta_object(output.get("metadata", {}))276277for cell in client.nb.cells:278# compute total code cells (for progress)279if cell.cell_type == "code":280total_code_cells += 1281# map cells to their labels282language = get_language_from_nb_metadata(client.nb.metadata)283label = nb_cell_yaml_options(language, cell).get("label", "")284cell_labels.append(label)285# find max label length286max_label_len = max(max_label_len, len(label))287288# execute the cells289for index, cell in enumerate(client.nb.cells):290cell_label = cell_labels[index]291padding = "." * (max_label_len - len(cell_label))292293# progress294progress = (not quiet) and cell.cell_type == "code" and index > 0295if progress:296status(297" Cell {0}/{1}: '{2}'{3}...".format(298current_code_cell - 1, total_code_cells - 1, cell_label, padding299)300)301302# clear cell output303cell = cell_clear_output(cell)304305# execute cell306trace("Executing cell {0}".format(index))307308if cell.cell_type == "code":309cell = cell_execute(310client,311cell,312index,313current_code_cell,314eval,315index > 0, # add_to_history316)317cell.execution_count = current_code_cell318elif cell.cell_type == "markdown":319cell = cell_execute_inline(client, cell)320321trace("Executed cell {0}".format(index))322323# if this was the setup cell, see if we need to exit b/c dependencies are out of date324if index == 0:325# confirm kernel_deps haven't changed (restart if they have)326if hasattr(notebook_execute, "kernel_deps"):327kernel_deps = nb_kernel_dependencies(cell)328if kernel_deps:329kernel_supports_daemonization = True330for path in kernel_deps.keys():331if path in notebook_execute.kernel_deps.keys():332if notebook_execute.kernel_deps[path] != kernel_deps[path]:333raise RestartKernel334else:335notebook_execute.kernel_deps[path] = kernel_deps[path]336337trace("Handling quarto metadata")338trace(json.dumps(cell, indent=2))339# also do it through cell metadata340handle_quarto_metadata(cell)341342# we are done w/ setup (with no restarts) so it's safe to print 'Executing...'343if not quiet:344status("\nExecuting '{0}'\n".format(input))345346# assign cell347client.nb.cells[index] = cell348349# increment current code cell350if cell.cell_type == "code":351current_code_cell += 1352353# end progress354if progress:355status("Done\n")356trace("Done")357358trace("Notebook execution complete")359360# set widgets metadata361client.set_widgets_metadata()362363# write to the cache364if nb_cache:365nb_write(client.nb, input)366nb_cache.cache_notebook_file(path=Path(input), overwrite=True)367368# remove setup cell (then renumber execution_Count)369client.nb.cells.pop(0)370for index, cell in enumerate(client.nb.cells):371if cell.cell_type == "code":372cell.execution_count = cell.execution_count - 1373374# re-write without setup cell375nb_write(client.nb, input)376377# execute cleanup cell378cleanup_cell = nb_cleanup_cell(nb, resource_dir)379if cleanup_cell:380kernel_supports_daemonization = True381nb.cells.append(cleanup_cell)382client.execute_cell(383cell=cleanup_cell, cell_index=len(client.nb.cells) - 1, store_history=False384)385nb.cells.pop()386387# record kernel deps after execution (picks up imports that occurred388# witihn the notebook cells)389kernel_deps = nb_kernel_dependencies(cleanup_cell)390if kernel_deps:391notebook_execute.kernel_deps = kernel_deps392else:393notebook_execute.kernel_deps = {}394395# progress396if not quiet:397status("\n")398399# return flag indicating whether we should persist400return kernel_supports_daemonization401402403def notebook_init(nb, resources, allow_errors):404created = False405if not hasattr(notebook_init, "client"):406trace("Creating NotebookClient")407# create notebook client408client = NotebookClient(nb, resources=resources)409client.allow_errors = allow_errors410client.record_timing = False411client.create_kernel_manager()412client.start_new_kernel()413client.start_new_kernel_client()414415async def get_info():416i = client.kc.kernel_info()417if asyncio.isfuture(i):418return await i419else:420return i421422info = run_sync(get_info)()423424info_msg = client.wait_for_reply(info)425client.nb.metadata["language_info"] = info_msg["content"]["language_info"]426notebook_init.client = client427created = True428429# cleanup kernel at process exit430atexit.register(client._cleanup_kernel)431432else:433# if the kernel has changed we need to force a restart434if (435nb.metadata.kernelspec.name436!= notebook_init.client.nb.metadata.kernelspec.name437):438raise RestartKernel439440# if the input file has changed we need to force a restart441if (442resources["metadata"]["input"]443!= notebook_init.client.resources["metadata"]["input"]444):445raise RestartKernel446447# set the new notebook, resources, etc.448notebook_init.client.nb = nb449notebook_init.client.allow_errors = allow_errors450451return (notebook_init.client, created)452453454def nb_write(nb, input):455nbformat.write(nb, input, version=NB_FORMAT_VERSION)456457458def nb_setup_cell(nb, options):459options = dict(options)460options["allow_empty"] = True461return nb_language_cell("setup", nb, **options)462463464def nb_cleanup_cell(nb, resource_dir):465return nb_language_cell("cleanup", nb, resource_dir, False)466467468def nb_language_cell(name, nb, resource_dir, allow_empty, **args):469kernelspec = nb.metadata.kernelspec470language = get_language_from_nb_metadata(nb.metadata)471trace(json.dumps(nb.metadata, indent=2))472source = ""473lang_dir = os.path.join(resource_dir, "jupyter", "lang", language)474if os.path.isdir(lang_dir):475cell_file = glob.glob(os.path.join(lang_dir, name + ".*"))476# base64-encode the run_path given477args["run_path"] = base64.b64encode(478args.get("run_path", "").encode("utf-8")479).decode("utf-8")480if len(cell_file) > 0:481with open(cell_file[0], "r") as file:482source = file.read().format(**args)483else:484trace(f"No {language} directory found in {lang_dir}")485trace(f"Will look for explicit quarto setup cell information in kernelspec dir")486try:487with open(488os.path.join(kernelspec.path, f"quarto_{name}_cell"), "r"489) as file:490trace(f"Quarto_{name}_cell file found in {kernelspec.path}")491trace(os.path.join(kernelspec.path, f"quarto_{name}_cell"))492source = file.read()493except FileNotFoundError:494trace(f"No quarto_{name}_cell file found in {kernelspec.path}")495trace(os.path.join(kernelspec.path, f"quarto_{name}_cell"))496pass497498# create cell499if source != "" or allow_empty:500return nbformat.versions[NB_FORMAT_VERSION].new_code_cell(source=source)501else:502return None503504505def nb_from_cache(nb, nb_cache, nb_meta=("kernelspec", "language_info", "widgets")):506try:507trace("nb_from_cache match")508cache_record = nb_cache.match_cache_notebook(nb)509trace("nb_from_cache get buncle")510cache_bundle = nb_cache.get_cache_bundle(cache_record.pk)511cache_nb = cache_bundle.nb512nb = copy.deepcopy(nb)513# selected (execution-oriented) metadata514trace("nb_from_cache processing metadata")515if nb_meta is None:516nb.metadata = cache_nb.metadata517else:518for key in nb_meta:519if key in cache_nb.metadata:520nb.metadata[key] = cache_nb.metadata[key]521# code cells522trace("nb_from_cache processing cells")523for idx in range(len(nb.cells)):524if nb.cells[idx].cell_type == "code":525cache_cell = cache_nb.cells.pop(0)526nb.cells[idx] = cache_cell527trace("nb_from_cache returning")528return nb529except KeyError:530return None531532533# This function is only called on setup cells534def nb_kernel_dependencies(setup_cell):535for index, output in enumerate(setup_cell.outputs):536if output.name == "stdout" and output.output_type == "stream":537return json.loads(output.text)538539540def cell_execute(client, cell, index, execution_count, eval_default, store_history):541language = get_language_from_nb_metadata(client.nb.metadata)542# read cell options543cell_options = nb_cell_yaml_options(language, cell)544545# check options for eval and error546eval = cell_options.get("eval", eval_default)547allow_errors = cell_options.get("error")548549trace(f"cell_execute with eval={eval}")550if allow_errors == True:551trace(f"cell_execute with allow_errors={allow_errors}")552553# execute if eval is active554if eval == True:555# add 'raises-exception' tag for allow_errors556if allow_errors:557if not "metadata" in cell:558cell["metadata"] = {}559tags = cell.get("metadata", {}).get("tags", [])560cell["metadata"]["tags"] = tags + ["raises-exception"]561562# execute (w/o yaml options so that cell magics work)563source = cell.source564cell.source = nb_strip_yaml_options(client, cell.source)565cell = client.execute_cell(566cell=cell,567cell_index=index,568execution_count=execution_count,569store_history=store_history,570)571cell.source = source572573# if lines_to_next_cell is 0 then fix it to be 1574lines_to_next_cell = cell.get("metadata", {}).get("lines_to_next_cell", -1)575if lines_to_next_cell == 0:576cell["metadata"]["lines_to_next_cell"] = 1577578# remove injected raises-exception579if allow_errors:580cell["metadata"]["tags"].remove("raises-exception")581if len(cell["metadata"]["tags"]) == 0:582del cell["metadata"]["tags"]583584# Check for display errors in output (respecting both global and cell settings)585cell_allows_errors = (586allow_errors if allow_errors is not None else client.allow_errors587)588if not cell_allows_errors:589trace("Cell does not allow errors: checking for uncaught errors")590for output in cell.outputs:591if output.get("output_type") == "error":592trace(" Uncaught error found in output")593from nbclient.exceptions import CellExecutionError594595error_name = output.get("ename", "UnnamedError")596error_value = output.get("evalue", "")597traceback = output.get("traceback", [])598# Use same error raising mechanism as nbclient599raise CellExecutionError.from_cell_and_msg(600cell,601{602"ename": "UncaughtCellError:" + error_name,603"evalue": error_value,604"traceback": traceback,605},606)607608# return cell609return cell610611612def cell_execute_inline(client, cell):613# helper to raise an error from a result614def raise_error(result):615ename = result.get("ename")616evalue = result.get("evalue")617raise Exception(f"{ename}: {evalue}")618619# helper to clear existing user_expressions if they exist620def clear_user_expressions():621if "metadata" in cell:622metadata = cell.get("metadata")623if "user_expressions" in metadata:624del metadata["user_expressions"]625626# find expressions in source627language = get_language_from_nb_metadata(client.nb.metadata)628source = "".join(cell.source)629expressions = re.findall(630rf"(?:^|[^`])`{{{language}}}[ \t]([^`]+)`", source, re.MULTILINE631)632if len(expressions):633# send and wait for 'execute' kernel message w/ user_expressions634kc = client.kc635user_expressions = dict()636for idx, expr in enumerate(expressions):637user_expressions[str(idx).strip()] = expr638msg_id = kc.execute("", user_expressions=user_expressions)639reply = client.wait_for_reply(msg_id)640641# process reply642content = reply.get("content")643if content.get("status") == "ok":644# build results (check for error on each one)645results = []646for key in user_expressions:647result = content.get("user_expressions").get(key)648if result.get("status") == "ok":649results.append(650{"expression": user_expressions.get(key), "result": result}651)652elif result.get("status") == "error":653raise_error(result)654655# set results into metadata656if not "metadata" in cell:657cell["metadata"] = {}658cell["metadata"]["user_expressions"] = results659660elif content.get("status") == "error":661raise_error(content)662else:663clear_user_expressions()664665# return cell666return cell667668669def cell_clear_output(cell):670remove_metadata = ["collapsed", "scrolled"]671if cell.cell_type == "code":672cell.outputs = []673cell.execution_count = None674if "metadata" in cell:675for field in remove_metadata:676cell.metadata.pop(field, None)677return cell678679680def nb_parameterize(nb, params):681# verify papermill import682if not papermill_translate:683raise ImportError(684"The papermill package is required for processing --execute-params"685)686687# alias kernel name and language688kernel_name = nb.metadata.kernelspec.name689language = get_language_from_nb_metadata(nb.metadata)690691# find params index and note any tags/yaml on it (exit if no params)692params_index = find_first_tagged_cell_index(nb, "parameters")693if params_index != -1:694params_cell_tags = (695nb.cells[params_index].get("metadata", {}).get("tags", []).copy()696)697params_cell_yaml = nb_cell_yaml_lines(language, nb.cells[params_index].source)698params_cell_tags.remove("parameters")699else:700return701702# Generate parameter content based on the kernel_name703params_content = papermill_translate.translate_parameters(704kernel_name, language, params, "Injected Parameters"705)706707# prepend options708if len(params_cell_yaml):709# https://github.com/quarto-dev/quarto-cli/issues/10097710# We need to find and drop `label: ` from the yaml options711# to avoid label duplication712# The only way to do this robustly is to parse the yaml713# and then re-encode it714try:715params_cell_yaml = parse_string("\n".join(params_cell_yaml))716if "label" in params_cell_yaml:717del params_cell_yaml["label"]718params_cell_yaml = safe_dump(params_cell_yaml).strip().splitlines()719except Exception as e:720sys.stderr.write(str(e) + "\naksjdfhakjsdhf\n")721sys.stderr.write(722"\nWARNING: Invalid YAML option format in cell:\n"723+ "\n".join(params_cell_yaml)724+ "\n"725)726sys.stderr.flush()727params_cell_yaml = []728729comment_chars = nb_language_comment_chars(language)730option_prefix = comment_chars[0] + "| "731option_suffix = comment_chars[1] if len(comment_chars) > 1 else None732733def enclose(yaml):734yaml = option_prefix + yaml735if option_suffix:736yaml = yaml + option_suffix737return yaml738739params_content = (740"\n".join(map(enclose, params_cell_yaml)) + "\n" + params_content741)742743# create params cell744params_cell = nbformat.v4.new_code_cell(source=params_content)745params_cell.metadata["tags"] = ["injected-parameters"] + params_cell_tags746747# find existing injected params index748injected_params_index = find_first_tagged_cell_index(nb, "injected-parameters")749750# find the right insertion/replace point for the injected params751if injected_params_index >= 0:752# Replace the injected cell with a new version753before = nb.cells[:injected_params_index]754after = nb.cells[injected_params_index + 1 :]755else:756# Add an injected cell after the parameter cell757before = nb.cells[: params_index + 1]758after = nb.cells[params_index + 1 :]759760nb.cells = before + [params_cell] + after761if not nb.metadata.get("papermill"):762nb.metadata.papermill = {}763nb.metadata.papermill["parameters"] = params764765766def find_first_tagged_cell_index(nb, tag):767parameters_indices = []768for idx, cell in enumerate(nb.cells):769if tag in cell.get("metadata", {}).get("tags", {}):770parameters_indices.append(idx)771if not parameters_indices:772return -1773return parameters_indices[0]774775776def nb_strip_yaml_options(client, source):777yaml_lines = nb_cell_yaml_lines(778get_language_from_nb_metadata(client.nb.metadata), source779)780num_yaml_lines = len(yaml_lines)781if num_yaml_lines > 0:782return "\n".join(source.splitlines()[num_yaml_lines:])783else:784return source785786787def nb_cell_yaml_options(lang, cell):788# go through the lines until we've found all of the yaml789yaml_lines = nb_cell_yaml_lines(lang, cell.source)790791# if we have yaml then parse it792if len(yaml_lines) > 0:793yaml_code = "\n".join(yaml_lines)794yaml_options = parse_string(yaml_code)795if type(yaml_options) is dict:796return yaml_options797else:798sys.stderr.write(799"\nWARNING: Invalid YAML option format in cell:\n" + yaml_code + "\n"800)801sys.stderr.flush()802return dict()803804else:805return dict()806807808def nb_cell_yaml_lines(lang, source):809# determine language comment chars810comment_chars = nb_language_comment_chars(lang)811option_pattern = "^" + re.escape(comment_chars[0]) + "\\s*\\| ?"812option_suffix = comment_chars[1] if len(comment_chars) > 1 else None813814# go through the lines until we've found all of the yaml815yaml_lines = []816for line in source.splitlines():817option_match = re.match(option_pattern, line)818if option_match:819if (not option_suffix) or line.rstrip().endswith(option_suffix):820yaml_option = line[len(option_match.group()) :]821if option_suffix:822yaml_option = yaml_option.rstrip()[: -len(option_suffix)]823# strip trailing spaces after : to avoid poyo error824# (https://github.com/hackebrot/poyo/issues/30)825yaml_option = re.sub(":\\s+$", ":", yaml_option)826yaml_lines.append(yaml_option)827continue828break829830# return the lines831return yaml_lines832833834def nb_language_comment_chars(lang):835langs = dict(836r="#",837python="#",838julia="#",839scala="//",840matlab="%",841csharp="//",842fsharp="//",843c=["/*", "*/"],844css=["/*", "*/"],845sas=["*", ";"],846powershell="#",847bash="#",848sql="--",849mysql="--",850psql="--",851lua="--",852cpp="//",853cc="//",854stan="#",855octave="#",856fortran="!",857fortran95="!",858awk="#",859gawk="#",860stata="*",861java="//",862groovy="//",863sed="#",864perl="#",865ruby="#",866tikz="%",867js="//",868d3="//",869node="//",870sass="//",871coffee="#",872go="//",873asy="//",874haskell="--",875dot="//",876apl="⍝",877q = "/",878ocaml=["(*", "*)"],879)880if lang in langs:881chars = langs[lang]882if not isinstance(chars, type([])):883chars = [chars]884return chars885else:886return ["#"]887888889