Path: blob/master/internal/book2/create_dummy_notebook.py
1192 views
"""1command usage:2python3 internal/book2/create_dummy_notebook.py --lof=internal/book2.lof --book_no=23"""45import argparse6from email.policy import default7from random import choices8from TexSoup import TexSoup9import regex as re10import os11import nbformat as nbf12import pandas as pd13from glob import glob14from probml_utils.url_utils import (15extract_scripts_name_from_caption,16make_url_from_fig_no_and_script_name,17figure_url_mapping_from_lof,18)1920parser = argparse.ArgumentParser(description="create dummy notebook")21parser.add_argument("-lof", "--lof", type=str, help="")22parser.add_argument("-book_no", "--book_no", type=int, default=2, choices=[1, 2], help="")23parser.add_argument("-nb_path", "--nb_path", type=str, default="notebooks/", help="")2425args = parser.parse_args()2627lof_file = str(args.lof)28book_no = args.book_no29nb_path = args.nb_path303132def convert_to_ipynb(file):33if ".py" in file:34return file[:-3] + ".ipynb"35return file363738def find_multinotebooks():39fig_no_urls_mapping = figure_url_mapping_from_lof(lof_file, "", book_no=book_no)40more_than_one = 041multi_notebooks = {}42for fig_no in fig_no_urls_mapping:43if "fig_" in fig_no_urls_mapping[fig_no]:44print(fig_no_urls_mapping[fig_no])45multi_notebooks[fig_no] = fig_no_urls_mapping[fig_no]46more_than_one += 147print(f"{more_than_one} notebooks have more than one figure")48return multi_notebooks495051def delete_existing_multinotebooks():52"""53delete existing notebooks54"""55notebooks = glob(f"notebooks/book{book_no}/*/*.ipynb")56cnt = 057for notebook in notebooks:58if "fig_" in notebook.split("/")[-1]:59os.remove(notebook)60print(f"{notebook} deleted!")61cnt += 16263print(f"{cnt} notebooks deleted")646566def preprocess_caption(captions):67# create mapping of fig_no to list of script_name68whole_link_ipynb = r"\{\S+\.ipynb\}" # find {https://<path/to/>foo.ipynb}{foo.ipynb} from caption69whole_link_py = r"\{\S+\.py\}"7071fig_cnt = 072cleaned_caption = {}7374multi_notebooks = find_multinotebooks()75for caption in captions:76fig_no = str(caption.contents[0])7778# if it does not contain multi_notebooks79if fig_no not in multi_notebooks:80continue8182caption = (83str(caption)84.replace(r"\ignorespaces", "")85.replace(r" \relax", "")86.replace(r"\href", "")87.replace(r"\url", "")88.replace(r'\cc@accent {"705E}', "")89.replace(r"\numberline", "")90.replace(r"\bm", "")91.replace(r"\DOTSB", "")92.replace(r"\slimits", "")93.replace(r"\oset", "")94)9596# print(fig_no, end=" ")97links = re.findall(whole_link_ipynb, str(caption)) + re.findall(whole_link_py, str(caption))98# print(fig_no, links)99for link in links:100script = extract_scripts_name_from_caption(link)[0]101script_ipynb = convert_to_ipynb(script)102original_url = f"[{script_ipynb}]({make_url_from_fig_no_and_script_name(fig_no,script_ipynb, book_no = book_no)})" # in form of markdown hyperlink103caption = caption.replace(link, original_url)104105caption = re.findall(r"{\d+.\d+}{(.*)}", caption)[0].strip() # extract caption from {4.13}{caption}106107# print(fig_no, caption, end="\n\n")108cleaned_caption[fig_no] = caption109110return cleaned_caption111112113def parse_lof(lof_file):114with open(lof_file) as fp:115LoF_File_Contents = fp.read()116return LoF_File_Contents117118119def make_dummy_notebook_name(fig_no):120"""121convert 1.11 to fig_1_11.ipynb122"""123return f"fig_{fig_no.replace('.','_')}.ipynb"124125126def create_multi_notebooks(cleaned_captions, relative_path=nb_path):127"""128create new notebook and add caption to it129"""130# https://stackoverflow.com/questions/38193878/how-to-create-modify-a-jupyter-notebook-from-code-python131cnt = 0132for fig_no in cleaned_captions:133134# make relative path for new dummy notebook135chapter_no = int(fig_no.split(".")[0])136137dummpy_notebook = make_dummy_notebook_name(fig_no)138fig_path = os.path.join(relative_path, f"book{book_no}/{chapter_no:02d}", dummpy_notebook)139print(fig_path.split("/")[-1], end="\n")140141nb = nbf.v4.new_notebook()142nb["cells"] = [nbf.v4.new_markdown_cell(cleaned_captions[fig_no])]143with open(fig_path, "w") as f:144nbf.write(nb, f)145cnt += 1146147print(f"\n{cnt} notebooks written!")148149150if __name__ == "__main__":151# delete existing multinotebooks152delete_existing_multinotebooks()153154# find multinotebooks155print(find_multinotebooks())156157# parse lof file158soup = TexSoup(parse_lof(lof_file))159160# preprocess caption161cleaned_captions = preprocess_caption(soup.find_all("numberline"))162163# create multinoteboos and write caption164create_multi_notebooks(cleaned_captions)165166167