Path: blob/master/internal/book1/figure_no_notebook_mapping.py
1192 views
"""1command usage:2python3 internal/book1/figure_no_notebook_mapping.py -lof internal/book1.lof -csv internal/book1/figures_nb_mapping_book1.csv3"""4import argparse5from probml_utils.url_utils import (6dict_to_csv,7extract_scripts_name_from_caption,8)9from TexSoup import TexSoup1011parser = argparse.ArgumentParser(description="create figure url mapping")12parser.add_argument("-lof", "--lof", type=str, help="", default="internal/book2.lof")13parser.add_argument("-csv", "--csv", type=str, help="", default="internal/figures_url_mapping_book2.csv")14args = parser.parse_args()1516BOOK_NO = int(args.lof.split("/")[-1].split(".")[0][-1]) # from internal/book1.lof to 117NOTEBOOKS_MD_URL = "https://probml.github.io/notebooks#"1819def fig_no_nb_mapping(lof_file_path, csv_name, make_url = False):20f"""21create mapping of fig_no to url by parsing lof_file and save mapping in {csv_name}22"""23with open(lof_file_path) as fp:24LoF_File_Contents = fp.read()25soup = TexSoup(LoF_File_Contents)2627# create mapping of fig_no to list of script_name2829url_mapping = {}30for caption in soup.find_all("numberline"):31fig_no = str(caption.contents[0])32extracted_scripts = extract_scripts_name_from_caption(str(caption))33nb = None34if len(extracted_scripts) == 1:35nb = extracted_scripts[0]3637elif len(extracted_scripts) > 1: # use dummy notebooks38chap, fig = fig_no.split(".")39nb = f"fig_{chap}_{fig}.ipynb"4041if nb:42if make_url:43url_mapping[fig_no] = NOTEBOOKS_MD_URL + nb44else:45url_mapping[fig_no] = nb464748if csv_name:49dict_to_csv(url_mapping, csv_name)50print(f"Mapping of {len(url_mapping)} urls is saved in {csv_name}")51return url_mapping525354print(f"Parsing started from {args.lof}...........")5556if args.csv:57figure_mapping = fig_no_nb_mapping(args.lof, args.csv)58figure_mapping = fig_no_nb_mapping(args.lof, args.csv.replace(".csv","")+"_urls.csv", make_url=True)5960else:61figure_mapping = fig_no_nb_mapping(args.lof, "")62figure_mapping = fig_no_nb_mapping(args.lof, args.csv.replace("csv","")+"_urls.csv", make_url=True)63646566