Path: blob/master/internal/book2/create_figure_url_mapping.py
1192 views
"""1command usage:2python3 internal/book2/create_figure_url_mapping.py -lof internal/book2.lof -csv internal/figures_url_mapping_book2.csv3"""4import argparse5import regex as re6from probml_utils.url_utils import (7figure_url_mapping_from_lof,8dict_to_csv,9make_url_from_fig_no_and_script_name,10extract_scripts_name_from_caption,11)12from TexSoup import TexSoup1314parser = argparse.ArgumentParser(description="create figure url mapping")15parser.add_argument("-lof", "--lof", type=str, help="", default="internal/book2.lof")16parser.add_argument("-csv", "--csv", type=str, help="", default="internal/figures_url_mapping_book2.csv")17args = parser.parse_args()1819BOOK_NO = int(args.lof.split("/")[-1].split(".")[0][-1]) # from internal/book1.lof to 1202122def figure_url_mapping_from_lof_dummy_nb_excluded(23lof_file_path,24csv_name,25convert_to_which_url="colab",26base_url="https://github.com/probml/pyprobml/blob/master/notebooks",27book_no=1,28):29f"""30create mapping of fig_no to url by parsing lof_file and save mapping in {csv_name}31"""32with open(lof_file_path) as fp:33LoF_File_Contents = fp.read()34soup = TexSoup(LoF_File_Contents)3536# create mapping of fig_no to list of script_name3738url_mapping = {}39for caption in soup.find_all("numberline"):40fig_no = str(caption.contents[0])41extracted_scripts = extract_scripts_name_from_caption(str(caption))42if len(extracted_scripts) > 0:43url_mapping[fig_no] = []44for script_name in extracted_scripts:45url_mapping[fig_no].append(46make_url_from_fig_no_and_script_name(47fig_no,48script_name,49convert_to_which_url=convert_to_which_url,50base_url=base_url,51book_no=book_no,52)53)5455if csv_name:56dict_to_csv(url_mapping, csv_name)57print(f"Mapping of {len(url_mapping)} urls is saved in {csv_name}")58return url_mapping596061print(f"Parsing started from {args.lof}...........")6263if args.csv:64figure_mapping = figure_url_mapping_from_lof(args.lof, args.csv, book_no=BOOK_NO)65figure_mapping = figure_url_mapping_from_lof_dummy_nb_excluded(66args.lof, args.csv.replace(".csv", "") + "_excluded_dummy_nb.csv", book_no=BOOK_NO67)68else:69figure_mapping = figure_url_mapping_from_lof(args.lof, "", book_no=BOOK_NO)70figure_mapping = figure_url_mapping_from_lof_dummy_nb_excluded(args.lof, "", book_no=BOOK_NO)717273