Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/internal/book2/create_figure_url_mapping.py
1192 views
1
"""
2
command usage:
3
python3 internal/book2/create_figure_url_mapping.py -lof internal/book2.lof -csv internal/figures_url_mapping_book2.csv
4
"""
5
import argparse
6
import regex as re
7
from probml_utils.url_utils import (
8
figure_url_mapping_from_lof,
9
dict_to_csv,
10
make_url_from_fig_no_and_script_name,
11
extract_scripts_name_from_caption,
12
)
13
from TexSoup import TexSoup
14
15
parser = argparse.ArgumentParser(description="create figure url mapping")
16
parser.add_argument("-lof", "--lof", type=str, help="", default="internal/book2.lof")
17
parser.add_argument("-csv", "--csv", type=str, help="", default="internal/figures_url_mapping_book2.csv")
18
args = parser.parse_args()
19
20
BOOK_NO = int(args.lof.split("/")[-1].split(".")[0][-1]) # from internal/book1.lof to 1
21
22
23
def figure_url_mapping_from_lof_dummy_nb_excluded(
24
lof_file_path,
25
csv_name,
26
convert_to_which_url="colab",
27
base_url="https://github.com/probml/pyprobml/blob/master/notebooks",
28
book_no=1,
29
):
30
f"""
31
create mapping of fig_no to url by parsing lof_file and save mapping in {csv_name}
32
"""
33
with open(lof_file_path) as fp:
34
LoF_File_Contents = fp.read()
35
soup = TexSoup(LoF_File_Contents)
36
37
# create mapping of fig_no to list of script_name
38
39
url_mapping = {}
40
for caption in soup.find_all("numberline"):
41
fig_no = str(caption.contents[0])
42
extracted_scripts = extract_scripts_name_from_caption(str(caption))
43
if len(extracted_scripts) > 0:
44
url_mapping[fig_no] = []
45
for script_name in extracted_scripts:
46
url_mapping[fig_no].append(
47
make_url_from_fig_no_and_script_name(
48
fig_no,
49
script_name,
50
convert_to_which_url=convert_to_which_url,
51
base_url=base_url,
52
book_no=book_no,
53
)
54
)
55
56
if csv_name:
57
dict_to_csv(url_mapping, csv_name)
58
print(f"Mapping of {len(url_mapping)} urls is saved in {csv_name}")
59
return url_mapping
60
61
62
print(f"Parsing started from {args.lof}...........")
63
64
if args.csv:
65
figure_mapping = figure_url_mapping_from_lof(args.lof, args.csv, book_no=BOOK_NO)
66
figure_mapping = figure_url_mapping_from_lof_dummy_nb_excluded(
67
args.lof, args.csv.replace(".csv", "") + "_excluded_dummy_nb.csv", book_no=BOOK_NO
68
)
69
else:
70
figure_mapping = figure_url_mapping_from_lof(args.lof, "", book_no=BOOK_NO)
71
figure_mapping = figure_url_mapping_from_lof_dummy_nb_excluded(args.lof, "", book_no=BOOK_NO)
72
73