Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/internal/common_notebooks_index.ipynb
1191 views
Kernel: Python [conda env:py3713]
from glob import glob import pandas as pd import os import nbformat import probml_utils.url_utils as url_utils
root_path = "../notebooks" def get_notebook_path(book_str, chap_no, nb_name): return os.path.join(root_path, book_str, chap_no, nb_name) def seperate_stuffs(nb_path): return nb_path.split("/")[-3:]
book2 = glob("../notebooks/book1/*/*.ipynb") #+ glob("../notebooks/book2/*/*.ipynb") book2 len(book2)
316
nb_list = list(map(seperate_stuffs, book2)) df_nb_list = pd.DataFrame(nb_list, columns=["book_no", "chap_no", "nb_name"]) df_nb_list
df_nb_list_grp = df_nb_list.groupby("nb_name").agg(lambda x: list(x)).reset_index() df_nb_list_grp
def is_query_in_nb(notebook, query): """ fun should take one argument: code """ nb = nbformat.read(notebook, as_version=4) for cell in nb.cells: code = cell["source"] if query in code: return 1 return 0 def get_n_cells_nb(notebook): """ fun should take one argument: code """ nb = nbformat.read(notebook, as_version=4) return len(nb.cells) def get_original_nb(df_nb_list_grp_ser): nb_name = df_nb_list_grp_ser["nb_name"] books = df_nb_list_grp_ser["book_no"] chaps = df_nb_list_grp_ser["chap_no"] t = [] for book, chap in zip(books, chaps): nb_path = get_notebook_path(book, chap, nb_name) is_source = is_query_in_nb(nb_path, "Source of this notebook") t.append(is_source) return t
df_nb_list_grp["is_source_present"] = df_nb_list_grp.apply(get_original_nb, axis=1) df_nb_list_grp
# Delete duplicate notebooks from book2 def del_duplicate_notebook(df_root_ser): is_source = df_root_ser["is_source_present"] nb_name = df_root_ser["nb_name"] # print(is_source) for i in range(len(is_source)): if is_source[i] == 1 and df_root_ser["book_no"][i] == "book1": # delete only book2's duplicate notebook: nb_path = get_notebook_path(df_root_ser["book_no"][i], df_root_ser["chap_no"][i], nb_name) if is_query_in_nb(nb_path, "/pyprobml/"): # delete notebooks who have pyprobml links os.remove(nb_path) print("Deleted: ", nb_path) def del_fig_notebook(df_root_ser): is_source = df_root_ser["is_source_present"] nb_name = df_root_ser["nb_name"] # print(is_source) if "fig_" in nb_name and df_root_ser["book_no"] == "book2": # delete only book2's notebook: nb_path = get_notebook_path(df_root_ser["book_no"], df_root_ser["chap_no"], nb_name) os.remove(nb_path) print("Deleted: ", nb_path) df_root = df_nb_list_grp df_nb_list_grp.apply(del_duplicate_notebook, axis=1) #df_nb_list_grp.apply(del_fig_notebook, axis=1)
Deleted: ../notebooks/book1/13/activation_fun_plot.ipynb Deleted: ../notebooks/book1/04/gauss_infer_1d.ipynb Deleted: ../notebooks/book1/04/gauss_infer_2d.ipynb Deleted: ../notebooks/book1/21/gmm_2d.ipynb Deleted: ../notebooks/book1/17/gprDemoArd.ipynb Deleted: ../notebooks/book1/17/gprDemoChangeHparams.ipynb Deleted: ../notebooks/book1/17/gprDemoNoiseFree.ipynb Deleted: ../notebooks/book1/17/gpr_demo_marglik.ipynb Deleted: ../notebooks/book1/18/hinge_loss_plot.ipynb Deleted: ../notebooks/book1/11/huberLossPlot.ipynb Deleted: ../notebooks/book1/17/huberLossPlot.ipynb Deleted: ../notebooks/book1/10/iris_logreg.ipynb Deleted: ../notebooks/book1/11/linreg_poly_vs_degree.ipynb Deleted: ../notebooks/book1/11/linreg_post_pred_plot.ipynb Deleted: ../notebooks/book1/03/prob.ipynb
0 None 1 None 2 None 3 None 4 None ... 301 None 302 None 303 None 304 None 305 None Length: 306, dtype: object
def get_root_col(df_root_ser, col): is_source = df_root_ser["is_source_present"] nb_name = df_root_ser["nb_name"] if is_source.count(0) == 0: print(f"{nb_name} is not in pyprobml!") return df_root_ser[col][0] elif is_source.count(0) > 1: print(f"{nb_name} - multiple copies exist") else: return df_root_ser[col][is_source.index(0)] df_root = df_nb_list_grp df_root["chap_no"] = df_nb_list_grp.apply(get_root_col, col="chap_no", axis=1) df_root["book_no"] = df_nb_list_grp.apply(get_root_col, col="book_no", axis=1) df_root
bootstrap_filter.ipynb is not in pyprobml! bootstrap_filter_maneuver.ipynb is not in pyprobml! ekf_mlp.ipynb is not in pyprobml! ekf_vs_ukf.ipynb is not in pyprobml! gauss-bp-1d-line.ipynb is not in pyprobml! gprDemoArd.ipynb is not in pyprobml! gprDemoChangeHparams.ipynb is not in pyprobml! gprDemoNoiseFree.ipynb is not in pyprobml! pendulum_1d.ipynb is not in pyprobml! rbpf_maneuver.ipynb is not in pyprobml! rbpf_maneuver_demo.ipynb is not in pyprobml! sis_vs_smc.ipynb is not in pyprobml! bootstrap_filter.ipynb is not in pyprobml! bootstrap_filter_maneuver.ipynb is not in pyprobml! ekf_mlp.ipynb is not in pyprobml! ekf_vs_ukf.ipynb is not in pyprobml! gauss-bp-1d-line.ipynb is not in pyprobml! gprDemoArd.ipynb is not in pyprobml! gprDemoChangeHparams.ipynb is not in pyprobml! gprDemoNoiseFree.ipynb is not in pyprobml! pendulum_1d.ipynb is not in pyprobml! rbpf_maneuver.ipynb is not in pyprobml! rbpf_maneuver_demo.ipynb is not in pyprobml! sis_vs_smc.ipynb is not in pyprobml!
df_root[df_root["book_no"].isna()]
df_root
url_utils.github_to_rawcontent_url("https://github.com/probml/pyprobml/blob/master/notebooks/book2/03/dtheory.ipynb")
'https://raw.githubusercontent.com/probml/pyprobml/master/notebooks/book2/03/dtheory.ipynb'
# check dead urls # df_root["url"] = df_root.apply( # lambda x: url_utils.make_url_from_chapter_no_and_script_name( # chapter_no=int(x["chap_no"]), # script_name=x["nb_name"], # book_no=int(x["book_no"][-1]), # convert_to_which_url="github-raw", # ), # axis=1, # ) # df_root
# url_utils.check_dead_urls(list(df_root["url"]))
# Add colab url df_root["colab_url"] = df_root.apply( lambda x: url_utils.make_url_from_chapter_no_and_script_name( chapter_no=int(x["chap_no"]), script_name=x["nb_name"], book_no=int(x["book_no"][-1]), convert_to_which_url="colab", ), axis=1, ) # Add colab url df_root["github_url"] = df_root.apply( lambda x: url_utils.make_url_from_chapter_no_and_script_name( chapter_no=int(x["chap_no"]), script_name=x["nb_name"], book_no=int(x["book_no"][-1]), convert_to_which_url="github", ), axis=1, ) df_root
t = df_root["url"][1] t
'https://raw.githubusercontent.com/probml/pyprobml/master/notebooks/book1/11/LinearRegressionProbML.ipynb'
enclose_span = lambda text, nb_id: f"<span id={nb_id}>{text}</span>" to_md_url = lambda text, url: f"[{text}]({url})" # to_md_url(enclose_span("GAN_JAX_CelebA_demo.ipynb"), t) df_root["md_colab_url"] = df_root.apply( lambda x: to_md_url(enclose_span("colab", x["nb_name"]), x["colab_url"]), axis=1 ) df_root["md_github_url"] = df_root.apply( lambda x: to_md_url(enclose_span("github", x["nb_name"]), x["github_url"]), axis=1 ) df_root
df_final = df_root[["nb_name", "md_colab_url", "md_github_url"]] df_final.columns = ["Notebook", "Colab url", "Github url"] df_final
df_final.iloc[0, 1]
'[<span id=KLfwdReverseMixGauss.ipynb>colab</span>](https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/06/KLfwdReverseMixGauss.ipynb)'

Handle supplementary notebooks

github_root = "https://github.com/probml/pyprobml/blob/master" colab_root = "https://colab.research.google.com/github/probml/pyprobml/blob/master"
supp_book2 = glob("../notebooks/book2/*/*/*.ipynb") + glob("../notebooks/book1/*/*/*.ipynb") supp_book2 len(supp_book2), supp_book2[0]
(17, '../notebooks/book2/09/supplementary/discretized_ssm.ipynb')
nb_github_colab_list = list( map(lambda x: [x.split("/")[-1], colab_root + x.replace("..", ""), github_root + x.replace("..", "")], supp_book2) ) df_supp = pd.DataFrame(nb_github_colab_list, columns=df_final.columns) df_supp
df_supp["Colab url"] = df_supp.apply(lambda x: to_md_url(enclose_span("colab", x["Notebook"]), x["Colab url"]), axis=1) df_supp["Github url"] = df_supp.apply( lambda x: to_md_url(enclose_span("github", x["Notebook"]), x["Github url"]), axis=1 ) df_supp
df_supp.iloc[0, 2]
'[<span id=discretized_ssm.ipynb>github</span>](https://github.com/probml/pyprobml/blob/master/notebooks/book2/09/supplementary/discretized_ssm.ipynb)'

Combine supplementary + chapters notebooks

df_chap_supp = pd.concat([df_final, df_supp]) df_chap_supp = df_chap_supp.sort_values(by="Notebook", key=lambda col: col.str.lower()) df_chap_supp
df_chap_supp.to_markdown("../notebooks.md", index=False)
pd.concat()
df_external = pd.read_csv("external_links.csv") df_external
df1 = pd.DataFrame([[0, 1], [2, 3]]) df1
df2 = pd.DataFrame([[11, 12], [13, 14]], index=[1, 2]) df2
import numpy as np
array([1])
df1.drop(index=np.intersect1d(df1.index.values, df2.index.values))