Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/internal/transfer_non_figure_notebooks.ipynb
1191 views
Kernel: Python [conda env:pyprobml]
import os import re import fitz from probml_utils.url_utils import check_dead_urls from glob import glob
with fitz.open("../../bookv2/vol1/pml1.pdf") as doc: text = "" for page in doc: text += page.get_text() + ""
text_one_line = text.replace("\n", "")
py = re.findall("ssssspy(.*?)eeeeepy", text_one_line) nb = re.findall("sssssnb(.*?)eeeeenb", text_one_line) dl = re.findall("sssssdl(.*?)eeeeedl", text_one_line) # chnb = re.findall("ssssschnb(.*?)eeeeechnb", text_one_line)
dl_dict_c2n = {} dl_dict_n2c = {} for each in set(dl): chap, name = each.split("/") raw_name = name.replace("_jax.ipynb", "") dl_dict_n2c.update({raw_name: chap}) try: dl_dict_c2n[chap].append(raw_name) except KeyError: dl_dict_c2n[chap] = [raw_name]
dl_dict_n2c
{'resnet': '14', 'transformers': '15', 'rnn': '15', 'densenet': '14', 'lstm': '15', 'entailment_attention_mlp': '15', 'cnn1d_sentiment': '15', 'naive_bayes_mnist': '09', 'multi_head_attention': '15', 'conv2d': '14', 'batchnorm': '14', 'attention': '15', 'image_augmentation': '19', 'nmt': '15', 'bert': '15', 'lenet': '14', 'positional_encoding': '15', 'multi_gpu_training': '13', 'rnn_sentiment': '15', 'word_analogies': '20', 'gru': '15', 'finetune_cnn': '19', 'nmt_attention': '15', 'skipgram': '20'}
dl_dict_c2n
{'14': ['resnet', 'densenet', 'conv2d', 'batchnorm', 'lenet'], '15': ['transformers', 'rnn', 'lstm', 'entailment_attention_mlp', 'cnn1d_sentiment', 'multi_head_attention', 'attention', 'nmt', 'bert', 'positional_encoding', 'rnn_sentiment', 'gru', 'nmt_attention'], '09': ['naive_bayes_mnist'], '19': ['image_augmentation', 'finetune_cnn'], '13': ['multi_gpu_training'], '20': ['word_analogies', 'skipgram']}

Combine all

all_files = set(py).union(set(nb)).union(set(dl)) all_files = list(map(lambda x: x.replace("fi", "fi").replace("fl", "fl"), all_files)) len(all_files)
227
base_url = "https://github.com/probml/pyprobml/blob/master/notebooks/book1"
dead_urls = check_dead_urls(list(map(lambda x: os.path.join(base_url, x.replace(".py", ".ipynb")), all_files)))
import numpy as np
failed_files = [list(all_files)[i] for i, status in enumerate(dead_urls) if status == 1]
existing_notebooks = list( map(lambda x: x.split("/")[-1].replace(".ipynb", ""), glob("../../pyprobml/notebooks/book1/*/*.ipynb")) )
print(len(failed_files)) failed_files
17
['07/cholesky_demo.py', '07/einsum_demo.py', '10/perceptron_demo_2d.py', '14/layer_norm_jax.ipynb', '08/sgd_comparison.ipynb', '03/mix_bernoulli_sgd_mnist.py', '11/linsys_solve_demo.py', '05/dtheory.ipynb', '14/cifar10_cnn_lightning.ipynb', '13/mlp_imdb_tf.ipynb', '14/transposed_conv_jax.py', '01/text_preproc_jax.ipynb', '03/sprinkler_pgm.ipynb', '03/correlation2d.py', '01/tfidf_demo.py', '04/beta_credible_int_demo.py', '07/power_method_demo.py']

Moved failed files

import nbformat import shutil
old_nb_path = "../../probml-notebooks/notebooks/" old_script_path = "../scripts/" new_nb_path = "../notebooks/book1/" for each in failed_files: chap, name = each.split("/") if name.endswith(".py"): old_path = os.path.join(old_script_path, name) new_path = os.path.join(new_nb_path, chap, name.replace(".py", ".ipynb")) nb = nbformat.v4.new_notebook() with open(old_path) as f: nb["cells"] = [nbformat.v4.new_code_cell(f.read())] nbformat.write(nb, new_path) else: old_path = os.path.join(old_nb_path, name) new_path = os.path.join(new_nb_path, chap, name) shutil.copy(old_path, new_path) print(each, "transfered")
07/cholesky_demo.py transfered 07/einsum_demo.py transfered 10/perceptron_demo_2d.py transfered 14/layer_norm_jax.ipynb transfered 08/sgd_comparison.ipynb transfered 03/mix_bernoulli_sgd_mnist.py transfered 11/linsys_solve_demo.py transfered 05/dtheory.ipynb transfered 14/cifar10_cnn_lightning.ipynb transfered 13/mlp_imdb_tf.ipynb transfered 14/transposed_conv_jax.py transfered 01/text_preproc_jax.ipynb transfered 03/sprinkler_pgm.ipynb transfered 03/correlation2d.py transfered 01/tfidf_demo.py transfered 04/beta_credible_int_demo.py transfered 07/power_method_demo.py transfered