Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/internal/duplicate_notebooks.py
1191 views
1
from glob import glob
2
import os
3
import shutil
4
import regex as re
5
from pathlib import Path
6
7
book1_notebooks = glob("notebooks/book1/*/*.ipynb")
8
book2_notebooks = glob("notebooks/book2/*/*.ipynb")
9
misc_notebooks = glob("notebooks/misc/*.ipynb") + glob("notebooks/misc/*/*.ipynb")
10
base_url = "https://colab.research.google.com/github/probml/pyprobml/blob/master/"
11
12
print(len(book1_notebooks), len(book2_notebooks), len(misc_notebooks))
13
14
get_notebook_name = lambda notebook: notebook.split("/")[-1]
15
16
book1_notebooks_names = set(list(map(get_notebook_name, book1_notebooks)))
17
book2_notebooks_names = set(list(map(get_notebook_name, book2_notebooks)))
18
notebook_names = book1_notebooks_names.union(book2_notebooks_names)
19
20
def remove_duplicate_nb_by_name():
21
for misc_notebook in misc_notebooks:
22
notebook_name = get_notebook_name(misc_notebook)
23
if notebook_name in notebook_names:
24
print(f"{misc_notebook} is a duplicate")
25
shutil.move(misc_notebook, f"deprecated/")
26
27
def get_path_nb(nb):
28
for notebook in book1_notebooks:
29
if get_notebook_name(notebook) == nb:
30
return notebook
31
32
def copy_referred_nb():
33
# Readme.md
34
readme_files = glob("notebooks/book1/*/README.md")
35
refered_nb = []
36
copied_nb = []
37
for readme_file in readme_files:
38
print(f"************* {readme_file} **************")
39
with open(readme_file, "r") as f:
40
updated_flg = 0
41
content = f.read()
42
if "## Supplementary material" in content:
43
new_content_lines = content.split("## Supplementary material")[0].split("\n")
44
new_content_lines.append("## Supplementary material")
45
content = content.split("## Supplementary material")[1]
46
for line in content.split("\n"):
47
last_field = line.split("|")[-1]
48
if "Notebook" in last_field or "[d2lbook]" in last_field:
49
link = last_field.replace("[Notebook]", "").replace("[d2lbook]", "").replace("(", "").replace(")", "") #get link to nb
50
nb_name = link.split("/")[-1]
51
refered_nb.append(nb_name)
52
nb_misc_file = f"notebooks/misc/{nb_name}"
53
nb_dest = f"{readme_file.replace('README.md','')}{nb_name}"
54
if nb_misc_file in misc_notebooks:
55
shutil.copy(nb_misc_file, nb_dest) #copy from misc to current chapter
56
print(f"{nb_misc_file} -> {nb_dest}")
57
line = line.replace(last_field,f"[{nb_name}]({os.path.join(base_url,nb_dest)})") #update the link
58
copied_nb.append(nb_misc_file) #track which nb are copied
59
updated_flg = 1
60
else:
61
curr_chapter_nb = glob(f"{readme_file.replace('README.md','')}*.ipynb")
62
#print(curr_chapter_nb)
63
# check if notebook in current chapter
64
if nb_dest in curr_chapter_nb:
65
line = line.replace(last_field,f"[{nb_name}]({os.path.join(base_url,nb_dest)})") #update the link
66
updated_flg = 1
67
print(f"{nb_dest} exists in current chapter")
68
69
#check if notebook is in different chapter
70
else:
71
nb_dest = get_path_nb(nb_name)
72
#print(nb_dest, book1_notebooks)
73
if nb_dest in book1_notebooks:
74
updated_link = os.path.join(base_url, nb_dest)
75
line = line.replace(last_field,f"[{nb_name}]({updated_link})")
76
updated_flg = 1
77
print(f"{nb_dest} exists in different chapter")
78
79
else:
80
print(f"{link} not in misc and not in current chapter!!")
81
82
new_content_lines.append(line)
83
84
if updated_flg:
85
with open(readme_file, "w") as f:
86
f.write("\n".join(new_content_lines))
87
#break
88
89
return copied_nb
90
91
92
93
def delete_nb(notebook_list):
94
[os.remove(nb) for nb in notebook_list]
95
print(f"{len(notebook_list)} deleted!")
96
97
def store_copied_nb(notebooks,fname = "internal/ignored_notebooks.txt"):
98
with open(fname,"w") as fp:
99
[fp.write(nb+"\n") for nb in notebooks]
100
101
if __name__ == "__main__":
102
print("main")
103
# copied_nb = copy_referred_nb()
104
# print(len(copied_nb), len(set(copied_nb)))
105
# print(copied_nb[:4])
106
# store_copied_nb(copied_nb)
107
# delete_nb(set(copied_nb))
108
109
'''
110
# some issues
111
1. Needs to update probml-notebooks/ link to pyprobml/
112
'''
113
114