Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
jantic
GitHub Repository: jantic/deoldify
Path: blob/master/fastai/gen_doc/gen_notebooks.py
781 views
1
"`gen_doc.nbdoc` generates notebook documentation from module functions and links to correct places"
2
import pkgutil, inspect, sys,os, importlib,json,enum,warnings,nbformat,re
3
from IPython.core.display import display, Markdown
4
from nbconvert.preprocessors import ExecutePreprocessor
5
from nbformat.sign import NotebookNotary
6
from pathlib import Path
7
from .core import *
8
from .nbdoc import *
9
10
__all__ = ['create_module_page', 'update_module_page', 'import_mod',
11
'link_nb', 'update_notebooks', 'generate_missing_metadata', 'update_nb_metadata']
12
13
def get_empty_notebook():
14
"Default notbook with the minimum metadata."
15
#TODO: check python version and nbformat
16
return {'metadata': {'kernelspec': {'display_name': 'Python 3',
17
'language': 'python',
18
'name': 'python3'},
19
'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
20
'file_extension': '.py',
21
'mimetype': 'text/x-python',
22
'name': 'python',
23
'nbconvert_exporter': 'python',
24
'pygments_lexer': 'ipython3',
25
'version': '3.6.6'}},
26
'nbformat': 4,
27
'nbformat_minor': 2}
28
29
def get_md_cell(source, metadata=None):
30
"Markdown cell containing `source` with `metadata`."
31
return {'cell_type': 'markdown',
32
'metadata': {} if metadata is None else metadata,
33
'source': source}
34
35
def get_empty_cell(ctype='markdown'):
36
"Empty cell of type `ctype`."
37
return {'cell_type': ctype, 'metadata': {}, 'source': []}
38
39
def get_code_cell(code, hidden=False):
40
"Code cell containing `code` that may be `hidden`."
41
return {'cell_type' : 'code',
42
'execution_count': 0,
43
'metadata' : {'hide_input': hidden, 'trusted':True},
44
'source' : code,
45
'outputs': []}
46
47
def get_doc_cell(func_name):
48
"Code cell with the command to show the doc of `func_name`."
49
code = f"show_doc({func_name})"
50
return get_code_cell(code, True)
51
52
def get_global_vars(mod):
53
"Return globally assigned variables."
54
# https://stackoverflow.com/questions/8820276/docstring-for-variable/31764368#31764368
55
import ast,re
56
with open(mod.__file__, 'r') as f: fstr = f.read()
57
flines = fstr.splitlines()
58
d = {}
59
for node in ast.walk(ast.parse(fstr)):
60
if isinstance(node,ast.Assign) and hasattr(node.targets[0], 'id'):
61
key,lineno = node.targets[0].id,node.targets[0].lineno
62
codestr = flines[lineno]
63
match = re.match(f"^({key})\s*=\s*.*", codestr)
64
if match and match.group(1) != '__all__': # only top level assignment
65
d[key] = f'`{codestr}` {get_source_link(mod, lineno)}'
66
return d
67
68
def write_nb(nb, nb_path, mode='w'):
69
with open(nb_path, mode) as f: f.write(nbformat.writes(nbformat.from_dict(nb), version=4))
70
71
class ExecuteShowDocPreprocessor(ExecutePreprocessor):
72
"An ExecutePreprocessor that only executes show_doc cells"
73
def preprocess_cell(self, cell, resources, index):
74
if 'source' in cell and cell.cell_type == "code":
75
if IMPORT_RE.search(cell['source']) or SHOW_DOC_RE.search(cell['source']):
76
return super().preprocess_cell(cell, resources, index)
77
return cell, resources
78
79
def execute_nb(fname, metadata=None, save=True, show_doc_only=False):
80
"Execute notebook `fname` with `metadata` for preprocessing."
81
# Any module used in the notebook that isn't inside must be in the same directory as this script
82
with open(fname) as f: nb = nbformat.read(f, as_version=4)
83
ep_class = ExecuteShowDocPreprocessor if show_doc_only else ExecutePreprocessor
84
ep = ep_class(timeout=600, kernel_name='python3')
85
metadata = metadata or {}
86
ep.preprocess(nb, metadata)
87
if save:
88
with open(fname, 'wt') as f: nbformat.write(nb, f)
89
NotebookNotary().sign(nb)
90
91
def _symbol_skeleton(name): return [get_doc_cell(name), get_md_cell(f"`{name}`")]
92
93
def create_module_page(mod, dest_path, force=False):
94
"Create the documentation notebook for module `mod_name` in path `dest_path`"
95
nb = get_empty_notebook()
96
mod_name = mod.__name__
97
strip_name = strip_fastai(mod_name)
98
init_cell = [get_md_cell(f'## Title for {strip_name} (use plain english, not module name!)'), get_md_cell('Type an introduction of the package here.')]
99
cells = [get_code_cell(f'from fastai.gen_doc.nbdoc import *\nfrom {mod_name} import * ', True)]
100
101
gvar_map = get_global_vars(mod)
102
if gvar_map: cells.append(get_md_cell('### Global Variable Definitions:'))
103
for name in get_exports(mod):
104
if name in gvar_map: cells.append(get_md_cell(gvar_map[name]))
105
106
for ft_name in get_ft_names(mod, include_inner=True):
107
if not hasattr(mod, ft_name):
108
warnings.warn(f"Module {strip_name} doesn't have a function named {ft_name}.")
109
continue
110
cells += _symbol_skeleton(ft_name)
111
elt = getattr(mod, ft_name)
112
nb['cells'] = init_cell + cells + [get_md_cell(UNDOC_HEADER)]
113
114
doc_path = get_doc_path(mod, dest_path)
115
write_nb(nb, doc_path, 'w' if force else 'x')
116
execute_nb(doc_path)
117
return doc_path
118
119
_default_exclude = ['.ipynb_checkpoints', '__pycache__', '__init__.py', 'imports']
120
121
def get_module_names(path_dir, exclude=None):
122
if exclude is None: exclude = _default_exclude
123
"Search a given `path_dir` and return all the modules contained inside except those in `exclude`"
124
files = sorted(path_dir.glob('*'), key=lambda x: (x.is_dir(), x.name), reverse=True) # directories first
125
res = [f'{path_dir.name}']
126
for f in files:
127
if f.is_dir() and f.name in exclude: continue # exclude directories
128
if any([f.name.endswith(ex) for ex in exclude]): continue # exclude extensions
129
130
if f.suffix == '.py': res.append(f'{path_dir.name}.{f.stem}')
131
elif f.is_dir(): res += [f'{path_dir.name}.{name}' for name in get_module_names(f)]
132
return res
133
134
def read_nb(fname):
135
"Read a notebook in `fname` and return its corresponding json"
136
with open(fname,'r') as f: return nbformat.reads(f.read(), as_version=4)
137
138
SHOW_DOC_RE = re.compile(r"show_doc\(([\w\.]*)")
139
def read_nb_content(cells, mod_name):
140
"Build a dictionary containing the position of the `cells`."
141
doc_fns = {}
142
for i, cell in enumerate(cells):
143
if cell['cell_type'] == 'code':
144
for match in SHOW_DOC_RE.findall(cell['source']):
145
doc_fns[match] = i
146
return doc_fns
147
148
def read_nb_types(cells):
149
doc_fns = {}
150
for i, cell in enumerate(cells):
151
if cell['cell_type'] == 'markdown':
152
match = re.match(r"^(?:<code>|`)?(\w*)\s*=\s*", cell['source'])
153
if match is not None: doc_fns[match.group(1)] = i
154
return doc_fns
155
156
def link_markdown_cells(cells, modules):
157
"Create documentation links for all cells in markdown with backticks."
158
for i, cell in enumerate(cells):
159
if cell['cell_type'] == 'markdown':
160
cell['source'] = link_docstring(modules, cell['source'])
161
162
def get_insert_idx(pos_dict, name):
163
"Return the position to insert a given function doc in a notebook."
164
keys,i = list(pos_dict.keys()),0
165
while i < len(keys) and str.lower(keys[i]) < str.lower(name): i+=1
166
if i == len(keys): return -1
167
else: return pos_dict[keys[i]]
168
169
def update_pos(pos_dict, start_key, nbr=2):
170
"Update the `pos_dict` by moving all positions after `start_key` by `nbr`."
171
for key,idx in pos_dict.items():
172
if str.lower(key) >= str.lower(start_key): pos_dict[key] += nbr
173
return pos_dict
174
175
def insert_cells(cells, pos_dict, ft_name, append=False):
176
"Insert the function doc `cells` at their correct position and updates `pos_dict`."
177
idx = get_insert_idx(pos_dict, ft_name)
178
if append or idx == -1: cells += [get_doc_cell(ft_name), get_empty_cell()]
179
else:
180
cells.insert(idx, get_doc_cell(ft_name))
181
cells.insert(idx+1, get_empty_cell())
182
pos_dict = update_pos(pos_dict, ft_name, 2)
183
return cells, pos_dict
184
185
def get_doc_path(mod, dest_path):
186
strip_name = strip_fastai(mod.__name__)
187
return os.path.join(dest_path,f'{strip_name}.ipynb')
188
189
def generate_missing_metadata(dest_file):
190
fn = Path(dest_file)
191
meta_fn = fn.parent/'jekyll_metadata.ipynb'
192
if not fn.exists() or not meta_fn.exists(): return print('Could not find notebooks:', fn, meta_fn)
193
metadata_nb = read_nb(meta_fn)
194
195
if has_metadata_cell(metadata_nb['cells'], fn.name): return
196
nb = read_nb(fn)
197
jmd = nb['metadata'].get('jekyll', {})
198
fmt_params = ''
199
for k,v in jmd.items(): fmt_params += f',\n {k}={stringify(v)}'
200
metadata_cell = get_code_cell(f"update_nb_metadata('{Path(fn).name}'{fmt_params})", hidden=False)
201
metadata_nb['cells'].append(metadata_cell)
202
write_nb(metadata_nb, meta_fn)
203
204
def update_nb_metadata(nb_path=None, title=None, summary=None, keywords='fastai', overwrite=True, **kwargs):
205
"Creates jekyll metadata for given notebook path."
206
nb = read_nb(nb_path)
207
data = {'title': title, 'summary': summary, 'keywords': keywords, **kwargs}
208
data = {k:v for (k,v) in data.items() if v is not None} # remove none values
209
if not data: return
210
nb['metadata']['jekyll'] = data
211
write_nb(nb, nb_path)
212
NotebookNotary().sign(nb)
213
214
def has_metadata_cell(cells, fn):
215
for c in cells:
216
if re.search(f"update_nb_metadata\('{fn}'", c['source']): return c
217
218
def stringify(s): return f'\'{s}\'' if isinstance(s, str) else s
219
220
IMPORT_RE = re.compile(r"from (fastai[\.\w_]*)")
221
def get_imported_modules(cells, nb_module_name=''):
222
"Finds all submodules of notebook - sorted by submodules > top level modules > manual imports. This gives notebook imports priority"
223
module_names = get_top_level_modules()
224
nb_imports = [match.group(1) for cell in cells for match in IMPORT_RE.finditer(cell['source']) if cell['cell_type'] == 'code']
225
parts = nb_module_name.split('.')
226
parent_modules = ['.'.join(parts[:(x+1)]) for x in range_of(parts)] # Imports parent modules - a.b.c = [a, a.b, a.b.c]
227
all_modules = module_names + nb_imports + parent_modules
228
mods = [import_mod(m, ignore_errors=True) for m in all_modules]
229
return [m for m in mods if m is not None]
230
231
def get_top_level_modules(num_levels=1):
232
mod_dir = Path(import_mod('fastai').__file__).parent
233
filtered_n = filter(lambda x: x.count('.')<=num_levels, get_module_names(mod_dir))
234
return sorted(filtered_n, key=lambda s: s.count('.'), reverse=True) # Submodules first (sorted by periods)
235
236
NEW_FT_HEADER = '## New Methods - Please document or move to the undocumented section'
237
UNDOC_HEADER = '## Undocumented Methods - Methods moved below this line will intentionally be hidden'
238
def parse_sections(cells):
239
old_cells, undoc_cells, new_cells = [], [], []
240
current_section = old_cells
241
for cell in cells:
242
if cell['cell_type'] == 'markdown':
243
if re.match(UNDOC_HEADER, cell['source']): current_section = undoc_cells
244
if re.match(NEW_FT_HEADER, cell['source']): current_section = new_cells
245
current_section.append(cell)
246
undoc_cells = undoc_cells or [get_md_cell(UNDOC_HEADER)]
247
new_cells = new_cells or [get_md_cell(NEW_FT_HEADER)]
248
return old_cells, undoc_cells, new_cells
249
250
def remove_undoc_cells(cells):
251
old, _, _ = parse_sections(cells)
252
return old
253
254
# currently code vbox sub-cells mainly
255
def remove_code_cell_jupyter_widget_state_elem(cells):
256
for c in cells:
257
if c['cell_type'] == 'code':
258
if 'outputs' in c:
259
c['outputs'] = [l for l in c['outputs'] if not ('data' in l and 'application/vnd.jupyter.widget-view+json' in l.data)]
260
return cells
261
262
def update_module_page(mod, dest_path='.'):
263
"Update the documentation notebook of a given module."
264
doc_path = get_doc_path(mod, dest_path)
265
strip_name = strip_fastai(mod.__name__)
266
nb = read_nb(doc_path)
267
cells = nb['cells']
268
269
link_markdown_cells(cells, get_imported_modules(cells, mod.__name__))
270
271
type_dict = read_nb_types(cells)
272
gvar_map = get_global_vars(mod)
273
for name in get_exports(mod):
274
if name not in gvar_map: continue
275
code = gvar_map[name]
276
if name in type_dict: cells[type_dict[name]] = get_md_cell(code)
277
else: cells.append(get_md_cell(code))
278
279
pos_dict = read_nb_content(cells, strip_name)
280
ft_names = get_ft_names(mod, include_inner=True)
281
new_fts = list(set(ft_names) - set(pos_dict.keys()))
282
if new_fts: print(f'Found new fuctions for {mod}. Please document:\n{new_fts}')
283
existing, undoc_cells, new_cells = parse_sections(cells)
284
for ft_name in new_fts: new_cells.extend([get_doc_cell(ft_name), get_empty_cell()])
285
if len(new_cells) > 1: nb['cells'] = existing + undoc_cells + new_cells
286
287
write_nb(nb, doc_path)
288
return doc_path
289
290
def link_nb(nb_path):
291
nb = read_nb(nb_path)
292
cells = nb['cells']
293
link_markdown_cells(cells, get_imported_modules(cells, Path(nb_path).stem))
294
write_nb(nb, nb_path)
295
NotebookNotary().sign(read_nb(nb_path))
296
297
def get_module_from_notebook(doc_path):
298
"Find module given a source path. Assume it belongs to fastai directory"
299
return f'fastai.{Path(doc_path).stem}'
300
301
def check_nbconvert_version():
302
import nbconvert
303
assert nbconvert.version_info >= (5,4,0), "Please update nbconvert to >=5.4 for consistent .html output"
304
305
def update_notebooks(source_path, dest_path=None, update_html=True, document_new_fns=False,
306
update_nb_links=True, html_path=None, force=False):
307
"`source_path` can be a directory or a file. Assume all modules reside in the fastai directory."
308
from .convert2html import convert_nb
309
source_path = Path(source_path)
310
311
if source_path.is_file():
312
dest_path = source_path.parent if dest_path is None else Path(dest_path)
313
html_path = dest_path/'..'/'docs' if html_path is None else Path(html_path)
314
doc_path = source_path
315
assert source_path.suffix == '.ipynb', 'Must update from notebook or module'
316
if document_new_fns:
317
mod = import_mod(get_module_from_notebook(source_path))
318
if not mod: print('Could not find module for path:', source_path)
319
elif mod.__file__.endswith('__init__.py'): pass
320
else: update_module_page(mod, dest_path)
321
generate_missing_metadata(doc_path)
322
if update_nb_links:
323
print(f'Updating notebook {doc_path}. Please wait...')
324
link_nb(doc_path)
325
execute_nb(doc_path, {'metadata': {'path': doc_path.parent}}, show_doc_only=True)
326
if update_html:
327
check_nbconvert_version()
328
html_fn = html_path/doc_path.with_suffix('.html').name
329
if not force and html_fn.is_file():
330
in_mod = os.path.getmtime(doc_path)
331
out_mod = os.path.getmtime(html_fn)
332
if in_mod < out_mod: return
333
convert_nb(doc_path, html_path)
334
335
elif (source_path.name.startswith('fastai.')):
336
# Do module update
337
assert dest_path is not None, 'To update a module, you must specify a destination folder for where notebook resides'
338
mod = import_mod(source_path.name)
339
if not mod: return print('Could not find module for:', source_path)
340
doc_path = Path(dest_path)/(strip_fastai(mod.__name__)+'.ipynb')
341
if not doc_path.exists():
342
print('Notebook does not exist. Creating:', doc_path)
343
create_module_page(mod, dest_path)
344
update_notebooks(doc_path, dest_path=dest_path, update_html=update_html, document_new_fns=document_new_fns,
345
update_nb_links=update_nb_links, html_path=html_path)
346
elif source_path.is_dir():
347
for f in sorted(Path(source_path).glob('*.ipynb')):
348
update_notebooks(f, dest_path=dest_path, update_html=update_html, document_new_fns=document_new_fns,
349
update_nb_links=update_nb_links, html_path=html_path)
350
else: print('Could not resolve source file:', source_path)
351
352