CoCalc -- gen_notebooks.py

GitHub Repository: jantic/deoldify
Path: blob/master/fastai/gen_doc/gen_notebooks.py
⁷⁸¹ views
1
"`gen_doc.nbdoc` generates notebook documentation from module functions and links to correct places"
2
import pkgutil, inspect, sys,os, importlib,json,enum,warnings,nbformat,re
3
from IPython.core.display import display, Markdown
4
from nbconvert.preprocessors import ExecutePreprocessor
5
from nbformat.sign import NotebookNotary
6
from pathlib import Path
7
from .core import *
8
from .nbdoc import *
9

10
__all__ = ['create_module_page', 'update_module_page', 'import_mod',
11
           'link_nb', 'update_notebooks', 'generate_missing_metadata', 'update_nb_metadata']
12

13
def get_empty_notebook():
14
    "Default notbook with the minimum metadata."
15
    #TODO: check python version and nbformat
16
    return {'metadata': {'kernelspec': {'display_name': 'Python 3',
17
                                        'language': 'python',
18
                                        'name': 'python3'},
19
                         'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
20
                         'file_extension': '.py',
21
                         'mimetype': 'text/x-python',
22
                         'name': 'python',
23
                         'nbconvert_exporter': 'python',
24
                         'pygments_lexer': 'ipython3',
25
                         'version': '3.6.6'}},
26
            'nbformat': 4,
27
            'nbformat_minor': 2}
28

29
def get_md_cell(source, metadata=None):
30
    "Markdown cell containing `source` with `metadata`."
31
    return {'cell_type': 'markdown',
32
            'metadata': {} if metadata is None else metadata,
33
            'source': source}
34

35
def get_empty_cell(ctype='markdown'):
36
    "Empty cell of type `ctype`."
37
    return {'cell_type': ctype, 'metadata': {}, 'source': []}
38

39
def get_code_cell(code, hidden=False):
40
    "Code cell containing `code` that may be `hidden`."
41
    return {'cell_type' : 'code',
42
            'execution_count': 0,
43
            'metadata' : {'hide_input': hidden, 'trusted':True},
44
            'source' : code,
45
            'outputs': []}
46

47
def get_doc_cell(func_name):
48
    "Code cell with the command to show the doc of `func_name`."
49
    code = f"show_doc({func_name})"
50
    return get_code_cell(code, True)
51

52
def get_global_vars(mod):
53
    "Return globally assigned variables."
54
    # https://stackoverflow.com/questions/8820276/docstring-for-variable/31764368#31764368
55
    import ast,re
56
    with open(mod.__file__, 'r') as f: fstr = f.read()
57
    flines = fstr.splitlines()
58
    d = {}
59
    for node in ast.walk(ast.parse(fstr)):
60
        if isinstance(node,ast.Assign) and hasattr(node.targets[0], 'id'):
61
            key,lineno = node.targets[0].id,node.targets[0].lineno
62
            codestr = flines[lineno]
63
            match = re.match(f"^({key})\s*=\s*.*", codestr)
64
            if match and match.group(1) != '__all__': # only top level assignment
65
                d[key] = f'`{codestr}` {get_source_link(mod, lineno)}'
66
    return d
67

68
def write_nb(nb, nb_path, mode='w'):
69
    with open(nb_path, mode) as f: f.write(nbformat.writes(nbformat.from_dict(nb), version=4))
70

71
class ExecuteShowDocPreprocessor(ExecutePreprocessor):
72
    "An ExecutePreprocessor that only executes show_doc cells"
73
    def preprocess_cell(self, cell, resources, index):
74
        if 'source' in cell and cell.cell_type == "code":
75
            if IMPORT_RE.search(cell['source']) or SHOW_DOC_RE.search(cell['source']):
76
                return super().preprocess_cell(cell, resources, index)
77
        return cell, resources
78

79
def execute_nb(fname, metadata=None, save=True, show_doc_only=False):
80
    "Execute notebook `fname` with `metadata` for preprocessing."
81
    # Any module used in the notebook that isn't inside must be in the same directory as this script
82
    with open(fname) as f: nb = nbformat.read(f, as_version=4)
83
    ep_class = ExecuteShowDocPreprocessor if show_doc_only else ExecutePreprocessor
84
    ep = ep_class(timeout=600, kernel_name='python3')
85
    metadata = metadata or {}
86
    ep.preprocess(nb, metadata)
87
    if save:
88
        with open(fname, 'wt') as f: nbformat.write(nb, f)
89
        NotebookNotary().sign(nb)
90

91
def _symbol_skeleton(name): return [get_doc_cell(name), get_md_cell(f"`{name}`")]
92

93
def create_module_page(mod, dest_path, force=False):
94
    "Create the documentation notebook for module `mod_name` in path `dest_path`"
95
    nb = get_empty_notebook()
96
    mod_name = mod.__name__
97
    strip_name = strip_fastai(mod_name)
98
    init_cell = [get_md_cell(f'## Title for {strip_name} (use plain english, not module name!)'), get_md_cell('Type an introduction of the package here.')]
99
    cells = [get_code_cell(f'from fastai.gen_doc.nbdoc import *\nfrom {mod_name} import * ', True)]
100

101
    gvar_map = get_global_vars(mod)
102
    if gvar_map: cells.append(get_md_cell('### Global Variable Definitions:'))
103
    for name in get_exports(mod):
104
        if name in gvar_map: cells.append(get_md_cell(gvar_map[name]))
105

106
    for ft_name in get_ft_names(mod, include_inner=True):
107
        if not hasattr(mod, ft_name):
108
            warnings.warn(f"Module {strip_name} doesn't have a function named {ft_name}.")
109
            continue
110
        cells += _symbol_skeleton(ft_name)
111
        elt = getattr(mod, ft_name)
112
    nb['cells'] = init_cell + cells + [get_md_cell(UNDOC_HEADER)]
113

114
    doc_path = get_doc_path(mod, dest_path)
115
    write_nb(nb, doc_path, 'w' if force else 'x')
116
    execute_nb(doc_path)
117
    return doc_path
118

119
_default_exclude = ['.ipynb_checkpoints', '__pycache__', '__init__.py', 'imports']
120

121
def get_module_names(path_dir, exclude=None):
122
    if exclude is None: exclude = _default_exclude
123
    "Search a given `path_dir` and return all the modules contained inside except those in `exclude`"
124
    files = sorted(path_dir.glob('*'), key=lambda x: (x.is_dir(), x.name), reverse=True) # directories first
125
    res = [f'{path_dir.name}']
126
    for f in files:
127
        if f.is_dir() and f.name in exclude: continue # exclude directories
128
        if any([f.name.endswith(ex) for ex in exclude]): continue # exclude extensions
129

130
        if f.suffix == '.py': res.append(f'{path_dir.name}.{f.stem}')
131
        elif f.is_dir(): res += [f'{path_dir.name}.{name}' for name in get_module_names(f)]
132
    return res
133

134
def read_nb(fname):
135
    "Read a notebook in `fname` and return its corresponding json"
136
    with open(fname,'r') as f: return nbformat.reads(f.read(), as_version=4)
137

138
SHOW_DOC_RE = re.compile(r"show_doc\(([\w\.]*)")
139
def read_nb_content(cells, mod_name):
140
    "Build a dictionary containing the position of the `cells`."
141
    doc_fns = {}
142
    for i, cell in enumerate(cells):
143
        if cell['cell_type'] == 'code':
144
            for match in SHOW_DOC_RE.findall(cell['source']):
145
                doc_fns[match] = i
146
    return doc_fns
147

148
def read_nb_types(cells):
149
    doc_fns = {}
150
    for i, cell in enumerate(cells):
151
        if cell['cell_type'] == 'markdown':
152
            match = re.match(r"^(?:<code>|`)?(\w*)\s*=\s*", cell['source'])
153
            if match is not None: doc_fns[match.group(1)] = i
154
    return doc_fns
155

156
def link_markdown_cells(cells, modules):
157
    "Create documentation links for all cells in markdown with backticks."
158
    for i, cell in enumerate(cells):
159
        if cell['cell_type'] == 'markdown':
160
            cell['source'] = link_docstring(modules, cell['source'])
161

162
def get_insert_idx(pos_dict, name):
163
    "Return the position to insert a given function doc in a notebook."
164
    keys,i = list(pos_dict.keys()),0
165
    while i < len(keys) and str.lower(keys[i]) < str.lower(name): i+=1
166
    if i == len(keys): return -1
167
    else:              return pos_dict[keys[i]]
168

169
def update_pos(pos_dict, start_key, nbr=2):
170
    "Update the `pos_dict` by moving all positions after `start_key` by `nbr`."
171
    for key,idx in pos_dict.items():
172
        if str.lower(key) >= str.lower(start_key): pos_dict[key] += nbr
173
    return pos_dict
174

175
def insert_cells(cells, pos_dict, ft_name, append=False):
176
    "Insert the function doc `cells` at their correct position and updates `pos_dict`."
177
    idx = get_insert_idx(pos_dict, ft_name)
178
    if append or idx == -1: cells += [get_doc_cell(ft_name), get_empty_cell()]
179
    else:
180
        cells.insert(idx, get_doc_cell(ft_name))
181
        cells.insert(idx+1, get_empty_cell())
182
        pos_dict = update_pos(pos_dict, ft_name, 2)
183
    return cells, pos_dict
184

185
def get_doc_path(mod, dest_path):
186
    strip_name = strip_fastai(mod.__name__)
187
    return os.path.join(dest_path,f'{strip_name}.ipynb')
188

189
def generate_missing_metadata(dest_file):
190
    fn = Path(dest_file)
191
    meta_fn = fn.parent/'jekyll_metadata.ipynb'
192
    if not fn.exists() or not meta_fn.exists(): return print('Could not find notebooks:', fn, meta_fn)
193
    metadata_nb = read_nb(meta_fn)
194

195
    if has_metadata_cell(metadata_nb['cells'], fn.name): return
196
    nb = read_nb(fn)
197
    jmd = nb['metadata'].get('jekyll', {})
198
    fmt_params = ''
199
    for k,v in jmd.items(): fmt_params += f',\n    {k}={stringify(v)}'
200
    metadata_cell = get_code_cell(f"update_nb_metadata('{Path(fn).name}'{fmt_params})", hidden=False)
201
    metadata_nb['cells'].append(metadata_cell)
202
    write_nb(metadata_nb, meta_fn)
203

204
def update_nb_metadata(nb_path=None, title=None, summary=None, keywords='fastai', overwrite=True, **kwargs):
205
    "Creates jekyll metadata for given notebook path."
206
    nb = read_nb(nb_path)
207
    data = {'title': title, 'summary': summary, 'keywords': keywords, **kwargs}
208
    data = {k:v for (k,v) in data.items() if v is not None} # remove none values
209
    if not data: return
210
    nb['metadata']['jekyll'] = data
211
    write_nb(nb, nb_path)
212
    NotebookNotary().sign(nb)
213

214
def has_metadata_cell(cells, fn):
215
    for c in cells:
216
        if re.search(f"update_nb_metadata\('{fn}'", c['source']): return c
217

218
def stringify(s): return f'\'{s}\'' if isinstance(s, str) else s
219

220
IMPORT_RE = re.compile(r"from (fastai[\.\w_]*)")
221
def get_imported_modules(cells, nb_module_name=''):
222
    "Finds all submodules of notebook - sorted by submodules > top level modules > manual imports. This gives notebook imports priority"
223
    module_names = get_top_level_modules()
224
    nb_imports = [match.group(1) for cell in cells for match in IMPORT_RE.finditer(cell['source']) if cell['cell_type'] == 'code']
225
    parts = nb_module_name.split('.')
226
    parent_modules = ['.'.join(parts[:(x+1)]) for x in range_of(parts)] # Imports parent modules - a.b.c = [a, a.b, a.b.c]
227
    all_modules = module_names + nb_imports + parent_modules
228
    mods = [import_mod(m, ignore_errors=True) for m in all_modules]
229
    return [m for m in mods if m is not None]
230

231
def get_top_level_modules(num_levels=1):
232
    mod_dir = Path(import_mod('fastai').__file__).parent
233
    filtered_n = filter(lambda x: x.count('.')<=num_levels, get_module_names(mod_dir))
234
    return sorted(filtered_n, key=lambda s: s.count('.'), reverse=True) # Submodules first (sorted by periods)
235

236
NEW_FT_HEADER = '## New Methods - Please document or move to the undocumented section'
237
UNDOC_HEADER = '## Undocumented Methods - Methods moved below this line will intentionally be hidden'
238
def parse_sections(cells):
239
    old_cells, undoc_cells, new_cells = [], [], []
240
    current_section = old_cells
241
    for cell in cells:
242
        if cell['cell_type'] == 'markdown':
243
            if re.match(UNDOC_HEADER, cell['source']): current_section = undoc_cells
244
            if re.match(NEW_FT_HEADER, cell['source']): current_section = new_cells
245
        current_section.append(cell)
246
    undoc_cells = undoc_cells or [get_md_cell(UNDOC_HEADER)]
247
    new_cells = new_cells or [get_md_cell(NEW_FT_HEADER)]
248
    return old_cells, undoc_cells, new_cells
249

250
def remove_undoc_cells(cells):
251
    old, _, _ = parse_sections(cells)
252
    return old
253

254
# currently code vbox sub-cells mainly
255
def remove_code_cell_jupyter_widget_state_elem(cells):
256
    for c in cells:
257
        if c['cell_type'] == 'code':
258
            if 'outputs' in c:
259
                c['outputs'] = [l for l in c['outputs'] if not ('data' in l and 'application/vnd.jupyter.widget-view+json' in l.data)]
260
    return cells
261

262
def update_module_page(mod, dest_path='.'):
263
    "Update the documentation notebook of a given module."
264
    doc_path = get_doc_path(mod, dest_path)
265
    strip_name = strip_fastai(mod.__name__)
266
    nb = read_nb(doc_path)
267
    cells = nb['cells']
268

269
    link_markdown_cells(cells, get_imported_modules(cells, mod.__name__))
270

271
    type_dict = read_nb_types(cells)
272
    gvar_map = get_global_vars(mod)
273
    for name in get_exports(mod):
274
        if name not in gvar_map: continue
275
        code = gvar_map[name]
276
        if name in type_dict: cells[type_dict[name]] = get_md_cell(code)
277
        else: cells.append(get_md_cell(code))
278

279
    pos_dict = read_nb_content(cells, strip_name)
280
    ft_names = get_ft_names(mod, include_inner=True)
281
    new_fts = list(set(ft_names) - set(pos_dict.keys()))
282
    if new_fts: print(f'Found new fuctions for {mod}. Please document:\n{new_fts}')
283
    existing, undoc_cells, new_cells = parse_sections(cells)
284
    for ft_name in new_fts: new_cells.extend([get_doc_cell(ft_name), get_empty_cell()])
285
    if len(new_cells) > 1: nb['cells'] = existing + undoc_cells + new_cells
286

287
    write_nb(nb, doc_path)
288
    return doc_path
289

290
def link_nb(nb_path):
291
    nb = read_nb(nb_path)
292
    cells = nb['cells']
293
    link_markdown_cells(cells, get_imported_modules(cells, Path(nb_path).stem))
294
    write_nb(nb, nb_path)
295
    NotebookNotary().sign(read_nb(nb_path))
296

297
def get_module_from_notebook(doc_path):
298
    "Find module given a source path. Assume it belongs to fastai directory"
299
    return f'fastai.{Path(doc_path).stem}'
300

301
def check_nbconvert_version():
302
    import nbconvert
303
    assert nbconvert.version_info >= (5,4,0), "Please update nbconvert to >=5.4 for consistent .html output"
304

305
def update_notebooks(source_path, dest_path=None, update_html=True, document_new_fns=False,
306
                     update_nb_links=True, html_path=None, force=False):
307
    "`source_path` can be a directory or a file. Assume all modules reside in the fastai directory."
308
    from .convert2html import convert_nb
309
    source_path = Path(source_path)
310

311
    if source_path.is_file():
312
        dest_path = source_path.parent if dest_path is None else Path(dest_path)
313
        html_path = dest_path/'..'/'docs' if html_path is None else Path(html_path)
314
        doc_path = source_path
315
        assert source_path.suffix == '.ipynb', 'Must update from notebook or module'
316
        if document_new_fns:
317
            mod = import_mod(get_module_from_notebook(source_path))
318
            if not mod: print('Could not find module for path:', source_path)
319
            elif mod.__file__.endswith('__init__.py'): pass
320
            else: update_module_page(mod, dest_path)
321
        generate_missing_metadata(doc_path)
322
        if update_nb_links:
323
            print(f'Updating notebook {doc_path}. Please wait...')
324
            link_nb(doc_path)
325
            execute_nb(doc_path, {'metadata': {'path': doc_path.parent}}, show_doc_only=True)
326
        if update_html:
327
            check_nbconvert_version()
328
            html_fn = html_path/doc_path.with_suffix('.html').name
329
            if not force and html_fn.is_file():
330
                in_mod  = os.path.getmtime(doc_path)
331
                out_mod = os.path.getmtime(html_fn)
332
                if in_mod < out_mod: return
333
            convert_nb(doc_path, html_path)
334

335
    elif (source_path.name.startswith('fastai.')):
336
        # Do module update
337
        assert dest_path is not None, 'To update a module, you must specify a destination folder for where notebook resides'
338
        mod = import_mod(source_path.name)
339
        if not mod: return print('Could not find module for:', source_path)
340
        doc_path = Path(dest_path)/(strip_fastai(mod.__name__)+'.ipynb')
341
        if not doc_path.exists():
342
            print('Notebook does not exist. Creating:', doc_path)
343
            create_module_page(mod, dest_path)
344
        update_notebooks(doc_path, dest_path=dest_path, update_html=update_html, document_new_fns=document_new_fns,
345
                         update_nb_links=update_nb_links, html_path=html_path)
346
    elif source_path.is_dir():
347
        for f in sorted(Path(source_path).glob('*.ipynb')):
348
            update_notebooks(f, dest_path=dest_path, update_html=update_html, document_new_fns=document_new_fns,
349
                             update_nb_links=update_nb_links, html_path=html_path)
350
    else: print('Could not resolve source file:', source_path)
351

352
Product

Resources

Company