Path: blob/main/converter/textbook-converter/textbook_converter/converter.py
3855 views
import json1import nbformat2import os3import shutil4import yaml56from pathlib import Path7from nbconvert.writers import FilesWriter89from . import TextbookExporter, mathigon_ximg_regex, html_img_regex101112def get_notebook_node(nb_file_path):13"""Return a NotebookNode object from the given notebook file.14"""15try:16notebook_node = nbformat.read(nb_file_path, nbformat.NO_CONVERT)17return notebook_node18except Exception as err:19print(f'Error reading notebook: {err}')2021return None222324def convert_notebook_node(25nb_node, file_name, output_dir, section_id='', is_problem_set=False26):27"""Convert notebook node28"""29try:30exporter = TextbookExporter()31resources = {32'textbook': {33'id': file_name,34'section': section_id,35'is_problem_set': is_problem_set36}37}3839if 'textbook' in nb_node['metadata']:40resources['textbook'] = { **resources['textbook'], **nb_node['metadata']['textbook'] }4142(body, resources) = exporter.from_notebook_node(nb_node, resources=resources)4344writer = FilesWriter()45writer.build_directory = output_dir46writer.write(47output=body,48resources=resources,49notebook_name=file_name50)5152return (body, resources)53except Exception as err:54print(f'Error exporting notebook: {err}')55return None, None565758def append_to_glossary_yaml(resources, yaml_output_path):59"""Append 'gloss' metadata into 'glossary.yaml'60"""61if 'textbook' in resources and 'glossary' in resources['textbook']:62glossary_file_path = os.path.join(yaml_output_path, 'glossary.yaml')6364new_glossary = resources["textbook"]["glossary"]65existing_glossary = yml_to_dict(glossary_file_path) or {}66updated_glossary = { **existing_glossary, **new_glossary }6768content = yaml.load(json.dumps(updated_glossary), Loader=yaml.BaseLoader)69with open(glossary_file_path, 'w', encoding='utf-8') as glossary_file:70glossary_file.write(f'{yaml.dump(content)}')717273def append_to_notations_yaml(resources, yaml_output_path):74"""Create and append to 'notations.yaml'75"""76if 'textbook' in resources and 'formulas' in resources['textbook']:77formulas_file_path = os.path.join(yaml_output_path, 'notations.yaml')7879new_formulas = resources["textbook"]["formulas"]80existing_formulas = yml_to_dict(formulas_file_path) or {}81updated_formulas = { **existing_formulas, **new_formulas }8283content = yaml.load(json.dumps(updated_formulas), Loader=yaml.BaseLoader)84with open(formulas_file_path, 'w', encoding='utf-8') as formulas_file:85formulas_file.write(f'{yaml.dump(content)}')868788def append_to_styles(nb_node, output_path):89"""Create 'styles.less'90"""91styles_file_path = os.path.join(output_path, 'styles.less')92styles_path = Path(styles_file_path).resolve()9394if not styles_path.exists():95with open(styles_path, 'w', encoding='utf-8') as styles_file:96styles_file.write('\n@import "../shared/shared";\n')979899def append_to_ts(resources, source_path, output_path):100"""Create and append to 'functions.ts'101"""102ts_file_path = os.path.join(output_path, 'functions.ts')103ts_path = Path(ts_file_path).resolve()104105if not ts_path.exists():106src_ts_file_path = Path(os.path.join(source_path, 'functions.ts')).resolve()107if not src_ts_file_path.exists():108with open(ts_path, 'w', encoding='utf-8') as ts_file:109ts_file.write('import * as shared from "../shared/shared";\n')110else:111shutil.copy(str(src_ts_file_path), str(ts_path))112113if 'textbook' in resources and 'functions' in resources['textbook']:114with open(ts_path, 'a', encoding='utf-8') as ts_file:115ts_file.write(f'\n\n{resources["textbook"]["functions"]}')116117118def append_to_index(resources, output_path):119"""Create and append to 'index.yaml'120"""121if 'textbook' in resources and 'index' in resources['textbook']:122index_file_path = os.path.join(output_path, 'index.yaml')123124new_index = resources["textbook"]["index"]125existing_index = yml_to_dict(index_file_path) or {}126updated_index = { **existing_index, **new_index }127128content = yaml.load(json.dumps(updated_index), Loader=yaml.BaseLoader)129with open(index_file_path, 'w', encoding='utf-8') as index_file:130index_file.write(f'{yaml.dump(content)}')131132133def convert_notebook_file(134nb_file_path, output_dir=None, shared_dir=None, section_id=None, is_problem_set=False135):136"""Convert notebook file to Mathigon markdown format137"""138nb_path = Path(nb_file_path).resolve()139140if not nb_path.exists():141print(f'{nb_path} not found')142return None143144if not nb_path.is_file():145print(f'{nb_path} is not a file')146return None147148nb_node = get_notebook_node(str(nb_path))149150if nb_node:151file_name = nb_path.stem152output_path = output_dir if output_dir else str(nb_path.parent)153shared_path = shared_dir if shared_dir else os.path.join(output_path, 'shared')154155if not os.path.exists(shared_path):156os.makedirs(shared_path, exist_ok=True)157158print('converting', nb_path)159160(body, resources) = convert_notebook_node(161nb_node,162file_name,163output_path,164section_id,165is_problem_set=is_problem_set166)167168if body:169append_to_glossary_yaml(resources, shared_path)170append_to_notations_yaml(resources, shared_path)171append_to_ts(resources, str(nb_path.parent), output_path)172append_to_index(resources, output_path)173174175def convert_notebook_directory(176nbs_dir_path,177output_dir=None,178shared_dir=None179):180"""Convert & combine notebook file in directory to Mathigon format181"""182nbs_path = Path(nbs_dir_path).resolve()183184if not nbs_path.exists():185print(f'{nbs_path} not found')186return None187188if not nbs_path.is_dir():189print(f'{nbs_path} is not a directory')190return None191192print(f'converting notebooks in {nbs_path}')193for nb_file_path in nbs_path.glob('*.ipynb'):194convert_notebook_file(195nb_file_path,196output_dir=output_dir,197shared_dir=shared_dir198)199200201def yml_to_dict(yml_file_path):202"""Return the yaml file content as a dictionary203"""204yml_path = Path(yml_file_path).resolve()205206if not yml_path.is_file():207return None208209yml_dict = None210with open(yml_path, encoding='utf-8') as file:211yml_dict = yaml.load(file, Loader=yaml.BaseLoader)212213return yml_dict214215216def update_image_path(line, source_path):217"""Update image src218"""219img_src_path = None220match = mathigon_ximg_regex.search(line)221if match is not None:222img_src_path = match.group(1)223else:224match = html_img_regex.search(line)225if match is not None:226img_src_path = match.group(2)227228if img_src_path and not img_src_path.startswith('/') and not img_src_path.startswith('http') and not img_src_path.startswith('data'):229return line.replace(img_src_path, f'/content/{source_path}/{img_src_path}')230else:231return line232233234def get_order_from_toc(toc_file_path, md_dir_path):235"""Return the chapter title and sections (in order) as defined in toc yaml236"""237md_path = md_dir_path.replace(os.path.sep, '/')238chapters = yml_to_dict(toc_file_path)239240chapter = next((ch for ch in chapters if md_path.endswith(ch['url'])), [])241242def get_sections(s):243return (s['id'], s['url'][1:] if s['url'].startswith('/') else s['url'])244245return chapter['title'], list(map(get_sections, chapter['sections']))246247248def standalone(md_dir, section):249"""Turn section into a standalone course250"""251md_dir_path = Path(md_dir).resolve()252253if not md_dir_path.exists():254print(f'{md_dir_path} not found')255return None256257if not md_dir_path.is_dir():258print(f'{md_dir_path} is not a directory')259return None260261# section md file name262md_file_name = section['url'].split('/')[-1] + '.md'263# rename md file to required name: `content.md`264Path(md_dir_path / Path(md_file_name)).rename(Path(md_dir_path / Path('content.md')))265# move section files into own directory266_ = os.listdir(md_dir_path)267shutil.copytree(md_dir_path, Path(md_dir_path.parent / Path(section['id'])))268269# clean up270Path(md_dir_path / Path('content.md')).unlink()271Path(md_dir_path / Path('index.yaml')).unlink()272Path(md_dir_path / Path('functions.ts')).unlink()273274275def merge(md_dir, toc_file_path, output_dir=None):276"""Merge markdown files in directory into single file277"""278md_dir_path = Path(md_dir).resolve()279280if not md_dir_path.exists():281print(f'{md_dir_path} not found')282return None283284if not md_dir_path.is_dir():285print(f'{md_dir_path} is not a directory')286return None287288merged_file_name = 'content.md'289290output_path = output_dir if output_dir else str(md_dir_path)291merged_md_path = os.path.join(output_path, merged_file_name)292293# Assumes section urls in toc corresponds to nb/md file names294title, sections = get_order_from_toc(toc_file_path, str(md_dir_path))295if sections:296md_files_path = [f'{os.path.join(str(md_dir_path), x[1].split("/")[-1])}.md' for x in sections if x[1] != merged_file_name]297else:298md_files_path = [x for x in md_dir_path.glob('*.md') if x.name != merged_file_name]299300with open(merged_md_path, 'w', encoding='utf-8') as out_file:301if title and sections:302out_file.write(f'# {title}\n\n')303for count, md_path in enumerate(md_files_path):304if count > 0:305out_file.write('\n\n---\n')306out_file.write(f'\n> section: {sections[count][0]}\n\n')307with open(md_path, encoding='utf-8') as in_file:308for line in in_file:309if sections:310line = update_image_path(line, sections[count][1].split('/')[0])311out_file.write(line)312313314def convert(315nb_file_or_dir_path,316output_dir='',317shared_dir='shared',318section_id=None,319is_problem_set=False320):321"""Convert notebook file or files in directory to Mathigon markdown322"""323nbs_path = Path(nb_file_or_dir_path)324325if not nbs_path.exists():326print(f'{nbs_path} not found')327return None328329if nbs_path.is_file():330convert_notebook_file(331nb_file_or_dir_path,332output_dir=output_dir,333shared_dir=shared_dir,334section_id=section_id,335is_problem_set=is_problem_set336)337else:338convert_notebook_directory(339nb_file_or_dir_path,340output_dir=output_dir,341shared_dir=shared_dir342)343344345