Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quantum-kittens
GitHub Repository: quantum-kittens/platypus
Path: blob/main/converter/textbook-converter/textbook_converter/converter.py
3855 views
1
import json
2
import nbformat
3
import os
4
import shutil
5
import yaml
6
7
from pathlib import Path
8
from nbconvert.writers import FilesWriter
9
10
from . import TextbookExporter, mathigon_ximg_regex, html_img_regex
11
12
13
def get_notebook_node(nb_file_path):
14
"""Return a NotebookNode object from the given notebook file.
15
"""
16
try:
17
notebook_node = nbformat.read(nb_file_path, nbformat.NO_CONVERT)
18
return notebook_node
19
except Exception as err:
20
print(f'Error reading notebook: {err}')
21
22
return None
23
24
25
def convert_notebook_node(
26
nb_node, file_name, output_dir, section_id='', is_problem_set=False
27
):
28
"""Convert notebook node
29
"""
30
try:
31
exporter = TextbookExporter()
32
resources = {
33
'textbook': {
34
'id': file_name,
35
'section': section_id,
36
'is_problem_set': is_problem_set
37
}
38
}
39
40
if 'textbook' in nb_node['metadata']:
41
resources['textbook'] = { **resources['textbook'], **nb_node['metadata']['textbook'] }
42
43
(body, resources) = exporter.from_notebook_node(nb_node, resources=resources)
44
45
writer = FilesWriter()
46
writer.build_directory = output_dir
47
writer.write(
48
output=body,
49
resources=resources,
50
notebook_name=file_name
51
)
52
53
return (body, resources)
54
except Exception as err:
55
print(f'Error exporting notebook: {err}')
56
return None, None
57
58
59
def append_to_glossary_yaml(resources, yaml_output_path):
60
"""Append 'gloss' metadata into 'glossary.yaml'
61
"""
62
if 'textbook' in resources and 'glossary' in resources['textbook']:
63
glossary_file_path = os.path.join(yaml_output_path, 'glossary.yaml')
64
65
new_glossary = resources["textbook"]["glossary"]
66
existing_glossary = yml_to_dict(glossary_file_path) or {}
67
updated_glossary = { **existing_glossary, **new_glossary }
68
69
content = yaml.load(json.dumps(updated_glossary), Loader=yaml.BaseLoader)
70
with open(glossary_file_path, 'w', encoding='utf-8') as glossary_file:
71
glossary_file.write(f'{yaml.dump(content)}')
72
73
74
def append_to_notations_yaml(resources, yaml_output_path):
75
"""Create and append to 'notations.yaml'
76
"""
77
if 'textbook' in resources and 'formulas' in resources['textbook']:
78
formulas_file_path = os.path.join(yaml_output_path, 'notations.yaml')
79
80
new_formulas = resources["textbook"]["formulas"]
81
existing_formulas = yml_to_dict(formulas_file_path) or {}
82
updated_formulas = { **existing_formulas, **new_formulas }
83
84
content = yaml.load(json.dumps(updated_formulas), Loader=yaml.BaseLoader)
85
with open(formulas_file_path, 'w', encoding='utf-8') as formulas_file:
86
formulas_file.write(f'{yaml.dump(content)}')
87
88
89
def append_to_styles(nb_node, output_path):
90
"""Create 'styles.less'
91
"""
92
styles_file_path = os.path.join(output_path, 'styles.less')
93
styles_path = Path(styles_file_path).resolve()
94
95
if not styles_path.exists():
96
with open(styles_path, 'w', encoding='utf-8') as styles_file:
97
styles_file.write('\n@import "../shared/shared";\n')
98
99
100
def append_to_ts(resources, source_path, output_path):
101
"""Create and append to 'functions.ts'
102
"""
103
ts_file_path = os.path.join(output_path, 'functions.ts')
104
ts_path = Path(ts_file_path).resolve()
105
106
if not ts_path.exists():
107
src_ts_file_path = Path(os.path.join(source_path, 'functions.ts')).resolve()
108
if not src_ts_file_path.exists():
109
with open(ts_path, 'w', encoding='utf-8') as ts_file:
110
ts_file.write('import * as shared from "../shared/shared";\n')
111
else:
112
shutil.copy(str(src_ts_file_path), str(ts_path))
113
114
if 'textbook' in resources and 'functions' in resources['textbook']:
115
with open(ts_path, 'a', encoding='utf-8') as ts_file:
116
ts_file.write(f'\n\n{resources["textbook"]["functions"]}')
117
118
119
def append_to_index(resources, output_path):
120
"""Create and append to 'index.yaml'
121
"""
122
if 'textbook' in resources and 'index' in resources['textbook']:
123
index_file_path = os.path.join(output_path, 'index.yaml')
124
125
new_index = resources["textbook"]["index"]
126
existing_index = yml_to_dict(index_file_path) or {}
127
updated_index = { **existing_index, **new_index }
128
129
content = yaml.load(json.dumps(updated_index), Loader=yaml.BaseLoader)
130
with open(index_file_path, 'w', encoding='utf-8') as index_file:
131
index_file.write(f'{yaml.dump(content)}')
132
133
134
def convert_notebook_file(
135
nb_file_path, output_dir=None, shared_dir=None, section_id=None, is_problem_set=False
136
):
137
"""Convert notebook file to Mathigon markdown format
138
"""
139
nb_path = Path(nb_file_path).resolve()
140
141
if not nb_path.exists():
142
print(f'{nb_path} not found')
143
return None
144
145
if not nb_path.is_file():
146
print(f'{nb_path} is not a file')
147
return None
148
149
nb_node = get_notebook_node(str(nb_path))
150
151
if nb_node:
152
file_name = nb_path.stem
153
output_path = output_dir if output_dir else str(nb_path.parent)
154
shared_path = shared_dir if shared_dir else os.path.join(output_path, 'shared')
155
156
if not os.path.exists(shared_path):
157
os.makedirs(shared_path, exist_ok=True)
158
159
print('converting', nb_path)
160
161
(body, resources) = convert_notebook_node(
162
nb_node,
163
file_name,
164
output_path,
165
section_id,
166
is_problem_set=is_problem_set
167
)
168
169
if body:
170
append_to_glossary_yaml(resources, shared_path)
171
append_to_notations_yaml(resources, shared_path)
172
append_to_ts(resources, str(nb_path.parent), output_path)
173
append_to_index(resources, output_path)
174
175
176
def convert_notebook_directory(
177
nbs_dir_path,
178
output_dir=None,
179
shared_dir=None
180
):
181
"""Convert & combine notebook file in directory to Mathigon format
182
"""
183
nbs_path = Path(nbs_dir_path).resolve()
184
185
if not nbs_path.exists():
186
print(f'{nbs_path} not found')
187
return None
188
189
if not nbs_path.is_dir():
190
print(f'{nbs_path} is not a directory')
191
return None
192
193
print(f'converting notebooks in {nbs_path}')
194
for nb_file_path in nbs_path.glob('*.ipynb'):
195
convert_notebook_file(
196
nb_file_path,
197
output_dir=output_dir,
198
shared_dir=shared_dir
199
)
200
201
202
def yml_to_dict(yml_file_path):
203
"""Return the yaml file content as a dictionary
204
"""
205
yml_path = Path(yml_file_path).resolve()
206
207
if not yml_path.is_file():
208
return None
209
210
yml_dict = None
211
with open(yml_path, encoding='utf-8') as file:
212
yml_dict = yaml.load(file, Loader=yaml.BaseLoader)
213
214
return yml_dict
215
216
217
def update_image_path(line, source_path):
218
"""Update image src
219
"""
220
img_src_path = None
221
match = mathigon_ximg_regex.search(line)
222
if match is not None:
223
img_src_path = match.group(1)
224
else:
225
match = html_img_regex.search(line)
226
if match is not None:
227
img_src_path = match.group(2)
228
229
if img_src_path and not img_src_path.startswith('/') and not img_src_path.startswith('http') and not img_src_path.startswith('data'):
230
return line.replace(img_src_path, f'/content/{source_path}/{img_src_path}')
231
else:
232
return line
233
234
235
def get_order_from_toc(toc_file_path, md_dir_path):
236
"""Return the chapter title and sections (in order) as defined in toc yaml
237
"""
238
md_path = md_dir_path.replace(os.path.sep, '/')
239
chapters = yml_to_dict(toc_file_path)
240
241
chapter = next((ch for ch in chapters if md_path.endswith(ch['url'])), [])
242
243
def get_sections(s):
244
return (s['id'], s['url'][1:] if s['url'].startswith('/') else s['url'])
245
246
return chapter['title'], list(map(get_sections, chapter['sections']))
247
248
249
def standalone(md_dir, section):
250
"""Turn section into a standalone course
251
"""
252
md_dir_path = Path(md_dir).resolve()
253
254
if not md_dir_path.exists():
255
print(f'{md_dir_path} not found')
256
return None
257
258
if not md_dir_path.is_dir():
259
print(f'{md_dir_path} is not a directory')
260
return None
261
262
# section md file name
263
md_file_name = section['url'].split('/')[-1] + '.md'
264
# rename md file to required name: `content.md`
265
Path(md_dir_path / Path(md_file_name)).rename(Path(md_dir_path / Path('content.md')))
266
# move section files into own directory
267
_ = os.listdir(md_dir_path)
268
shutil.copytree(md_dir_path, Path(md_dir_path.parent / Path(section['id'])))
269
270
# clean up
271
Path(md_dir_path / Path('content.md')).unlink()
272
Path(md_dir_path / Path('index.yaml')).unlink()
273
Path(md_dir_path / Path('functions.ts')).unlink()
274
275
276
def merge(md_dir, toc_file_path, output_dir=None):
277
"""Merge markdown files in directory into single file
278
"""
279
md_dir_path = Path(md_dir).resolve()
280
281
if not md_dir_path.exists():
282
print(f'{md_dir_path} not found')
283
return None
284
285
if not md_dir_path.is_dir():
286
print(f'{md_dir_path} is not a directory')
287
return None
288
289
merged_file_name = 'content.md'
290
291
output_path = output_dir if output_dir else str(md_dir_path)
292
merged_md_path = os.path.join(output_path, merged_file_name)
293
294
# Assumes section urls in toc corresponds to nb/md file names
295
title, sections = get_order_from_toc(toc_file_path, str(md_dir_path))
296
if sections:
297
md_files_path = [f'{os.path.join(str(md_dir_path), x[1].split("/")[-1])}.md' for x in sections if x[1] != merged_file_name]
298
else:
299
md_files_path = [x for x in md_dir_path.glob('*.md') if x.name != merged_file_name]
300
301
with open(merged_md_path, 'w', encoding='utf-8') as out_file:
302
if title and sections:
303
out_file.write(f'# {title}\n\n')
304
for count, md_path in enumerate(md_files_path):
305
if count > 0:
306
out_file.write('\n\n---\n')
307
out_file.write(f'\n> section: {sections[count][0]}\n\n')
308
with open(md_path, encoding='utf-8') as in_file:
309
for line in in_file:
310
if sections:
311
line = update_image_path(line, sections[count][1].split('/')[0])
312
out_file.write(line)
313
314
315
def convert(
316
nb_file_or_dir_path,
317
output_dir='',
318
shared_dir='shared',
319
section_id=None,
320
is_problem_set=False
321
):
322
"""Convert notebook file or files in directory to Mathigon markdown
323
"""
324
nbs_path = Path(nb_file_or_dir_path)
325
326
if not nbs_path.exists():
327
print(f'{nbs_path} not found')
328
return None
329
330
if nbs_path.is_file():
331
convert_notebook_file(
332
nb_file_or_dir_path,
333
output_dir=output_dir,
334
shared_dir=shared_dir,
335
section_id=section_id,
336
is_problem_set=is_problem_set
337
)
338
else:
339
convert_notebook_directory(
340
nb_file_or_dir_path,
341
output_dir=output_dir,
342
shared_dir=shared_dir
343
)
344
345