Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/smc_pyutil/smc_pyutil/sws2sagews.py
Views: 285
1
#!/usr/bin/env python3
2
# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
# License: AGPLv3 s.t. "Commons Clause" – read LICENSE.md for details
4
5
from __future__ import absolute_import
6
import json, os, sys
7
from .py23 import cPickle, text_type, PY3
8
from uuid import uuid4
9
10
MARKERS = {'cell': u"\uFE20", 'output': u"\uFE21"}
11
12
13
def uuid():
14
return text_type(uuid4())
15
16
17
def process_html(html):
18
if '"div-interact-1"' in html:
19
# probably an interact
20
return ""
21
else:
22
return html
23
24
25
def process_output(s):
26
s = s.strip()
27
if not s:
28
return []
29
i = s.find("Traceback (most recent call last):")
30
if i != -1:
31
s0 = s[:i]
32
s1 = s[i:]
33
if s0:
34
return [{'stdout': s0}, {'stderr': s1}]
35
else:
36
return [{'stderr': s1}]
37
else:
38
return [{'stdout': s}]
39
40
41
DISPLAY_MATH = {
42
'open': '<html><script type=\"math/tex; mode=display\">',
43
'close': '</script></html>',
44
'display': True
45
}
46
INLINE_MATH = {
47
'open': '<html><script type=\"math/tex\">',
48
'close': '</script></html>',
49
'display': False
50
}
51
INLINE_MATH_2009 = {
52
'open': '<html><span class=\"math\">',
53
'close': '</span></html>',
54
'display': False
55
}
56
HTML = {'open': '<html>', 'close': '</html>'}
57
mnames = ['DISPLAY_MATH', 'INLINE_MATH', 'INLINE_MATH_2009']
58
59
60
def output_messages(output):
61
messages = []
62
63
while len(output) > 0:
64
found = False
65
for ii, marker in enumerate(
66
[DISPLAY_MATH, INLINE_MATH, INLINE_MATH_2009]):
67
i = output.find(marker['open'])
68
if i != -1:
69
#print('found',mnames[ii])
70
messages.extend(process_output(output[:i]))
71
j = output.find(marker['close'])
72
if j != -1:
73
messages.append({
74
'tex': {
75
'tex': output[i + len(marker['open']):j],
76
'display': marker['display']
77
}
78
})
79
output = output[j + len(marker['close']):]
80
found = True
81
break
82
if found: continue
83
84
i = output.find(HTML['open'])
85
if i != -1:
86
messages.extend(process_output(output[:i]))
87
j = output.find(HTML['close'])
88
if j != -1:
89
messages.append(
90
{'html': process_html(output[i + len(HTML['open']):j])})
91
output = output[j + len(HTML['close']):]
92
continue
93
94
messages.extend(process_output(output))
95
output = ''
96
97
return MARKERS['output'].join(text_type(json.dumps(x)) for x in messages)
98
99
100
def migrate_input(s):
101
# Given the input to a cell, possibly make modifications heuristically to it to make it more
102
# Sagemath Cloud friendly.
103
return s
104
105
106
def sws_body_to_sagews(body):
107
# body is already an utf8 string
108
109
out = u""
110
i = 0
111
while i != -1 and i < len(body):
112
j = body.find("{{{", i)
113
if j == -1:
114
j = len(body)
115
html = body[i:j]
116
k = body.find("\n", j + 3)
117
if k == -1:
118
break
119
k2 = body.find("///", k)
120
if k2 == -1:
121
output = ""
122
k2 = body.find("}}}", k)
123
if k2 == -1:
124
input = ""
125
k2 = len(body)
126
i = len(body)
127
else:
128
input = body[k + 1:k2]
129
i = k2 + 4
130
else:
131
input = body[k + 1:k2]
132
k3 = body.find("}}}", k2 + 4)
133
if k3 == -1:
134
output = ""
135
i = len(body)
136
else:
137
output = body[k2 + 4:k3]
138
i = k3 + 4
139
140
html = html.strip()
141
input = migrate_input(input.strip())
142
output = output.strip()
143
144
if html:
145
out += MARKERS['cell'] + uuid() + 'i' + MARKERS['cell'] + u'\n'
146
out += '%html\n'
147
out += html + u'\n'
148
out += (u'\n' + MARKERS['output'] + uuid() + MARKERS['output'] +
149
json.dumps({'html': html}) + MARKERS['output']) + u'\n'
150
151
if input or output:
152
modes = ''
153
if '%auto' in input:
154
modes += 'a'
155
if '%hide' in input:
156
modes += 'i'
157
if '%hideall' in input:
158
modes += 'o'
159
out += MARKERS['cell'] + uuid() + modes + MARKERS['cell'] + u'\n'
160
out += input
161
out += (u'\n' + MARKERS['output'] + uuid() + MARKERS['output'] +
162
output_messages(output) + MARKERS['output']) + u'\n'
163
164
return out
165
166
167
def extra_modes(meta):
168
s = ''
169
if 'pretty_print' in meta:
170
s += u'typeset_mode(True, display=False)\n'
171
if 'system' in meta and meta['system'] != 'sage':
172
s += u'%%default_mode %s\n' % meta['system']
173
if not s:
174
return ''
175
# The 'a' means "auto".
176
return MARKERS['cell'] + uuid() + 'a' + MARKERS['cell'] + u'\n%auto\n' + s
177
178
179
def write_data_files(t, pfx='sage_worksheet'):
180
prefix = '{}/data/'.format(pfx)
181
data = [p for p in t if p.startswith(prefix)]
182
out = []
183
target = "foo.data"
184
if data:
185
if not os.path.exists(target):
186
os.makedirs(target)
187
for p in data:
188
dest = os.path.join(target, p[len(prefix):])
189
out.append(dest)
190
open(dest, 'wb').write(t.extractfile(p).read())
191
return out, target
192
193
194
def sws_to_sagews(filename):
195
"""
196
Convert a Sage Notebook sws file to a SageMath Cloud sagews file.
197
198
INPUT:
199
- ``filename`` -- the name of an sws file, say foo.sws
200
201
OUTPUT:
202
- creates a file foo[-n].sagews and returns the name of the output file
203
204
.. NOTE::
205
206
sws files from around 2009 are bzip2 archives with the following layout:
207
19/worksheet.txt
208
19/data/
209
19/conf.sobj
210
19/snapshots/1252938265.bz2
211
19/snapshots/1252940938.bz2
212
19/snapshots/1252940986.bz2
213
19/code/
214
19/cells/
215
19/cells/13/
216
19/cells/14/
217
...
218
sws files from 2012 and later have a layout like this:
219
sage_worksheet/worksheet_conf.pickle
220
sage_worksheet/worksheet.html
221
sage_worksheet/worksheet.txt
222
sage_worksheet/data/fcla.css
223
224
"""
225
out = ''
226
227
import os, tarfile
228
t = tarfile.open(name=filename, mode='r:bz2', bufsize=10240)
229
tfiles = t.getnames()
230
fmt_2011 = True
231
if 'sage_worksheet/worksheet.html' in tfiles:
232
pfx = 'sage_worksheet'
233
wkfile = 'sage_worksheet/worksheet.html'
234
else:
235
# older format files will not have 'sage_worksheet' at top level
236
pfx = tfiles[0]
237
wkfile = os.path.join(pfx, 'worksheet.txt')
238
if wkfile in tfiles:
239
fmt_2011 = False # 2009 format
240
else:
241
raise ValueError(
242
'could not find sage_worksheet/worksheet.html or {} in {}'.
243
format(wkfile, filename))
244
245
body = t.extractfile(wkfile).read()
246
data_files, data_path = write_data_files(pfx, t)
247
if data_files:
248
out += MARKERS['cell'] + uuid() + 'ai' + MARKERS[
249
'cell'] + u'\n%%hide\n%%auto\nDATA="%s/"\n' % data_path
250
if PY3:
251
body = body.decode('utf8')
252
out += sws_body_to_sagews(body)
253
254
meta = {}
255
if fmt_2011:
256
try:
257
meta = cPickle.loads(
258
t.extractfile('sage_worksheet/worksheet_conf.pickle').read())
259
except KeyError:
260
if INLINE_MATH['open'] in body:
261
meta['pretty_print'] = True
262
else:
263
if INLINE_MATH_2009['open'] in body:
264
meta['pretty_print'] = True
265
out = extra_modes(meta) + out
266
267
base = os.path.splitext(filename)[0]
268
i = 0
269
outfile = base + '.sagews'
270
if os.path.exists(outfile):
271
sys.stderr.write(
272
"%s: Warning -- Sage Worksheet '%s' already exists. Not overwriting.\n"
273
% (sys.argv[0], outfile))
274
sys.stderr.flush()
275
else:
276
sys.stdout.write("%s: Creating Sage Worksheet '%s'\n" %
277
(sys.argv[0], outfile))
278
sys.stdout.flush()
279
open(outfile, 'wb').write(out.encode('utf8'))
280
281
282
def main():
283
if len(sys.argv) == 1:
284
sys.stderr.write("""
285
Convert a Sage Notebook sws file to a SageMath Cloud sagews file.
286
287
Usage: %s path/to/filename.sws [path/to/filename2.sws] ...
288
289
Creates corresponding file path/to/filename.sagews, if it doesn't exist.
290
Also, a data/ directory may be created in the current directory, which contains
291
the contents of the data path in filename.sws.
292
""" % sys.argv[0])
293
sys.exit(1)
294
295
for path in sys.argv[1:]:
296
sws_to_sagews(path)
297
298
299
if __name__ == "__main__":
300
main()
301
302