Path: blob/master/src/smc_pyutil/smc_pyutil/sws2sagews.py
Views: 285
#!/usr/bin/env python31# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2# License: AGPLv3 s.t. "Commons Clause" – read LICENSE.md for details34from __future__ import absolute_import5import json, os, sys6from .py23 import cPickle, text_type, PY37from uuid import uuid489MARKERS = {'cell': u"\uFE20", 'output': u"\uFE21"}101112def uuid():13return text_type(uuid4())141516def process_html(html):17if '"div-interact-1"' in html:18# probably an interact19return ""20else:21return html222324def process_output(s):25s = s.strip()26if not s:27return []28i = s.find("Traceback (most recent call last):")29if i != -1:30s0 = s[:i]31s1 = s[i:]32if s0:33return [{'stdout': s0}, {'stderr': s1}]34else:35return [{'stderr': s1}]36else:37return [{'stdout': s}]383940DISPLAY_MATH = {41'open': '<html><script type=\"math/tex; mode=display\">',42'close': '</script></html>',43'display': True44}45INLINE_MATH = {46'open': '<html><script type=\"math/tex\">',47'close': '</script></html>',48'display': False49}50INLINE_MATH_2009 = {51'open': '<html><span class=\"math\">',52'close': '</span></html>',53'display': False54}55HTML = {'open': '<html>', 'close': '</html>'}56mnames = ['DISPLAY_MATH', 'INLINE_MATH', 'INLINE_MATH_2009']575859def output_messages(output):60messages = []6162while len(output) > 0:63found = False64for ii, marker in enumerate(65[DISPLAY_MATH, INLINE_MATH, INLINE_MATH_2009]):66i = output.find(marker['open'])67if i != -1:68#print('found',mnames[ii])69messages.extend(process_output(output[:i]))70j = output.find(marker['close'])71if j != -1:72messages.append({73'tex': {74'tex': output[i + len(marker['open']):j],75'display': marker['display']76}77})78output = output[j + len(marker['close']):]79found = True80break81if found: continue8283i = output.find(HTML['open'])84if i != -1:85messages.extend(process_output(output[:i]))86j = output.find(HTML['close'])87if j != -1:88messages.append(89{'html': process_html(output[i + len(HTML['open']):j])})90output = output[j + len(HTML['close']):]91continue9293messages.extend(process_output(output))94output = ''9596return MARKERS['output'].join(text_type(json.dumps(x)) for x in messages)979899def migrate_input(s):100# Given the input to a cell, possibly make modifications heuristically to it to make it more101# Sagemath Cloud friendly.102return s103104105def sws_body_to_sagews(body):106# body is already an utf8 string107108out = u""109i = 0110while i != -1 and i < len(body):111j = body.find("{{{", i)112if j == -1:113j = len(body)114html = body[i:j]115k = body.find("\n", j + 3)116if k == -1:117break118k2 = body.find("///", k)119if k2 == -1:120output = ""121k2 = body.find("}}}", k)122if k2 == -1:123input = ""124k2 = len(body)125i = len(body)126else:127input = body[k + 1:k2]128i = k2 + 4129else:130input = body[k + 1:k2]131k3 = body.find("}}}", k2 + 4)132if k3 == -1:133output = ""134i = len(body)135else:136output = body[k2 + 4:k3]137i = k3 + 4138139html = html.strip()140input = migrate_input(input.strip())141output = output.strip()142143if html:144out += MARKERS['cell'] + uuid() + 'i' + MARKERS['cell'] + u'\n'145out += '%html\n'146out += html + u'\n'147out += (u'\n' + MARKERS['output'] + uuid() + MARKERS['output'] +148json.dumps({'html': html}) + MARKERS['output']) + u'\n'149150if input or output:151modes = ''152if '%auto' in input:153modes += 'a'154if '%hide' in input:155modes += 'i'156if '%hideall' in input:157modes += 'o'158out += MARKERS['cell'] + uuid() + modes + MARKERS['cell'] + u'\n'159out += input160out += (u'\n' + MARKERS['output'] + uuid() + MARKERS['output'] +161output_messages(output) + MARKERS['output']) + u'\n'162163return out164165166def extra_modes(meta):167s = ''168if 'pretty_print' in meta:169s += u'typeset_mode(True, display=False)\n'170if 'system' in meta and meta['system'] != 'sage':171s += u'%%default_mode %s\n' % meta['system']172if not s:173return ''174# The 'a' means "auto".175return MARKERS['cell'] + uuid() + 'a' + MARKERS['cell'] + u'\n%auto\n' + s176177178def write_data_files(t, pfx='sage_worksheet'):179prefix = '{}/data/'.format(pfx)180data = [p for p in t if p.startswith(prefix)]181out = []182target = "foo.data"183if data:184if not os.path.exists(target):185os.makedirs(target)186for p in data:187dest = os.path.join(target, p[len(prefix):])188out.append(dest)189open(dest, 'wb').write(t.extractfile(p).read())190return out, target191192193def sws_to_sagews(filename):194"""195Convert a Sage Notebook sws file to a SageMath Cloud sagews file.196197INPUT:198- ``filename`` -- the name of an sws file, say foo.sws199200OUTPUT:201- creates a file foo[-n].sagews and returns the name of the output file202203.. NOTE::204205sws files from around 2009 are bzip2 archives with the following layout:20619/worksheet.txt20719/data/20819/conf.sobj20919/snapshots/1252938265.bz221019/snapshots/1252940938.bz221119/snapshots/1252940986.bz221219/code/21319/cells/21419/cells/13/21519/cells/14/216...217sws files from 2012 and later have a layout like this:218sage_worksheet/worksheet_conf.pickle219sage_worksheet/worksheet.html220sage_worksheet/worksheet.txt221sage_worksheet/data/fcla.css222223"""224out = ''225226import os, tarfile227t = tarfile.open(name=filename, mode='r:bz2', bufsize=10240)228tfiles = t.getnames()229fmt_2011 = True230if 'sage_worksheet/worksheet.html' in tfiles:231pfx = 'sage_worksheet'232wkfile = 'sage_worksheet/worksheet.html'233else:234# older format files will not have 'sage_worksheet' at top level235pfx = tfiles[0]236wkfile = os.path.join(pfx, 'worksheet.txt')237if wkfile in tfiles:238fmt_2011 = False # 2009 format239else:240raise ValueError(241'could not find sage_worksheet/worksheet.html or {} in {}'.242format(wkfile, filename))243244body = t.extractfile(wkfile).read()245data_files, data_path = write_data_files(pfx, t)246if data_files:247out += MARKERS['cell'] + uuid() + 'ai' + MARKERS[248'cell'] + u'\n%%hide\n%%auto\nDATA="%s/"\n' % data_path249if PY3:250body = body.decode('utf8')251out += sws_body_to_sagews(body)252253meta = {}254if fmt_2011:255try:256meta = cPickle.loads(257t.extractfile('sage_worksheet/worksheet_conf.pickle').read())258except KeyError:259if INLINE_MATH['open'] in body:260meta['pretty_print'] = True261else:262if INLINE_MATH_2009['open'] in body:263meta['pretty_print'] = True264out = extra_modes(meta) + out265266base = os.path.splitext(filename)[0]267i = 0268outfile = base + '.sagews'269if os.path.exists(outfile):270sys.stderr.write(271"%s: Warning -- Sage Worksheet '%s' already exists. Not overwriting.\n"272% (sys.argv[0], outfile))273sys.stderr.flush()274else:275sys.stdout.write("%s: Creating Sage Worksheet '%s'\n" %276(sys.argv[0], outfile))277sys.stdout.flush()278open(outfile, 'wb').write(out.encode('utf8'))279280281def main():282if len(sys.argv) == 1:283sys.stderr.write("""284Convert a Sage Notebook sws file to a SageMath Cloud sagews file.285286Usage: %s path/to/filename.sws [path/to/filename2.sws] ...287288Creates corresponding file path/to/filename.sagews, if it doesn't exist.289Also, a data/ directory may be created in the current directory, which contains290the contents of the data path in filename.sws.291""" % sys.argv[0])292sys.exit(1)293294for path in sys.argv[1:]:295sws_to_sagews(path)296297298if __name__ == "__main__":299main()300301302