Path: blob/master/src/smc_pyutil/smc_pyutil/html2sagews.py
Views: 285
#!/usr/bin/env python31# -*- coding: utf-8 -*-23# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.4# License: AGPLv3 s.t. "Commons Clause" – read LICENSE.md for details56# this script converts an html-exported sagews file back to sagews78from __future__ import print_function, unicode_literals, absolute_import910import sys11from .py23 import unquote, PY312import base64131415def extract(in_fn, out_fn):16print("extracting from '{in_fn}' to '{out_fn}'".format(**locals()))17start = 'href="data:application/octet-stream'1819def get_href():20for line in open(in_fn, 'r'):21if '<a' in line and start in line and 'download=' in line:22i = line.find(start)23href = line[i:].split('"', 2)[1]24return href2526href = get_href()27if href is None:28raise Exception("embedded sagews file not found!")29base64str = href.split(',', 1)30if len(base64str) <= 1:31raise Exception("unable to parse href data")32data = base64.b64decode(unquote(base64str[1]))33if PY3:34data = data.decode('utf8')35open(out_fn, 'w').write(data)363738def main():39if len(sys.argv) <= 1:40raise Exception(41"first argument needs to be the converted HTML file (likely '*.sagews.html')"42)43in_fn = sys.argv[1]44if len(sys.argv) == 2:45# detecting a 'filename.sagews.html' pattern46in_split = in_fn.rsplit('.', 2)47if len(in_split) >= 3 and in_split[-2] == 'sagews':48out_fn = '.'.join(in_split[:-1])49else:50out_fn = in_fn + '.sagews'51else:52out_fn = sys.argv[2]53extract(in_fn, out_fn)545556if __name__ == '__main__':57main()585960