Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/smc_pyutil/smc_pyutil/html2sagews.py
Views: 285
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
4
# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
5
# License: AGPLv3 s.t. "Commons Clause" – read LICENSE.md for details
6
7
# this script converts an html-exported sagews file back to sagews
8
9
from __future__ import print_function, unicode_literals, absolute_import
10
11
import sys
12
from .py23 import unquote, PY3
13
import base64
14
15
16
def extract(in_fn, out_fn):
17
print("extracting from '{in_fn}' to '{out_fn}'".format(**locals()))
18
start = 'href="data:application/octet-stream'
19
20
def get_href():
21
for line in open(in_fn, 'r'):
22
if '<a' in line and start in line and 'download=' in line:
23
i = line.find(start)
24
href = line[i:].split('"', 2)[1]
25
return href
26
27
href = get_href()
28
if href is None:
29
raise Exception("embedded sagews file not found!")
30
base64str = href.split(',', 1)
31
if len(base64str) <= 1:
32
raise Exception("unable to parse href data")
33
data = base64.b64decode(unquote(base64str[1]))
34
if PY3:
35
data = data.decode('utf8')
36
open(out_fn, 'w').write(data)
37
38
39
def main():
40
if len(sys.argv) <= 1:
41
raise Exception(
42
"first argument needs to be the converted HTML file (likely '*.sagews.html')"
43
)
44
in_fn = sys.argv[1]
45
if len(sys.argv) == 2:
46
# detecting a 'filename.sagews.html' pattern
47
in_split = in_fn.rsplit('.', 2)
48
if len(in_split) >= 3 and in_split[-2] == 'sagews':
49
out_fn = '.'.join(in_split[:-1])
50
else:
51
out_fn = in_fn + '.sagews'
52
else:
53
out_fn = sys.argv[2]
54
extract(in_fn, out_fn)
55
56
57
if __name__ == '__main__':
58
main()
59
60