CoCalc -- sagews2pdf.py

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/smc_pyutil/smc_pyutil/sagews2pdf.py
Views: ²⁸⁵
1
#!/usr/bin/env python2
2
# -*- coding: utf-8 -*-
3
# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
4
# License: AGPLv3 s.t. "Commons Clause" – read LICENSE.md for details
5
"""
6
CONTRIBUTORS:
7

8
  - William Stein   - maintainer and initial author
9
  - Cedric Sodhi    - internationalization and bug fixes
10
  - Tomas Kalvoda   - internationalization
11
  - Harald Schilly  - inkscape svg2pdf, ThreadPool, bug fixes, ...
12

13
"""
14

15
from __future__ import absolute_import, print_function
16
from .py23 import text_type, HTMLParser, quote, py2decodestr, PY2
17
import sys
18
if PY2:
19
    # this is a python 2 hack, conciously used to handle the particular situation here, that's all
20
    reload(sys)
21
    sys.setdefaultencoding('utf8')
22

23
MARKERS = {'cell': u"\uFE20", 'output': u"\uFE21"}
24

25
# https://github.com/sagemathinc/cocalc/issues/3910
26
ENGINE = '''
27
% !TeX program = xelatex
28
'''
29

30
# ATTN styles have to start with a newline
31
STYLES = {
32
    'classic':
33
    r"""
34
\documentclass{article}
35
\usepackage{fullpage}
36
\usepackage[utf8x]{inputenc}
37
\usepackage[T1]{fontenc}
38
\usepackage{amsmath}
39
\usepackage{amssymb}
40
""",
41
    'modern':
42
    r"""
43
\documentclass[
44
    paper=A4,
45
    pagesize,
46
    fontsize=11pt,
47
    %headings=small,
48
    titlepage=false,
49
    fleqn,
50
    toc=flat,
51
    bibliography=totoc, %totocnumbered,
52
    index=totoc,
53
    listof=flat]{scrartcl}
54
\usepackage{scrhack}
55
\setuptoc{toc}{leveldown}
56

57
\usepackage[utf8x]{inputenc}
58
\usepackage[T1]{fontenc}
59
\usepackage{xltxtra}  % xelatex
60

61
\usepackage[
62
    left=3cm,
63
    right=2cm,
64
    top=2cm,
65
    bottom=2cm,
66
    includeheadfoot]{geometry}
67
\usepackage[automark,headsepline,ilines,komastyle]{scrlayer-scrpage}
68
\pagestyle{scrheadings}
69

70
\usepackage{fixltx2e}
71

72
\raggedbottom
73

74
% font tweaks
75
\usepackage{ellipsis,ragged2e,marginnote}
76
\usepackage{inconsolata}
77
\renewcommand{\familydefault}{\sfdefault}
78
\setkomafont{sectioning}{\normalcolor\bfseries}
79
\setkomafont{disposition}{\normalcolor\bfseries}
80

81
\usepackage{mathtools}
82
\mathtoolsset{showonlyrefs=true}
83
\usepackage{amssymb}
84
\usepackage{sfmath}
85
"""
86
}
87

88
COMMON = r"""
89
\usepackage[USenglish]{babel}
90
\usepackage{etoolbox}
91
\usepackage{url}
92
\usepackage{hyperref}
93

94
% use includegraphics directly, but beware, that this is actually ...
95
\usepackage{graphicx}
96
% ... adjust box! http://latex-alive.tumblr.com/post/81481408449
97
\usepackage[Export]{adjustbox}
98
\adjustboxset{max size={\textwidth}{0.7\textheight}}
99

100
\usepackage{textcomp}
101
\def\leftqquote{``}\def\rightqqoute{''}
102
\catcode`\"=13
103
\def"{\bgroup\def"{\rightqqoute\egroup}\leftqquote}
104

105
\makeatletter
106
\preto{\@verbatim}{\topsep=0pt \partopsep=0pt }
107
\makeatother
108

109
\usepackage{color}
110
\definecolor{midgray}{rgb}{0.5,0.5,0.5}
111
\definecolor{lightyellow}{rgb}{1,1,.92}
112
\definecolor{dblackcolor}{rgb}{0.0,0.0,0.0}
113
\definecolor{dbluecolor}{rgb}{.01,.02,0.7}
114
\definecolor{dredcolor}{rgb}{1,0,0}
115
\definecolor{dbrowncolor}{rgb}{0.625,0.3125,0}
116
\definecolor{dgraycolor}{rgb}{0.30,0.3,0.30}
117
\definecolor{graycolor}{rgb}{0.35,0.35,0.35}
118

119
\usepackage{listings}
120
\lstdefinelanguage{Sage}[]{Python}
121
{morekeywords={True,False,sage,singular},
122
sensitive=true}
123
\lstset{
124
  showtabs=False,
125
  showspaces=False,
126
  showstringspaces=False,
127
  commentstyle={\ttfamily\color{dbrowncolor}},
128
  keywordstyle={\ttfamily\color{dbluecolor}\bfseries},
129
  stringstyle ={\ttfamily\color{dgraycolor}\bfseries},
130
  numberstyle ={\tiny\color{midgray}},
131
  backgroundcolor=\color{lightyellow},
132
  language = Sage,
133
  basicstyle={\ttfamily},
134
  extendedchars=true,
135
  keepspaces=true,
136
  aboveskip=1em,
137
  belowskip=0.1em,
138
  breaklines=true,
139
  prebreak = \raisebox{0ex}[0ex][0ex]{\ensuremath{\backslash}},
140
  %frame=single
141
}
142

143
% sagemath macros
144
\newcommand{\Bold}[1]{\mathbb{#1}}
145
\newcommand{\ZZ}{\Bold{Z}}
146
\newcommand{\NN}{\Bold{N}}
147
\newcommand{\RR}{\Bold{R}}
148
\newcommand{\CC}{\Bold{C}}
149
\newcommand{\FF}{\Bold{F}}
150
\newcommand{\QQ}{\Bold{Q}}
151
\newcommand{\QQbar}{\overline{\QQ}}
152
\newcommand{\CDF}{\Bold{C}}
153
\newcommand{\CIF}{\Bold{C}}
154
\newcommand{\CLF}{\Bold{C}}
155
\newcommand{\RDF}{\Bold{R}}
156
\newcommand{\RIF}{\Bold{I} \Bold{R}}
157
\newcommand{\RLF}{\Bold{R}}
158
\newcommand{\CFF}{\Bold{CFF}}
159
\newcommand{\GF}[1]{\Bold{F}_{#1}}
160
\newcommand{\Zp}[1]{\ZZ_{#1}}
161
\newcommand{\Qp}[1]{\QQ_{#1}}
162
\newcommand{\Zmod}[1]{\ZZ/#1\ZZ}
163
"""
164

165
# this is part of the preamble above, although this time full of utf8 chars
166
COMMON += text_type(r"""
167
% mathjax has \lt and \gt
168
\newcommand{\lt}{<}
169
\newcommand{\gt}{>}
170
% also support HTML's &le; and &ge;
171
\newcommand{\lequal}{≤}
172
\newcommand{\gequal}{≥}
173
\newcommand{\notequal}{≠}
174

175
% defining utf8 characters for listings
176
\lstset{literate=
177
  {á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
178
  {Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
179
  {à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
180
  {À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
181
  {ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
182
  {Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
183
  {â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
184
  {Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
185
  {œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
186
  {ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
187
  {ã}{{\~a}}1 {Ã}{{\~A}}1 {õ}{{\~o}}1 {Õ}{{\~O}}1
188
  {€}{{\EUR}}1 {£}{{\pounds}}1
189
}
190

191
""")
192

193
FOOTER = """
194
%configuration={"latex_command":"xelatex -synctex=1 -interact=nonstopmode 'tmp.tex'"}
195
"""
196

197
import argparse, base64, json, os, shutil, sys, textwrap, tempfile
198
from uuid import uuid4
199

200
BASE_URL = os.environ.get("COCALC_URL", "https://cocalc.com")
201

202

203
def escape_path(s):
204
    # see http://stackoverflow.com/questions/946170/equivalent-javascript-functions-for-pythons-urllib-quote-and-urllib-unquote
205
    s = quote(text_type(s).encode('utf-8'), safe='~@#$&()*!+=:;,.?/\'')
206
    return s.replace('#', '%23').replace("?", '%3F')
207

208

209
def wrap(s, c=90):
210
    return '\n'.join(['\n'.join(textwrap.wrap(x, c)) for x in s.splitlines()])
211

212

213
# used in texifyHTML and then again, in tex_escape
214
# they're mapped to macros, defined in the latex preamble
215
relational_signs = [
216
    ('gt', 'gt'),
217
    ('lt', 'lt'),
218
    ('ge', 'gequal'),
219
    ('le', 'lequal'),
220
    ('ne', 'notequal'),
221
]
222

223

224
def tex_escape(s):
225
    replacements = [
226
        ('\\', '{\\textbackslash}'),
227
        ('_', r'\_'),
228
        ('^', r'\^'),
229
        (r'{\textbackslash}$', r'\$'),
230
        ('%', r'\%'),
231
        ('#', r'\#'),
232
        ('&', r'\&'),
233
    ]
234
    for rep in replacements:
235
        s = s.replace(*rep)
236
    for rel in relational_signs:
237
        a, b = r'{\textbackslash}%s' % rel[1], r'\%s ' % rel[1]
238
        s = s.replace(a, b)
239
    return s
240

241

242
# Parallel computing can be useful for IO bound tasks.
243
def thread_map(callable, inputs, nb_threads=1):
244
    """
245
    Computing [callable(args) for args in inputs]
246
    in parallel using `nb_threads` separate *threads* (default: 2).
247

248
    This helps a bit with I/O bound tasks and is rather conservative
249
    to avoid excessive memory usage.
250

251
    If an exception is raised by any thread, a RuntimeError exception
252
    is instead raised.
253
    """
254
    print("Doing the following in parallel:\n%s" % ('\n'.join(inputs)))
255
    from multiprocessing.pool import ThreadPool
256
    tp = ThreadPool(nb_threads)
257
    exceptions = []
258

259
    def callable_wrap(x):
260
        try:
261
            return callable(x)
262
        except Exception as msg:
263
            exceptions.append(msg)
264

265
    results = tp.map(callable_wrap, inputs)
266
    if len(exceptions) > 0:
267
        raise RuntimeError(exceptions[0])
268
    return results
269

270

271
# create a subclass and override the handler methods
272

273

274
class Parser(HTMLParser):
275

276
    def __init__(self, cmds):
277
        HTMLParser.__init__(self)
278
        self.result = ''
279
        self._commands = cmds
280
        self._dont_close_img = False
281

282
    def handle_starttag(self, tag, attrs):
283
        if tag == 'h1':
284
            self.result += '\\section{'
285
        elif tag == 'h2':
286
            self.result += '\\subsection{'
287
        elif tag == 'h3':
288
            self.result += '\\subsubsection{'
289
        elif tag == 'i':
290
            self.result += '\\textemph{'
291
        elif tag == 'div' or tag == 'p':
292
            self.result += '\n\n{'
293
        elif tag == 'ul':
294
            self.result += '\\begin{itemize}'
295
        elif tag == 'ol':
296
            self.result += '\\begin{enumerate}'
297
        elif tag == 'hr':
298
            # self.result += '\n\n' + '-'*80 + '\n\n'
299
            self.result += '\n\n' + r'\noindent\makebox[\linewidth]{\rule{\textwidth}{0.4pt}}' + '\n\n'
300
        elif tag == 'li':
301
            self.result += '\\item{'
302
        elif tag == 'strong':
303
            self.result += '\\textbf{'
304
        elif tag == 'em':
305
            self.result += '\\textit{'
306
        elif tag == 'a':
307
            attrs = dict(attrs)
308
            if 'href' in attrs:
309
                self.result += '\\href{%s}{' % attrs['href']
310
            else:
311
                self.result += '\\url{'
312
        elif tag == 'img':
313
            attrs = dict(attrs)
314
            if "src" in attrs:
315
                href = attrs['src']
316
                _, ext = os.path.splitext(href)
317
                ext = ext.lower()
318
                if '?' in ext:
319
                    ext = ext[:ext.index('?')]
320
                # create a deterministic filename based on the href
321
                from hashlib import sha1
322
                base = sha1(href).hexdigest()
323
                filename = base + ext
324

325
                # href might start with /blobs/ or similar for e.g. octave plots
326
                # in such a case, there is also a file output and we ignore the image in the html
327
                if href[0] == '/':
328
                    self._dont_close_img = True
329
                    return
330
                else:
331
                    href_download = href
332

333
                # NOTE --no-check-certificate is needed since this query is done inside
334
                # the cluster, where the cert won't match the local service name.
335
                c = "rm -f '%s'; wget --no-check-certificate '%s' --output-document='%s'" % (
336
                    filename, href_download, filename)
337
                if ext == '.svg':
338
                    # convert to pdf
339
                    c += " && rm -f '%s'; inkscape --without-gui --export-pdf='%s' '%s'" % (
340
                        base + '.pdf', base + '.pdf', filename)
341
                    filename = base + '.pdf'
342
                self._commands.append(c)
343
                # the choice of 120 is "informed" but also arbitrary
344
                # besides that, if we scale it in sagews, we also have to scale it here
345
                scaling = 1.
346
                if 'smc-image-scaling' in attrs:
347
                    try:
348
                        # in practice (and if it is set at all) it is most likely 0.66
349
                        scaling = float(attrs['smc-image-scaling'])
350
                    except:
351
                        pass
352
                resolution = int(120. / scaling)
353
                self.result += '\\includegraphics[resolution=%s]{%s}' % (
354
                    resolution, filename)
355
                # alternatively, implicit scaling by adjbox and textwidth
356
                # self.result += '\\includegraphics{%s}'%(filename)
357
            else:
358
                # fallback, because there is no src='...'
359
                self.result += '\\verbatim{image: %s}' % str(attrs)
360
        else:
361
            self.result += '{'  # fallback
362

363
    def handle_endtag(self, tag):
364
        if tag == 'ul':
365
            self.result += '\\end{itemize}'
366
        elif tag == 'ol':
367
            self.result += '\\end{enumerate}'
368
        elif tag == 'hr':
369
            self.result += ''
370
        elif tag == 'img' and self._dont_close_img:
371
            self._dont_close_img = False
372
            self.result += ''
373
        else:
374
            self.result += '}'  # fallback
375

376
    def handle_data(self, data):
377
        # safe because all math stuff has already been escaped.
378
        # print "handle_data:", data
379
        self.result += tex_escape(data)
380

381

382
def sanitize_math_input(s):
383
    from .markdown2Mathjax import sanitizeInput
384
    # it's critical that $$ be first!
385
    delims = [('$$', '$$'), ('\\(', '\\)'), ('\\[', '\\]'),
386
              ('\\begin{equation}', '\\end{equation}'),
387
              ('\\begin{equation*}', '\\end{equation*}'),
388
              ('\\begin{align}', '\\end{align}'),
389
              ('\\begin{align*}', '\\end{align*}'),
390
              ('\\begin{eqnarray}', '\\end{eqnarray}'),
391
              ('\\begin{eqnarray*}', '\\end{eqnarray*}'),
392
              ('\\begin{math}', '\\end{math}'),
393
              ('\\begin{displaymath}', '\\end{displaymath}')]
394

395
    tmp = [((s, None), None)]
396
    for d in delims:
397
        tmp.append((sanitizeInput(tmp[-1][0][0], equation_delims=d), d))
398

399
    return tmp
400

401

402
def reconstruct_math(s, tmp):
403
    print("s ='%r'" % s)
404
    print("tmp = '%r'" % tmp)
405
    from .markdown2Mathjax import reconstructMath
406
    while len(tmp) > 1:
407
        s = reconstructMath(s, tmp[-1][0][1], equation_delims=tmp[-1][1])
408
        del tmp[-1]
409
    return s
410

411

412
def texifyHTML(s):
413
    replacements = [
414
        ('&#8220;', '``'),
415
        ('&#8221;', "''"),
416
        ('&#8217;', "'"),
417
        ('&amp;', "&"),
418
    ]
419
    for rep in replacements:
420
        s = s.replace(*rep)
421
    for rel in relational_signs:
422
        a, b = '&%s;' % rel[0], r'\%s' % rel[1]
423
        s = s.replace(a, b)
424
    return s
425

426

427
def html2tex(doc, cmds):
428
    doc = texifyHTML(doc)
429
    tmp = sanitize_math_input(doc)
430
    parser = Parser(cmds)
431
    # The number of (unescaped) dollars or double-dollars found so far. An even
432
    # number is assumed to indicate that we're outside of math and thus need to
433
    # escape.
434
    parser.dollars_found = 0
435
    parser.feed(tmp[-1][0][0])
436
    return reconstruct_math(parser.result, tmp)
437

438

439
def md2html(s):
440
    from markdown2 import markdown
441
    extras = [
442
        'code-friendly', 'footnotes', 'smarty-pants', 'wiki-tables',
443
        'fenced-code-blocks'
444
    ]
445

446
    tmp = sanitize_math_input(s)
447
    markedDownText = markdown(tmp[-1][0][0], extras=extras)
448
    return reconstruct_math(markedDownText, tmp)
449

450

451
def md2tex(doc, cmds):
452
    x = md2html(doc)
453
    #print "-" * 100
454
    #print "md2html:", x
455
    #print "-" * 100
456
    y = html2tex(x, cmds)
457
    #print "html2tex:", y
458
    #print "-" * 100
459
    return y
460

461

462
class Cell(object):
463

464
    def __init__(self, s):
465
        self.raw = s
466
        v = s.split('\n' + MARKERS['output'])
467
        if len(v) > 0:
468
            w = v[0].split(MARKERS['cell'] + '\n')
469
            n = w[0].lstrip(MARKERS['cell'])
470
            self.input_uuid = n[:36]
471
            self.input_codes = n[36:]
472
            if len(w) > 1:
473
                self.input = w[1]
474
            else:
475
                self.input = ''
476
        else:
477
            self.input_uuid = self.input = ''
478
        if len(v) > 1:
479
            w = v[1].split(MARKERS['output'])
480
            self.output_uuid = w[0] if len(w) > 0 else ''
481
            self.output = []
482
            for x in w[1:]:
483
                if x:
484
                    try:
485
                        self.output.append(json.loads(x))
486
                    except ValueError:
487
                        try:
488
                            print("**WARNING:** Unable to de-json '%s'" % x)
489
                        except:
490
                            print("Unable to de-json some output")
491
        else:
492
            self.output = self.output_uuid = ''
493

494
    def latex(self):
495
        """
496
        Returns the latex represenation of this cell along with a list of commands
497
        that should be executed in the shell in order to obtain remote data files,
498
        etc., to render this cell.
499
        """
500
        self._commands = []
501
        return self.latex_input() + self.latex_output(), self._commands
502

503
    def latex_input(self):
504
        if 'i' in self.input_codes:  # hide input
505
            return "\\begin{lstlisting}\n\\end{lstlisting}"
506
        if self.input.strip():
507
            return "\\begin{lstlisting}\n%s\n\\end{lstlisting}" % self.input
508
        else:
509
            return ""
510

511
    def latex_output(self):
512
        print("BASE_URL", BASE_URL)
513
        s = ''
514
        if 'o' in self.input_codes:  # hide output
515
            return s
516
        for x in self.output:
517
            if 'stdout' in x:
518
                s += "\\begin{verbatim}" + wrap(
519
                    x['stdout']) + "\\end{verbatim}"
520
            if 'stderr' in x:
521
                s += "{\\color{dredcolor}\\begin{verbatim}" + wrap(
522
                    x['stderr']) + "\\end{verbatim}}"
523
            if 'code' in x:
524
                # TODO: for now ignoring that not all code is Python...
525
                s += "\\begin{lstlisting}" + x['code'][
526
                    'source'] + "\\end{lstlisting}"
527
            if 'html' in x:
528
                s += html2tex(x['html'], self._commands)
529
            if 'md' in x:
530
                s += md2tex(x['md'], self._commands)
531
            if 'interact' in x:
532
                pass
533
            if 'tex' in x:
534
                val = x['tex']
535
                if 'display' in val:
536
                    s += "$$%s$$" % val['tex']
537
                else:
538
                    s += "$%s$" % val['tex']
539
            if 'file' in x:
540
                val = x['file']
541
                if 'url' in val:
542
                    target = val['url']
543
                    filename = os.path.split(target)[-1]
544
                else:
545
                    filename = os.path.split(val['filename'])[-1]
546
                    target = "%s/blobs/%s?uuid=%s" % (
547
                        BASE_URL, escape_path(filename), val['uuid'])
548

549
                base, ext = os.path.splitext(filename)
550
                ext = ext.lower()[1:]
551
                # print "latex_output ext", ext
552
                if ext in ['jpg', 'jpeg', 'png', 'eps', 'pdf', 'svg']:
553
                    img = ''
554
                    i = target.find("/raw/")
555
                    if i != -1:
556
                        src = os.path.join(os.environ['HOME'], target[i + 5:])
557
                        if os.path.abspath(src) != os.path.abspath(filename):
558
                            try:
559
                                shutil.copyfile(src, filename)
560
                            except Exception as msg:
561
                                print(msg)
562
                        img = filename
563
                    else:
564
                        # Get the file from remote server
565
                        c = "rm -f '%s'; wget --no-check-certificate '%s' --output-document='%s'" % (
566
                            filename, target, filename)
567
                        # If we succeeded, convert it to a png, which is what we can easily embed
568
                        # in a latex document (svg's don't work...)
569
                        self._commands.append(c)
570
                        if ext == 'svg':
571
                            # hack for svg files; in perfect world someday might do something with vector graphics,
572
                            # see http://tex.stackexchange.com/questions/2099/how-to-include-svg-diagrams-in-latex
573
                            # Now we live in a perfect world, and proudly introduce inkscape as a dependency for SMC :-)
574
                            #c += ' && rm -f "%s"; convert -antialias -density 150 "%s" "%s"'%(base+'.png',filename,base+'.png')
575
                            # converts the svg file into pdf
576
                            c += " && rm -f '%s'; inkscape --without-gui --export-pdf='%s' '%s'" % (
577
                                base + '.pdf', base + '.pdf', filename)
578
                            self._commands.append(c)
579
                            filename = base + '.pdf'
580
                        img = filename
581
                    # omitting [width=\\textwidth] allows figsize to set displayed size
582
                    # see https://github.com/sagemathinc/cocalc/issues/114
583
                    s += '{\\centering\n\\includegraphics{%s}\n\\par\n}\n' % img
584
                elif ext == 'sage3d' and 'sage3d' in extra_data and 'uuid' in val:
585
                    # render a static image, if available
586
                    v = extra_data['sage3d']
587
                    #print "KEYS", v.keys()
588
                    uuid = val['uuid']
589
                    if uuid in v:
590
                        #print "TARGET acquired!"
591
                        data = v[uuid].pop()
592
                        width = min(1, 1.2 * data.get('width', 0.5))
593
                        #print "width = ", width
594
                        if 'data-url' in data:
595
                            # 'data:image/png;base64,iVBOR...'
596
                            data_url = data['data-url']
597
                            i = data_url.find('/')
598
                            j = data_url.find(";")
599
                            k = data_url.find(',')
600
                            image_ext = data_url[i + 1:j]
601
                            image_data = data_url[k + 1:]
602
                            assert data_url[j + 1:k] == 'base64'
603
                            filename = str(uuid4()) + "." + image_ext
604
                            b64 = base64.b64decode(image_data)
605
                            open(filename, 'w').write(b64)
606
                            s += '\\includegraphics[width=%s\\textwidth]{%s}\n' % (
607
                                width, filename)
608

609
                else:
610
                    if target.startswith('http'):
611
                        s += '\\url{%s}' % target
612
                    else:
613
                        s += '\\begin{verbatim}[' + target + ']\\end{verbatim}'
614

615
        return s
616

617

618
class Worksheet(object):
619

620
    def __init__(self, filename=None, s=None):
621
        """
622
        The worksheet defined by the given filename or UTF unicode string s.
623
        """
624
        self._default_title = ''
625
        if filename:
626
            self._filename = os.path.abspath(filename)
627
        else:
628
            self._filename = None
629
        if filename is not None:
630
            self._default_title = filename
631
            self._init_from(py2decodestr(open(filename).read()))
632
        elif s is not None:
633
            self._init_from(s)
634
        else:
635
            raise ValueError("filename or s must be defined")
636

637
    def _init_from(self, s):
638
        self._cells = [Cell(x) for x in s.split('\n' + MARKERS['cell'])]
639

640
    def __getitem__(self, i):
641
        return self._cells[i]
642

643
    def __len__(self):
644
        return len(self._cells)
645

646
    def latex_preamble(self,
647
                       title='',
648
                       author='',
649
                       date='',
650
                       style='modern',
651
                       contents=True):
652
        # The utf8x instead of utf8 below is because of http://tex.stackexchange.com/questions/83440/inputenc-error-unicode-char-u8-not-set-up-for-use-with-latex, which I needed due to approx symbols, etc. causing trouble.
653
        #\usepackage{attachfile}
654
        from datetime import datetime
655
        top = u'% generated by smc-sagews2pdf -- {timestamp}'
656
        top = top.format(timestamp=str(datetime.utcnow()))
657
        s = top + ENGINE + STYLES[style]
658
        s += COMMON
659
        s += text_type(r"\title{%s}" % tex_escape(title) + u"\n")
660
        s += text_type(r"\author{%s}" % tex_escape(author) + u"\n")
661
        if date:
662
            s += text_type(r"\date{%s}" % tex_escape(date) + u"\n")
663
        s += u"\\begin{document}\n"
664
        s += u"\\maketitle\n"
665
        #if self._filename:
666
        #    s += "The Worksheet: \\attachfile{%s}\n\n"%self._filename
667

668
        if contents:
669
            s += u"\\tableofcontents\n"
670
        return s
671

672
    def latex(self,
673
              title='',
674
              author='',
675
              date='',
676
              style='modern',
677
              contents=True):
678
        if not title:
679
            title = self._default_title
680
        commands = []
681
        tex = []
682
        for c in self._cells:
683
            t, cmd = c.latex()
684
            tex.append(t)
685
            if cmd:
686
                commands.extend(cmd)
687
        if commands:
688
            thread_map(os.system, commands)
689
        return self.latex_preamble(title=title,
690
                                   author=author,
691
                                   date=date,
692
                                   style=style,
693
                                   contents=contents) \
694
               + '\n'.join(tex) \
695
               + r"\end{document}" \
696
               + FOOTER
697

698

699
def sagews_to_pdf(filename,
700
                  title='',
701
                  author='',
702
                  date='',
703
                  outfile='',
704
                  contents=True,
705
                  remove_tmpdir=True,
706
                  work_dir=None,
707
                  style='modern'):
708
    base = os.path.splitext(filename)[0]
709
    if not outfile:
710
        pdf = base + ".pdf"
711
    else:
712
        pdf = outfile
713
    print("converting: %s --> %s" % (filename, pdf))
714
    W = Worksheet(filename)
715
    try:
716
        if work_dir is None:
717
            work_dir = tempfile.mkdtemp()
718
        else:
719
            if not os.path.exists(work_dir):
720
                os.makedirs(work_dir)
721
        if not remove_tmpdir:
722
            print("Temporary directory retained: %s" % work_dir)
723
        cur = os.path.abspath('.')
724
        os.chdir(work_dir)
725
        from codecs import open
726
        open('tmp.tex', 'w', 'utf8').write(
727
            W.latex(title=title,
728
                    author=author,
729
                    date=date,
730
                    contents=contents,
731
                    style=style))  #.encode('utf8'))
732
        from subprocess import check_call
733
        check_call('latexmk -pdf -xelatex -f -interaction=nonstopmode tmp.tex',
734
                   shell=True)
735
        if os.path.exists('tmp.pdf'):
736
            shutil.move('tmp.pdf', os.path.join(cur, pdf))
737
            print("Created", os.path.join(cur, pdf))
738
    finally:
739
        if work_dir and remove_tmpdir:
740
            shutil.rmtree(work_dir)
741
        else:
742
            print("Leaving latex files in '%s'" % work_dir)
743

744

745
def main():
746
    global extra_data, BASE_URL
747

748
    parser = argparse.ArgumentParser(
749
        description="convert a sagews worksheet to a pdf file via latex",
750
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
751
    parser.add_argument("filename",
752
                        nargs='+',
753
                        help="name of sagews file (required)",
754
                        type=str)
755
    parser.add_argument("--author",
756
                        dest="author",
757
                        help="author name for printout",
758
                        type=str,
759
                        default="")
760
    parser.add_argument("--title",
761
                        dest="title",
762
                        help="title for printout",
763
                        type=str,
764
                        default="")
765
    parser.add_argument("--date",
766
                        dest="date",
767
                        help="date for printout",
768
                        type=str,
769
                        default="")
770
    parser.add_argument("--contents",
771
                        dest="contents",
772
                        help="include a table of contents 'true' or 'false'",
773
                        type=str,
774
                        default='true')
775
    parser.add_argument(
776
        "--outfile",
777
        dest="outfile",
778
        help=
779
        "output filename (defaults to input file with sagews replaced by pdf)",
780
        type=str,
781
        default="")
782
    parser.add_argument(
783
        "--remove_tmpdir",
784
        dest="remove_tmpdir",
785
        help=
786
        "if 'false' do not delete the temporary LaTeX files and print name of temporary directory",
787
        type=str,
788
        default='true')
789
    parser.add_argument(
790
        "--work_dir",
791
        dest="work_dir",
792
        help=
793
        "if set, then this is used as the working directory where the tex files are generated and it won't be deleted like the temp dir."
794
    )
795
    parser.add_argument(
796
        '--subdir',
797
        dest="subdir",
798
        help=
799
        "if set, the work_dir will be set (or overwritten) to be pointing to a subdirectory named after the file to be converted.",
800
        default='false')
801
    parser.add_argument(
802
        "--extra_data_file",
803
        dest="extra_data_file",
804
        help=
805
        "JSON format file that contains extra data useful in printing this worksheet, e.g., 3d plots",
806
        type=str,
807
        default='')
808
    parser.add_argument("--style",
809
                        dest="style",
810
                        help="Styling of the LaTeX document",
811
                        type=str,
812
                        choices=['classic', 'modern'],
813
                        default="modern")
814
    parser.add_argument(
815
        "--base_url",
816
        dest="base_url",
817
        help=
818
        "The 'BASE_URL' from where blobs and other files are being downloaded from",
819
        default=BASE_URL)
820

821
    args = parser.parse_args()
822
    args.contents = args.contents == 'true'
823
    args.remove_tmpdir = args.remove_tmpdir == 'true'
824
    args.subdir = args.subdir == 'true'
825

826
    if args.extra_data_file:
827
        import json
828
        extra_data = json.loads(open(args.extra_data_file).read())
829
    else:
830
        extra_data = {}
831

832
    BASE_URL = args.base_url
833

834
    remove_tmpdir = args.remove_tmpdir
835

836
    curdir = os.path.abspath('.')
837
    for filename in args.filename:
838
        os.chdir(curdir)  # stuff below can change away from curdir
839

840
        if args.subdir:
841
            from os.path import dirname, basename, splitext, join
842
            dir = dirname(filename)
843
            subdir = '%s-sagews2pdf' % splitext(basename(filename))[0]
844
            work_dir = join(dir, subdir)
845
            remove_tmpdir = False
846
        elif args.work_dir is not None:
847
            work_dir = os.path.abspath(os.path.expanduser(args.work_dir))
848
            remove_tmpdir = False
849
        else:
850
            work_dir = None
851

852
        title = py2decodestr(args.title)
853
        author = py2decodestr(args.author)
854

855
        from subprocess import CalledProcessError
856
        try:
857
            sagews_to_pdf(filename,
858
                          title=title,
859
                          author=author,
860
                          date=args.date,
861
                          outfile=args.outfile,
862
                          contents=args.contents,
863
                          remove_tmpdir=remove_tmpdir,
864
                          work_dir=work_dir,
865
                          style=args.style)
866
        # subprocess.check_call might throw
867
        except CalledProcessError as e:
868
            sys.stderr.write('CalledProcessError: %s\n' % e)
869
            exit(1)
870

871

872
if __name__ == "__main__":
873
    main()
874

875
Product

Resources

Company