Path: blob/master/src/smc_pyutil/smc_pyutil/ipynb_to_pdf.py
Views: 285
#!/usr/bin/python1# -*- coding: utf-8 -*-23# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.4# License: AGPLv3 s.t. "Commons Clause" – read LICENSE.md for details5"""6Convert ipynb files to pdf using nbconvert's html generating7and headless chromium, instead of using LaTeX. This is much8faster and more reliable, but potentially doesn't "look" as good,9depending on your tastes. It also has a dependency on chromium.10"""1112# ATTN: make sure to keep dependencies of this in sync with projects/project/configuration.ts1314### **15# This script is deprecated as of16#17# https://github.com/sagemathinc/cocalc/pull/558318#19# which implements much better and more efficient functionality.20# In particular, there is now a project api call jupyter_nbconvert21# e.g., used in packages/frontend/course/export/export-assignment.ts22# that converts notebooks to other formats. It's much more23# efficient than upstream nbconvert for html and pdf, and doesn't waste24# time importing code each time it is run.25###2627from __future__ import absolute_import, print_function28from shutil import which29import os, sys, time, glob30from subprocess import check_call31from itertools import repeat, chain323334def sanitize_nbconvert_path(path):35# same functionality as in packages/util/sanitize-nbconvert.ts36# https://github.com/jupyter/nbconvert/issues/91137return glob.escape(path)383940def ipynb_to_pdf(path):41t = time.time()42print("-" * 70)43print("Convert %s..." % path)44if not path.endswith('.ipynb'):45err = "every path must end in '.ipynb' but '%s' does not" % path46raise ValueError(err)4748browser = None49if which("chromium-browser") is not None:50browser = "chromium-browser"51elif which("google-chrome") is not None:52browser = "google-chrome"53else:54raise Exception("Neither Chrome nor Chromium installed!")55print(f"using {browser} to convert to PDF")5657path = os.path.abspath(path)58base = path[:-len('ipynb')]59pdf = base + 'pdf'60html = base + 'tmp.html'61check_call([62"jupyter",63"nbconvert",64sanitize_nbconvert_path(path),65"--to",66"html",67"--template",68"classic",69"--output=%s" % html,70])71# --no-sandbox so it works in cocalc-docker (see https://stackoverflow.com/questions/43665276/how-to-run-google-chrome-headless-in-docker); should be OK, given our security model...72check_call([73browser,74"--headless",75"--disable-gpu",76"--no-sandbox",77"--print-to-pdf=%s" % pdf,78"--run-all-compositor-stages-before-draw",79"--virtual-time-budget=10000",80html,81])82os.unlink(html)83print("Converted %s to %s in %s seconds" % (path, pdf, time.time() - t))84print("-" * 70)858687def main():88if len(sys.argv) == 1:89print("Usage: cc-ipynb-to-pdf [filename1.ipynb] [filename2.ipynb] ...")90print(91"Converts filename1.ipynb to filename1.pdf, etc., using nbconvert first"92)93print(94"to convert to HTML, then using headless chromium to convert that to PDF."95)96print(97"This is *vastly* more robust and faster than using nbconvert directly,"98)99print("since that uses LaTeX.")100else:101for path in sys.argv[1:]:102ipynb_to_pdf(path)103104105if __name__ == "__main__":106main()107108109