Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/misc/cmyk-magic.ipynb
1192 views
Kernel: Python 3

Open In Colab

import pdf2image
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) <ipython-input-1-bd56f4c59bef> in <module>() ----> 1 import pdf2image ModuleNotFoundError: No module named 'pdf2image' --------------------------------------------------------------------------- NOTE: If your import is failing due to a missing package, you can manually install dependencies using either !pip or !apt. To view examples of installing some common dependencies, click the "Open Examples" button below. ---------------------------------------------------------------------------
!pip install pdf2image
Collecting pdf2image Downloading pdf2image-1.16.0-py3-none-any.whl (10 kB) Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from pdf2image) (7.1.2) Installing collected packages: pdf2image Successfully installed pdf2image-1.16.0
from google.colab import drive drive.mount("/content/drive")
Mounted at /content/drive
!ls /content/drive/MyDrive/MLAPA
cnn-vis-modified.gslides 'Flaxifying (V)AE MLAPP examples.gdoc' GANs.gdraw GenModelsTree.gdraw hier-vae.pdf InfTree.gdraw 'Introduction to Bayesian Machine Learning.gslides' 'Intro to Prob Stats for ML.gslides' 'LSTM (1).gdraw' LSTM.gdraw 'Machine learning: a probabilistic perspective MLAPA.gdoc' mlapa-30jan17.pdf 'MLAPA figures.gslides' 'MLAPA todo.gsheet' 'MLAPA v2.gdoc' MLtriad.gdraw nnet-8jan16.pdf pmlSol-camera-22may12.pdf 'sRGB Color Space Profile.icm' survey-results-google-RG-dec2015.png Text-200416.zip USWebCoatedSWOP.icc
from pdf2image import convert_from_path
!ls
2dgridDAGa.pdf sample_data
!sudo apt-get install poppler-utils
Reading package lists... Done Building dependency tree Reading state information... Done The following NEW packages will be installed: poppler-utils 0 upgraded, 1 newly installed, 0 to remove and 40 not upgraded. Need to get 154 kB of archives. After this operation, 613 kB of additional disk space will be used. Get:1 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 poppler-utils amd64 0.62.0-2ubuntu2.12 [154 kB] Fetched 154 kB in 1s (304 kB/s) debconf: unable to initialize frontend: Dialog debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.) debconf: falling back to frontend: Readline debconf: unable to initialize frontend: Readline debconf: (This frontend requires a controlling tty.) debconf: falling back to frontend: Teletype dpkg-preconfigure: unable to re-open stdin: Selecting previously unselected package poppler-utils. (Reading database ... 160837 files and directories currently installed.) Preparing to unpack .../poppler-utils_0.62.0-2ubuntu2.12_amd64.deb ... Unpacking poppler-utils (0.62.0-2ubuntu2.12) ... Setting up poppler-utils (0.62.0-2ubuntu2.12) ... Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
input_path = "/content/2dgridDAGa.pdf" temp_file_path = "/content/temp2dgridDAGa" out = convert_from_path(input_path, output_file=temp_file_path, use_pdftocairo=True, fmt="png", single_file=True)
!ls
2dgridDAGa.pdf sample_data temp2dgridDAGa.png
import os import shutil from pdf2image import convert_from_path from PIL import Image from PIL import ImageCms import argparse from glob import glob from tqdm import tqdm import functools import multiprocessing import concurrent.futures def split_file_name(input_path): base_name, dir_name = os.path.basename(input_path), os.path.dirname(input_path) file_name, ext = os.path.splitext(os.path.basename(base_name)) return base_name, dir_name, file_name, ext def convert( input_path, output_path, color_space="CMYK", input_profile_path=None, output_profile_path=None, quality=100, verbose=False, overwrite=False, ): """converts an image or pdf into a color space of choice for CMYK the default output format is JPG Keyword arguments: input_path -- the input path of the file output_path -- the output path for the result to be written. color_space -- the color space to convert to , default value is CMYK input_profile_path -- the path to the input profile output_profile_path -- the path to the output profile """ try: if not overwrite and os.path.exists(output_path): return True if input_path.endswith(".pdf") or input_path.endswith(".PDF"): _, dir_name, file_name, _ = split_file_name(output_path) temp_file_name = "temp" + file_name temp_file_path = os.path.join(dir_name, temp_file_name) print("input", input_path) print("output", temp_file_path) print("call convert ") # convert_from_path(input_path,output_file=temp_file_path,fmt="png",use_pdftocairo=True,single_file=True) convert_from_path(input_path, output_file=temp_file_path, fmt="png", single_file=True) temp_file_path += ".png" print(temp_file_path) _convert_profiles( temp_file_path, output_path, color_space=color_space, input_profile_path=input_profile_path, output_profile_path=output_profile_path, quality=quality, ) # os.remove(temp_file_path) print("done") return True elif ( input_path.endswith(".png") or input_path.endswith(".PNG") or input_path.endswith(".jpg") or input_path.endswith(".JPG") or input_path.endswith(".jpeg") or input_path.endswith(".JPEG") ): print("else block") return _convert_profiles( input_path, output_path, color_space=color_space, input_profile_path=input_profile_path, output_profile_path=output_profile_path, quality=quality, ) else: print(f"{input_path} is not a valid image file, copying it instead to {output_path}.") shutil.copy(input_path, output_path) return False except Exception as e: print("exception") if verbose: print(f"Error in file: {input_path}\n", e) return False def _convert_profiles( input_path=None, output_path=None, color_space="CMYK", input_profile_path=None, output_profile_path=None, quality="100", ): try: with Image.open(input_path) as im: img_cmyk = ImageCms.profileToProfile( im, input_profile_path, output_profile_path, renderingIntent=0, outputMode=color_space ) quality = int(quality) img_cmyk.save(output_path, quality=quality) return True except Exception as e: print(e) print(f"cannot convert{input_path}, copying it instead.") shutil.copy(input_path, output_path) return False
import os
input_path = "/content/2dgridDAGa.pdf" temp_file_path = "/content/2dgridDAGa" convert( input_path, temp_file_path, color_space="RGB", quality=80, verbose=True, input_profile_path="/content/drive/MyDrive/MLAPA/sRGB Color Space Profile.icm", output_profile_path="/content/drive/MyDrive/MLAPA/sRGB Color Space Profile.icm", )
input /content/2dgridDAGa.pdf output /content/temp2dgridDAGa call convert /content/temp2dgridDAGa.png [Errno 2] No such file or directory: '/content/temp2dgridDAGa.png' cannot convert/content/temp2dgridDAGa.png, copying it instead. exception Error in file: /content/2dgridDAGa.pdf [Errno 2] No such file or directory: '/content/temp2dgridDAGa.png'
False
!ls
2dgridDAGa.pdf drive sample_data