Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-5/Untitled2.ipynb
427 views
Kernel: Python 2
from __future__ import print_function from base64 import b64encode from os import makedirs, remove from os.path import join, basename from sys import argv import json import requests import glob from unidecode import unidecode
ENDPOINT_URL = 'https://vision.googleapis.com/v1/images:annotate' RESULTS_DIR = 'jsons'
def make_image_data_list(image_filenames): """ image_filenames is a list of filename strings Returns a list of dicts formatted as the Vision API needs them to be """ img_requests = [] with open(image_filenames, 'rb') as f: ctxt = b64encode(f.read()).decode() img_requests.append({ 'image': {'content': ctxt}, 'features': [{ 'type': 'TEXT_DETECTION', 'maxResults': 1 }] }) return img_requests def make_image_data(image_filenames): """Returns the image data lists as bytes""" imgdict = make_image_data_list(image_filenames) return json.dumps({"requests": imgdict }).encode() def request_ocr(api_key, image_filenames): response = requests.post(ENDPOINT_URL, data=make_image_data(image_filenames), params={'key': api_key}, headers={'Content-Type': 'application/json'}) return response def remove_non_ascii(text): return unidecode(unicode(text, encoding = "utf-8"))
api_key = "AIzaSyCSMpzBIKlZObk8Uzkx6Iavo967m7vFf0Q" image_filenames = "./4.jpg" response = request_ocr(api_key, image_filenames) print(type(response))
<class 'requests.models.Response'>
print(type(response.text))
<type 'unicode'>
entities = [] for i in range(1,len(response.json()['responses'][0]['textAnnotations'])): entities.append(remove_non_ascii(response.json()['responses'][0]['textAnnotations'][i]['description'].encode("utf-8"))) print(entities)
['Page', '8', 'B.C.', 'ROY', 'TECHNOLOGY', 'HOSPITAL', 'I.I.T.,', 'KHARAGPUR', 'ha', 'lkrabor', 'ly', 'pey', 'the', 'Doctor:', 'Date:', '.0.8.', 'MAR', '2016', 'Observation', 'Prescription', 'Loose', 'Sol', 'Fao', 'to', 'eje', 'BP', '-', '116', 'Rilsenicti', '@', 'd', '-', 'TatoLapauude2ing', ', ', 'jDra', ')', '-', 'Ca', 'nh', 'de', 'Cun ', '. aueno', 'ER.soe']
bboxes = [] for i in range(1,len(response.json()['responses'][0]['textAnnotations'])): bboxes.append(response.json()['responses'][0]['textAnnotations'][i]['boundingPoly']['vertices'])
print(bboxes[0])
[{u'y': 341, u'x': 2083}, {u'y': 345, u'x': 2176}, {u'y': 390, u'x': 2174}, {u'y': 386, u'x': 2081}]