CoCalc -- Untitled2.ipynb

GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-5/Untitled2.ipynb
⁶²³ views

Kernel: Python 2

In [28]:

from __future__ import print_function
from base64 import b64encode
from os import makedirs, remove
from os.path import join, basename
from sys import argv
import json
import requests
import glob
from unidecode import unidecode

In [2]:

ENDPOINT_URL = 'https://vision.googleapis.com/v1/images:annotate'
RESULTS_DIR = 'jsons'

In [29]:

def make_image_data_list(image_filenames):
    """
    image_filenames is a list of filename strings
    Returns a list of dicts formatted as the Vision API
        needs them to be
    """
    img_requests = []
    with open(image_filenames, 'rb') as f:
        ctxt = b64encode(f.read()).decode()
        img_requests.append({
                'image': {'content': ctxt},
                'features': [{
                    'type': 'TEXT_DETECTION',
                    'maxResults': 1
                }]
        })
    return img_requests

def make_image_data(image_filenames):
    """Returns the image data lists as bytes"""
    imgdict = make_image_data_list(image_filenames)
    return json.dumps({"requests": imgdict }).encode()


def request_ocr(api_key, image_filenames):
    response = requests.post(ENDPOINT_URL, data=make_image_data(image_filenames), params={'key': api_key}, headers={'Content-Type': 'application/json'})
    return response

def remove_non_ascii(text):
    return unidecode(unicode(text, encoding = "utf-8"))

In [4]:

api_key = "AIzaSyCSMpzBIKlZObk8Uzkx6Iavo967m7vFf0Q"
image_filenames = "./4.jpg"
response = request_ocr(api_key, image_filenames)
print(type(response))

Out[4]:

<class 'requests.models.Response'>

In [5]:

print(type(response.text))

Out[5]:

<type 'unicode'>

In [32]:

entities = []
for i in range(1,len(response.json()['responses'][0]['textAnnotations'])):
    entities.append(remove_non_ascii(response.json()['responses'][0]['textAnnotations'][i]['description'].encode("utf-8")))
print(entities)

Out[32]:

['Page', '8', 'B.C.', 'ROY', 'TECHNOLOGY', 'HOSPITAL', 'I.I.T.,', 'KHARAGPUR', 'ha', 'lkrabor', 'ly', 'pey', 'the', 'Doctor:', 'Date:', '.0.8.', 'MAR', '2016', 'Observation', 'Prescription', 'Loose', 'Sol', 'Fao', 'to', 'eje', 'BP', '-', '116', 'Rilsenicti', '@', 'd', '-', 'TatoLapauude2ing', ', ', 'jDra', ')', '-', 'Ca', 'nh', 'de', 'Cun ', '. aueno', 'ER.soe']

In [38]:

bboxes = []
for i in range(1,len(response.json()['responses'][0]['textAnnotations'])):
    bboxes.append(response.json()['responses'][0]['textAnnotations'][i]['boundingPoly']['vertices'])

In [47]:

print(bboxes[0])

Out[47]:

[{u'y': 341, u'x': 2083}, {u'y': 345, u'x': 2176}, {u'y': 390, u'x': 2174}, {u'y': 386, u'x': 2081}]

In [ ]:

Product

Resources

Company