Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-1/OCR.py
427 views
1
2
import numpy as np
3
import pandas as pd
4
import matplotlib.pyplot as plt
5
import tensorflow as tf
6
import cv2
7
8
# Import costume functions, corresponding to notebooks
9
from ocr.normalization import imageNorm, letterNorm
10
from ocr import page, words, charSeg
11
from ocr.helpers import implt, resize
12
from ocr.tfhelpers import Graph
13
from ocr.datahelpers import idx2char
14
15
16
# ### Global Variables
17
18
# In[2]:
19
20
21
# Settings
22
IMG = '1' # 1, 2, 3
23
LANG = 'cz' # cz, en
24
MODEL_LOC = 'models/char-clas/' + LANG + '/CharClassifier'
25
26
27
# ## Load Trained Model
28
29
# In[3]:
30
31
32
charClass = Graph(MODEL_LOC)
33
34
35
# ## Load image
36
37
# In[4]:
38
39
40
image = cv2.cvtColor(cv2.imread("test/%s.jpg" % IMG), cv2.COLOR_BGR2RGB)
41
implt(image)
42
43
44
# In[5]:
45
46
47
# Crop image and get bounding boxes
48
crop = page.detection(image)
49
implt(crop)
50
bBoxes = words.detection(crop)
51
52
53
# # Simple UI using widgets
54
55
# In[6]:
56
57
58
class Cycler:
59
""" Cycle through the words and recognise them """
60
height = 60
61
62
def __init__(self, image, boxes):
63
self.boxes = boxes # Array of bounding boxes
64
self.image = image # Whole image
65
66
67
def recognise(self, img):
68
""" Recognising word and printing it """
69
# Pre-processing the word
70
img = imageNorm(img, 60, border=False, tilt=True, hystNorm=True)
71
72
# Separate letters
73
img = cv2.copyMakeBorder(img, 0, 0, 30, 30,cv2.BORDER_CONSTANT, value=[0, 0, 0])
74
gaps = charSeg.segmentation(img, RNN=True, debug=True)
75
76
chars = []
77
for i in range(len(gaps)-1):
78
char = img[:, gaps[i]:gaps[i+1]]
79
# TODO None type error after treshold
80
char, dim = letterNorm(char, is_thresh=True, dim=True)
81
# TODO Test different values
82
if dim[0] > 4 and dim[1] > 4:
83
chars.append(char.flatten())
84
85
chars = np.array(chars)
86
word = ''
87
if len(chars) != 0:
88
pred = charClass.run(chars)
89
for c in pred:
90
word += idx2char(c)
91
92
print("Word: " + word)
93
94
95
def idxImage(self, index):
96
""" Getting next image from the array """
97
if index < len(self.boxes):
98
b = self.boxes[index]
99
x1, y1, x2, y2 = b
100
101
# Cuting out the word image
102
img = self.image[y1:y2, x1:x2]
103
implt(img, t='Index: ' + str(index))
104
105
self.recognise(img)
106
107
108
# In[7]:
109
110
111
cycler = Cycler(crop, bBoxes)
112
113
# Interactive slider
114
115
for i in range(len(bBoxes)):
116
cycler.idxImage(i)
117
118