Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-3/ocr/normalization.py
426 views
1
# -*- coding: utf-8 -*-
2
"""
3
Include functions for normalizing images of words and letters
4
Main functions: imageNorm, letterNorm, imageStandardization
5
"""
6
import numpy as np
7
import cv2
8
import math
9
from .helpers import *
10
11
12
13
def imageStandardization(image):
14
""" Image standardization same as tf.image.per_image_standardization """
15
return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size))
16
17
18
def cropAddBorder(img, height, threshold=50, border=True, borderSize=15):
19
""" Crop and add border to word image of letter segmentation """
20
# Clear small values
21
ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO)
22
23
x0 = 0
24
y0 = 0
25
x1 = img.shape[1]
26
y1 = img.shape[0]
27
28
for i in range(img.shape[0]):
29
if np.count_nonzero(img[i, :]) > 1:
30
y0 = i
31
break
32
for i in reversed(range(img.shape[0])):
33
if np.count_nonzero(img[i, :]) > 1:
34
y1 = i+1
35
break
36
for i in range(img.shape[1]):
37
if np.count_nonzero(img[:, i]) > 1:
38
x0 = i
39
break
40
for i in reversed(range(img.shape[1])):
41
if np.count_nonzero(img[:, i]) > 1:
42
x1 = i+1
43
break
44
45
if height != 0:
46
img = resize(img[y0:y1, x0:x1], height, True)
47
else:
48
img = img[y0:y1, x0:x1]
49
50
if border:
51
return cv2.copyMakeBorder(img, 0, 0, borderSize, borderSize,
52
cv2.BORDER_CONSTANT,
53
value=[0, 0, 0])
54
return img
55
56
57
def wordTilt(img, height, border=True, borderSize=15):
58
""" Detect the angle for tiltByAngle function """
59
edges = cv2.Canny(img, 50, 150, apertureSize = 3)
60
lines = cv2.HoughLines(edges, 1, np.pi/180, 30)
61
62
if lines is not None:
63
meanAngle = 0
64
# Set min number of valid lines (try higher)
65
numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6)
66
if numLines > 1:
67
meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6])
68
69
# Look for angle with correct value
70
if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6):
71
img = tiltByAngle(img, meanAngle, height)
72
return cropAddBorder(img, height, 50, border, borderSize)
73
74
75
def tiltByAngle(img, angle, height):
76
""" Tilt the image by given angle """
77
dist = np.tan(angle) * height
78
width = len(img[0])
79
sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]])
80
81
# Dist is positive for angle < 0.7; negative for angle > 2.6
82
# Image must be shifed to right
83
if dist > 0:
84
tPoints = np.float32([[0,0],
85
[dist,height],
86
[width+dist,height],
87
[width,0]])
88
else:
89
tPoints = np.float32([[-dist,0],
90
[0,height],
91
[width,height],
92
[width-dist,0]])
93
94
M = cv2.getPerspectiveTransform(sPoints, tPoints)
95
return cv2.warpPerspective(img, M, (int(width+abs(dist)), height))
96
97
98
def sobelDetect(channel):
99
""" The Sobel Operator"""
100
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
101
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
102
# Combine x, y gradient magnitudes sqrt(x^2 + y^2)
103
sobel = np.hypot(sobelX, sobelY)
104
sobel[sobel > 255] = 255
105
return np.uint8(sobel)
106
107
108
class HysterThresh:
109
def __init__(self, img):
110
img = 255 - img
111
img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255
112
hist, bins = np.histogram(img.ravel(), 256, [0,256])
113
114
self.high = np.argmax(hist) + 65
115
self.low = np.argmax(hist) + 45
116
self.diff = 255 - self.high
117
118
self.img = img
119
self.im = np.zeros(img.shape, dtype=img.dtype)
120
121
def getImage(self):
122
self.hyster()
123
return np.uint8(self.im)
124
125
def hyster_rec(self, r, c):
126
h, w = self.img.shape
127
for ri in range(r-1, r+2):
128
for ci in range(c-1, c+2):
129
if (h > ri >= 0
130
and w > ci >= 0
131
and self.im[ri, ci] == 0
132
and self.high > self.img[ri, ci] >= self.low):
133
self.im[ri, ci] = self.img[ri, ci] + self.diff
134
self.hyster_rec(ri, ci)
135
136
def hyster(self):
137
r, c = self.img.shape
138
for ri in range(r):
139
for ci in range(c):
140
if (self.img[ri, ci] >= self.high):
141
self.im[ri, ci] = 255
142
self.img[ri, ci] = 255
143
self.hyster_rec(ri, ci)
144
145
146
def hystImageNorm(image):
147
""" Word normalization using hystheresis thresholding """
148
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
149
# img = cv2.bilateralFilter(gray, 0, 10, 30)
150
img = cv2.bilateralFilter(gray, 0, 15, 30)
151
return HysterThresh(img).getImage()
152
153
154
def imageNorm(image, height, border=True, tilt=True, borderSize=15, hystNorm=False):
155
"""
156
Preprocess image
157
=> resize, get edges, tilt world
158
"""
159
image = resize(image, height, True)
160
161
if hystNorm:
162
th = hystImageNorm(image)
163
else:
164
img = cv2.bilateralFilter(image, 0, 30, 30)
165
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
166
167
edges = sobelDetect(gray)
168
ret,th = cv2.threshold(edges, 50, 255, cv2.THRESH_TOZERO)
169
170
if tilt:
171
return wordTilt(th, height, border, borderSize)
172
return th
173
174
175
def resizeLetter(img, size = 56):
176
""" Resize bigger side of the image to given size """
177
if (img.shape[0] > img.shape[1]):
178
rat = float(size) / img.shape[0]
179
return cv2.resize(img, (int(rat * img.shape[1]), size), interpolation = cv2.INTER_CUBIC)
180
else:
181
rat = float(size) / img.shape[1]
182
return cv2.resize(img, (size, int(rat * img.shape[0])))
183
return img
184
185
186
def letterNorm(image, is_thresh=True, dim=False):
187
""" Preprocess an image - crop """
188
if is_thresh and image.shape[0] > 0 and image.shape[1] > 0:
189
image = cropAddBorder(image, height=0, threshold=80, border=False) # threshold=80
190
191
resized = image
192
if image.shape[0] > 1 and image.shape[1] > 1:
193
resized = resizeLetter(image)
194
195
result = np.zeros((64, 64), np.uint8)
196
offset = [0, 0]
197
# Calculate offset for smaller size
198
if image.shape[0] > image.shape[1]:
199
offset = [int((result.shape[1] - resized.shape[1])/2), 4]
200
else:
201
offset = [4, int((result.shape[0] - resized.shape[0])/2)]
202
# Replace zeros by image
203
result[offset[1]:offset[1] + resized.shape[0],
204
offset[0]:offset[0] + resized.shape[1]] = resized
205
206
if dim:
207
return result, image.shape
208
return result
209
210