Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-5/word_detection.py
427 views
1
import numpy as np
2
import pandas as pd
3
import matplotlib.pyplot as plt
4
import cv2
5
from ocr.helpers import implt, resize, ratio
6
from copy import deepcopy
7
8
#implt(img, 'gray')
9
10
def sobelDetect(channel):
11
""" The Sobel Operator"""
12
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
13
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
14
# Combine x, y gradient magnitudes sqrt(x^2 + y^2)
15
sobel = np.hypot(sobelX, sobelY)
16
sobel[sobel > 255] = 255
17
return np.uint8(sobel)
18
19
20
def edgeDetect(im):
21
"""
22
Edge detection
23
The Sobel operator is applied for each image layer (RGB)
24
"""
25
return np.max(np.array([sobelDetect(im[:,:, 0]), sobelDetect(im[:,:, 1]), sobelDetect(im[:,:, 2]) ]), axis=0)
26
27
#implt(edgeImg, 'gray', 'Sobel operator')
28
#implt(bwImage, 'gray', 'Final closing')
29
30
def delLines(gray):
31
""" Delete page lines """
32
linek = np.ones((1,11),np.uint8)
33
x = cv2.morphologyEx(gray, cv2.MORPH_OPEN, linek ,iterations=1)
34
i = gray-x
35
closing = cv2.morphologyEx(dil, cv2.MORPH_CLOSE, np.ones((17,17), np.uint8))
36
#implt(closing, 'gray', 'Del Lines')
37
return closing
38
39
40
def delBigAreas(img):
41
""" Find and remove contours too big for a word """
42
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
43
# ret, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
44
gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 101, 3)
45
#implt(gray, 'gray')
46
47
gray2 = gray.copy()
48
mask = np.zeros(gray.shape,np.uint8)
49
50
im2, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
51
52
for cnt in contours:
53
if (200 < cv2.contourArea(cnt) < 5000):
54
cv2.drawContours(img,[cnt],0,(0,255,0),2)
55
cv2.drawContours(mask,[cnt],0,255,-1)
56
57
#implt(mask)
58
#implt(img)
59
60
def union(a,b):
61
x = min(a[0], b[0])
62
y = min(a[1], b[1])
63
w = max(a[0]+a[2], b[0]+b[2]) - x
64
h = max(a[1]+a[3], b[1]+b[3]) - y
65
return [x, y, w, h]
66
67
def isIntersect(a,b):
68
x = max(a[0], b[0])
69
y = max(a[1], b[1])
70
w = min(a[0]+a[2], b[0]+b[2]) - x
71
h = min(a[1]+a[3], b[1]+b[3]) - y
72
if w<0 or h<0:
73
return False
74
return True
75
76
def groupRectangles(rec):
77
"""
78
Uion intersecting rectangles
79
Args:
80
rec - list of rectangles in form [x, y, w, h]
81
Return:
82
list of grouped ractangles
83
"""
84
tested = [False for i in range(len(rec))]
85
final = []
86
i = 0
87
while i < len(rec):
88
if not tested[i]:
89
j = i+1
90
while j < len(rec):
91
if not tested[j] and isIntersect(rec[i], rec[j]):
92
rec[i] = union(rec[i], rec[j])
93
tested[j] = True
94
j = i
95
j += 1
96
final += [rec[i]]
97
i += 1
98
99
return final
100
101
def textDetect(img, original):
102
""" Text detection using contours """
103
# Resize image
104
small = resize(img, 2000)
105
image = resize(original, 2000)
106
107
# Finding contours
108
mask = np.zeros(small.shape, np.uint8)
109
im2, cnt, hierarchy = cv2.findContours(np.copy(small), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
110
111
#implt(img, 'gray')
112
113
# Variables for contour index and words' bounding boxes
114
index = 0
115
boundingBoxes = np.array([0,0,0,0])
116
bBoxes = []
117
118
# CCOMP hierarchy: [Next, Previous, First Child, Parent]
119
# cv2.RETR_CCOMP - contours into 2 levels
120
# Go through all contours in first level
121
while (index >= 0):
122
x,y,w,h = cv2.boundingRect(cnt[index])
123
# Get only the contour
124
cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)
125
maskROI = mask[y:y+h, x:x+w]
126
# Ratio of white pixels to area of bounding rectangle
127
r = float(cv2.countNonZero(maskROI)) / (w * h)
128
129
# Limits for text (white pixel ratio, width, height)
130
# TODO Test h/w and w/h ratios
131
if r > 0.1 and 1600 > w > 10 and 1600 > h > 10 and h/w < 3 and w/h < 10 and (60 // h) * w < 1000:
132
bBoxes += [[x, y, w, h]]
133
134
# Index of next contour
135
index = hierarchy[0][index][0]
136
137
# Group intersecting rectangles
138
bBoxes = groupRectangles(bBoxes)
139
i = 0
140
f = open("output/words/normal/bounding_boxes_normal.txt","w")
141
for (x, y, w, h) in bBoxes:
142
boundingBoxes = np.vstack((boundingBoxes, np.array([x, y, x+w, y+h])))
143
cv2.imwrite("output/words/normal/"+str(i)+".jpg",image[y:y+h, x:x+w])
144
f.write(str(i) + "\t => \t" + "("+str(x)+","+str(y)+")"+","+"("+str(x+w)+","+str(y+h)+")"+"\n")
145
# cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
146
i = i+1
147
#implt(image, t='Bounding rectangles')
148
149
# Recalculate coordinates to original scale
150
bBoxes = boundingBoxes.dot(ratio(image, small.shape[0])).astype(np.int64)
151
return bBoxes[1:]
152
153
def textDetectWatershed(thresh, original):
154
""" Text detection using watershed algorithm """
155
# According to: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html
156
img = resize(original, 3000)
157
thresh = resize(thresh, 3000)
158
# noise removal
159
kernel = np.ones((3,3),np.uint8)
160
opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)
161
162
# sure background area
163
sure_bg = cv2.dilate(opening,kernel,iterations=3)
164
165
# Finding sure foreground area
166
dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
167
ret, sure_fg = cv2.threshold(dist_transform,0.01*dist_transform.max(),255,0)
168
# Finding unknown region
169
sure_fg = np.uint8(sure_fg)
170
# cv2.imshow("image",sure_fg)
171
# cv2.waitKey(10)
172
173
unknown = cv2.subtract(sure_bg,sure_fg)
174
175
# Marker labelling
176
ret, markers = cv2.connectedComponents(sure_fg)
177
178
179
180
# Add one to all labels so that sure background is not 0, but 1
181
markers += 1
182
183
# Now, mark the region of unknown with zero
184
markers[unknown == 255] = 0
185
186
187
188
markers = cv2.watershed(img, markers)
189
190
#implt(markers, t='Markers')
191
image = img.copy()
192
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
193
194
# Creating result array
195
boundingBoxes = np.array([0,0,0,0])
196
bBoxes = []
197
198
for mark in np.unique(markers):
199
# mark == 0 --> background
200
if mark == 0:
201
continue
202
203
# Draw it on mask and detect biggest contour
204
mask = np.zeros(gray.shape, dtype="uint8")
205
mask[markers == mark] = 255
206
207
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
208
c = max(cnts, key=cv2.contourArea)
209
210
# Draw a bounding rectangle if it contains text
211
x,y,w,h = cv2.boundingRect(c)
212
cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)
213
maskROI = mask[y:y+h, x:x+w]
214
# Ratio of white pixels to area of bounding rectangle
215
r = float(cv2.countNonZero(maskROI)) / (w * h)
216
# Limits for text
217
218
219
# WORK ON THIS
220
if r > 0.1 and 2000 > w > 15 and 1500 > h > 15:
221
bBoxes += [[x, y, w, h]]
222
223
# Group intersecting rectangles
224
#bBoxes = groupRectangles(bBoxes)
225
i = 0
226
f = open("./output/words/watershed/bounding_boxes_watershed.txt","w")
227
for (x, y, w, h) in bBoxes:
228
boundingBoxes = np.vstack((boundingBoxes, np.array([x, y, x+w, y+h])))
229
cv2.imwrite("./output/words/watershed/"+str(i)+".jpg",image[y:y+h, x:x+w])
230
f.write(str(i) + "\t => \t" + "("+str(x)+","+str(y)+")"+","+"("+str(x+w)+","+str(y+h)+")"+"\n")
231
# cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
232
i = i+1
233
234
#implt(image)
235
236
# Recalculate coordinates to original size
237
bBoxes = boundingBoxes.dot(ratio(original, 3000)).astype(np.int64)
238
return bBoxes[1:]
239
240
#print(len(wbBoxes))
241
#print(len(bBoxes))
242
243
##---
244
# image = cv2.cvtColor(cv2.imread("2_1.jpg"), cv2.COLOR_BGR2RGB)
245
# img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
246
247
248
# # Image pre-processing - blur, edges, threshold, closing
249
# blurred = cv2.GaussianBlur(image, (5, 5), 18)
250
# edgeImg = edgeDetect(blurred)
251
# ret, edgeImg = cv2.threshold(edgeImg, 50, 255, cv2.THRESH_BINARY)
252
# bwImage = cv2.morphologyEx(edgeImg, cv2.MORPH_CLOSE, np.ones((20,20), np.uint8))
253
254
255
256
# bBoxes1 = textDetect(bwImage, image)
257
258
# # or
259
260
# wbBoxes = textDetectWatershed(bwImage, image)
261
262