CoCalc -- word_detection.py

GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-4/word_detection.py
⁴²⁷ views
1
import numpy as np
2
import pandas as pd
3
import matplotlib.pyplot as plt
4
import cv2
5
from ocr.helpers import implt, resize, ratio
6
from copy import deepcopy
7

8
#implt(img, 'gray')
9

10
def sobelDetect(channel):
11
    """ The Sobel Operator"""
12
    sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
13
    sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
14
    # Combine x, y gradient magnitudes sqrt(x^2 + y^2)
15
    sobel = np.hypot(sobelX, sobelY)
16
    sobel[sobel > 255] = 255
17
    return np.uint8(sobel)
18

19

20
def edgeDetect(im):
21
    """
22
    Edge detection
23
    The Sobel operator is applied for each image layer (RGB)
24
    """
25
    return np.max(np.array([sobelDetect(im[:,:, 0]), sobelDetect(im[:,:, 1]), sobelDetect(im[:,:, 2]) ]), axis=0)
26

27
#implt(edgeImg, 'gray', 'Sobel operator')
28
#implt(bwImage, 'gray', 'Final closing')
29

30
def delLines(gray):
31
    """ Delete page lines """
32
    linek = np.ones((1,11),np.uint8)
33
    x = cv2.morphologyEx(gray, cv2.MORPH_OPEN, linek ,iterations=1)
34
    i = gray-x
35
    closing = cv2.morphologyEx(dil, cv2.MORPH_CLOSE, np.ones((17,17), np.uint8))
36
    #implt(closing, 'gray', 'Del Lines')
37
    return closing
38

39

40
def delBigAreas(img):
41
    """ Find and remove contours too big for a word """
42
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
43
    # ret, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
44
    gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 101, 3)
45
    #implt(gray, 'gray')
46

47
    gray2 = gray.copy()
48
    mask = np.zeros(gray.shape,np.uint8)
49

50
    im2, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
51

52
    for cnt in contours:
53
        if (200 < cv2.contourArea(cnt) < 5000):
54
            cv2.drawContours(img,[cnt],0,(0,255,0),2)
55
            cv2.drawContours(mask,[cnt],0,255,-1)
56

57
    #implt(mask)
58
    #implt(img)
59

60
def union(a,b):
61
    x = min(a[0], b[0])
62
    y = min(a[1], b[1])
63
    w = max(a[0]+a[2], b[0]+b[2]) - x
64
    h = max(a[1]+a[3], b[1]+b[3]) - y
65
    return [x, y, w, h]
66

67
def isIntersect(a,b):
68
    x = max(a[0], b[0])
69
    y = max(a[1], b[1])
70
    w = min(a[0]+a[2], b[0]+b[2]) - x
71
    h = min(a[1]+a[3], b[1]+b[3]) - y
72
    if w<0 or h<0:
73
        return False
74
    return True
75

76
def groupRectangles(rec):
77
    """
78
    Uion intersecting rectangles
79
    Args:
80
        rec - list of rectangles in form [x, y, w, h]
81
    Return:
82
        list of grouped ractangles
83
    """
84
    tested = [False for i in range(len(rec))]
85
    final = []
86
    i = 0
87
    while i < len(rec):
88
        if not tested[i]:
89
            j = i+1
90
            while j < len(rec):
91
                if not tested[j] and isIntersect(rec[i], rec[j]):
92
                    rec[i] = union(rec[i], rec[j])
93
                    tested[j] = True
94
                    j = i
95
                j += 1
96
            final += [rec[i]]
97
        i += 1
98

99
    return final
100

101
def textDetect(img, original):
102
    """ Text detection using contours """
103
    # Resize image
104
    small = resize(img, 2000)
105
    image = resize(original, 2000)
106

107
    # Finding contours
108
    mask = np.zeros(small.shape, np.uint8)
109
    im2, cnt, hierarchy = cv2.findContours(np.copy(small), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
110

111
    #implt(img, 'gray')
112

113
    # Variables for contour index and words' bounding boxes
114
    index = 0
115
    boundingBoxes = np.array([0,0,0,0])
116
    bBoxes = []
117

118
    # CCOMP hierarchy: [Next, Previous, First Child, Parent]
119
    # cv2.RETR_CCOMP - contours into 2 levels
120
    # Go through all contours in first level
121
    while (index >= 0):
122
        x,y,w,h = cv2.boundingRect(cnt[index])
123
        # Get only the contour
124
        cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)
125
        maskROI = mask[y:y+h, x:x+w]
126
        # Ratio of white pixels to area of bounding rectangle
127
        r = float(cv2.countNonZero(maskROI)) / (w * h)
128

129
        # Limits for text (white pixel ratio, width, height)
130
        # TODO Test h/w and w/h ratios
131
        if r > 0.1 and 1600 > w > 10 and 1600 > h > 10 and h/w < 3 and w/h < 10 and (60 // h) * w < 1000:
132
            bBoxes += [[x, y, w, h]]
133

134
        # Index of next contour
135
        index = hierarchy[0][index][0]
136

137
    # Group intersecting rectangles
138
    bBoxes = groupRectangles(bBoxes)
139
    i = 0
140
    f = open("output/words/normal/bounding_boxes_normal.txt","w")
141
    for (x, y, w, h) in bBoxes:
142
        boundingBoxes = np.vstack((boundingBoxes, np.array([x, y, x+w, y+h])))
143
        cv2.imwrite("output/words/normal/"+str(i)+".jpg",image[y:y+h, x:x+w])
144
        f.write(str(i) + "\t => \t" + "("+str(x)+","+str(y)+")"+","+"("+str(x+w)+","+str(y+h)+")"+"\n")
145
        # cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
146
        i = i+1
147
    #implt(image, t='Bounding rectangles')
148

149
    # Recalculate coordinates to original scale
150
    bBoxes = boundingBoxes.dot(ratio(image, small.shape[0])).astype(np.int64)
151
    return bBoxes[1:]
152

153
def textDetectWatershed(thresh, original):
154
    """ Text detection using watershed algorithm """
155
    # According to: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html
156
    img = resize(original, 3000)
157
    thresh = resize(thresh, 3000)
158
    # noise removal
159
    kernel = np.ones((3,3),np.uint8)
160
    opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)
161

162
    # sure background area
163
    sure_bg = cv2.dilate(opening,kernel,iterations=3)
164

165
    # Finding sure foreground area
166
    dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
167
    ret, sure_fg = cv2.threshold(dist_transform,0.01*dist_transform.max(),255,0)
168
    # Finding unknown region
169
    sure_fg = np.uint8(sure_fg)
170
    # cv2.imshow("image",sure_fg)
171
    # cv2.waitKey(10)
172
   
173
    unknown = cv2.subtract(sure_bg,sure_fg)
174

175
    # Marker labelling
176
    ret, markers = cv2.connectedComponents(sure_fg)
177

178

179

180
    # Add one to all labels so that sure background is not 0, but 1
181
    markers += 1
182

183
    # Now, mark the region of unknown with zero
184
    markers[unknown == 255] = 0
185

186

187
 
188
    markers = cv2.watershed(img, markers)
189

190
    #implt(markers, t='Markers')
191
    image = img.copy()
192
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
193

194
    # Creating result array
195
    boundingBoxes = np.array([0,0,0,0])
196
    bBoxes = []
197

198
    for mark in np.unique(markers):
199
        # mark == 0 --> background
200
        if mark == 0:
201
            continue
202

203
        # Draw it on mask and detect biggest contour
204
        mask = np.zeros(gray.shape, dtype="uint8")
205
        mask[markers == mark] = 255
206

207
        cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
208
        c = max(cnts, key=cv2.contourArea)
209

210
        # Draw a bounding rectangle if it contains text
211
        x,y,w,h = cv2.boundingRect(c)
212
        cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)
213
        maskROI = mask[y:y+h, x:x+w]
214
        # Ratio of white pixels to area of bounding rectangle
215
        r = float(cv2.countNonZero(maskROI)) / (w * h)
216
        # Limits for text
217
        
218

219
        # WORK ON THIS
220
        if r > 0.1 and 2000 > w > 15 and 1500 > h > 15:
221
            bBoxes += [[x, y, w, h]]
222

223
    # Group intersecting rectangles
224
    #bBoxes = groupRectangles(bBoxes)
225
    i = 0
226
    f = open("output/words/watershed/bounding_boxes_watershed.txt","w")
227
    for (x, y, w, h) in bBoxes:
228
        boundingBoxes = np.vstack((boundingBoxes, np.array([x, y, x+w, y+h])))
229
        cv2.imwrite("output/words/watershed/"+str(i)+".jpg",image[y:y+h, x:x+w])
230
        f.write(str(i) + "\t => \t" + "("+str(x)+","+str(y)+")"+","+"("+str(x+w)+","+str(y+h)+")"+"\n")
231
        # cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)
232
        i = i+1
233

234
    #implt(image)
235

236
    # Recalculate coordinates to original size
237
    bBoxes = boundingBoxes.dot(ratio(original, 3000)).astype(np.int64)
238
    return bBoxes[1:]
239

240
#print(len(wbBoxes))
241
#print(len(bBoxes))
242

243
##---
244
# image = cv2.cvtColor(cv2.imread("2_1.jpg"), cv2.COLOR_BGR2RGB)
245
# img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
246

247

248
# # Image pre-processing - blur, edges, threshold, closing
249
# blurred = cv2.GaussianBlur(image, (5, 5), 18)
250
# edgeImg = edgeDetect(blurred)
251
# ret, edgeImg = cv2.threshold(edgeImg, 50, 255, cv2.THRESH_BINARY)
252
# bwImage = cv2.morphologyEx(edgeImg, cv2.MORPH_CLOSE, np.ones((20,20), np.uint8))
253

254

255

256
# bBoxes1 = textDetect(bwImage, image)
257

258
# # or
259

260
# wbBoxes = textDetectWatershed(bwImage, image)
261
Product

Resources

Company