Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-4/preprocess.py
427 views
1
import numpy as np
2
import pandas as pd
3
import matplotlib.pyplot as plt
4
import cv2
5
from ocr.helpers import implt, resize, ratio
6
7
def edgesDet(img, minVal, maxVal):
8
""" Preprocessing (gray, thresh, filter, border) + Canny edge detection """
9
img = cv2.cvtColor(resize(img), cv2.COLOR_BGR2GRAY)
10
11
# Applying blur and threshold
12
img = cv2.bilateralFilter(img, 9, 75, 75)
13
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 4)
14
15
# Median blur replace center pixel by median of pixels under kelner
16
# => removes thin details
17
img = cv2.medianBlur(img, 11)
18
19
# Add black border - detection of border touching pages
20
# Contour can't touch side of image
21
img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=[0, 0, 0])
22
23
return cv2.Canny(img, minVal, maxVal)
24
25
26
def fourCornersSort(pts):
27
""" Sort corners: top-left, bot-left, bot-right, top-right"""
28
diff = np.diff(pts, axis=1)
29
summ = pts.sum(axis=1)
30
return np.array([pts[np.argmin(summ)],
31
pts[np.argmax(diff)],
32
pts[np.argmax(summ)],
33
pts[np.argmin(diff)]])
34
35
36
def contourOffset(cnt, offset):
37
""" Offset contour because of 5px border """
38
cnt += offset
39
cnt[cnt < 0] = 0
40
return cnt
41
42
43
def findPageContours(edges, img):
44
""" Finding corner points of page contour """
45
# Getting contours
46
im2, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
47
48
# Finding biggest rectangle otherwise return original corners
49
height = edges.shape[0]
50
width = edges.shape[1]
51
MIN_COUNTOUR_AREA = height * width * 0.5
52
MAX_COUNTOUR_AREA = (width - 10) * (height - 10)
53
54
maxArea = MIN_COUNTOUR_AREA
55
pageContour = np.array([[0, 0],
56
[0, height],
57
[width, height],
58
[width, 0]])
59
60
for cnt in contours:
61
perimeter = cv2.arcLength(cnt, True)
62
approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)
63
64
# Page has 4 corners and it is convex
65
if (len(approx) == 4 and
66
cv2.isContourConvex(approx) and
67
maxArea < cv2.contourArea(approx) < MAX_COUNTOUR_AREA):
68
69
maxArea = cv2.contourArea(approx)
70
pageContour = approx
71
72
# Sort corners and offset them
73
pageContour = fourCornersSort(pageContour[:, 0])
74
return contourOffset(pageContour, (-5, -5))
75
76
77
def perspImageTransform(img, sPoints):
78
""" Transform perspective from start points to target points """
79
# Euclidean distance - calculate maximum height and width
80
height = max(np.linalg.norm(sPoints[0] - sPoints[1]),
81
np.linalg.norm(sPoints[2] - sPoints[3]))
82
width = max(np.linalg.norm(sPoints[1] - sPoints[2]),
83
np.linalg.norm(sPoints[3] - sPoints[0]))
84
85
# Create target points
86
tPoints = np.array([[0, 0],
87
[0, height],
88
[width, height],
89
[width, 0]], np.float32)
90
91
# getPerspectiveTransform() needs float32
92
if sPoints.dtype != np.float32:
93
sPoints = sPoints.astype(np.float32)
94
95
M = cv2.getPerspectiveTransform(sPoints, tPoints)
96
return cv2.warpPerspective(img, M, (int(width), int(height)))
97
98
99
100
101
102
#--
103
104
image = cv2.cvtColor(cv2.imread("test1.jpeg"), cv2.COLOR_BGR2RGB)
105
106
# Edge detection ()
107
imageEdges = edgesDet(image, 200, 250)
108
109
# Close gaps between edges (double page clouse => rectangle kernel)
110
closedEdges = cv2.morphologyEx(imageEdges, cv2.MORPH_CLOSE, np.ones((5, 11)))
111
112
113
pageContour = findPageContours(closedEdges, resize(image))
114
115
# Recalculate to original scale
116
pageContour = pageContour.dot(ratio(image))
117
118
119
newImage = perspImageTransform(image, pageContour)
120
cv2.imwrite("2_1.jpg", cv2.cvtColor(newImage, cv2.COLOR_BGR2RGB))
121
122