Path: blob/master/Model-4/preprocess.py
427 views
import numpy as np1import pandas as pd2import matplotlib.pyplot as plt3import cv24from ocr.helpers import implt, resize, ratio56def edgesDet(img, minVal, maxVal):7""" Preprocessing (gray, thresh, filter, border) + Canny edge detection """8img = cv2.cvtColor(resize(img), cv2.COLOR_BGR2GRAY)910# Applying blur and threshold11img = cv2.bilateralFilter(img, 9, 75, 75)12img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 4)1314# Median blur replace center pixel by median of pixels under kelner15# => removes thin details16img = cv2.medianBlur(img, 11)1718# Add black border - detection of border touching pages19# Contour can't touch side of image20img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=[0, 0, 0])2122return cv2.Canny(img, minVal, maxVal)232425def fourCornersSort(pts):26""" Sort corners: top-left, bot-left, bot-right, top-right"""27diff = np.diff(pts, axis=1)28summ = pts.sum(axis=1)29return np.array([pts[np.argmin(summ)],30pts[np.argmax(diff)],31pts[np.argmax(summ)],32pts[np.argmin(diff)]])333435def contourOffset(cnt, offset):36""" Offset contour because of 5px border """37cnt += offset38cnt[cnt < 0] = 039return cnt404142def findPageContours(edges, img):43""" Finding corner points of page contour """44# Getting contours45im2, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)4647# Finding biggest rectangle otherwise return original corners48height = edges.shape[0]49width = edges.shape[1]50MIN_COUNTOUR_AREA = height * width * 0.551MAX_COUNTOUR_AREA = (width - 10) * (height - 10)5253maxArea = MIN_COUNTOUR_AREA54pageContour = np.array([[0, 0],55[0, height],56[width, height],57[width, 0]])5859for cnt in contours:60perimeter = cv2.arcLength(cnt, True)61approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)6263# Page has 4 corners and it is convex64if (len(approx) == 4 and65cv2.isContourConvex(approx) and66maxArea < cv2.contourArea(approx) < MAX_COUNTOUR_AREA):6768maxArea = cv2.contourArea(approx)69pageContour = approx7071# Sort corners and offset them72pageContour = fourCornersSort(pageContour[:, 0])73return contourOffset(pageContour, (-5, -5))747576def perspImageTransform(img, sPoints):77""" Transform perspective from start points to target points """78# Euclidean distance - calculate maximum height and width79height = max(np.linalg.norm(sPoints[0] - sPoints[1]),80np.linalg.norm(sPoints[2] - sPoints[3]))81width = max(np.linalg.norm(sPoints[1] - sPoints[2]),82np.linalg.norm(sPoints[3] - sPoints[0]))8384# Create target points85tPoints = np.array([[0, 0],86[0, height],87[width, height],88[width, 0]], np.float32)8990# getPerspectiveTransform() needs float3291if sPoints.dtype != np.float32:92sPoints = sPoints.astype(np.float32)9394M = cv2.getPerspectiveTransform(sPoints, tPoints)95return cv2.warpPerspective(img, M, (int(width), int(height)))96979899100101#--102103image = cv2.cvtColor(cv2.imread("test1.jpeg"), cv2.COLOR_BGR2RGB)104105# Edge detection ()106imageEdges = edgesDet(image, 200, 250)107108# Close gaps between edges (double page clouse => rectangle kernel)109closedEdges = cv2.morphologyEx(imageEdges, cv2.MORPH_CLOSE, np.ones((5, 11)))110111112pageContour = findPageContours(closedEdges, resize(image))113114# Recalculate to original scale115pageContour = pageContour.dot(ratio(image))116117118newImage = perspImageTransform(image, pageContour)119cv2.imwrite("2_1.jpg", cv2.cvtColor(newImage, cv2.COLOR_BGR2RGB))120121122