Handwritting - Page Detection

Finding a page in the image

Import Packages

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from ocr.helpers import implt, resize, ratio

print("OpenCV: " + cv2.__version__)
print("Numpy: " + np.__version__)
print("Pandas: " + pd.__version__)

plt.rcParams['figure.figsize'] = (9.0, 9.0)

Out[1]:

OpenCV: 3.1.0
Numpy: 1.13.1
Pandas: 0.20.3

Finding Page

This part will find the page and delete the unnacessary background.

Global Variables

In [2]:

IMG = "poster"               # Image name/number

In [3]:

# Loading images and ploting it (converting to RGB from BGR)
image = cv2.cvtColor(cv2.imread("data/pagedet/%s.jpg" % IMG), cv2.COLOR_BGR2RGB)
implt(image)

Out[3]:

In [4]:

def edgesDet(img, minVal, maxVal):
    """ Preprocessing (gray, thresh, filter, border) + Canny edge detection """
    img = cv2.cvtColor(resize(img), cv2.COLOR_BGR2GRAY)

    # Applying blur and threshold
    img = cv2.bilateralFilter(img, 9, 75, 75)
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 4)
    implt(img, 'gray', 'Adaptive Threshold')

    # Median blur replace center pixel by median of pixels under kelner
    # => removes thin details
    img = cv2.medianBlur(img, 11)

    # Add black border - detection of border touching pages
    # Contour can't touch side of image
    img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    implt(img, 'gray', 'Median Blur + Border')

    return cv2.Canny(img, minVal, maxVal)

In [5]:

# Edge detection ()
imageEdges = edgesDet(image, 200, 250)

# Close gaps between edges (double page clouse => rectangle kernel)
closedEdges = cv2.morphologyEx(imageEdges, cv2.MORPH_CLOSE, np.ones((5, 11)))
implt(closedEdges, 'gray', 'Edges')

Out[5]:

In [6]:

def fourCornersSort(pts):
    """ Sort corners: top-left, bot-left, bot-right, top-right"""
    diff = np.diff(pts, axis=1)
    summ = pts.sum(axis=1)
    return np.array([pts[np.argmin(summ)],
                     pts[np.argmax(diff)],
                     pts[np.argmax(summ)],
                     pts[np.argmin(diff)]])


def contourOffset(cnt, offset):
    """ Offset contour because of 5px border """
    cnt += offset
    cnt[cnt < 0] = 0
    return cnt


def findPageContours(edges, img):
    """ Finding corner points of page contour """
    # Getting contours  
    im2, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # Finding biggest rectangle otherwise return original corners
    height = edges.shape[0]
    width = edges.shape[1]
    MIN_COUNTOUR_AREA = height * width * 0.5
    MAX_COUNTOUR_AREA = (width - 10) * (height - 10)

    maxArea = MIN_COUNTOUR_AREA
    pageContour = np.array([[0, 0],
                            [0, height],
                            [width, height],
                            [width, 0]])

    for cnt in contours:
        perimeter = cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, 0.03 * perimeter, True)

        # Page has 4 corners and it is convex
        if (len(approx) == 4 and
                cv2.isContourConvex(approx) and
                maxArea < cv2.contourArea(approx) < MAX_COUNTOUR_AREA):
            
            maxArea = cv2.contourArea(approx)
            pageContour = approx

    # Sort corners and offset them
    pageContour = fourCornersSort(pageContour[:, 0])
    return contourOffset(pageContour, (-5, -5))

In [7]:

pageContour = findPageContours(closedEdges, resize(image))
print("PAGE CONTOUR:")
print(pageContour)
implt(cv2.drawContours(resize(image), [pageContour], -1, (0, 255, 0), 3))
       
# Recalculate to original scale
pageContour = pageContour.dot(ratio(image))

Out[7]:

PAGE CONTOUR:
[[ 48  26]
 [ 29 783]
 [572 792]
 [576  32]]

In [8]:

def perspImageTransform(img, sPoints):
    """ Transform perspective from start points to target points """
    # Euclidean distance - calculate maximum height and width
    height = max(np.linalg.norm(sPoints[0] - sPoints[1]),
                 np.linalg.norm(sPoints[2] - sPoints[3]))
    width = max(np.linalg.norm(sPoints[1] - sPoints[2]),
                 np.linalg.norm(sPoints[3] - sPoints[0]))
    
    # Create target points
    tPoints = np.array([[0, 0],
                        [0, height],
                        [width, height],
                        [width, 0]], np.float32)
    
    # getPerspectiveTransform() needs float32
    if sPoints.dtype != np.float32:
        sPoints = sPoints.astype(np.float32)
    
    M = cv2.getPerspectiveTransform(sPoints, tPoints) 
    return cv2.warpPerspective(img, M, (int(width), int(height)))
    
    
newImage = perspImageTransform(image, pageContour)
implt(newImage, t='Result')

Out[8]:

Saving / Exporting image

In [9]:

# Saving cropped image
cv2.imwrite("data/textdet/%s.jpg" % IMG, cv2.cvtColor(newImage, cv2.COLOR_BGR2RGB))

Out[9]:

True

Handwritting - Page Detection

Import Packages

Finding Page

Global Variables

Saving / Exporting image

Product

Resources

Company