Path: blob/master/Model-4/word_detection.py
427 views
import numpy as np1import pandas as pd2import matplotlib.pyplot as plt3import cv24from ocr.helpers import implt, resize, ratio5from copy import deepcopy67#implt(img, 'gray')89def sobelDetect(channel):10""" The Sobel Operator"""11sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)12sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)13# Combine x, y gradient magnitudes sqrt(x^2 + y^2)14sobel = np.hypot(sobelX, sobelY)15sobel[sobel > 255] = 25516return np.uint8(sobel)171819def edgeDetect(im):20"""21Edge detection22The Sobel operator is applied for each image layer (RGB)23"""24return np.max(np.array([sobelDetect(im[:,:, 0]), sobelDetect(im[:,:, 1]), sobelDetect(im[:,:, 2]) ]), axis=0)2526#implt(edgeImg, 'gray', 'Sobel operator')27#implt(bwImage, 'gray', 'Final closing')2829def delLines(gray):30""" Delete page lines """31linek = np.ones((1,11),np.uint8)32x = cv2.morphologyEx(gray, cv2.MORPH_OPEN, linek ,iterations=1)33i = gray-x34closing = cv2.morphologyEx(dil, cv2.MORPH_CLOSE, np.ones((17,17), np.uint8))35#implt(closing, 'gray', 'Del Lines')36return closing373839def delBigAreas(img):40""" Find and remove contours too big for a word """41gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)42# ret, gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)43gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 101, 3)44#implt(gray, 'gray')4546gray2 = gray.copy()47mask = np.zeros(gray.shape,np.uint8)4849im2, contours, hierarchy = cv2.findContours(gray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)5051for cnt in contours:52if (200 < cv2.contourArea(cnt) < 5000):53cv2.drawContours(img,[cnt],0,(0,255,0),2)54cv2.drawContours(mask,[cnt],0,255,-1)5556#implt(mask)57#implt(img)5859def union(a,b):60x = min(a[0], b[0])61y = min(a[1], b[1])62w = max(a[0]+a[2], b[0]+b[2]) - x63h = max(a[1]+a[3], b[1]+b[3]) - y64return [x, y, w, h]6566def isIntersect(a,b):67x = max(a[0], b[0])68y = max(a[1], b[1])69w = min(a[0]+a[2], b[0]+b[2]) - x70h = min(a[1]+a[3], b[1]+b[3]) - y71if w<0 or h<0:72return False73return True7475def groupRectangles(rec):76"""77Uion intersecting rectangles78Args:79rec - list of rectangles in form [x, y, w, h]80Return:81list of grouped ractangles82"""83tested = [False for i in range(len(rec))]84final = []85i = 086while i < len(rec):87if not tested[i]:88j = i+189while j < len(rec):90if not tested[j] and isIntersect(rec[i], rec[j]):91rec[i] = union(rec[i], rec[j])92tested[j] = True93j = i94j += 195final += [rec[i]]96i += 19798return final99100def textDetect(img, original):101""" Text detection using contours """102# Resize image103small = resize(img, 2000)104image = resize(original, 2000)105106# Finding contours107mask = np.zeros(small.shape, np.uint8)108im2, cnt, hierarchy = cv2.findContours(np.copy(small), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)109110#implt(img, 'gray')111112# Variables for contour index and words' bounding boxes113index = 0114boundingBoxes = np.array([0,0,0,0])115bBoxes = []116117# CCOMP hierarchy: [Next, Previous, First Child, Parent]118# cv2.RETR_CCOMP - contours into 2 levels119# Go through all contours in first level120while (index >= 0):121x,y,w,h = cv2.boundingRect(cnt[index])122# Get only the contour123cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED)124maskROI = mask[y:y+h, x:x+w]125# Ratio of white pixels to area of bounding rectangle126r = float(cv2.countNonZero(maskROI)) / (w * h)127128# Limits for text (white pixel ratio, width, height)129# TODO Test h/w and w/h ratios130if r > 0.1 and 1600 > w > 10 and 1600 > h > 10 and h/w < 3 and w/h < 10 and (60 // h) * w < 1000:131bBoxes += [[x, y, w, h]]132133# Index of next contour134index = hierarchy[0][index][0]135136# Group intersecting rectangles137bBoxes = groupRectangles(bBoxes)138i = 0139f = open("output/words/normal/bounding_boxes_normal.txt","w")140for (x, y, w, h) in bBoxes:141boundingBoxes = np.vstack((boundingBoxes, np.array([x, y, x+w, y+h])))142cv2.imwrite("output/words/normal/"+str(i)+".jpg",image[y:y+h, x:x+w])143f.write(str(i) + "\t => \t" + "("+str(x)+","+str(y)+")"+","+"("+str(x+w)+","+str(y+h)+")"+"\n")144# cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)145i = i+1146#implt(image, t='Bounding rectangles')147148# Recalculate coordinates to original scale149bBoxes = boundingBoxes.dot(ratio(image, small.shape[0])).astype(np.int64)150return bBoxes[1:]151152def textDetectWatershed(thresh, original):153""" Text detection using watershed algorithm """154# According to: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html155img = resize(original, 3000)156thresh = resize(thresh, 3000)157# noise removal158kernel = np.ones((3,3),np.uint8)159opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3)160161# sure background area162sure_bg = cv2.dilate(opening,kernel,iterations=3)163164# Finding sure foreground area165dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)166ret, sure_fg = cv2.threshold(dist_transform,0.01*dist_transform.max(),255,0)167# Finding unknown region168sure_fg = np.uint8(sure_fg)169# cv2.imshow("image",sure_fg)170# cv2.waitKey(10)171172unknown = cv2.subtract(sure_bg,sure_fg)173174# Marker labelling175ret, markers = cv2.connectedComponents(sure_fg)176177178179# Add one to all labels so that sure background is not 0, but 1180markers += 1181182# Now, mark the region of unknown with zero183markers[unknown == 255] = 0184185186187markers = cv2.watershed(img, markers)188189#implt(markers, t='Markers')190image = img.copy()191gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)192193# Creating result array194boundingBoxes = np.array([0,0,0,0])195bBoxes = []196197for mark in np.unique(markers):198# mark == 0 --> background199if mark == 0:200continue201202# Draw it on mask and detect biggest contour203mask = np.zeros(gray.shape, dtype="uint8")204mask[markers == mark] = 255205206cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]207c = max(cnts, key=cv2.contourArea)208209# Draw a bounding rectangle if it contains text210x,y,w,h = cv2.boundingRect(c)211cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED)212maskROI = mask[y:y+h, x:x+w]213# Ratio of white pixels to area of bounding rectangle214r = float(cv2.countNonZero(maskROI)) / (w * h)215# Limits for text216217218# WORK ON THIS219if r > 0.1 and 2000 > w > 15 and 1500 > h > 15:220bBoxes += [[x, y, w, h]]221222# Group intersecting rectangles223#bBoxes = groupRectangles(bBoxes)224i = 0225f = open("output/words/watershed/bounding_boxes_watershed.txt","w")226for (x, y, w, h) in bBoxes:227boundingBoxes = np.vstack((boundingBoxes, np.array([x, y, x+w, y+h])))228cv2.imwrite("output/words/watershed/"+str(i)+".jpg",image[y:y+h, x:x+w])229f.write(str(i) + "\t => \t" + "("+str(x)+","+str(y)+")"+","+"("+str(x+w)+","+str(y+h)+")"+"\n")230# cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2)231i = i+1232233#implt(image)234235# Recalculate coordinates to original size236bBoxes = boundingBoxes.dot(ratio(original, 3000)).astype(np.int64)237return bBoxes[1:]238239#print(len(wbBoxes))240#print(len(bBoxes))241242##---243# image = cv2.cvtColor(cv2.imread("2_1.jpg"), cv2.COLOR_BGR2RGB)244# img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)245246247# # Image pre-processing - blur, edges, threshold, closing248# blurred = cv2.GaussianBlur(image, (5, 5), 18)249# edgeImg = edgeDetect(blurred)250# ret, edgeImg = cv2.threshold(edgeImg, 50, 255, cv2.THRESH_BINARY)251# bwImage = cv2.morphologyEx(edgeImg, cv2.MORPH_CLOSE, np.ones((20,20), np.uint8))252253254255# bBoxes1 = textDetect(bwImage, image)256257# # or258259# wbBoxes = textDetectWatershed(bwImage, image)260261