Path: blob/master/Model-1/OCR-Evaluator.ipynb
427 views
Kernel: Python 3
Evaluating OCR Models
TODO - Finish abstract class of cycler
In [1]:
import numpy as np import pandas as pd import matplotlib.pyplot as plt import tensorflow as tf import cv2 import time import math from collections import Counter import unidecode from abc import ABC, abstractmethod # Import Widgets from ipywidgets import Button, Text, HBox, VBox from IPython.display import display, clear_output # Import costume functions, corresponding to notebooks from ocr import charSeg from ocr.normalization import letterNorm, imageNorm # from ocr import charSeg # Helpers from ocr.helpers import implt, resize, extendImg from ocr.datahelpers import loadWordsData, idx2char from ocr.tfhelpers import Graph from ocr.viz import printProgressBar
Out[1]:
Loading Segmantation model:
INFO:tensorflow:Restoring parameters from models/gap-clas/CNN-CG
INFO:tensorflow:Restoring parameters from models/gap-clas/RNN/Bi-RNN-new
Global Variables
In [2]:
# Settings LANG = 'en'
Load Trained Model
In [3]:
charClass_1 = Graph('models/char-clas/' + LANG + '/CharClassifier') # charClass_2 = Graph('models/char-clas/' + LANG + '/Bi-RNN/model_2', 'prediction') # charClass_3 = Graph('models/char-clas/' + LANG + '/Bi-RNN/model_1', 'prediction') wordClass = Graph('models/word-clas/' + LANG + '/WordClassifier2', 'prediction_infer') wordClass2 = Graph('models/word-clas/' + LANG + '/SeqRNN/Classifier', 'word_prediction') wordClass3 = Graph('models/word-clas/' + LANG + '/CTC/Classifier2', 'word_prediction')
Out[3]:
INFO:tensorflow:Restoring parameters from models/char-clas/en/CharClassifier
INFO:tensorflow:Restoring parameters from models/word-clas/en/WordClassifier2
INFO:tensorflow:Restoring parameters from models/word-clas/en/SeqRNN/Classifier
INFO:tensorflow:Restoring parameters from models/word-clas/en/CTC/Classifier2
Load image
In [4]:
images, labels = loadWordsData('data/test_words/' + LANG + '_raw', loadGaplines=False) for i in range(len(images)): printProgressBar(i, len(images)) images[i] = imageNorm( cv2.cvtColor(images[i], cv2.COLOR_GRAY2RGB), 60, border=False, tilt=True, hystNorm=True) if LANG == 'en': for i in range(len(labels)): labels[i] = unidecode.unidecode(labels[i]) print() print('Number of chars:', sum(len(l) for l in labels))
Out[4]:
Loading words...
-> Number of words: 267
|████████████████████████████████████████| 100.0%
Number of chars: 1356
Testing
In [5]:
# Load Words WORDS = {} with open('data/' + LANG + '_50k.txt') as f: for line in f: if LANG == 'en': WORDS[unidecode.unidecode(line.split(" ")[0])] = int(line.split(" ")[1]) else: WORDS[line.split(" ")[0]] = int(line.split(" ")[1]) WORDS = Counter(WORDS) def P(word, N=sum(WORDS.values())): "Probability of `word`." return WORDS[word] / N def correction(word): "Most probable spelling correction for word." if word in WORDS: return word return max(candidates(word), key=P) def candidates(word): "Generate possible spelling corrections for word." return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word]) def known(words): "The subset of `words` that appear in the dictionary of WORDS." return set(w for w in words if w in WORDS) def edits1(word): "All edits that are one edit away from `word`." if LANG == 'cz': letters = 'aábcčdďeéěfghiíjklmnňoópqrřsštťuúůvwxyýzž' else: letters = 'abcdefghijklmnopqrstuvwxyz' splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] deletes = [L + R[1:] for L, R in splits if R] transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1] replaces = [L + c + R[1:] for L, R in splits if R for c in letters] inserts = [L + c + R for L, R in splits for c in letters] return set(deletes + transposes + replaces + inserts) def edits2(word): "All edits that are two edits away from `word`." return (e2 for e1 in edits1(word) for e2 in edits1(e1))
Cycler
In [6]:
class Cycler(ABC): """ Abstract cycler class """ def __init__(self, images, labels, charClass, stats="NO Stats Provided", slider=(60, 15), ctc=False, seq2seq=False, charRNN=False): self.images = images self.labels = labels self.charClass = charClass self.slider = slider self.totalChars = sum([len(l) for l in labels]) self.ctc = ctc self.seq2seq = seq2seq self.charRNN = charRNN self.stats = stats self.evaluate() @abstractmethod def recogniseWord(self, img): pass def countCorrect(self, pred, label, lower=False): correct = 0 for i in range(min(len(pred), len(label))): if ((not lower and pred[i] == label[i]) or (lower and pred[i] == label.lower()[i])): correct += 1 return correct def evaluate(self): """ Evaluate accuracy of the word classification """ print() print("STATS:", self.stats) print(self.labels[1], ':', self.recogniseWord(self.images[1])) start_time = time.time() correct = 0 correctWithCorrection = 0 for i in range(len(self.images)): word = self.recogniseWord(self.images[i]) correct += self.countCorrect(word, self.labels[i]) # Correction works only for lower letters correctWithCorrection += self.countCorrect(correction(word.lower()), self.labels[i], lower=True) print("Correct/Total: %s / %s" % (correct, self.totalChars)) print("Accuracy: %s %%" % round(correct/self.totalChars * 100, 4)) print("Accuracy with correction: %s %%" % round(correctWithCorrection/self.totalChars * 100, 4)) print("--- %s seconds ---" % round(time.time() - start_time, 2))
In [7]:
class WordCycler(Cycler): """ Cycle through the words and recognise them """ def recogniseWord(self, img): slider = self.slider if self.ctc: step = 10 img = cv2.copyMakeBorder( img, 0, 0, self.slider[1]//2, self.slider[1]//2, cv2.BORDER_CONSTANT, value=[0, 0, 0]) img = extendImg( img, (img.shape[0], max(-(-img.shape[1] // step) * step, self.slider[1] + step))) length = (img.shape[1]-slider[1]) // step input_seq = np.zeros((1, length, slider[0] * slider[1]), dtype=np.float32) input_seq[0][:] = [img[:, loc*step: loc*step + slider[1]].flatten() for loc in range(length)] input_seq = input_seq.swapaxes(0, 1) pred = self.charClass.eval_feed({'inputs:0': input_seq, 'inputs_length:0': [length], 'keep_prob:0': 1})[0] word = '' for i in pred: if word == 0 and i != 0: break else: word += idx2char(i) else: length = img.shape[1]//slider[1] input_seq = np.zeros((1, length, slider[0] * slider[1]), dtype=np.float32) input_seq[0][:] = [img[:, loc * slider[1]: (loc+1) * slider[1]].flatten() for loc in range(length)] input_seq = input_seq.swapaxes(0, 1) if self.seq2seq: targets = np.zeros((1, 1), dtype=np.int32) pred = self.charClass.eval_feed({'encoder_inputs:0': input_seq, 'encoder_inputs_length:0': [length], 'decoder_targets:0': targets, 'keep_prob:0': 1})[0] else: targets = np.zeros((1, 1, 4096), dtype=np.int32) pred = self.charClass.eval_feed({'encoder_inputs:0': input_seq, 'encoder_inputs_length:0': [length], 'letter_targets:0': targets, 'is_training:0': False, 'keep_prob:0': 1})[0] word = '' for i in pred: if word == 1: break else: word += idx2char(i, True) return word
In [8]:
class CharCycler(Cycler): """ Cycle through the words and recognise them """ def recogniseWord(self, img): img = cv2.copyMakeBorder(img, 0, 0, 30, 30, cv2.BORDER_CONSTANT, value=[0, 0, 0]) gaps = charSeg.segmentation(img, RNN=True) chars = [] for i in range(len(gaps)-1): char = img[:, gaps[i]:gaps[i+1]] # TODO None type error after treshold char, dim = letterNorm(char, is_thresh=True, dim=True) # TODO Test different values if dim[0] > 4 and dim[1] > 4: chars.append(char.flatten()) chars = np.array(chars) word = '' if len(chars) != 0: if self.charRNN: pred = self.charClass.eval_feed({'inputs:0': [chars], 'length:0': [len(chars)], 'keep_prob:0': 1})[0] else: pred = self.charClass.run(chars) for c in pred: # word += CHARS[charIdx] word += idx2char(c) return word
In [9]:
# Class cycling through words WordCycler(images, labels, wordClass, stats='Seq2Seq', slider=(60, 2), seq2seq=True) WordCycler(images, labels, wordClass2, stats='Seq2Seq2CNN', slider=(60, 2)) WordCycler(images, labels, wordClass3, stats='CTC', slider=(60, 60), ctc=True) CharCycler(images, labels, charClass_1, stats='Bi-RNN and CNN', charRNN=False) # Cycler(images, # labels, # charClass_2, # charRNN=True) # Cycler(images, # labels, # charClass_3, # charRNN=True)
Out[9]:
STATS: Seq2Seq
spreads : zpasobe
Correct/Total: 626 / 1356
Accuracy: 46.1652 %
Accuracy with correction: 45.8702 %
--- 26.22 seconds ---
STATS: Seq2Seq2CNN
spreads : spreads
Correct/Total: 997 / 1356
Accuracy: 73.5251 %
Accuracy with correction: 76.1799 %
--- 34.8 seconds ---
STATS: CTC
spreads : spreads
Correct/Total: 1003 / 1356
Accuracy: 73.9676 %
Accuracy with correction: 78.0973 %
--- 32.03 seconds ---
STATS: Bi-RNN and CNN
spreads : spreads
Correct/Total: 1049 / 1356
Accuracy: 77.3599 %
Accuracy with correction: 80.0147 %
--- 76.62 seconds ---
<__main__.CharCycler at 0x7fbfaa21ad68>