GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-1/GapClassifier-BiRNN.ipynb
⁴²⁷ views

Kernel: Python 3

CNN with Bidirctional RNN - Char Classification

Using TensorFlow

TODO

Try with 3px width of slider

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import tensorflow as tf
import cv2


%matplotlib notebook
# Increase size of plots
plt.rcParams['figure.figsize'] = (9.0, 5.0)


# Helpers
from ocr.helpers import implt
from ocr.mlhelpers import TrainingPlot, DataSet
from ocr.imgtransform import coordinates_remap
from ocr.datahelpers import loadWordsData, correspondingShuffle
from ocr.tfhelpers import Graph, create_cell

tf.reset_default_graph()
sess = tf.InteractiveSession()

print("OpenCV: " + cv2.__version__)
print("Numpy: " + np.__version__)
print("TensorFlow: " + tf.__version__)

Out[1]:

OpenCV: 3.1.0
Numpy: 1.13.3
TensorFlow: 1.4.0

Settings

In [2]:

PAD = 0                            # Value for PADding images
POS = 1                            # Values of positive and negative label 0/-1
NEG = 0

POS_SPAN = 1                             # Number of positive values around true position (5 is too high)
POS_WEIGHT = 3                           # Weighting possitive values in loss counting

slider_size = (60, 60)                   # Height is set to 60 by data and width should be even
slider_step = 2
N_INPUT = slider_size[0]*slider_size[1]  # Size of sequence input vector will depend on CNN
num_buckets = 5
n_classes = 2                            # Number of different outputs

rnn_layers = 4
rnn_residual_layers = 2                  # HAVE TO be smaller than encoder_layers
rnn_units = 256

learning_rate = 1e-4
dropout = 0.4                            # Percentage of dopped out data
train_set = 0.8                          # Percentage of training data

TRAIN_STEPS = 500000                     # Number of training steps!
TEST_ITER = 150
LOSS_ITER = 50
SAVE_ITER = 2000
BATCH_SIZE = 10
# EPOCH = 2000                           # "Number" of batches in epoch

save_loc = 'models/gap-clas/RNN/Bi-RNN-new'

Loading Images

In [3]:

images, _, gaplines = loadWordsData(['data/words2/'],
                                    loadGaplines=True)

Out[3]:

Loading words...
-> Number of words: 5069

Dataset

In [4]:

# Shuffle data
images, gaplines = correspondingShuffle([images, gaplines])

for i in range(len(images)):
    # Add border and offset gaplines - RUN ONLY ONCE
    images[i] = cv2.copyMakeBorder(images[i],
                                   0, 0, int(slider_size[1]/2), int(slider_size[1]/2),
                                   cv2.BORDER_CONSTANT,
                                   value=0)
    gaplines[i] += int(slider_size[1] / 2)
    
# Split data on train and test dataset
div = int(train_set * len(images))

trainImages = images[0:div]
testImages = images[div:]

trainGaplines = gaplines[0:div]
testGaplines = gaplines[div:]

print("Training images:", div)
print("Testing images:", len(images) - div)

Out[4]:

Training images: 4055
Testing images: 1014

In [5]:

class BucketDataIterator():
    """ Iterator for feeding seq2seq model during training """
    def __init__(self,
                 images,
                 gaplines,
                 gap_span,
                 num_buckets=5,
                 slider=(60, 30),
                 slider_step=2,
                 imgprocess=lambda x: x,
                 train=True):
        
        self.train = train
        length = [(image.shape[1]-slider[1])//slider_step for image in images]
    
        # Creating indices from gaplines
        indices = gaplines - int(slider[1]/2)
        indices = indices // slider_step
        
        # Split images to sequence of vectors
        # + targets seq of labels per image in images seq
        images_seq = np.empty(len(images), dtype=object)
        targets_seq = np.empty(len(images), dtype=object)
        for i, img in enumerate(images):
            images_seq[i] = [imgprocess(img[:, loc * slider_step: loc * slider_step + slider[1]].flatten())
                             for loc in range(length[i])]
            
            targets_seq[i] = np.ones((length[i])) * NEG
            for offset in range(gap_span):
                ind = indices[i] + (-(offset % 2) * offset // 2) + ((1 - offset%2) * offset // 2) 
                
                if ind[0] < 0:
                    ind[0] = 0
                if ind[-1] >= length[i]:
                    ind[-1] = length[i] - 1
                    
                targets_seq[i][ind] = POS  

        # Create pandas dataFrame and sort it by images seq lenght (length) 
        # in_length == out_length
        self.dataFrame = pd.DataFrame({'length': length,
                                       'images': images_seq,
                                       'targets': targets_seq
                                      }).sort_values('length').reset_index(drop=True)

        bsize = int(len(images) / num_buckets)
        self.num_buckets = num_buckets
        
        # Create buckets by slicing parts by indexes
        self.buckets = []
        for bucket in range(num_buckets-1):
            self.buckets.append(self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize])
        self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:])        
        
        self.buckets_size = [len(bucket) for bucket in self.buckets]

        # cursor[i] will be the cursor for the ith bucket
        self.cursor = np.array([0] * num_buckets)
        self.bucket_order = np.random.permutation(num_buckets)
        self.bucket_cursor = 0
        self.shuffle()
        print("Iterator created.")


    def shuffle(self, idx=None):
        """ Shuffle idx bucket or each bucket separately """
        for i in [idx] if idx is not None else range(self.num_buckets):
            self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True)
            self.cursor[i] = 0


    def next_batch(self, batch_size):
        """
        Creates next training batch of size: batch_size
        Retruns: image seq, letter seq, seq lengths
        """
        i_bucket = self.bucket_order[self.bucket_cursor]
        # Increment cursor and shuffle in case of new round
        self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets
        if self.bucket_cursor == 0:
            self.bucket_order = np.random.permutation(self.num_buckets)
            
        if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]:
            self.shuffle(i_bucket)

        # Handle too big batch sizes
        if (batch_size > self.buckets_size[i_bucket]):
            batch_size = self.buckets_size[i_bucket]

        res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]:
                                          self.cursor[i_bucket]+batch_size]
        self.cursor[i_bucket] += batch_size

        # PAD input sequence and output
        # Pad sequences with <PAD> to same length
        max_length = max(res['length'])
        
        input_seq = np.zeros((batch_size, max_length, N_INPUT), dtype=np.float32)
        for i, img in enumerate(res['images']):
            input_seq[i][:res['length'].values[i]] = img
        
        # Need to pad according to the maximum length output sequence
        targets = np.ones([batch_size, max_length], dtype=np.float32) * PAD
        for i, target in enumerate(targets):
            target[:res['length'].values[i]] = res['targets'].values[i]
        
        return input_seq, targets, res['length'].values


    def next_feed(self, size):
        """ Create feed directly for model training """
        (inputs_,
         targets_,
         length_) = self.next_batch(size)
        return {
            inputs: inputs_,            
            targets: targets_,
            length: length_,
            keep_prob: (1.0 - dropout) if self.train else 1.0
        }

In [6]:

# Create iterator for feeding BiRNN
train_iterator = BucketDataIterator(trainImages,
                                    trainGaplines,
                                    POS_SPAN,
                                    num_buckets,
                                    slider_size,
                                    slider_step,
                                    train=True)
test_iterator = BucketDataIterator(testImages,
                                   testGaplines,
                                   POS_SPAN,
                                   1,
                                   slider_size,
                                   slider_step,
                                   train=False)

Out[6]:

Iterator created.
Iterator created.

Create classifier

Inputs

In [7]:

# Input placehodlers
# N_INPUT -> size of vector representing one image in sequence
# Inputs shape (batch_size, max_seq_length, vec_size) - time major
inputs = tf.placeholder(shape=(None, None, N_INPUT),
                                dtype=tf.float32,
                                name='inputs')
length = tf.placeholder(shape=(None,),
                        dtype=tf.int32,
                        name='length')
# Required for training, not required for application
targets = tf.placeholder(shape=(None, None),
                         dtype=tf.int64,
                         name='targets')
# Dropout value
keep_prob = tf.placeholder(tf.float32, name='keep_prob')

Standardization + CNN

In [8]:

# Help functions for standard layers
def conv2d(x, W, name=None):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name=name)

def max_pool_2x2(x, name=None):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

# 1. Layer - Convulation variables
W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 1, 2],
                          initializer=tf.contrib.layers.xavier_initializer())
b_conv1 = tf.Variable(tf.constant(0.1, shape=[2]), name='b_conv1')
# 3. Layer - Convulation variables
W_conv2 = tf.get_variable('W_conv2', shape=[5, 5, 2, 4],
                          initializer=tf.contrib.layers.xavier_initializer())
b_conv2 = tf.Variable(tf.constant(0.1, shape=[4]), name='b_conv2')

def CNN(x):
    x = tf.image.per_image_standardization(x)
    x = tf.reshape(x, [1, slider_size[0], slider_size[1], 1])
    # 1. Layer - Convulation
    h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1, name='h_conv1')
    # 2. Layer - Max Pool
    h_pool1 = max_pool_2x2(h_conv1, name='h_pool1')
    # 3. Layer - Convulation
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h_conv2')
    # 4. Layer - Max Pool
    return max_pool_2x2(h_conv2, name='h_pool2')

# Input images CNN
inpts = tf.map_fn(
    lambda seq: tf.map_fn(
        lambda img:
            tf.reshape(
                CNN(tf.reshape(img, [slider_size[0], slider_size[1], 1])),
#                 CNN(tf.reshape(img, [1, slider_size[0], slider_size[1], 1])),
                [-1]),
        seq),
    inputs,
    dtype=tf.float32)

Bi-RNN

In [9]:

# Cells
cell_fw = create_cell(rnn_units,
                      rnn_layers,
                      rnn_residual_layers,
                      is_dropout=True,
                      keep_prob=keep_prob)
cell_bw = create_cell(rnn_units,
                      rnn_layers,
                      rnn_residual_layers,
                      is_dropout=True,
                      keep_prob=keep_prob)

In [10]:

# Bidirectional RNN
bi_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
    cell_fw = cell_fw,
    cell_bw = cell_bw,
    inputs = inpts,
    sequence_length = length,
    dtype = tf.float32)

outputs = tf.concat(bi_outputs, -1, name='outputs')

# pred = tf.matmul(outputs, W)
# pred = tf.scan(lambda a, x: tf.matmul(x, W), outputs, infer_shape=False)
pred = tf.layers.dense(inputs=outputs,
                       units=n_classes,
                       name='pred')
prediction = tf.argmax(pred, axis=-1, name='prediction')

Optimizer

In [11]:

# Define loss and optimizer
weights = tf.multiply(targets, POS_WEIGHT) + 1
loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(
    logits=pred,
    labels=targets,
    weights=weights), name='loss')
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, name='train_step')

# Evaluate model
correct_pred = tf.equal(prediction, targets)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Training

In [12]:

sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

# Creat plot for live stats ploting
trainPlot = TrainingPlot(TRAIN_STEPS, TEST_ITER, LOSS_ITER)

try:
    for i_batch in range(TRAIN_STEPS):
        fd = train_iterator.next_feed(BATCH_SIZE)
        train_step.run(fd)
        
        if i_batch % LOSS_ITER == 0:
            # Plotting loss
            tmpLoss = loss.eval(fd)
            trainPlot.updateCost(tmpLoss, i_batch // LOSS_ITER)
    
        if i_batch % TEST_ITER == 0:
            # Plotting accuracy
            fd_test = test_iterator.next_feed(BATCH_SIZE)
            accTest = accuracy.eval(fd_test)
            accTrain = accuracy.eval(fd)
            trainPlot.updateAcc(accTest, accTrain, i_batch // TEST_ITER)

        if i_batch % SAVE_ITER == 0:
            saver.save(sess, save_loc)
        
except KeyboardInterrupt:
    saver.save(sess, save_loc)
    print('Training interrupted, model saved.')


fd_test = test_iterator.next_feed(2*BATCH_SIZE)
accTest = accuracy.eval(fd_test)
print("Training finished with accuracy:", accTest)

Out[12]:

<IPython.core.display.Javascript object>

Training interrupted, model saved.
Training finished with accuracy: 0.975385

CNN with Bidirctional RNN - Char Classification

TODO

Settings

Loading Images

Dataset

Create classifier

Inputs

Standardization + CNN

Bi-RNN

Optimizer

Training

Product

Resources

Company