Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-1/GapClassifier.ipynb
427 views
Kernel: Python 3

Convolutional Neural Network - Gap / Char Classification

Using TensorFlow

import numpy as np import pandas as pd import matplotlib.pyplot as plt import tensorflow as tf import cv2 import glob from math import ceil # Helpers from ocr.helpers import implt from ocr.mlhelpers import TrainingPlot, DataSet from ocr.datahelpers import loadGapData print("OpenCV: " + cv2.__version__) print("Numpy: " + np.__version__) print("TensorFlow: " + tf.__version__)
/home/breta/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters
OpenCV: 3.1.0 Numpy: 1.14.1 TensorFlow: 1.6.0
%matplotlib notebook # Increase size of plots plt.rcParams['figure.figsize'] = (9.0, 5.0)

Settings

slider = (60, 60) # Height is set to 60 by data and # width should be less than 120 learning_rate = 5e-5 dropout = 0.5 # Percentage of dopped out data train_set = 0.8 # Percentage of training data TRAIN_STEPS = 500000 TEST_ITER = 150 COST_ITER = 50 SAVE_ITER = 2000 BATCH_SIZE = 64 save_loc = 'models/gap-clas/large/CNN-CG'

Load Images and Lables in CSV

images, labels = loadGapData('data/gapdet/large/', slider=slider)
Loading gap data... -> Number of gaps and letters: 13591
print("Number of images: " + str(len(images))) # Splitting on train and test data div = int(train_set * len(images)) trainData = images[0:div] trainLabels = labels[0:div] evalData = images[div:] evalLabels = labels[div:] print("Training images: %g" % div)
Number of images: 13591 Training images: 10872

Create classifier

Dataset

# Prepare training dataset trainSet = DataSet(trainData, trainLabels) evalSet = DataSet(evalData, evalLabels)

Convulation Neural Network

Graph

sess = tf.InteractiveSession() def weights(name, shape): return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=SCALE)) def bias(const, shape, name=None): return tf.Variable(tf.constant(const, shape=shape), name=name) # Help functions for standard layers def conv2d(x, W, name=None): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name=name) def max_pool_2x2(x, name=None): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) # Regularization scale - FOR TWEAKING SCALE = 0.001 # Weighting cross entropy POS_WEIGHT = (len(labels) - sum(labels)) / sum(labels) # Place holders for data (x) and labels (y_) x = tf.placeholder(tf.float32, [None, slider[0]*slider[1]], name='x') targets = tf.placeholder(tf.int64, [None]) # Reshape input data reshape_images = tf.reshape(x, [-1, slider[0], slider[1], 1]) # Image standardization x_images = tf.map_fn( lambda img: tf.image.per_image_standardization(img), reshape_images) # 1. Layer - Convulation + Subsampling W_conv1 = weights('W_conv1', shape=[8, 8, 1, 10]) b_conv1 = bias(0.1, shape=[10], name='b_conv1') h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1, name='h_conv1') # 2. Layer - Max Pool h_pool1 = max_pool_2x2(h_conv1, name='h_pool1') # 3. Layer - Convulation + Subsampling W_conv2 = weights('W_conv2', shape=[5, 5, 10, 20]) b_conv2 = bias(0.1, shape=[20], name='b_conv2') h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h_conv2') # 4. Layer - Max Pool h_pool2 = max_pool_2x2(h_conv2, name='h_pool2') # 5. Fully Connected layer W_fc1 = weights('W_fc1', shape=[ceil(slider[0]/4)*ceil(slider[1]/4)*20, 1000]) b_fc1 = bias(0.1, shape=[1000], name='b_fc1') h_conv2_flat = tf.reshape( h_pool2, [-1, ceil(slider[0]/4)*ceil(slider[1]/4)*20], name='h_conv2_flat') h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1, name='h_fc1') # 6. Dropout keep_prob = tf.placeholder(tf.float32, name='keep_prob') h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob, name='h_fc1_drop') # 7. Output layer W_fc2 = weights('W_fc2', shape=[1000, 2]) b_fc2 = bias(0.1, shape=[2], name='b_fc2') y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 # Activation function for real use in application activation = tf.argmax(tf.matmul(h_fc1, W_fc2) + b_fc2, 1, name='activation') # Cost: cross entropy + regularization # Regularization with L2 Regularization with decaying learning rate # cross_entropy = tf.nn.weighted_cross_entropy_with_logits(logits=y_conv, targets=y_) weights = tf.multiply(targets, POS_WEIGHT) + 1 cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=y_conv, labels=targets, weights=weights) # Using cross entropy for sigmoid as loss regularization = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) cost = tf.reduce_mean(cross_entropy) + sum(regularization) # Optimizer train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost, name='train_step') # Evaluating correct_prediction = tf.equal(tf.argmax(y_conv,1), targets) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

Training

sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # Graph for live ploting trainPlot = TrainingPlot(TRAIN_STEPS, TEST_ITER, COST_ITER) try: for i in range(TRAIN_STEPS): trainBatch, labelBatch = trainSet.next_batch(BATCH_SIZE) if i%COST_ITER == 0: # Plotting cost tmpCost = cost.eval(feed_dict={x: trainBatch, targets: labelBatch, keep_prob: 1.0}) trainPlot.updateCost(tmpCost, i // COST_ITER) if i%TEST_ITER == 0: # Plotting accuracy evalD, evalL = evalSet.next_batch(500) accEval = accuracy.eval(feed_dict={x: evalD, targets: evalL, keep_prob: 1.0}) accTrain = accuracy.eval(feed_dict={x: trainBatch, targets: labelBatch, keep_prob: 1.0}) trainPlot.updateAcc(accEval, accTrain, i // TEST_ITER) if i%SAVE_ITER == 0: # Saving model saver.save(sess, save_loc) train_step.run(feed_dict={x: trainBatch, targets: labelBatch, keep_prob: 1 - dropout}) except KeyboardInterrupt: pass saver.save(sess, save_loc) evalD, evalL = evalSet.next_batch(500) print("Accuracy %g" % accuracy.eval(feed_dict={x: evalD, targets: evalL, keep_prob: 1.0})) sess.close()
<IPython.core.display.Javascript object>
Accuracy 0.874