Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-1/WordClassifier-Seq2Seq2CNN.ipynb
427 views
Kernel: Python 3

Recurrent Neural Network - Word Classification

Using Special model

Implemented in TensorFlow. Using Seq2Seq to generate the sequence of letter images and recognise them by RNN.

TODO

Remove random border
import numpy as np import pandas as pd import matplotlib as plt import tensorflow as tf import tensorflow.contrib.seq2seq as seq2seq from tensorflow.python.layers import core as layers_core from tensorflow.python.ops import math_ops import time import math import unidecode import cv2 from ocr.datahelpers import loadWordsData, correspondingShuffle, char2idx from ocr.helpers import implt, extendImg, resize from ocr.mlhelpers import TrainingPlot from ocr.normalization import letterNorm, imageStandardization from ocr.tfhelpers import Graph, create_cell %matplotlib notebook # Increase size of images plt.rcParams['figure.figsize'] = (9.0, 5.0) tf.reset_default_graph() sess = tf.InteractiveSession() print('Tensorflow', tf.__version__)
Tensorflow 1.4.0

Loading images

LANG = 'en'
images, labels, gaplines = loadWordsData( ['data/words2/'], loadGaplines=True) if LANG == 'en': for i in range(len(labels)): labels[i] = unidecode.unidecode(labels[i])
Loading words... -> Number of words: 5069

Settings

CHARS = 82 if LANG =='cz' else 52 PAD = 0 # Padding EOS = 1 # End of seq LETTER_PAD = -1.0 num_buckets = 5 slider_size = (60, 2) step_size = 2 N_INPUT = slider_size[0]*slider_size[1] char_size = CHARS + 2 letter_size = 64*64 encoder_layers = 2 encoder_residual_layers = 1 # HAVE TO be smaller than encoder_layers encoder_units = 256 decoder_layers = 2*encoder_layers # 2* is due to the bidirectional encoder decoder_residual_layers = 2*encoder_residual_layers decoder_units = encoder_units wordRNN_layers = 2 wordRNN_residual_layers = 1 wordRNN_units = 128 attention_size = 256 add_output_length = 4 learning_rate = 1e-3 # 1e-4 max_gradient_norm = 5.0 # For gradient clipping dropout = 0.4 train_per = 0.8 # Percentage of training data TRAIN_STEPS = 1000000 # Number of training steps! TEST_ITER = 150 LOSS_ITER = 50 SAVE_ITER = 2000 BATCH_SIZE = 20 # 64 EPOCH = 1500 # Number of batches in epoch - not accurate save_location = 'models/word-clas/' + LANG + '/SeqRNN/Classifier2'

Dataset

# Shuffle data for later splitting images, labels, gaplines = correspondingShuffle([images, labels, gaplines]) labels_idx = np.empty(len(labels), dtype=object) for i, label in enumerate(labels): labels_idx[i] = [char2idx(c, True) for c in label] labels_idx[i].append(EOS) # Split data on train and test dataset div = int(train_per * len(images)) trainImages = images[0:div] testImages = images[div:] trainGaplines = gaplines[0:div] testGaplines = gaplines[div:] trainLabels_idx = labels_idx[0:div] testLabels_idx = labels_idx[div:] print("Training images:", div) print("Testing images:", len(images) - div)
Training images: 4055 Testing images: 1014
def stackImage(img, a, b): """ Add blank columns (lenght a, b) at start and end of image """ return np.concatenate( (np.zeros((img.shape[0], a)), np.concatenate((img, np.zeros((img.shape[0], b))), axis=1)), axis=1) # Dont mix train and test images num_new_images = 0 # 2 trainImagesF = np.empty(len(trainImages) * (num_new_images+1), dtype=object) trainGaplinesF = np.empty(len(trainImages) * (num_new_images+1), dtype=object) trainLabelsF_idx = np.empty(len(trainImages)*(num_new_images+1), dtype=object) for idx, img in enumerate(trainImages): add_idx = idx*(num_new_images+1) trainImagesF[add_idx] = img trainGaplinesF[add_idx] = trainGaplines[idx] trainLabelsF_idx[add_idx] = trainLabels_idx[idx] for i in range(num_new_images): a, b = np.random.randint(1, 16, size=2) trainImagesF[add_idx + (i+1)] = stackImage(img, a, b) trainGaplinesF[add_idx + (i+1)] = trainGaplines[idx] + a trainLabelsF_idx[add_idx + (i+1)] = trainLabels_idx[idx] print("Total train images", len(trainImagesF))
Total train images 4055
class BucketDataIterator(): """ Iterator for feeding seq2seq model during training """ def __init__(self, images, targets, gaplines, num_buckets=5, slider=(60, 30), slider_step=2, train=True): self.train = train self.slider = slider # PADDING of images to slider size; -(a // b) = ceil(a/b) for i in range(len(images)): images[i] = extendImg( images[i], (images[i].shape[0], max(-(-images[i].shape[1] // slider_step) * slider_step, 60))) in_length = [(image.shape[1] + 1 - slider[1])//slider_step for image in images] # Split images to sequence of vectors img_seq = np.empty(len(images), dtype=object) for i, img in enumerate(images): img_seq[i] = [img[:, loc * slider_step: loc*slider_step + slider[1]].flatten() for loc in range(in_length[i])] end_letter = np.ones(letter_size) # * LETTER_PAD # End letter is full white np.put(end_letter, [0], [1]) letter_seq = np.empty(len(images), dtype=object) for i, img in enumerate(images): letter_seq[i] = [imageStandardization( letterNorm(img[:, gaplines[i][x]:gaplines[i][x+1]])).flatten() for x in range(len(gaplines[i])-1)] letter_seq[i].append(end_letter) # Create pandas dataFrame and sort it by images width (length) # letters_length is num_letter + EOS self.dataFrame = pd.DataFrame({'in_length': in_length, 'letters_length': [len(g) for g in gaplines], 'words_length': [len(t) for t in targets], 'in_images': img_seq, 'letters': letter_seq, 'words': targets }).sort_values('in_length').reset_index(drop=True) bsize = int(len(images) / num_buckets) self.num_buckets = num_buckets # Create buckets by slicing parts by indexes self.buckets = [] for bucket in range(num_buckets-1): self.buckets.append(self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize]) self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:]) self.buckets_size = [len(bucket) for bucket in self.buckets] # cursor[i] will be the cursor for the ith bucket self.cursor = np.array([0] * num_buckets) self.bucket_order = np.random.permutation(num_buckets) self.bucket_cursor = 0 self.shuffle() print("Iterator created.") def shuffle(self, idx=None): """ Shuffle idx bucket or each bucket separately """ for i in [idx] if idx is not None else range(self.num_buckets): self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True) self.cursor[i] = 0 def next_batch(self, batch_size): """ Creates next training batch of size: batch_size Retruns: (in_images, letters, words, in_length, letter_length, word_length) """ i_bucket = self.bucket_order[self.bucket_cursor] # Increment cursor and shuffle in case of new round self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets if self.bucket_cursor == 0: self.bucket_order = np.random.permutation(self.num_buckets) if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]: self.shuffle(i_bucket) # Handle too big batch sizes if (batch_size > self.buckets_size[i_bucket]): batch_size = self.buckets_size[i_bucket] res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]: self.cursor[i_bucket]+batch_size] self.cursor[i_bucket] += batch_size # Check correct length of later prediction of sequences assert np.all(res['in_length'] + add_output_length >= res['letters_length']) input_max = max(res['in_length']) letters_max = max(res['letters_length']) words_max = max(res['words_length']) input_seq = np.ones((batch_size, input_max, N_INPUT), dtype=np.float32) * LETTER_PAD for i, img in enumerate(res['in_images']): input_seq[i][:res['in_length'].values[i]] = img input_seq = input_seq.swapaxes(0, 1) # Time major letters = np.ones((batch_size, letters_max, letter_size), dtype=np.float32) * LETTER_PAD for i, img in enumerate(res['letters']): letters[i][:res['letters_length'].values[i]] = img # Need to pad according to the maximum length output sequence words = np.zeros([batch_size, words_max], dtype=np.int32) for i, word in enumerate(res['words']): words[i][:res['words_length'].values[i]] = word return (input_seq, letters, words, res['in_length'].values, res['letters_length'].values, res['words_length'].values) def next_feed(self, size, words=True, train=None): """ Create feed directly for model training """ if train is None: train = self.train (encoder_inputs_, letter_targets_, word_targets_, encoder_inputs_length_, letter_targets_length_, word_targets_length_) = self.next_batch(size) return { encoder_inputs: encoder_inputs_, encoder_inputs_length: encoder_inputs_length_, letter_targets: letter_targets_, letter_targets_length: letter_targets_length_, word_targets: word_targets_, word_targets_length: word_targets_length_, keep_prob: (1.0 - dropout) if self.train else 1.0, is_training: train, is_words: words }
# Create iterator for feeding RNN # Create only once, it modifies: labels_idx train_iterator = BucketDataIterator(trainImagesF, trainLabelsF_idx, trainGaplinesF, num_buckets, slider_size, step_size, train=True) test_iterator = BucketDataIterator(testImages, testLabels_idx, testGaplines, num_buckets, slider_size, step_size, train=False)
Iterator created. Iterator created.

Placeholders

# Only encoder inputs are time major # Encoder inputs shape (max_seq_length, batch_size, vec_size) encoder_inputs = tf.placeholder(shape=(None, None, N_INPUT), dtype=tf.float32, name='encoder_inputs') encoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='encoder_inputs_length') # Required for letter sep. training # Contains EOS symbol letter_targets = tf.placeholder(shape=(None, None, letter_size), dtype=tf.float32, name='letter_targets') letter_targets_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='letter_targets_length') # Required for word training word_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='word_targets') word_targets_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='word_targets_length') # Dropout value keep_prob = tf.placeholder(tf.float32, name='keep_prob') # Testing control is_training = tf.placeholder(tf.bool, shape=None, name="is_training") is_words = tf.placeholder(tf.bool, shape=None, name="is_words")

Decoder Train Feeds

sequence_size, batch_size, _ = tf.unstack(tf.shape(encoder_inputs)) # letter_targets EOS_SLICE = tf.cast(tf.fill([batch_size, 1, letter_size], EOS), tf.float32) PAD_SLICE = tf.cast(tf.fill([batch_size, 1, letter_size], LETTER_PAD), tf.float32) # PAD # Train inputs with EOS symbol at start of seq letter_train_inputs = tf.concat([PAD_SLICE, letter_targets], axis=1) #EOS_SLICE letter_train_length = letter_targets_length # Length of infer (test) letter output # TODO: will have to make shorte... output_length = tf.minimum( tf.reduce_max(encoder_inputs_length) * step_size // 15 + add_output_length, 23)

Encoder

enc_cell_fw = create_cell(encoder_units, encoder_layers, encoder_residual_layers, is_dropout=True, keep_prob=keep_prob) enc_cell_bw = create_cell(encoder_units, encoder_layers, encoder_residual_layers, is_dropout=True, keep_prob=keep_prob)
### CNN ### SCALE = 0.01 # 0.1 # Functions for initializing convulation and pool layers def weights(name, shape): return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=SCALE)) def bias(const, shape, name=None): return tf.Variable(tf.constant(const, shape=shape), name=name) def conv2d2(x, W, name=None): return tf.nn.conv2d(x, W, strides=[1, 2, 1, 1], padding='SAME', name=name) W_conv1 = weights('W_conv1', shape=[2, 1, 1, 1]) b_conv1 = bias(0.1, shape=[1], name='b_conv1') def CNN_1(x): x = tf.image.per_image_standardization(x) img = tf.reshape(x, [1, slider_size[0], slider_size[1], 1]) h_conv1 = tf.nn.relu(conv2d2(img, W_conv1) + b_conv1, name='h_conv1') return h_conv1
inputs = tf.map_fn( lambda seq: tf.map_fn( lambda img: tf.reshape( CNN_1(tf.reshape(img, [slider_size[0], slider_size[1], 1])), [-1]), seq), encoder_inputs, dtype=tf.float32) # inputs = encoder_inputs # Bidirectional RNN, gibe fw and bw outputs separately enc_outputs, enc_state = tf.nn.bidirectional_dynamic_rnn( cell_fw = enc_cell_fw, cell_bw = enc_cell_bw, inputs = inputs, sequence_length = encoder_inputs_length, dtype = tf.float32, time_major = True) encoder_outputs = tf.concat(enc_outputs, -1) if encoder_layers == 1: encoder_state = enc_state else: encoder_state = [] for layer_id in range(encoder_layers): encoder_state.append(enc_state[0][layer_id]) # forward encoder_state.append(enc_state[1][layer_id]) # backward encoder_state = tuple(encoder_state)

Decoder

# attention_states: size [batch_size, max_time, num_units] attention_states = tf.transpose(encoder_outputs, [1, 0, 2]) # Create an attention mechanism attention_mechanism = seq2seq.BahdanauAttention( attention_size, attention_states, # decoder_units instead of attention_size memory_sequence_length=encoder_inputs_length) decoder_cell = create_cell(decoder_units, decoder_layers, decoder_residual_layers, is_dropout=True, keep_prob=keep_prob) decoder_cell = seq2seq.AttentionWrapper( decoder_cell, attention_mechanism, attention_layer_size=attention_size) decoder_initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone( cell_state=encoder_state)

TRAIN DECODER

# Helper helper = seq2seq.TrainingHelper( letter_train_inputs, letter_targets_length) # Decoder projection_layer = layers_core.Dense( letter_size, activation=tf.tanh, use_bias=True) decoder = seq2seq.BasicDecoder( decoder_cell, helper, decoder_initial_state, output_layer=projection_layer) # Dynamic decoding outputs, final_context_state, _ = seq2seq.dynamic_decode( decoder) letter_logits_train = outputs.rnn_output letter_prediction_train = outputs.sample_id

INFERENCE DECODER

### CNN ### def conv2d(x, W, name=None): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name=name) def max_pool_2x2(x, name=None): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) W_conv1_ = weights('W_conv1_', shape=[16, 16, 1, 4]) b_conv1_ = bias(0.1, shape=[4], name='b_conv1_') W_conv2_ = weights('W_conv2_', shape=[5, 5, 4, 12]) b_conv2_ = bias(0.1, shape=[12], name='b_conv2_') W_conv3_ = weights('W_conv3_', shape=[3, 3, 12, 20]) b_conv3_ = bias(0.1, shape=[20], name='b_conv3_') W_fc1_ = weights('W_fc2_', shape=[8*8*20, char_size]) b_fc1_ = bias(0.1, shape=[char_size], name='b_fc2_') def CNN_2(x, clas=True): if clas: b_size, seq_size, _ = tf.unstack(tf.shape(x)) else: b_size, _ = tf.unstack(tf.shape(x)) imgs = tf.reshape(x, [-1, 64, 64, 1]) x_imgs = tf.map_fn( lambda img: tf.image.per_image_standardization(img), imgs) # 1. Layer - Convulation h_conv1 = tf.nn.relu(conv2d(x_imgs, W_conv1_) + b_conv1_, name='h_conv1_') # 2. Layer - Max Pool h_pool1 = max_pool_2x2(h_conv1, name='h_pool1_') # 3. Layer - Convulation h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2_) + b_conv2_, name='h_conv2_') # Reshape filters into flat arraty h_pool2 = max_pool_2x2(h_conv2, name='h_pool2_') h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3_) + b_conv3_, name='h_conv3_') # Reshape filters into flat arraty h_pool3 = max_pool_2x2(h_conv3, name='h_pool3_') h_flat = tf.reshape(h_pool3, [-1, 8*8*20]) # 6. Dropout h_flat_drop = tf.nn.dropout(h_flat, keep_prob) # 7. Output layer out = tf.matmul(h_flat_drop, W_fc1_) + b_fc1_ if clas: return tf.reshape(out, [b_size, seq_size, -1]) else: return tf.reshape(out, [b_size, -1])
# Helper without embedding, can add param: 'next_inputs_fn' helper_infer = seq2seq.InferenceHelper( sample_fn=(lambda x: x), sample_shape=[letter_size], sample_dtype=tf.float32, start_inputs=tf.cast(tf.fill([batch_size, letter_size], LETTER_PAD), tf.float32), # PAD <- EOS, need flaot32 end_fn=(lambda sample_ids: # tf.greater(sample_ids[:, 0], 0))) tf.equal(tf.argmax(CNN_2(sample_ids, False), axis=-1, output_type=tf.int32), 1))) decoder_infer = seq2seq.BasicDecoder( decoder_cell, helper_infer, decoder_initial_state, output_layer=projection_layer) # Dynamic decoding outputs_infer, final_context_state, final_seq_lengths = seq2seq.dynamic_decode( decoder_infer, impute_finished=True, maximum_iterations=output_length) letter_prediction_infer = tf.identity(outputs_infer.rnn_output, # sample_id name='letter_prediction_infer')

RNN

word_inputs = tf.cond(is_training, lambda: letter_targets, lambda: letter_prediction_infer) word_inputs_length_ = tf.cond(is_training, lambda: letter_targets_length, lambda: final_seq_lengths) word_inputs_length = word_inputs_length_ # tf.subtract(word_inputs_length_, 1) # Input images CNN word_outputs = CNN_2(word_inputs) # Word RNN # cell_RNN = create_cell(wordRNN_units, # wordRNN_layers, # wordRNN_residual_layers, # is_dropout=True, # keep_prob=keep_prob) # word_outputs, _ = tf.nn.dynamic_rnn( # cell = cell_RNN, # inputs = word_inputs, # sequence_length = word_inputs_length, # dtype = tf.float32) # word_logits = tf.layers.dense( # inputs=word_outputs, # units=char_size, # name='pred') word_logits = word_outputs word_prediction = tf.argmax( word_logits, axis=-1, output_type=tf.int32, # word_logits = tf.layers.dense( name='word_prediction')

Optimizer

Weights + Paddings

# Pad test accuracy letter_test_targets = tf.pad( letter_targets, [[0, 0], [0, output_length - tf.reduce_max(letter_targets_length)], [0, 0]], constant_values=LETTER_PAD, mode='CONSTANT') # Pad prediction to match lengths letter_pred_infer_pad = tf.pad( letter_prediction_infer, [[0, 0], [0, output_length - tf.reduce_max(word_inputs_length_)], [0, 0]], constant_values=LETTER_PAD, mode='CONSTANT') word_pad_lenght = tf.maximum( tf.reduce_max(word_inputs_length_), tf.reduce_max(word_targets_length)) word_logits_pad = tf.pad( word_logits, [[0, 0], [0, word_pad_lenght - tf.reduce_max(word_inputs_length_)], [0, 0]], constant_values=PAD, mode='CONSTANT') word_pred_pad = tf.pad( word_prediction, [[0, 0], [0, word_pad_lenght - tf.reduce_max(word_inputs_length_)]], constant_values=PAD, mode='CONSTANT') word_targets_pad = tf.pad( word_targets, [[0, 0], [0, word_pad_lenght - tf.reduce_max(word_targets_length)]], constant_values=PAD, mode='CONSTANT') # Weights letter_loss_weights = tf.sequence_mask( letter_train_length, tf.reduce_max(letter_train_length), dtype=tf.float32) letter_test_weights = tf.sequence_mask( letter_train_length, output_length, dtype=tf.float32) word_loss_weights = tf.sequence_mask( word_targets_length, # word_inputs_length, try max(targets, inputs) word_pad_lenght, dtype=tf.float32) word_acc_weights = tf.sequence_mask( tf.subtract(final_seq_lengths, 1), # word_inputs_length, try max(targets, inputs) word_pad_lenght, dtype=tf.float32)
## Loss letter_loss = tf.losses.mean_squared_error( predictions=letter_logits_train, labels=letter_targets, weights=tf.stack([letter_loss_weights for i in range(letter_size)], axis=-1)) word_seq_loss = seq2seq.sequence_loss( logits=word_logits_pad, targets=word_targets_pad, weights=word_loss_weights, name='word_loss') regularization = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) word_loss = word_seq_loss + sum(regularization) loss = tf.cond(is_words, lambda: word_loss, lambda: letter_loss) # learning_rate_ = learning_rate learning_rate_ = tf.cond(tf.logical_and(is_words, tf.logical_not(is_training)), lambda: learning_rate, # * 0.1, lambda: learning_rate) ## Calculate and clip gradients params = tf.trainable_variables() gradients = tf.gradients(loss, params) clipped_gradients, _ = tf.clip_by_global_norm( gradients, max_gradient_norm) ### Optimization optimizer = tf.train.AdamOptimizer(learning_rate_) train_step = optimizer.apply_gradients( zip(clipped_gradients, params), name='train_step') ### Evaluate model correct_prediction = tf.equal( word_pred_pad, word_targets_pad) ## Advanced accuracy only the elements of seq including EOS symbol # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy = (tf.reduce_sum(tf.cast(correct_prediction, tf.float32) * word_acc_weights) \ / tf.reduce_sum(word_acc_weights))
sess.run(tf.global_variables_initializer()) fd = test_iterator.next_feed(3) pre = word_logits.eval(fd) print(pre.shape)
(3, 11, 54)

Training

sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # Creat plot for live stats ploting trainPlot = TrainingPlot(TRAIN_STEPS, TEST_ITER, LOSS_ITER) try: for i_batch in range(TRAIN_STEPS): # Three steps (can overflow): 1. train letters, 2. train words on known letter 3. combine is_words_, is_train_ = (True, True) if i_batch < 1000 or i_batch % 2 == 0: # 2000 # 5000 is_words_, is_train_ = (False, True) # elif i_batch < 3000: # 12000 # is_words_, is_train_ = (True, True) elif i_batch % 4 == 3: is_words_, is_train_ = (True, False) fd = train_iterator.next_feed(BATCH_SIZE, words=is_words_, train=is_train_) train_step.run(fd) if i_batch % LOSS_ITER == 0: # Plotting loss tmpLoss = loss.eval(fd) trainPlot.updateCost(tmpLoss, i_batch // LOSS_ITER) if i_batch % TEST_ITER == 0: # Plotting accuracy fd_test = test_iterator.next_feed(BATCH_SIZE) fd = train_iterator.next_feed(BATCH_SIZE, words=True, train=False) accTest = accuracy.eval(fd_test) accTrain = accuracy.eval(fd) trainPlot.updateAcc(accTest, accTrain, i_batch // TEST_ITER) if i_batch % SAVE_ITER == 0: saver.save(sess, save_location) if i_batch % EPOCH == 0: fd_test = test_iterator.next_feed(BATCH_SIZE) print('batch %r - loss: %r' % (i_batch, sess.run(loss, fd_test))) predict_, target_ = sess.run([word_prediction, word_targets], fd_test) for i, (inp, pred) in enumerate(zip(target_, predict_)): print(' expected > {}'.format(inp)) print(' predicted > {}'.format(pred)) if i >= 1: break print() except KeyboardInterrupt: print('Training interrupted, model saved.') saver.save(sess, save_location)
<IPython.core.display.Javascript object>
batch 0 - loss: 5.0267339 expected > [45 48 30 32 1 0 0 0 0 0 0 0] predicted > [50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50] expected > [24 36 41 31 42 50 1 0 0 0 0 0] predicted > [50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50] batch 1500 - loss: 3.0129859 expected > [10 41 46 47 28 39 39 1 0 0] predicted > [46 36 39 39 1 1 1 1] expected > [32 41 32 45 34 52 1 0 0 0] predicted > [46 42 42 1 1 1 1 1] batch 3000 - loss: 2.2141256 expected > [38 32 1 0 0 0] predicted > [29 32 1 1 1] expected > [10 39 39 1 0 0] predicted > [34 32 1 1 1] batch 4500 - loss: 2.4519362 expected > [11 36 41 31 45 36 30 35 1 0] predicted > [11 36 31 31 41 41 31 1 1] expected > [46 32 46 36 47 1 0 0 0 0] predicted > [46 32 46 46 47 47 1 1 1] batch 6000 - loss: 2.2202351 expected > [21 28 47 28 41 28 1 0 0 0] predicted > [21 28 47 32 32 28 1 1] expected > [12 32 49 36 41 1 0 0 0 0] predicted > [12 32 40 40 40 1 1 1] batch 7500 - loss: 1.8579268 expected > [45 28 47 36 42 1 0] predicted > [45 32 47 47 42 1] expected > [50 35 42 1 0 0 0] predicted > [49 32 39 42 1 1] batch 9000 - loss: 1.6952639 expected > [49 42 31 28 1 0 0 0 0] predicted > [49 42 31 28 1 1 1 1 1] expected > [53 32 41 36 47 1 0 0 0] predicted > [30 32 41 36 47 1 1 1 1] batch 10500 - loss: 1.288264 expected > [2 1 0 0 0 0] predicted > [28 1 1 1 1] expected > [20 1 0 0 0 0] predicted > [10 1 1 1 1] batch 12000 - loss: 1.2034814 expected > [20 47 32 39 28 1 0 0 0 0] predicted > [20 47 32 39 28 1 1 1 1] expected > [ 4 35 28 39 38 1 0 0 0 0] predicted > [ 4 38 28 39 38 1 1 1 1] batch 13500 - loss: 1.581913 expected > [10 34 41 28 30 1] predicted > [20 43 32 28 1] expected > [ 3 28 52 1 0 0] predicted > [ 3 28 52 1 1] batch 15000 - loss: 0.97768688 expected > [29 31 36 1 0 0] predicted > [29 47 36 1 1] expected > [47 45 36 43 1 0] predicted > [29 45 43 43 1] batch 16500 - loss: 1.7931079 expected > [38 45 28 47 28 46 36 1 0 0] predicted > [38 45 28 39 28 45 36 1 1 1 1] expected > [39 32 47 48 46 38 28 1 0 0] predicted > [39 32 48 46 39 28 28 28 1 1 1] batch 18000 - loss: 1.3911827 expected > [ 2 39 36 30 32 1 0 0 0 0] predicted > [38 39 40 30 32 32 1 1 1 1] expected > [30 36 46 39 42 1 0 0 0 0] predicted > [28 36 46 39 42 1 1 1 1 1] batch 19500 - loss: 1.6664226 expected > [47 32 35 31 52 1] predicted > [47 32 31 31 52 1] expected > [42 1 0 0 0 0] predicted > [42 1 1 1 1 1] batch 21000 - loss: 1.7441137 expected > [11 36 41 31 45 36 30 35 1 0] predicted > [11 36 41 45 36 45 38 1] expected > [45 36 46 38 1 0 0 0 0 0] predicted > [45 36 28 38 1 1 1 1] batch 22500 - loss: 1.4565325 expected > [46 40 32 47 1 0 0 0 0] predicted > [48 41 32 47 1 1 1 1 1 1] expected > [25 32 41 28 1 0 0 0 0] predicted > [25 32 41 28 1 1 1 1 1 1] batch 24000 - loss: 1.4579525 expected > [34 1 0 0 0 0 0] predicted > [26 1 1 1 1] expected > [43 45 32 47 47 52 1] predicted > [43 42 53 52 1] batch 25500 - loss: 1.1414853 expected > [42 45 34 28 41 1 0 0] predicted > [30 45 34 28 41 1 1] expected > [19 48 31 42 39 33 1 0] predicted > [ 5 28 31 28 39 33 1] batch 27000 - loss: 1.6382754 expected > [28 1 0 0 0] predicted > [28 1 1 1 1 1] expected > [10 39 39 1 0] predicted > [24 1 1 1 1 1] batch 28500 - loss: 1.542223 expected > [43 28 48 46 32 1 0 0 0] predicted > [43 28 47 32 32 1 1 1 1] expected > [53 28 46 47 48 43 52 1 0] predicted > [43 28 46 47 48 41 42 1 1] batch 30000 - loss: 1.039789 expected > [32 51 30 36 47 32 1 0 0] predicted > [32 51 30 36 47 32 1 1 1 1] expected > [40 42 47 42 45 1 0 0 0] predicted > [40 42 47 42 45 1 1 1 1 1] batch 31500 - loss: 0.82488751 expected > [12 36 41 42 1 0 0 0 0 0] predicted > [25 36 41 42 1 1 1 1 1 1] expected > [43 45 36 30 32 1 0 0 0 0] predicted > [43 45 36 41 32 1 1 1 1 1] batch 33000 - loss: 2.1014812 expected > [53 28 1 0 0 0 0 0 0] predicted > [53 28 1 1 1 1 1 1 1] expected > [34 45 42 48 43 1 0 0 0] predicted > [34 45 42 28 1 1 1 1 1] batch 34500 - loss: 1.229347 expected > [40 28 47 32 45 36 28 39 1 0 0 0] predicted > [40 28 47 32 45 28 28 1 1 1] expected > [46 28 50 1 0 0 0 0 0 0 0 0] predicted > [46 28 48 48 1 1 1 1 1 1] batch 36000 - loss: 1.4402622 expected > [28 30 1 0 0 0] predicted > [28 30 1 1 1 1] expected > [48 31 1 0 0 0] predicted > [48 31 1 1 1 1] batch 37500 - loss: 1.3146204 expected > [47 52 43 32 1] predicted > [47 52 43 32 1] expected > [28 39 32 1 0] predicted > [28 39 32 1 1] batch 39000 - loss: 1.7878778 expected > [30 28 45 32 1 0 0 0 0 0 0] predicted > [30 28 45 32 1 1 1 1 1 1 1] expected > [46 48 29 46 30 45 36 29 32 1 0] predicted > [46 48 29 46 30 45 29 32 1 1 1] batch 40500 - loss: 0.89473355 expected > [46 48 31 31 32 41 1 0 0 0] predicted > [46 45 31 31 32 41 41 1 1 1] expected > [41 32 46 47 28 30 36 39 42 1] predicted > [41 32 46 47 28 30 36 39 36 1] batch 42000 - loss: 1.211858 expected > [47 50 42 1 0 0 0] predicted > [47 50 42 1 1 1 1] expected > [35 45 28 47 1 0 0] predicted > [35 45 28 47 1 1 1] batch 43500 - loss: 1.6062182 expected > [30 28 37 1 0 0] predicted > [ 4 28 37 1 1 1 1] expected > [37 28 38 42 1 0] predicted > [52 32 38 42 1 1 1] batch 45000 - loss: 0.84628856 expected > [2 1 0 0 0 0] predicted > [2 1 1 1 1 1] expected > [44 48 36 47 32 1] predicted > [34 40 36 47 32 1] batch 46500 - loss: 1.578271 expected > [39 32 53 32 47 1 0] predicted > [29 32 32 47 1 1 1] expected > [50 36 31 32 1 0 0] predicted > [50 36 31 32 1 1 1] batch 48000 - loss: 1.5562747 expected > [45 36 46 38 1 0 0 0 0] predicted > [45 36 46 38 1 1 1 1] expected > [45 28 30 32 38 1 0 0 0] predicted > [45 28 30 32 38 1 1 1] Training interrupted, model saved.
'models/word-clas/en/SeqRNN/Classifier2'
%matplotlib inline def evalLetters(feed): predict_, target_, predict_lengths_, target_lengths_ = sess.run( [letter_prediction_infer, letter_targets, final_seq_lengths, letter_targets_length], feed) for i, (inp, pred) in enumerate(zip(target_, predict_)): print("Expected images:", target_lengths_[i]) for x in range(len(inp)): implt(inp[x].reshape((64, 64)), 'gray') print("Predicted images:", predict_lengths_[i]) for x in range(len(pred)): implt(pred[x].reshape((64, 64)), 'gray') if i >= 0: break fd_test = test_iterator.next_feed(BATCH_SIZE) fd = train_iterator.next_feed(BATCH_SIZE, words=False, train=False) evalLetters(fd_test) evalLetters(fd)
Expected images: 4
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook
Predicted images: 4
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook
Expected images: 4
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook
Predicted images: 4
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook