Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-1/WordClassifier-Seq2Seq.ipynb
427 views
Kernel: Python 3

Recurrent Neural Network - Word Classification

Using Seq2Seq model

Implemented in TensorFlow. Using bidirectual RNN as encoder and decoder implemented as tf.nn.raw_rnn

TODO

REMOVE embeddings Overlaping sliders One-shot preprocessing Train preprocessing with RNN lower extra steps, bigger hState, preprocessing?
import numpy as np import pandas as pd import cv2 import matplotlib as plt import tensorflow as tf import tensorflow.contrib.seq2seq as seq2seq from tensorflow.python.layers import core as layers_core import time import math import unidecode from ocr.datahelpers import loadWordsData, correspondingShuffle, char2idx from ocr.helpers import extendImg from ocr.mlhelpers import TrainingPlot from ocr.tfhelpers import Graph, create_cell from ocr.imgtransform import coordinates_remap from ocr.normalization import imageNorm %matplotlib notebook # Increase size of images plt.rcParams['figure.figsize'] = (9.0, 5.0) tf.reset_default_graph() sess = tf.InteractiveSession() print('Tensorflow', tf.__version__)
Tensorflow 1.4.0

Loading images

LANG = 'en'
images, labels = loadWordsData(['data/words2/'], loadGaplines=False) if LANG == 'en': for i in range(len(labels)): labels[i] = unidecode.unidecode(labels[i])
Loading words... -> Number of words: 5069

Settings

char_size = 82 if LANG =='cz' else 52 PAD = 0 # Padding EOS = 1 # End of seq num_new_images = 2 # Number of new images per image fac_alpha = 2.0 # Factors for image preprocessing fac_sigma = 0.08 num_buckets = 5 slider_size = (60, 2) N_INPUT = 60*2 # Size of sequence input vector vocab_size = char_size + 2 # Number of different chars + <PAD> and <EOS> input_embedding_size = vocab_size # Size of vector for embedding chars2vec encoder_layers = 2 decoder_layers = 2*encoder_layers # 2* is due to the bidirectional encoder encoder_residual_layers = 1 # HAVE TO be smaller than encoder_layers decoder_residual_layers = 2*encoder_residual_layers encoder_units = 256 decoder_units = encoder_units add_output_length = 4 # 4 learning_rate = 1e-4 # 1e-4 max_gradient_norm = 5.0 # For gradient clipping dropout = 0.4 train_per = 0.8 # Percentage of training data TRAIN_STEPS = 100000 # Number of training steps! TEST_ITER = 150 LOSS_ITER = 50 SAVE_ITER = 2000 BATCH_SIZE = 64 EPOCH = 2000 # Number of batches in epoch - not accurate save_location = 'models/word-clas/' + LANG + '/WordClassifier2'

Dataset

# Shuffle data for later splitting images, labels = correspondingShuffle([images, labels]) labels_idx = np.empty(len(labels), dtype=object) for i, label in enumerate(labels): labels_idx[i] = [char2idx(c, True) for c in label] # Split data on train and test dataset div = int(train_per * len(images)) trainImages = images[0:div] testImages = images[div:] trainLabels_idx = labels_idx[0:div] testLabels_idx = labels_idx[div:] print("Training images:", div) print("Testing images:", len(images) - div)
Training images: 4055 Testing images: 1014

Dataset extending

# Dont mix train and test images trainImagesFinal = np.empty(len(trainImages) * (num_new_images+1), dtype=object) trainLabelsFinal_idx = np.empty(len(trainImages)*(num_new_images+1), dtype=object) for idx, img in enumerate(trainImages): trainImagesFinal[idx*(num_new_images+1)] = img trainLabelsFinal_idx[idx*(num_new_images+1)] = trainLabels_idx[idx] for i in range(num_new_images): trainImagesFinal[idx*(num_new_images+1) + (i+1)] = coordinates_remap(img, fac_alpha, fac_sigma) trainLabelsFinal_idx[idx*(num_new_images+1) + (i+1)] = trainLabels_idx[idx] print("Transformed train images", len(trainImagesFinal))
Transformed train images 12165
class BucketDataIterator(): """ Iterator for feeding seq2seq model during training """ def __init__(self, images, targets, num_buckets=5, slider=(60, 30), train=True): self.train = train # First PADDING of images to slider size ( -(a // b) == ceil(a/b)) self.slider = slider for i in range(len(images)): images[i] = extendImg( images[i], (images[i].shape[0], -(-images[i].shape[1] // slider[1]) * slider[1])) in_length = [image.shape[1]//slider[1] for image in images] # Split images to sequence of vectors imgseq = np.empty(len(images), dtype=object) for i, img in enumerate(images): imgseq[i] = [img[:, loc * slider[1]: (loc+1) * slider[1]].flatten() for loc in range(in_length[i])] # Create pandas dataFrame and sort it by images width (length) self.dataFrame = pd.DataFrame({'in_length': in_length, 'out_length': [len(t) for t in targets], 'images': imgseq, 'targets': targets }).sort_values('in_length').reset_index(drop=True) bsize = int(len(images) / num_buckets) self.num_buckets = num_buckets # Create buckets by slicing parts by indexes self.buckets = [] for bucket in range(num_buckets-1): self.buckets.append(self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize]) self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:]) self.buckets_size = [len(bucket) for bucket in self.buckets] # cursor[i] will be the cursor for the ith bucket self.cursor = np.array([0] * num_buckets) self.bucket_order = np.random.permutation(num_buckets) self.bucket_cursor = 0 self.shuffle() print("Iterator created.") def shuffle(self, idx=None): """ Shuffle idx bucket or each bucket separately """ for i in [idx] if idx is not None else range(self.num_buckets): self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True) self.cursor[i] = 0 def next_batch(self, batch_size): """ Creates next training batch of size: batch_size Retruns: image seq, letter seq, image seq lengths, letter seq lengths """ i_bucket = self.bucket_order[self.bucket_cursor] # Increment cursor and shuffle in case of new round self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets if self.bucket_cursor == 0: self.bucket_order = np.random.permutation(self.num_buckets) if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]: self.shuffle(i_bucket) # Handle too big batch sizes if (batch_size > self.buckets_size[i_bucket]): batch_size = self.buckets_size[i_bucket] res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]: self.cursor[i_bucket]+batch_size] self.cursor[i_bucket] += batch_size # PAD input sequence and output # Pad sequences with <PAD> to same length input_max = max(res['in_length']) output_max = max(res['out_length']) # In order to make it work at production assert np.all(res['in_length'] + add_output_length >= res['out_length']) input_seq = np.zeros((batch_size, input_max, N_INPUT), dtype=np.float32) for i, img in enumerate(res['images']): input_seq[i][:res['in_length'].values[i]] = img input_seq = input_seq.swapaxes(0, 1) # Need to pad according to the maximum length output sequence targets = np.zeros([batch_size, output_max], dtype=np.int32) for i, target in enumerate(targets): target[:res['out_length'].values[i]] = res['targets'].values[i] targets = targets.swapaxes(0, 1) return input_seq, targets, res['in_length'].values, res['out_length'].values def next_feed(self, size): """ Create feed directly for model training """ (encoder_inputs_, decoder_targets_, encoder_inputs_length_, decoder_targets_length_) = self.next_batch(size) return { encoder_inputs: encoder_inputs_, encoder_inputs_length: encoder_inputs_length_, decoder_targets: decoder_targets_, decoder_targets_length: decoder_targets_length_, keep_prob: (1.0 - dropout) if self.train else 1.0 }
# Create iterator for feeding RNN # Create only once, it modifies: labels_idx train_iterator = BucketDataIterator(trainImagesFinal, trainLabelsFinal_idx, num_buckets, slider_size, train=True) test_iterator = BucketDataIterator(testImages, testLabels_idx, num_buckets, slider_size, train=False)
Iterator created. Iterator created.

Placeholders

# Input placehodlers # N_INPUT -> size of vector representing one image in sequence # Encoder inputs shape (max_seq_length, batch_size, vec_size) encoder_inputs = tf.placeholder(shape=(None, None, N_INPUT), dtype=tf.float32, name='encoder_inputs') encoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='encoder_inputs_length') # required for training, not required for testing and application decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets') decoder_targets_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='decoder_targets_length') # Dropout value keep_prob = tf.placeholder(tf.float32, name='keep_prob')

Decoder Train Feeds

sequence_size, batch_size = tf.unstack(tf.shape(decoder_targets)) test_length = tf.floor_div(tf.reduce_max(encoder_inputs_length), 7) + add_output_length EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * EOS PAD_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * PAD # Train inputs with EOS symbol at start of seq decoder_train_inputs = tf.concat([EOS_SLICE, decoder_targets], axis=0) decoder_train_length = decoder_targets_length + 1 # train targets with EOS symbol at end of seq decoder_train_targets = tf.concat([decoder_targets, PAD_SLICE], axis=0) decoder_train_targets_seq_len, _ = tf.unstack(tf.shape(decoder_train_targets)) decoder_train_targets_eos_mask = tf.one_hot(decoder_train_length - 1, decoder_train_targets_seq_len, on_value=EOS, off_value=PAD, dtype=tf.int32) decoder_train_targets_eos_mask = tf.transpose(decoder_train_targets_eos_mask, [1, 0]) # hacky way using one_hot to put EOS symbol at the end of target sequence decoder_train_targets = tf.add(decoder_train_targets, decoder_train_targets_eos_mask) # Pad test accuracy decoder_test_targets = tf.pad( decoder_train_targets, [[0, test_length - decoder_train_targets_seq_len], [0, 0]], mode='CONSTANT') loss_weights = tf.sequence_mask( decoder_train_length, tf.reduce_max(decoder_train_length), dtype=tf.float32) test_weights = tf.sequence_mask( decoder_train_length, test_length, dtype=tf.float32)

Embeddings

# Randomly initialized embedding matrix, for characters embedding in decoder embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32) decoder_train_inputs_embedded = tf.nn.embedding_lookup( embeddings, decoder_train_inputs)

Encoder

enc_cell_fw = create_cell(encoder_units, encoder_layers, encoder_residual_layers, is_dropout=True, keep_prob=keep_prob) enc_cell_bw = create_cell(encoder_units, encoder_layers, encoder_residual_layers, is_dropout=True, keep_prob=keep_prob)
# Help functions for standard layers def conv2d(x, W, name=None): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name=name) def max_pool_2x2(x, name=None): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) # 1. Layer - Convulation variables W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 1, 4], initializer=tf.contrib.layers.xavier_initializer()) b_conv1 = tf.Variable(tf.constant(0.1, shape=[4]), name='b_conv1') # 3. Layer - Convulation variables W_conv2 = tf.get_variable('W_conv2', shape=[5, 5, 4, 8], initializer=tf.contrib.layers.xavier_initializer()) b_conv2 = tf.Variable(tf.constant(0.1, shape=[8]), name='b_conv2') def CNN(x): x = tf.image.per_image_standardization(x) x_img = tf.reshape(x, [1, slider_size[0], slider_size[1], 1]) # 1. Layer - Convulation h_conv1 = tf.nn.relu(conv2d(x_img, W_conv1) + b_conv1, name='h_conv1') # 2. Layer - Max Pool h_pool1 = max_pool_2x2(h_conv1, name='h_pool1') # 3. Layer - Convulation h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2, name='h_conv2') # 4. Layer - Max Pool return max_pool_2x2(h_conv2, name='h_pool2') # Input images CNN inputs = tf.map_fn( lambda seq: tf.map_fn( lambda img: tf.reshape( CNN(tf.reshape(img, [slider_size[0], slider_size[1], 1])), [-1]), seq), encoder_inputs, dtype=tf.float32) # Bidirectional RNN, gibe fw and bw outputs separately enc_outputs, enc_state = tf.nn.bidirectional_dynamic_rnn( cell_fw = enc_cell_fw, cell_bw = enc_cell_bw, inputs = inputs, sequence_length = encoder_inputs_length, dtype = tf.float32, time_major = True) encoder_outputs = tf.concat(enc_outputs, -1) if encoder_layers == 1: encoder_state = enc_state else: encoder_state = [] for layer_id in range(encoder_layers): encoder_state.append(enc_state[0][layer_id]) # forward encoder_state.append(enc_state[1][layer_id]) # backward encoder_state = tuple(encoder_state)
sess.run(tf.global_variables_initializer()) fd = test_iterator.next_feed(5) print(loss_weights.eval(fd))
[[ 1. 1. 1. 1.] [ 1. 1. 1. 1.] [ 1. 1. 1. 1.] [ 1. 1. 1. 1.] [ 1. 1. 1. 1.]]

Decoder

# attention_states: size [batch_size, max_time, num_units] attention_states = tf.transpose(encoder_outputs, [1, 0, 2]) # Create an attention mechanism attention_mechanism = tf.contrib.seq2seq.LuongAttention( decoder_units, attention_states, memory_sequence_length=encoder_inputs_length) decoder_cell = create_cell(decoder_units, decoder_layers, decoder_residual_layers, is_dropout=True, keep_prob=keep_prob) decoder_cell = seq2seq.AttentionWrapper( decoder_cell, attention_mechanism, attention_layer_size=decoder_units) decoder_initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone( cell_state=encoder_state) ### TRAIN DECODER ### # Helper helper = seq2seq.TrainingHelper( decoder_train_inputs_embedded, decoder_train_length, time_major=True) # Decoder projection_layer = layers_core.Dense( vocab_size, use_bias=False) decoder = seq2seq.BasicDecoder( decoder_cell, helper, decoder_initial_state, output_layer=projection_layer) # Dynamic decoding # outputs.rnn_output = plain output # outputs.sample_id = tf.argmax(outputs.rnn_output, axis=-1) outputs, final_context_state, _ = seq2seq.dynamic_decode( decoder) logits_train = outputs.rnn_output prediction_train = outputs.sample_id ### INFERENCE DECODER ### # Helper helper_infer = seq2seq.GreedyEmbeddingHelper( embeddings, tf.fill([batch_size], EOS), EOS) # Decoder decoder_infer = seq2seq.BasicDecoder( decoder_cell, helper_infer, decoder_initial_state, output_layer=projection_layer) # Dynamic decoding outputs_infer, final_context_state, final_seq_lengths = seq2seq.dynamic_decode( decoder_infer, impute_finished=True, # maximum_iterations=tf.reduce_max(encoder_inputs_length) + add_output_length) maximum_iterations=test_length) prediction_inference = tf.identity(outputs_infer.sample_id, name='prediction_infer')

Optimizer

targets = tf.transpose(decoder_train_targets, [1, 0]) test_targets = tf.transpose(decoder_test_targets, [1, 0]) ## Loss loss = seq2seq.sequence_loss(logits=logits_train, targets=targets, weights=loss_weights, name='loss') ## Calculate and clip gradients params = tf.trainable_variables() gradients = tf.gradients(loss, params) clipped_gradients, _ = tf.clip_by_global_norm( gradients, max_gradient_norm) ### Optimization optimizer = tf.train.AdamOptimizer(learning_rate) train_step = optimizer.apply_gradients( zip(clipped_gradients, params), name='train_step') ### Evaluate model # Pad prediction to match lengths prediction_infer_padded = tf.pad( prediction_inference, [[0, 0], [0, test_length - tf.reduce_max(final_seq_lengths)]], mode='CONSTANT') correct_prediction = tf.equal(prediction_infer_padded, test_targets) ## Advanced accuracy only the elements of seq including EOS symbol # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) accuracy = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)*test_weights)/tf.reduce_sum(test_weights)

Training

sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # Creat plot for live stats ploting trainPlot = TrainingPlot(TRAIN_STEPS, TEST_ITER, LOSS_ITER) try: for i_batch in range(TRAIN_STEPS): fd = train_iterator.next_feed(BATCH_SIZE) train_step.run(fd) if i_batch % LOSS_ITER == 0: # Plotting loss tmpLoss = loss.eval(fd) trainPlot.updateCost(tmpLoss, i_batch // LOSS_ITER) if i_batch % TEST_ITER == 0: # Plotting accuracy fd_test = test_iterator.next_feed(BATCH_SIZE) accTest = accuracy.eval(fd_test) accTrain = accuracy.eval(fd) trainPlot.updateAcc(accTest, accTrain, i_batch // TEST_ITER) if i_batch % SAVE_ITER == 0: saver.save(sess, save_location) if i_batch % EPOCH == 0: fd_test = test_iterator.next_feed(BATCH_SIZE) print('batch %r - loss: %r' % (i_batch, sess.run(loss, fd_test))) predict_, target_ = sess.run([prediction_infer_padded, test_targets], fd_test) for i, (inp, pred) in enumerate(zip(target_, predict_)): print(' expected > {}'.format(inp)) print(' predicted > {}'.format(pred)) if i >= 1: break print() except KeyboardInterrupt: saver.save(sess, save_location) print('Training interrupted, model saved.')
<IPython.core.display.Javascript object>
batch 0 - loss: 3.9249754 expected > [ 3 28 38 32 1 0 0 0 0 0 0 0 0 0 0] predicted > [28 28 1 0 0 0 0 0 0 0 0 0 0 0 0] expected > [35 42 31 41 42 47 28 1 0 0 0 0 0 0 0] predicted > [28 1 0 0 0 0 0 0 0 0 0 0 0 0 0] batch 2000 - loss: 1.6534165 expected > [43 48 39 39 1 0 0 0 0] predicted > [43 28 39 1 0 0 0 0 0] expected > [20 1 0 0 0 0 0 0 0] predicted > [12 1 0 0 0 0 0 0 0] batch 4000 - loss: 1.3383532 expected > [40 36 46 47 42 1 0 0 0 0 0 0 0 0 0] predicted > [49 28 46 47 42 1 0 0 0 0 0 0 0 0 0] expected > [38 45 28 1 0 0 0 0 0 0 0 0 0 0 0] predicted > [47 45 28 40 1 0 0 0 0 0 0 0 0 0 0] batch 6000 - loss: 1.1780998 expected > [53 39 48 47 28 1 0 0 0 0 0 0 0 0 0] predicted > [30 39 28 47 28 1 0 0 0 0 0 0 0 0 0] expected > [47 36 40 32 1 0 0 0 0 0 0 0 0 0 0] predicted > [38 45 42 49 32 1 0 0 0 0 0 0 0 0 0] batch 8000 - loss: 1.1226385 expected > [ 3 45 32 47 28 1 0 0 0 0 0 0 0 0 0] predicted > [ 3 28 30 38 1 0 0 0 0 0 0 0 0 0 0] expected > [21 49 28 45 48 1 0 0 0 0 0 0 0 0 0] predicted > [27 28 45 36 1 0 0 0 0 0 0 0 0 0 0] batch 10000 - loss: 1.3133575 expected > [13 28 31 36 46 39 28 49 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [ 3 28 30 35 42 30 39 42 49 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] expected > [16 46 28 40 32 39 28 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [16 45 30 41 36 38 36 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] batch 12000 - loss: 1.4228649 expected > [12 42 41 47 36 41 32 41 47 28 39 41 36 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [35 42 47 36 40 28 39 36 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] expected > [49 52 53 38 48 40 41 32 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [49 52 46 38 48 40 48 41 36 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] batch 14000 - loss: 1.8237103 expected > [52 1 0 0 0 0 0 0 0] predicted > [23 1 0 0 0 0 0 0 0] expected > [45 32 28 39 39 52 1 0 0] predicted > [40 28 39 39 52 1 0 0 0] batch 16000 - loss: 1.9093359 expected > [34 28 34 1 0 0 0 0 0] predicted > [40 48 52 1 0 0 0 0 0] expected > [45 52 46 1 0 0 0 0 0] predicted > [40 52 37 1 0 0 0 0 0] batch 18000 - loss: 1.6860849 expected > [48 49 32 46 47 1 0 0 0 0 0 0 0] predicted > [40 42 53 32 47 1 0 0 0 0 0 0 0] expected > [43 32 41 1 0 0 0 0 0 0 0 0 0] predicted > [43 28 45 1 0 0 0 0 0 0 0 0 0] Training interrupted, model saved.
for i in range(5): fd_test = test_iterator.next_feed(BATCH_SIZE) predict_, target_ = sess.run([prediction_infer_padded, test_targets], fd_test) for i, (inp, pred) in enumerate(zip(target_, predict_)): print(' expected > {}'.format(inp)) print(' predicted > {}'.format(pred)) if i >= 1: break print()
expected > [4 1 0 0 0 0 0 0 0] predicted > [4 1 0 0 0 0 0 0 0] expected > [42 29 32 37 31 32 47 32 1] predicted > [31 52 47 32 47 1 0 0 0] expected > [53 28 43 39 28 47 28 1 0 0 0 0 0 0 0] predicted > [31 28 46 39 48 47 28 1 0 0 0 0 0 0 0] expected > [40 36 30 1 0 0 0 0 0 0 0 0 0 0 0] predicted > [41 36 53 1 0 0 0 0 0 0 0 0 0 0 0] expected > [49 52 53 38 48 40 41 32 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [49 52 53 38 48 40 48 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] expected > [29 39 36 53 38 42 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [29 39 28 30 38 28 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] expected > [45 42 38 1 0 0 0 0 0 0 0 0 0] predicted > [45 42 38 1 0 0 0 0 0 0 0 0 0] expected > [45 28 31 1 0 0 0 0 0 0 0 0 0] predicted > [45 28 31 1 0 0 0 0 0 0 0 0 0] expected > [47 32 45 30 32 40 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [47 45 32 30 32 41 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] expected > [46 36 41 30 32 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] predicted > [48 36 41 30 32 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]