Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aswintechguy
GitHub Repository: aswintechguy/Deep-Learning-Projects
Path: blob/main/Image Segmentation Tutorial - Unet - Pet Dataset/Image Segmentation Tutorial - UNet - Oxford Pet Dataset.ipynb
578 views
Kernel: Python 3 (ipykernel)

Import Modules

!pip install tensorflow_datasets==4.9.3 -q
import tensorflow as tf from tensorflow.keras import layers, models import tensorflow_datasets as tfds import numpy as np import matplotlib.pyplot as plt %matplotlib inline

Load the Dataset

dataset, info = tfds.load('oxford_iiit_pet', with_info=True)
info
tfds.core.DatasetInfo( name='oxford_iiit_pet', full_name='oxford_iiit_pet/3.2.0', description=""" The Oxford-IIIT pet dataset is a 37 category pet image dataset with roughly 200 images for each class. The images have large variations in scale, pose and lighting. All images have an associated ground truth annotation of breed. """, homepage='http://www.robots.ox.ac.uk/~vgg/data/pets/', data_dir='/root/tensorflow_datasets/oxford_iiit_pet/3.2.0', file_format=tfrecord, download_size=773.52 MiB, dataset_size=774.69 MiB, features=FeaturesDict({ 'file_name': Text(shape=(), dtype=string), 'image': Image(shape=(None, None, 3), dtype=uint8), 'label': ClassLabel(shape=(), dtype=int64, num_classes=37), 'segmentation_mask': Image(shape=(None, None, 1), dtype=uint8), 'species': ClassLabel(shape=(), dtype=int64, num_classes=2), }), supervised_keys=('image', 'label'), disable_shuffling=False, splits={ 'test': <SplitInfo num_examples=3669, num_shards=4>, 'train': <SplitInfo num_examples=3680, num_shards=4>, }, citation="""@InProceedings{parkhi12a, author = "Parkhi, O. M. and Vedaldi, A. and Zisserman, A. and Jawahar, C.~V.", title = "Cats and Dogs", booktitle = "IEEE Conference on Computer Vision and Pattern Recognition", year = "2012", }""", )
dataset
{'train': <_PrefetchDataset element_spec={'file_name': TensorSpec(shape=(), dtype=tf.string, name=None), 'image': TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'segmentation_mask': TensorSpec(shape=(None, None, 1), dtype=tf.uint8, name=None), 'species': TensorSpec(shape=(), dtype=tf.int64, name=None)}>, 'test': <_PrefetchDataset element_spec={'file_name': TensorSpec(shape=(), dtype=tf.string, name=None), 'image': TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'segmentation_mask': TensorSpec(shape=(None, None, 1), dtype=tf.uint8, name=None), 'species': TensorSpec(shape=(), dtype=tf.int64, name=None)}>}

Preprocessing Steps

def normalize(input_image, input_mask): input_image = tf.cast(input_image, tf.float32) / 255.0 input_mask = input_mask - 1 # convert to zero based indexing return input_image, input_mask def load_train_images(sample): # resize the image input_image = tf.image.resize(sample['image'], (128, 128)) input_mask = tf.image.resize(sample['segmentation_mask'], (128, 128)) # data augmentation if tf.random.uniform(()) > 0.5: input_image = tf.image.flip_left_right(input_image) input_mask = tf.image.flip_left_right(input_mask) # normalize the images input_image, input_mask = normalize(input_image, input_mask) return input_image, input_mask def load_test_images(sample): # resize the image input_image = tf.image.resize(sample['image'], (128, 128)) input_mask = tf.image.resize(sample['segmentation_mask'], (128, 128)) # normalize the images input_image, input_mask = normalize(input_image, input_mask) return input_image, input_mask
train_dataset = dataset['train'].map(load_train_images, num_parallel_calls=tf.data.experimental.AUTOTUNE) test_dataset = dataset['test'].map(load_test_images, num_parallel_calls=tf.data.experimental.AUTOTUNE)
BATCH_SIZE = 64 BUFFER_SIZE = 1000 train_dataset = train_dataset.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) test_dataset = test_dataset.batch(BATCH_SIZE)

Explanatory Data Analysis

def display_sample(image_list): plt.figure(figsize=(10,10)) title = ['Input Image', 'True Mask', 'Predicted Mask'] for i in range(len(image_list)): plt.subplot(1, len(image_list), i+1) plt.title(title[i]) plt.imshow(tf.keras.utils.array_to_img(image_list[i])) plt.axis('off') plt.show()
for images, masks in train_dataset.take(3): sample_image, sample_mask = images[0], masks[0] display_sample([sample_image, sample_mask])
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook

Define U-Net Model

def double_conv_block(x, n_filters): x = layers.Conv2D(n_filters, 3, padding='same', activation='relu', kernel_initializer='he_normal')(x) x = layers.Conv2D(n_filters, 3, padding='same', activation='relu', kernel_initializer='he_normal')(x) return x def downsample_block(x, n_filters): f = double_conv_block(x, n_filters) p = layers.MaxPool2D(2)(f) p = layers.Dropout(0.3)(p) return f, p def upsample_block(x, conv_features, n_filters): x = layers.Conv2DTranspose(n_filters, 3, 2, padding='same')(x) x = layers.concatenate([x, conv_features]) x = layers.Dropout(0.3)(x) x = double_conv_block(x, n_filters) return x
def build_unet_model(output_channels): # input layer inputs = layers.Input(shape=(128, 128, 3)) # encoder - downsample f1, p1 = downsample_block(inputs, 64) f2, p2 = downsample_block(p1, 128) f3, p3 = downsample_block(p2, 256) f4, p4 = downsample_block(p3, 512) # intermediate block intermediate_block = double_conv_block(p4, 1024) # decoder - upsample u6 = upsample_block(intermediate_block, f4, 512) u7 = upsample_block(u6, f3, 256) u8 = upsample_block(u7, f2, 128) u9 = upsample_block(u8, f1, 64) # output layer outputs = layers.Conv2D(output_channels, 1, padding='same', activation='softmax')(u9) # unet model unet_model = tf.keras.Model(inputs, outputs, name='U-Net') return unet_model
# for images, masks in train_dataset.take(1): # sample_image, sample_mask = images[0], masks[0] # sample_mask[60]
output_channels = 3 model = build_unet_model(output_channels) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# plot the model tf.keras.utils.plot_model(model, show_shapes=True, expand_nested=False, dpi=64)
Image in a Jupyter notebook

Train the Model

EPOCHS = 20 steps_per_epoch = info.splits['train'].num_examples // BATCH_SIZE validation_steps = info.splits['test'].num_examples // BATCH_SIZE history = model.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_data=test_dataset)
Epoch 1/20 57/57 [==============================] - 82s 1s/step - loss: 0.9267 - accuracy: 0.5709 - val_loss: 0.8328 - val_accuracy: 0.5746 Epoch 2/20 57/57 [==============================] - 69s 1s/step - loss: 0.7601 - accuracy: 0.6467 - val_loss: 0.6687 - val_accuracy: 0.7170 Epoch 3/20 57/57 [==============================] - 74s 1s/step - loss: 0.6364 - accuracy: 0.7325 - val_loss: 0.6207 - val_accuracy: 0.7392 Epoch 4/20 57/57 [==============================] - 74s 1s/step - loss: 0.5760 - accuracy: 0.7619 - val_loss: 0.5382 - val_accuracy: 0.7782 Epoch 5/20 57/57 [==============================] - 74s 1s/step - loss: 0.5225 - accuracy: 0.7853 - val_loss: 0.4846 - val_accuracy: 0.8035 Epoch 6/20 57/57 [==============================] - 74s 1s/step - loss: 0.4766 - accuracy: 0.8059 - val_loss: 0.4674 - val_accuracy: 0.8099 Epoch 7/20 57/57 [==============================] - 74s 1s/step - loss: 0.4492 - accuracy: 0.8163 - val_loss: 0.4196 - val_accuracy: 0.8284 Epoch 8/20 57/57 [==============================] - 74s 1s/step - loss: 0.4142 - accuracy: 0.8312 - val_loss: 0.4128 - val_accuracy: 0.8326 Epoch 9/20 57/57 [==============================] - 68s 1s/step - loss: 0.3946 - accuracy: 0.8385 - val_loss: 0.4061 - val_accuracy: 0.8333 Epoch 10/20 57/57 [==============================] - 68s 1s/step - loss: 0.3772 - accuracy: 0.8455 - val_loss: 0.3846 - val_accuracy: 0.8434 Epoch 11/20 57/57 [==============================] - 68s 1s/step - loss: 0.3708 - accuracy: 0.8497 - val_loss: 0.4035 - val_accuracy: 0.8361 Epoch 12/20 57/57 [==============================] - 68s 1s/step - loss: 0.3518 - accuracy: 0.8557 - val_loss: 0.3592 - val_accuracy: 0.8550 Epoch 13/20 57/57 [==============================] - 68s 1s/step - loss: 0.3337 - accuracy: 0.8632 - val_loss: 0.3652 - val_accuracy: 0.8547 Epoch 14/20 57/57 [==============================] - 74s 1s/step - loss: 0.3331 - accuracy: 0.8639 - val_loss: 0.3535 - val_accuracy: 0.8586 Epoch 15/20 57/57 [==============================] - 68s 1s/step - loss: 0.3224 - accuracy: 0.8678 - val_loss: 0.3461 - val_accuracy: 0.8612 Epoch 16/20 57/57 [==============================] - 68s 1s/step - loss: 0.3328 - accuracy: 0.8649 - val_loss: 0.3445 - val_accuracy: 0.8623 Epoch 17/20 57/57 [==============================] - 74s 1s/step - loss: 0.2966 - accuracy: 0.8773 - val_loss: 0.3502 - val_accuracy: 0.8623 Epoch 18/20 57/57 [==============================] - 68s 1s/step - loss: 0.2947 - accuracy: 0.8778 - val_loss: 0.3976 - val_accuracy: 0.8500 Epoch 19/20 57/57 [==============================] - 68s 1s/step - loss: 0.2855 - accuracy: 0.8814 - val_loss: 0.3323 - val_accuracy: 0.8699 Epoch 20/20 57/57 [==============================] - 68s 1s/step - loss: 0.2823 - accuracy: 0.8830 - val_loss: 0.3303 - val_accuracy: 0.8712

Visualize the Results

# plot train & val accuracy plt.figure(figsize=(12, 4)) plt.subplot(1, 2, 1) plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model Accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(['Train', 'Val'], loc='upper left') # plot train & val loss plt.subplot(1, 2, 2) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend(['Train', 'Val'], loc='upper left') plt.tight_layout() plt.show()
Image in a Jupyter notebook

Test Predictions

def create_mask(pred_mask): pred_mask = tf.argmax(pred_mask, axis=-1) pred_mask = pred_mask[..., tf.newaxis] return pred_mask[0] def show_predictions(dataset=None, num=1): if dataset: for image, mask in dataset.take(num): pred_mask = model.predict(image) display_sample([image[0], mask[0], create_mask(pred_mask)])
# for image, mask in test_dataset.take(1): # pred_mask = model.predict(image) # pred_mask = tf.argmax(pred_mask, axis=-1) # print(np.array(pred_mask.shape)) # pred_mask = pred_mask[..., tf.newaxis] # print(pred_mask[0].shape)
show_predictions(test_dataset, 10)
2/2 [==============================] - 0s 176ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 123ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 122ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 122ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 124ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 119ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 122ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 120ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 123ms/step
Image in a Jupyter notebook
2/2 [==============================] - 0s 120ms/step
Image in a Jupyter notebook