CoCalc -- Image Segmentation Tutorial - UNet

GitHub Repository: aswintechguy/Deep-Learning-Projects
Path: blob/main/Image Segmentation Tutorial - Unet - Pet Dataset/Image Segmentation Tutorial - UNet - Oxford Pet Dataset.ipynb
⁵⁷⁸ views

Kernel: Python 3 (ipykernel)

Import Modules

In [ ]:

!pip install tensorflow_datasets==4.9.3 -q

In [4]:

import tensorflow as tf
from tensorflow.keras import layers, models
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Load the Dataset

In [5]:

dataset, info = tfds.load('oxford_iiit_pet', with_info=True)

In [6]:

info

Out[6]:

tfds.core.DatasetInfo(
    name='oxford_iiit_pet',
    full_name='oxford_iiit_pet/3.2.0',
    description="""
    The Oxford-IIIT pet dataset is a 37 category pet image dataset with roughly 200
    images for each class. The images have large variations in scale, pose and
    lighting. All images have an associated ground truth annotation of breed.
    """,
    homepage='http://www.robots.ox.ac.uk/~vgg/data/pets/',
    data_dir='/root/tensorflow_datasets/oxford_iiit_pet/3.2.0',
    file_format=tfrecord,
    download_size=773.52 MiB,
    dataset_size=774.69 MiB,
    features=FeaturesDict({
        'file_name': Text(shape=(), dtype=string),
        'image': Image(shape=(None, None, 3), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=37),
        'segmentation_mask': Image(shape=(None, None, 1), dtype=uint8),
        'species': ClassLabel(shape=(), dtype=int64, num_classes=2),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=3669, num_shards=4>,
        'train': <SplitInfo num_examples=3680, num_shards=4>,
    },
    citation="""@InProceedings{parkhi12a,
      author       = "Parkhi, O. M. and Vedaldi, A. and Zisserman, A. and Jawahar, C.~V.",
      title        = "Cats and Dogs",
      booktitle    = "IEEE Conference on Computer Vision and Pattern Recognition",
      year         = "2012",
    }""",
)

In [7]:

dataset

Out[7]:

{'train': <_PrefetchDataset element_spec={'file_name': TensorSpec(shape=(), dtype=tf.string, name=None), 'image': TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'segmentation_mask': TensorSpec(shape=(None, None, 1), dtype=tf.uint8, name=None), 'species': TensorSpec(shape=(), dtype=tf.int64, name=None)}>,
 'test': <_PrefetchDataset element_spec={'file_name': TensorSpec(shape=(), dtype=tf.string, name=None), 'image': TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'segmentation_mask': TensorSpec(shape=(None, None, 1), dtype=tf.uint8, name=None), 'species': TensorSpec(shape=(), dtype=tf.int64, name=None)}>}

Preprocessing Steps

In [8]:

def normalize(input_image, input_mask):
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_mask = input_mask - 1 # convert to zero based indexing
    return input_image, input_mask

def load_train_images(sample):
    # resize the image
    input_image = tf.image.resize(sample['image'], (128, 128))
    input_mask = tf.image.resize(sample['segmentation_mask'], (128, 128))
    # data augmentation
    if tf.random.uniform(()) > 0.5:
        input_image = tf.image.flip_left_right(input_image)
        input_mask = tf.image.flip_left_right(input_mask)
    # normalize the images
    input_image, input_mask = normalize(input_image, input_mask)
    return input_image, input_mask

def load_test_images(sample):
    # resize the image
    input_image = tf.image.resize(sample['image'], (128, 128))
    input_mask = tf.image.resize(sample['segmentation_mask'], (128, 128))
    # normalize the images
    input_image, input_mask = normalize(input_image, input_mask)
    return input_image, input_mask

In [9]:

train_dataset = dataset['train'].map(load_train_images, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = dataset['test'].map(load_test_images, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [10]:

BATCH_SIZE = 64
BUFFER_SIZE = 1000

train_dataset = train_dataset.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE)

Explanatory Data Analysis

In [11]:

def display_sample(image_list):
    plt.figure(figsize=(10,10))
    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(image_list)):
        plt.subplot(1, len(image_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.utils.array_to_img(image_list[i]))
        plt.axis('off')

    plt.show()

In [12]:

for images, masks in train_dataset.take(3):
    sample_image, sample_mask = images[0], masks[0]
    display_sample([sample_image, sample_mask])

Out[12]:

Define U-Net Model

In [13]:

def double_conv_block(x, n_filters):
    x = layers.Conv2D(n_filters, 3, padding='same', activation='relu', kernel_initializer='he_normal')(x)
    x = layers.Conv2D(n_filters, 3, padding='same', activation='relu', kernel_initializer='he_normal')(x)
    return x

def downsample_block(x, n_filters):
    f = double_conv_block(x, n_filters)
    p = layers.MaxPool2D(2)(f)
    p = layers.Dropout(0.3)(p)
    return f, p

def upsample_block(x, conv_features, n_filters):
    x = layers.Conv2DTranspose(n_filters, 3, 2, padding='same')(x)
    x = layers.concatenate([x, conv_features])
    x = layers.Dropout(0.3)(x)
    x = double_conv_block(x, n_filters)
    return x

In [24]:

def build_unet_model(output_channels):
    # input layer
    inputs = layers.Input(shape=(128, 128, 3))

    # encoder - downsample
    f1, p1 = downsample_block(inputs, 64)
    f2, p2 = downsample_block(p1, 128)
    f3, p3 = downsample_block(p2, 256)
    f4, p4 = downsample_block(p3, 512)

    # intermediate block
    intermediate_block = double_conv_block(p4, 1024)

    # decoder - upsample
    u6 = upsample_block(intermediate_block, f4, 512)
    u7 = upsample_block(u6, f3, 256)
    u8 = upsample_block(u7, f2, 128)
    u9 = upsample_block(u8, f1, 64)

    # output layer
    outputs = layers.Conv2D(output_channels, 1, padding='same', activation='softmax')(u9)

    # unet model
    unet_model = tf.keras.Model(inputs, outputs, name='U-Net')

    return unet_model

In [25]:

# for images, masks in train_dataset.take(1):
#     sample_image, sample_mask = images[0], masks[0]
# sample_mask[60]

In [26]:

output_channels = 3
model = build_unet_model(output_channels)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [27]:

# plot the model
tf.keras.utils.plot_model(model, show_shapes=True, expand_nested=False, dpi=64)

Out[27]:

Train the Model

In [ ]:

EPOCHS = 20
steps_per_epoch = info.splits['train'].num_examples // BATCH_SIZE
validation_steps = info.splits['test'].num_examples // BATCH_SIZE

history = model.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_data=test_dataset)

Epoch 1/20
57/57 [==============================] - 82s 1s/step - loss: 0.9267 - accuracy: 0.5709 - val_loss: 0.8328 - val_accuracy: 0.5746
Epoch 2/20
57/57 [==============================] - 69s 1s/step - loss: 0.7601 - accuracy: 0.6467 - val_loss: 0.6687 - val_accuracy: 0.7170
Epoch 3/20
57/57 [==============================] - 74s 1s/step - loss: 0.6364 - accuracy: 0.7325 - val_loss: 0.6207 - val_accuracy: 0.7392
Epoch 4/20
57/57 [==============================] - 74s 1s/step - loss: 0.5760 - accuracy: 0.7619 - val_loss: 0.5382 - val_accuracy: 0.7782
Epoch 5/20
57/57 [==============================] - 74s 1s/step - loss: 0.5225 - accuracy: 0.7853 - val_loss: 0.4846 - val_accuracy: 0.8035
Epoch 6/20
57/57 [==============================] - 74s 1s/step - loss: 0.4766 - accuracy: 0.8059 - val_loss: 0.4674 - val_accuracy: 0.8099
Epoch 7/20
57/57 [==============================] - 74s 1s/step - loss: 0.4492 - accuracy: 0.8163 - val_loss: 0.4196 - val_accuracy: 0.8284
Epoch 8/20
57/57 [==============================] - 74s 1s/step - loss: 0.4142 - accuracy: 0.8312 - val_loss: 0.4128 - val_accuracy: 0.8326
Epoch 9/20
57/57 [==============================] - 68s 1s/step - loss: 0.3946 - accuracy: 0.8385 - val_loss: 0.4061 - val_accuracy: 0.8333
Epoch 10/20
57/57 [==============================] - 68s 1s/step - loss: 0.3772 - accuracy: 0.8455 - val_loss: 0.3846 - val_accuracy: 0.8434
Epoch 11/20
57/57 [==============================] - 68s 1s/step - loss: 0.3708 - accuracy: 0.8497 - val_loss: 0.4035 - val_accuracy: 0.8361
Epoch 12/20
57/57 [==============================] - 68s 1s/step - loss: 0.3518 - accuracy: 0.8557 - val_loss: 0.3592 - val_accuracy: 0.8550
Epoch 13/20
57/57 [==============================] - 68s 1s/step - loss: 0.3337 - accuracy: 0.8632 - val_loss: 0.3652 - val_accuracy: 0.8547
Epoch 14/20
57/57 [==============================] - 74s 1s/step - loss: 0.3331 - accuracy: 0.8639 - val_loss: 0.3535 - val_accuracy: 0.8586
Epoch 15/20
57/57 [==============================] - 68s 1s/step - loss: 0.3224 - accuracy: 0.8678 - val_loss: 0.3461 - val_accuracy: 0.8612
Epoch 16/20
57/57 [==============================] - 68s 1s/step - loss: 0.3328 - accuracy: 0.8649 - val_loss: 0.3445 - val_accuracy: 0.8623
Epoch 17/20
57/57 [==============================] - 74s 1s/step - loss: 0.2966 - accuracy: 0.8773 - val_loss: 0.3502 - val_accuracy: 0.8623
Epoch 18/20
57/57 [==============================] - 68s 1s/step - loss: 0.2947 - accuracy: 0.8778 - val_loss: 0.3976 - val_accuracy: 0.8500
Epoch 19/20
57/57 [==============================] - 68s 1s/step - loss: 0.2855 - accuracy: 0.8814 - val_loss: 0.3323 - val_accuracy: 0.8699
Epoch 20/20
57/57 [==============================] - 68s 1s/step - loss: 0.2823 - accuracy: 0.8830 - val_loss: 0.3303 - val_accuracy: 0.8712

Visualize the Results

In [33]:

# plot train & val accuracy
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Val'], loc='upper left')

# plot train & val loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Val'], loc='upper left')

plt.tight_layout()
plt.show()

Out[33]:

Test Predictions

In [46]:

def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]

def show_predictions(dataset=None, num=1):
    if dataset:
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            display_sample([image[0], mask[0], create_mask(pred_mask)])

In [47]:

# for image, mask in test_dataset.take(1):
#     pred_mask = model.predict(image)
# pred_mask = tf.argmax(pred_mask, axis=-1)
# print(np.array(pred_mask.shape))
# pred_mask = pred_mask[..., tf.newaxis]
# print(pred_mask[0].shape)

In [48]:

show_predictions(test_dataset, 10)

Out[48]:

2/2 [==============================] - 0s 176ms/step

2/2 [==============================] - 0s 123ms/step

2/2 [==============================] - 0s 122ms/step

2/2 [==============================] - 0s 122ms/step

2/2 [==============================] - 0s 124ms/step

2/2 [==============================] - 0s 119ms/step

2/2 [==============================] - 0s 122ms/step

2/2 [==============================] - 0s 120ms/step

2/2 [==============================] - 0s 123ms/step

2/2 [==============================] - 0s 120ms/step

In [16]:

In [16]:

Import Modules

Load the Dataset

Preprocessing Steps

Explanatory Data Analysis

Define U-Net Model

Train the Model

Visualize the Results

Test Predictions

Product

Resources

Company