Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
fchollet
GitHub Repository: fchollet/deep-learning-with-python-notebooks
Path: blob/master/second_edition/chapter08_intro-to-dl-for-computer-vision.ipynb
713 views
Kernel: Python 3

This is a companion notebook for the book Deep Learning with Python, Second Edition. For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.

This notebook was generated for TensorFlow 2.6.

Introduction to deep learning for computer vision

Introduction to convnets

Instantiating a small convnet

from tensorflow import keras from tensorflow.keras import layers inputs = keras.Input(shape=(28, 28, 1)) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(inputs) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.Flatten()(x) outputs = layers.Dense(10, activation="softmax")(x) model = keras.Model(inputs=inputs, outputs=outputs)

Displaying the model's summary

model.summary()

Training the convnet on MNIST images

from tensorflow.keras.datasets import mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data() train_images = train_images.reshape((60000, 28, 28, 1)) train_images = train_images.astype("float32") / 255 test_images = test_images.reshape((10000, 28, 28, 1)) test_images = test_images.astype("float32") / 255 model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) model.fit(train_images, train_labels, epochs=5, batch_size=64)

Evaluating the convnet

test_loss, test_acc = model.evaluate(test_images, test_labels) print(f"Test accuracy: {test_acc:.3f}")

The convolution operation

Understanding border effects and padding

Understanding convolution strides

The max-pooling operation

An incorrectly structured convnet missing its max-pooling layers

inputs = keras.Input(shape=(28, 28, 1)) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(inputs) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.Flatten()(x) outputs = layers.Dense(10, activation="softmax")(x) model_no_max_pool = keras.Model(inputs=inputs, outputs=outputs)
model_no_max_pool.summary()

Training a convnet from scratch on a small dataset

The relevance of deep learning for small-data problems

Downloading the data

from google.colab import files files.upload()
!mkdir ~/.kaggle !cp kaggle.json ~/.kaggle/ !chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c dogs-vs-cats
!unzip -qq dogs-vs-cats.zip
!unzip -qq train.zip

Copying images to training, validation, and test directories

import os, shutil, pathlib original_dir = pathlib.Path("train") new_base_dir = pathlib.Path("cats_vs_dogs_small") def make_subset(subset_name, start_index, end_index): for category in ("cat", "dog"): dir = new_base_dir / subset_name / category os.makedirs(dir) fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)] for fname in fnames: shutil.copyfile(src=original_dir / fname, dst=dir / fname) make_subset("train", start_index=0, end_index=1000) make_subset("validation", start_index=1000, end_index=1500) make_subset("test", start_index=1500, end_index=2500)

Building the model

Instantiating a small convnet for dogs vs. cats classification

from tensorflow import keras from tensorflow.keras import layers inputs = keras.Input(shape=(180, 180, 3)) x = layers.Rescaling(1./255)(inputs) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.Flatten()(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

Configuring the model for training

model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

Data preprocessing

Using image_dataset_from_directory to read images

from tensorflow.keras.utils import image_dataset_from_directory train_dataset = image_dataset_from_directory( new_base_dir / "train", image_size=(180, 180), batch_size=32) validation_dataset = image_dataset_from_directory( new_base_dir / "validation", image_size=(180, 180), batch_size=32) test_dataset = image_dataset_from_directory( new_base_dir / "test", image_size=(180, 180), batch_size=32)
import numpy as np import tensorflow as tf random_numbers = np.random.normal(size=(1000, 16)) dataset = tf.data.Dataset.from_tensor_slices(random_numbers)
for i, element in enumerate(dataset): print(element.shape) if i >= 2: break
batched_dataset = dataset.batch(32) for i, element in enumerate(batched_dataset): print(element.shape) if i >= 2: break
reshaped_dataset = dataset.map(lambda x: tf.reshape(x, (4, 4))) for i, element in enumerate(reshaped_dataset): print(element.shape) if i >= 2: break

Displaying the shapes of the data and labels yielded by the Dataset

for data_batch, labels_batch in train_dataset: print("data batch shape:", data_batch.shape) print("labels batch shape:", labels_batch.shape) break

Fitting the model using a Dataset

callbacks = [ keras.callbacks.ModelCheckpoint( filepath="convnet_from_scratch.keras", save_best_only=True, monitor="val_loss") ] history = model.fit( train_dataset, epochs=30, validation_data=validation_dataset, callbacks=callbacks)

Displaying curves of loss and accuracy during training

import matplotlib.pyplot as plt accuracy = history.history["accuracy"] val_accuracy = history.history["val_accuracy"] loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(1, len(accuracy) + 1) plt.plot(epochs, accuracy, "bo", label="Training accuracy") plt.plot(epochs, val_accuracy, "b", label="Validation accuracy") plt.title("Training and validation accuracy") plt.legend() plt.figure() plt.plot(epochs, loss, "bo", label="Training loss") plt.plot(epochs, val_loss, "b", label="Validation loss") plt.title("Training and validation loss") plt.legend() plt.show()

Evaluating the model on the test set

test_model = keras.models.load_model("convnet_from_scratch.keras") test_loss, test_acc = test_model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")

Using data augmentation

Define a data augmentation stage to add to an image model

data_augmentation = keras.Sequential( [ layers.RandomFlip("horizontal"), layers.RandomRotation(0.1), layers.RandomZoom(0.2), ] )

Displaying some randomly augmented training images

plt.figure(figsize=(10, 10)) for images, _ in train_dataset.take(1): for i in range(9): augmented_images = data_augmentation(images) ax = plt.subplot(3, 3, i + 1) plt.imshow(augmented_images[0].numpy().astype("uint8")) plt.axis("off")

Defining a new convnet that includes image augmentation and dropout

inputs = keras.Input(shape=(180, 180, 3)) x = data_augmentation(inputs) x = layers.Rescaling(1./255)(x) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.Flatten()(x) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

Training the regularized convnet

callbacks = [ keras.callbacks.ModelCheckpoint( filepath="convnet_from_scratch_with_augmentation.keras", save_best_only=True, monitor="val_loss") ] history = model.fit( train_dataset, epochs=100, validation_data=validation_dataset, callbacks=callbacks)

Evaluating the model on the test set

test_model = keras.models.load_model( "convnet_from_scratch_with_augmentation.keras") test_loss, test_acc = test_model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")

Leveraging a pretrained model

Feature extraction with a pretrained model

Instantiating the VGG16 convolutional base

conv_base = keras.applications.vgg16.VGG16( weights="imagenet", include_top=False, input_shape=(180, 180, 3))
conv_base.summary()

Fast feature extraction without data augmentation

Extracting the VGG16 features and corresponding labels

import numpy as np def get_features_and_labels(dataset): all_features = [] all_labels = [] for images, labels in dataset: preprocessed_images = keras.applications.vgg16.preprocess_input(images) features = conv_base.predict(preprocessed_images) all_features.append(features) all_labels.append(labels) return np.concatenate(all_features), np.concatenate(all_labels) train_features, train_labels = get_features_and_labels(train_dataset) val_features, val_labels = get_features_and_labels(validation_dataset) test_features, test_labels = get_features_and_labels(test_dataset)
train_features.shape

Defining and training the densely connected classifier

inputs = keras.Input(shape=(5, 5, 512)) x = layers.Flatten()(inputs) x = layers.Dense(256)(x) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"]) callbacks = [ keras.callbacks.ModelCheckpoint( filepath="feature_extraction.keras", save_best_only=True, monitor="val_loss") ] history = model.fit( train_features, train_labels, epochs=20, validation_data=(val_features, val_labels), callbacks=callbacks)

Plotting the results

import matplotlib.pyplot as plt acc = history.history["accuracy"] val_acc = history.history["val_accuracy"] loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, "bo", label="Training accuracy") plt.plot(epochs, val_acc, "b", label="Validation accuracy") plt.title("Training and validation accuracy") plt.legend() plt.figure() plt.plot(epochs, loss, "bo", label="Training loss") plt.plot(epochs, val_loss, "b", label="Validation loss") plt.title("Training and validation loss") plt.legend() plt.show()

Feature extraction together with data augmentation

Instantiating and freezing the VGG16 convolutional base

conv_base = keras.applications.vgg16.VGG16( weights="imagenet", include_top=False) conv_base.trainable = False

Printing the list of trainable weights before and after freezing

conv_base.trainable = True print("This is the number of trainable weights " "before freezing the conv base:", len(conv_base.trainable_weights))
conv_base.trainable = False print("This is the number of trainable weights " "after freezing the conv base:", len(conv_base.trainable_weights))

Adding a data augmentation stage and a classifier to the convolutional base

data_augmentation = keras.Sequential( [ layers.RandomFlip("horizontal"), layers.RandomRotation(0.1), layers.RandomZoom(0.2), ] ) inputs = keras.Input(shape=(180, 180, 3)) x = data_augmentation(inputs) x = keras.applications.vgg16.preprocess_input(x) x = conv_base(x) x = layers.Flatten()(x) x = layers.Dense(256)(x) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
callbacks = [ keras.callbacks.ModelCheckpoint( filepath="feature_extraction_with_data_augmentation.keras", save_best_only=True, monitor="val_loss") ] history = model.fit( train_dataset, epochs=50, validation_data=validation_dataset, callbacks=callbacks)

Evaluating the model on the test set

test_model = keras.models.load_model( "feature_extraction_with_data_augmentation.keras") test_loss, test_acc = test_model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")

Fine-tuning a pretrained model

conv_base.summary()

Freezing all layers until the fourth from the last

conv_base.trainable = True for layer in conv_base.layers[:-4]: layer.trainable = False

Fine-tuning the model

model.compile(loss="binary_crossentropy", optimizer=keras.optimizers.RMSprop(learning_rate=1e-5), metrics=["accuracy"]) callbacks = [ keras.callbacks.ModelCheckpoint( filepath="fine_tuning.keras", save_best_only=True, monitor="val_loss") ] history = model.fit( train_dataset, epochs=30, validation_data=validation_dataset, callbacks=callbacks)
model = keras.models.load_model("fine_tuning.keras") test_loss, test_acc = model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")

Summary