Path: blob/master/chapter08_image-classification.ipynb
709 views
Kernel: Python 3
This is a companion notebook for the book Deep Learning with Python, Third Edition. For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.
If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.
The book's contents are available online at deeplearningwithpython.io.
In [0]:
!pip install keras keras-hub --upgrade -q
In [0]:
import os os.environ["KERAS_BACKEND"] = "jax"
In [0]:
# @title import os from IPython.core.magic import register_cell_magic @register_cell_magic def backend(line, cell): current, required = os.environ.get("KERAS_BACKEND", ""), line.split()[-1] if current == required: get_ipython().run_cell(cell) else: print( f"This cell requires the {required} backend. To run it, change KERAS_BACKEND to " f"\"{required}\" at the top of the notebook, restart the runtime, and rerun the notebook." )
Image classification
Introduction to convnets
In [0]:
import keras from keras import layers inputs = keras.Input(shape=(28, 28, 1)) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(inputs) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.GlobalAveragePooling2D()(x) outputs = layers.Dense(10, activation="softmax")(x) model = keras.Model(inputs=inputs, outputs=outputs)
In [0]:
model.summary(line_length=80)
In [0]:
from keras.datasets import mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data() train_images = train_images.reshape((60000, 28, 28, 1)) train_images = train_images.astype("float32") / 255 test_images = test_images.reshape((10000, 28, 28, 1)) test_images = test_images.astype("float32") / 255 model.compile( optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) model.fit(train_images, train_labels, epochs=5, batch_size=64)
In [0]:
test_loss, test_acc = model.evaluate(test_images, test_labels) print(f"Test accuracy: {test_acc:.3f}")
The convolution operation
Understanding border effects and padding
Understanding convolution strides
The max-pooling operation
In [0]:
inputs = keras.Input(shape=(28, 28, 1)) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(inputs) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.GlobalAveragePooling2D()(x) outputs = layers.Dense(10, activation="softmax")(x) model_no_max_pool = keras.Model(inputs=inputs, outputs=outputs)
In [0]:
model_no_max_pool.summary(line_length=80)
Training a convnet from scratch on a small dataset
The relevance of deep learning for small-data problems
Downloading the data
In [0]:
import kagglehub kagglehub.login()
In [0]:
download_path = kagglehub.competition_download("dogs-vs-cats")
In [0]:
import zipfile with zipfile.ZipFile(download_path + "/train.zip", "r") as zip_ref: zip_ref.extractall(".")
In [0]:
import os, shutil, pathlib original_dir = pathlib.Path("train") new_base_dir = pathlib.Path("dogs_vs_cats_small") def make_subset(subset_name, start_index, end_index): for category in ("cat", "dog"): dir = new_base_dir / subset_name / category os.makedirs(dir) fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)] for fname in fnames: shutil.copyfile(src=original_dir / fname, dst=dir / fname) make_subset("train", start_index=0, end_index=1000) make_subset("validation", start_index=1000, end_index=1500) make_subset("test", start_index=1500, end_index=2500)
Building your model
In [0]:
import keras from keras import layers inputs = keras.Input(shape=(180, 180, 3)) x = layers.Rescaling(1.0 / 255)(inputs) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=512, kernel_size=3, activation="relu")(x) x = layers.GlobalAveragePooling2D()(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs=inputs, outputs=outputs)
In [0]:
model.summary(line_length=80)
In [0]:
model.compile( loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"], )
Data preprocessing
In [0]:
from keras.utils import image_dataset_from_directory batch_size = 64 image_size = (180, 180) train_dataset = image_dataset_from_directory( new_base_dir / "train", image_size=image_size, batch_size=batch_size ) validation_dataset = image_dataset_from_directory( new_base_dir / "validation", image_size=image_size, batch_size=batch_size ) test_dataset = image_dataset_from_directory( new_base_dir / "test", image_size=image_size, batch_size=batch_size )
Understanding TensorFlow Dataset objects
In [0]:
import numpy as np import tensorflow as tf random_numbers = np.random.normal(size=(1000, 16)) dataset = tf.data.Dataset.from_tensor_slices(random_numbers)
In [0]:
for i, element in enumerate(dataset): print(element.shape) if i >= 2: break
In [0]:
batched_dataset = dataset.batch(32) for i, element in enumerate(batched_dataset): print(element.shape) if i >= 2: break
In [0]:
reshaped_dataset = dataset.map( lambda x: tf.reshape(x, (4, 4)), num_parallel_calls=8) for i, element in enumerate(reshaped_dataset): print(element.shape) if i >= 2: break
Fitting the model
In [0]:
for data_batch, labels_batch in train_dataset: print("data batch shape:", data_batch.shape) print("labels batch shape:", labels_batch.shape) break
In [0]:
callbacks = [ keras.callbacks.ModelCheckpoint( filepath="convnet_from_scratch.keras", save_best_only=True, monitor="val_loss", ) ] history = model.fit( train_dataset, epochs=50, validation_data=validation_dataset, callbacks=callbacks, )
In [0]:
import matplotlib.pyplot as plt accuracy = history.history["accuracy"] val_accuracy = history.history["val_accuracy"] loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(1, len(accuracy) + 1) plt.plot(epochs, accuracy, "r--", label="Training accuracy") plt.plot(epochs, val_accuracy, "b", label="Validation accuracy") plt.title("Training and validation accuracy") plt.legend() plt.figure() plt.plot(epochs, loss, "r--", label="Training loss") plt.plot(epochs, val_loss, "b", label="Validation loss") plt.title("Training and validation loss") plt.legend() plt.show()
In [0]:
test_model = keras.models.load_model("convnet_from_scratch.keras") test_loss, test_acc = test_model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")
Using data augmentation
In [0]:
data_augmentation_layers = [ layers.RandomFlip("horizontal"), layers.RandomRotation(0.1), layers.RandomZoom(0.2), ] def data_augmentation(images, targets): for layer in data_augmentation_layers: images = layer(images) return images, targets augmented_train_dataset = train_dataset.map( data_augmentation, num_parallel_calls=8 ) augmented_train_dataset = augmented_train_dataset.prefetch(tf.data.AUTOTUNE)
In [0]:
plt.figure(figsize=(10, 10)) for image_batch, _ in train_dataset.take(1): image = image_batch[0] for i in range(9): ax = plt.subplot(3, 3, i + 1) augmented_image, _ = data_augmentation(image, None) augmented_image = keras.ops.convert_to_numpy(augmented_image) plt.imshow(augmented_image.astype("uint8")) plt.axis("off")
In [0]:
inputs = keras.Input(shape=(180, 180, 3)) x = layers.Rescaling(1.0 / 255)(inputs) x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x) x = layers.MaxPooling2D(pool_size=2)(x) x = layers.Conv2D(filters=512, kernel_size=3, activation="relu")(x) x = layers.GlobalAveragePooling2D()(x) x = layers.Dropout(0.25)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs=inputs, outputs=outputs) model.compile( loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"], )
In [0]:
callbacks = [ keras.callbacks.ModelCheckpoint( filepath="convnet_from_scratch_with_augmentation.keras", save_best_only=True, monitor="val_loss", ) ] history = model.fit( augmented_train_dataset, epochs=100, validation_data=validation_dataset, callbacks=callbacks, )
In [0]:
test_model = keras.models.load_model( "convnet_from_scratch_with_augmentation.keras" ) test_loss, test_acc = test_model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")
Using a pretrained model
Feature extraction with a pretrained model
In [0]:
import keras_hub conv_base = keras_hub.models.Backbone.from_preset("xception_41_imagenet")
In [0]:
preprocessor = keras_hub.layers.ImageConverter.from_preset( "xception_41_imagenet", image_size=(180, 180), )
Fast feature extraction without data augmentation
In [0]:
def get_features_and_labels(dataset): all_features = [] all_labels = [] for images, labels in dataset: preprocessed_images = preprocessor(images) features = conv_base.predict(preprocessed_images, verbose=0) all_features.append(features) all_labels.append(labels) return np.concatenate(all_features), np.concatenate(all_labels) train_features, train_labels = get_features_and_labels(train_dataset) val_features, val_labels = get_features_and_labels(validation_dataset) test_features, test_labels = get_features_and_labels(test_dataset)
In [0]:
train_features.shape
In [0]:
inputs = keras.Input(shape=(6, 6, 2048)) x = layers.GlobalAveragePooling2D()(inputs) x = layers.Dense(256, activation="relu")(x) x = layers.Dropout(0.25)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile( loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"], ) callbacks = [ keras.callbacks.ModelCheckpoint( filepath="feature_extraction.keras", save_best_only=True, monitor="val_loss", ) ] history = model.fit( train_features, train_labels, epochs=10, validation_data=(val_features, val_labels), callbacks=callbacks, )
In [0]:
import matplotlib.pyplot as plt acc = history.history["accuracy"] val_acc = history.history["val_accuracy"] loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, "r--", label="Training accuracy") plt.plot(epochs, val_acc, "b", label="Validation accuracy") plt.title("Training and validation accuracy") plt.legend() plt.figure() plt.plot(epochs, loss, "r--", label="Training loss") plt.plot(epochs, val_loss, "b", label="Validation loss") plt.title("Training and validation loss") plt.legend() plt.show()
In [0]:
test_model = keras.models.load_model("feature_extraction.keras") test_loss, test_acc = test_model.evaluate(test_features, test_labels) print(f"Test accuracy: {test_acc:.3f}")
Feature extraction together with data augmentation
In [0]:
import keras_hub conv_base = keras_hub.models.Backbone.from_preset( "xception_41_imagenet", trainable=False, )
In [0]:
conv_base.trainable = True len(conv_base.trainable_weights)
In [0]:
conv_base.trainable = False len(conv_base.trainable_weights)
In [0]:
inputs = keras.Input(shape=(180, 180, 3)) x = preprocessor(inputs) x = conv_base(x) x = layers.GlobalAveragePooling2D()(x) x = layers.Dense(256)(x) x = layers.Dropout(0.25)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile( loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"], )
In [0]:
callbacks = [ keras.callbacks.ModelCheckpoint( filepath="feature_extraction_with_data_augmentation.keras", save_best_only=True, monitor="val_loss", ) ] history = model.fit( augmented_train_dataset, epochs=30, validation_data=validation_dataset, callbacks=callbacks, )
In [0]:
test_model = keras.models.load_model( "feature_extraction_with_data_augmentation.keras" ) test_loss, test_acc = test_model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")
Fine-tuning a pretrained model
In [0]:
model.compile( loss="binary_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=1e-5), metrics=["accuracy"], ) callbacks = [ keras.callbacks.ModelCheckpoint( filepath="fine_tuning.keras", save_best_only=True, monitor="val_loss", ) ] history = model.fit( augmented_train_dataset, epochs=30, validation_data=validation_dataset, callbacks=callbacks, )
In [0]:
model = keras.models.load_model("fine_tuning.keras") test_loss, test_acc = model.evaluate(test_dataset) print(f"Test accuracy: {test_acc:.3f}")