Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
fchollet
GitHub Repository: fchollet/deep-learning-with-python-notebooks
Path: blob/master/chapter05_fundamentals-of-ml.ipynb
709 views
Kernel: Python 3

This is a companion notebook for the book Deep Learning with Python, Third Edition. For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.

The book's contents are available online at deeplearningwithpython.io.

!pip install keras keras-hub --upgrade -q
import os os.environ["KERAS_BACKEND"] = "jax"
# @title import os from IPython.core.magic import register_cell_magic @register_cell_magic def backend(line, cell): current, required = os.environ.get("KERAS_BACKEND", ""), line.split()[-1] if current == required: get_ipython().run_cell(cell) else: print( f"This cell requires the {required} backend. To run it, change KERAS_BACKEND to " f"\"{required}\" at the top of the notebook, restart the runtime, and rerun the notebook." )

Fundamentals of machine learning

Generalization: The goal of machine learning

Underfitting and overfitting

Noisy training data
Ambiguous features
Rare features and spurious correlations
from keras.datasets import mnist import numpy as np (train_images, train_labels), _ = mnist.load_data() train_images = train_images.reshape((60000, 28 * 28)) train_images = train_images.astype("float32") / 255 train_images_with_noise_channels = np.concatenate( [train_images, np.random.random((len(train_images), 784))], axis=1 ) train_images_with_zeros_channels = np.concatenate( [train_images, np.zeros((len(train_images), 784))], axis=1 )
import keras from keras import layers def get_model(): model = keras.Sequential( [ layers.Dense(512, activation="relu"), layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model model = get_model() history_noise = model.fit( train_images_with_noise_channels, train_labels, epochs=10, batch_size=128, validation_split=0.2, ) model = get_model() history_zeros = model.fit( train_images_with_zeros_channels, train_labels, epochs=10, batch_size=128, validation_split=0.2, )
import matplotlib.pyplot as plt val_acc_noise = history_noise.history["val_accuracy"] val_acc_zeros = history_zeros.history["val_accuracy"] epochs = range(1, 11) plt.plot( epochs, val_acc_noise, "b-", label="Validation accuracy with noise channels", ) plt.plot( epochs, val_acc_zeros, "r--", label="Validation accuracy with zeros channels", ) plt.title("Effect of noise channels on validation accuracy") plt.xlabel("Epochs") plt.xticks(epochs) plt.ylabel("Accuracy") plt.legend() plt.show()

The nature of generalization in deep learning

(train_images, train_labels), _ = mnist.load_data() train_images = train_images.reshape((60000, 28 * 28)) train_images = train_images.astype("float32") / 255 random_train_labels = train_labels[:] np.random.shuffle(random_train_labels) model = keras.Sequential( [ layers.Dense(512, activation="relu"), layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) model.fit( train_images, random_train_labels, epochs=100, batch_size=128, validation_split=0.2, )
The manifold hypothesis
Interpolation as a source of generalization
Why deep learning works
Training data is paramount

Evaluating machine-learning models

Training, validation, and test sets

Simple hold-out validation
K-fold validation
Iterated K-fold validation with shuffling

Beating a common-sense baseline

Things to keep in mind about model evaluation

Improving model fit

Tuning key gradient descent parameters

(train_images, train_labels), _ = mnist.load_data() train_images = train_images.reshape((60000, 28 * 28)) train_images = train_images.astype("float32") / 255 model = keras.Sequential( [ layers.Dense(512, activation="relu"), layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer=keras.optimizers.RMSprop(learning_rate=1.0), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) model.fit( train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2 )
model = keras.Sequential( [ layers.Dense(512, activation="relu"), layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer=keras.optimizers.RMSprop(learning_rate=1e-2), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) model.fit( train_images, train_labels, epochs=10, batch_size=128, validation_split=0.2 )

Using better architecture priors

Increasing model capacity

model = keras.Sequential([layers.Dense(10, activation="softmax")]) model.compile( optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) history_small_model = model.fit( train_images, train_labels, epochs=20, batch_size=128, validation_split=0.2 )
import matplotlib.pyplot as plt val_loss = history_small_model.history["val_loss"] epochs = range(1, 21) plt.plot(epochs, val_loss, "b-", label="Validation loss") plt.title("Validation loss for a model with insufficient capacity") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.show()
model = keras.Sequential( [ layers.Dense(128, activation="relu"), layers.Dense(128, activation="relu"), layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) history_large_model = model.fit( train_images, train_labels, epochs=20, batch_size=128, validation_split=0.2, )
val_loss = history_large_model.history["val_loss"] epochs = range(1, 21) plt.plot(epochs, val_loss, "b-", label="Validation loss") plt.title("Validation loss for a model with appropriate capacity") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.show()
model = keras.Sequential( [ layers.Dense(2048, activation="relu"), layers.Dense(2048, activation="relu"), layers.Dense(2048, activation="relu"), layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) history_very_large_model = model.fit( train_images, train_labels, epochs=20, batch_size=32, validation_split=0.2, )
val_loss = history_very_large_model.history["val_loss"] epochs = range(1, 21) plt.plot(epochs, val_loss, "b-", label="Validation loss") plt.title("Validation loss for a model with too much capacity") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.show()

Improving generalization

Dataset curation

Feature engineering

Using early stopping

Regularizing your model

Reducing the network's size
from keras.datasets import imdb (train_data, train_labels), _ = imdb.load_data(num_words=10000) def vectorize_sequences(sequences, dimension=10000): results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1.0 return results train_data = vectorize_sequences(train_data) model = keras.Sequential( [ layers.Dense(16, activation="relu"), layers.Dense(16, activation="relu"), layers.Dense(1, activation="sigmoid"), ] ) model.compile( optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"], ) history_original = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4, )
model = keras.Sequential( [ layers.Dense(4, activation="relu"), layers.Dense(4, activation="relu"), layers.Dense(1, activation="sigmoid"), ] ) model.compile( optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"], ) history_smaller_model = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4, )
original_val_loss = history_original.history["val_loss"] smaller_model_val_loss = history_smaller_model.history["val_loss"] epochs = range(1, 21) plt.plot( epochs, original_val_loss, "r--", label="Validation loss of original model", ) plt.plot( epochs, smaller_model_val_loss, "b-", label="Validation loss of smaller model", ) plt.title("Original model vs. smaller model (IMDB review classification)") plt.xlabel("Epochs") plt.ylabel("Loss") plt.xticks(epochs) plt.legend() plt.show()
model = keras.Sequential( [ layers.Dense(512, activation="relu"), layers.Dense(512, activation="relu"), layers.Dense(1, activation="sigmoid"), ] ) model.compile( optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"], ) history_larger_model = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4, )
original_val_loss = history_original.history["val_loss"] larger_model_val_loss = history_larger_model.history["val_loss"] epochs = range(1, 21) plt.plot( epochs, original_val_loss, "r--", label="Validation loss of original model", ) plt.plot( epochs, larger_model_val_loss, "b-", label="Validation loss of larger model", ) plt.title("Original model vs. larger model (IMDB review classification)") plt.xlabel("Epochs") plt.ylabel("Loss") plt.xticks(epochs) plt.legend() plt.show()
Adding weight regularization
from keras.regularizers import l2 model = keras.Sequential( [ layers.Dense(16, kernel_regularizer=l2(0.002), activation="relu"), layers.Dense(16, kernel_regularizer=l2(0.002), activation="relu"), layers.Dense(1, activation="sigmoid"), ] ) model.compile( optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"], ) history_l2_reg = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4, )
original_val_loss = history_original.history["val_loss"] l2_val_loss = history_l2_reg.history["val_loss"] epochs = range(1, 21) plt.plot( epochs, original_val_loss, "r--", label="Validation loss of original model", ) plt.plot( epochs, l2_val_loss, "b-", label="Validation loss of L2-regularized model", ) plt.title( "Original model vs. L2-regularized model (IMDB review classification)" ) plt.xlabel("Epochs") plt.ylabel("Loss") plt.xticks(epochs) plt.legend() plt.show()
from keras import regularizers regularizers.l1(0.001) regularizers.l1_l2(l1=0.001, l2=0.001)
Adding dropout
model = keras.Sequential( [ layers.Dense(16, activation="relu"), layers.Dropout(0.5), layers.Dense(16, activation="relu"), layers.Dropout(0.5), layers.Dense(1, activation="sigmoid"), ] ) model.compile( optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"], ) history_dropout = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4, )
original_val_loss = history_original.history["val_loss"] dropout_val_loss = history_dropout.history["val_loss"] epochs = range(1, 21) plt.plot( epochs, original_val_loss, "r--", label="Validation loss of original model", ) plt.plot( epochs, dropout_val_loss, "b-", label="Validation loss of dropout-regularized model", ) plt.title( "Original model vs. dropout-regularized model (IMDB review classification)" ) plt.xlabel("Epochs") plt.ylabel("Loss") plt.xticks(epochs) plt.legend() plt.show()