Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
fchollet
GitHub Repository: fchollet/deep-learning-with-python-notebooks
Path: blob/master/second_edition/chapter04_getting-started-with-neural-networks.ipynb
713 views
Kernel: Python 3

This is a companion notebook for the book Deep Learning with Python, Second Edition. For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.

This notebook was generated for TensorFlow 2.6.

Getting started with neural networks: Classification and regression

Classifying movie reviews: A binary classification example

The IMDB dataset

Loading the IMDB dataset

from tensorflow.keras.datasets import imdb (train_data, train_labels), (test_data, test_labels) = imdb.load_data( num_words=10000)
train_data[0]
train_labels[0]
max([max(sequence) for sequence in train_data])

Decoding reviews back to text

word_index = imdb.get_word_index() reverse_word_index = dict( [(value, key) for (key, value) in word_index.items()]) decoded_review = " ".join( [reverse_word_index.get(i - 3, "?") for i in train_data[0]])

Preparing the data

Encoding the integer sequences via multi-hot encoding

import numpy as np def vectorize_sequences(sequences, dimension=10000): results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): for j in sequence: results[i, j] = 1. return results x_train = vectorize_sequences(train_data) x_test = vectorize_sequences(test_data)
x_train[0]
y_train = np.asarray(train_labels).astype("float32") y_test = np.asarray(test_labels).astype("float32")

Building your model

Model definition

from tensorflow import keras from tensorflow.keras import layers model = keras.Sequential([ layers.Dense(16, activation="relu"), layers.Dense(16, activation="relu"), layers.Dense(1, activation="sigmoid") ])

Compiling the model

model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"])

Validating your approach

Setting aside a validation set

x_val = x_train[:10000] partial_x_train = x_train[10000:] y_val = y_train[:10000] partial_y_train = y_train[10000:]

Training your model

history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=512, validation_data=(x_val, y_val))
history_dict = history.history history_dict.keys()

Plotting the training and validation loss

import matplotlib.pyplot as plt history_dict = history.history loss_values = history_dict["loss"] val_loss_values = history_dict["val_loss"] epochs = range(1, len(loss_values) + 1) plt.plot(epochs, loss_values, "bo", label="Training loss") plt.plot(epochs, val_loss_values, "b", label="Validation loss") plt.title("Training and validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.show()

Plotting the training and validation accuracy

plt.clf() acc = history_dict["accuracy"] val_acc = history_dict["val_accuracy"] plt.plot(epochs, acc, "bo", label="Training acc") plt.plot(epochs, val_acc, "b", label="Validation acc") plt.title("Training and validation accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.legend() plt.show()

Retraining a model from scratch

model = keras.Sequential([ layers.Dense(16, activation="relu"), layers.Dense(16, activation="relu"), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.fit(x_train, y_train, epochs=4, batch_size=512) results = model.evaluate(x_test, y_test)
results

Using a trained model to generate predictions on new data

model.predict(x_test)

Further experiments

Wrapping up

Classifying newswires: A multiclass classification example

The Reuters dataset

Loading the Reuters dataset

from tensorflow.keras.datasets import reuters (train_data, train_labels), (test_data, test_labels) = reuters.load_data( num_words=10000)
len(train_data)
len(test_data)
train_data[10]

Decoding newswires back to text

word_index = reuters.get_word_index() reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) decoded_newswire = " ".join([reverse_word_index.get(i - 3, "?") for i in train_data[0]])
train_labels[10]

Preparing the data

Encoding the input data

x_train = vectorize_sequences(train_data) x_test = vectorize_sequences(test_data)

Encoding the labels

def to_one_hot(labels, dimension=46): results = np.zeros((len(labels), dimension)) for i, label in enumerate(labels): results[i, label] = 1. return results y_train = to_one_hot(train_labels) y_test = to_one_hot(test_labels)
from tensorflow.keras.utils import to_categorical y_train = to_categorical(train_labels) y_test = to_categorical(test_labels)

Building your model

Model definition

model = keras.Sequential([ layers.Dense(64, activation="relu"), layers.Dense(64, activation="relu"), layers.Dense(46, activation="softmax") ])

Compiling the model

model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])

Validating your approach

Setting aside a validation set

x_val = x_train[:1000] partial_x_train = x_train[1000:] y_val = y_train[:1000] partial_y_train = y_train[1000:]

Training the model

history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=512, validation_data=(x_val, y_val))

Plotting the training and validation loss

loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, "bo", label="Training loss") plt.plot(epochs, val_loss, "b", label="Validation loss") plt.title("Training and validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.show()

Plotting the training and validation accuracy

plt.clf() acc = history.history["accuracy"] val_acc = history.history["val_accuracy"] plt.plot(epochs, acc, "bo", label="Training accuracy") plt.plot(epochs, val_acc, "b", label="Validation accuracy") plt.title("Training and validation accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.legend() plt.show()

Retraining a model from scratch

model = keras.Sequential([ layers.Dense(64, activation="relu"), layers.Dense(64, activation="relu"), layers.Dense(46, activation="softmax") ]) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) model.fit(x_train, y_train, epochs=9, batch_size=512) results = model.evaluate(x_test, y_test)
results
import copy test_labels_copy = copy.copy(test_labels) np.random.shuffle(test_labels_copy) hits_array = np.array(test_labels) == np.array(test_labels_copy) hits_array.mean()

Generating predictions on new data

predictions = model.predict(x_test)
predictions[0].shape
np.sum(predictions[0])
np.argmax(predictions[0])

A different way to handle the labels and the loss

y_train = np.array(train_labels) y_test = np.array(test_labels)
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

The importance of having sufficiently large intermediate layers

A model with an information bottleneck

model = keras.Sequential([ layers.Dense(64, activation="relu"), layers.Dense(4, activation="relu"), layers.Dense(46, activation="softmax") ]) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=128, validation_data=(x_val, y_val))

Further experiments

Wrapping up

Predicting house prices: A regression example

The Boston Housing Price dataset

Loading the Boston housing dataset

from tensorflow.keras.datasets import boston_housing (train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
train_data.shape
test_data.shape
train_targets

Preparing the data

Normalizing the data

mean = train_data.mean(axis=0) train_data -= mean std = train_data.std(axis=0) train_data /= std test_data -= mean test_data /= std

Building your model

Model definition

def build_model(): model = keras.Sequential([ layers.Dense(64, activation="relu"), layers.Dense(64, activation="relu"), layers.Dense(1) ]) model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"]) return model

Validating your approach using K-fold validation

K-fold validation

k = 4 num_val_samples = len(train_data) // k num_epochs = 100 all_scores = [] for i in range(k): print(f"Processing fold #{i}") val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples] val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples] partial_train_data = np.concatenate( [train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], axis=0) partial_train_targets = np.concatenate( [train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]], axis=0) model = build_model() model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=16, verbose=0) val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0) all_scores.append(val_mae)
all_scores
np.mean(all_scores)

Saving the validation logs at each fold

num_epochs = 500 all_mae_histories = [] for i in range(k): print(f"Processing fold #{i}") val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples] val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples] partial_train_data = np.concatenate( [train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], axis=0) partial_train_targets = np.concatenate( [train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]], axis=0) model = build_model() history = model.fit(partial_train_data, partial_train_targets, validation_data=(val_data, val_targets), epochs=num_epochs, batch_size=16, verbose=0) mae_history = history.history["val_mae"] all_mae_histories.append(mae_history)

Building the history of successive mean K-fold validation scores

average_mae_history = [ np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

Plotting validation scores

plt.plot(range(1, len(average_mae_history) + 1), average_mae_history) plt.xlabel("Epochs") plt.ylabel("Validation MAE") plt.show()

Plotting validation scores, excluding the first 10 data points

truncated_mae_history = average_mae_history[10:] plt.plot(range(1, len(truncated_mae_history) + 1), truncated_mae_history) plt.xlabel("Epochs") plt.ylabel("Validation MAE") plt.show()

Training the final model

model = build_model() model.fit(train_data, train_targets, epochs=130, batch_size=16, verbose=0) test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)
test_mae_score

Generating predictions on new data

predictions = model.predict(test_data) predictions[0]

Wrapping up

Summary