Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
fchollet
GitHub Repository: fchollet/deep-learning-with-python-notebooks
Path: blob/master/chapter18_best-practices-for-the-real-world.ipynb
709 views
Kernel: Python 3

This is a companion notebook for the book Deep Learning with Python, Third Edition. For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.

The book's contents are available online at deeplearningwithpython.io.

!pip install keras keras-hub --upgrade -q
import os os.environ["KERAS_BACKEND"] = "jax"
# @title import os from IPython.core.magic import register_cell_magic @register_cell_magic def backend(line, cell): current, required = os.environ.get("KERAS_BACKEND", ""), line.split()[-1] if current == required: get_ipython().run_cell(cell) else: print( f"This cell requires the {required} backend. To run it, change KERAS_BACKEND to " f"\"{required}\" at the top of the notebook, restart the runtime, and rerun the notebook." )

Best practices for the real world

Getting the most out of your models

Hyperparameter optimization

Using KerasTuner
!pip install keras-tuner -q
import keras from keras import layers def build_model(hp): units = hp.Int(name="units", min_value=16, max_value=64, step=16) model = keras.Sequential( [ layers.Dense(units, activation="relu"), layers.Dense(10, activation="softmax"), ] ) optimizer = hp.Choice(name="optimizer", values=["rmsprop", "adam"]) model.compile( optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model
import keras_tuner as kt class SimpleMLP(kt.HyperModel): def __init__(self, num_classes): self.num_classes = num_classes def build(self, hp): units = hp.Int(name="units", min_value=16, max_value=64, step=16) model = keras.Sequential( [ layers.Dense(units, activation="relu"), layers.Dense(self.num_classes, activation="softmax"), ] ) optimizer = hp.Choice(name="optimizer", values=["rmsprop", "adam"]) model.compile( optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model hypermodel = SimpleMLP(num_classes=10)
tuner = kt.BayesianOptimization( build_model, objective="val_accuracy", max_trials=20, executions_per_trial=2, directory="mnist_kt_test", overwrite=True, )
tuner.search_space_summary()
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() x_train = x_train.reshape((-1, 28 * 28)).astype("float32") / 255 x_test = x_test.reshape((-1, 28 * 28)).astype("float32") / 255 x_train_full = x_train[:] y_train_full = y_train[:] num_val_samples = 10000 x_train, x_val = x_train[:-num_val_samples], x_train[-num_val_samples:] y_train, y_val = y_train[:-num_val_samples], y_train[-num_val_samples:] callbacks = [ keras.callbacks.EarlyStopping(monitor="val_loss", patience=5), ] tuner.search( x_train, y_train, batch_size=128, epochs=100, validation_data=(x_val, y_val), callbacks=callbacks, verbose=2, )
top_n = 4 best_hps = tuner.get_best_hyperparameters(top_n)
def get_best_epoch(hp): model = build_model(hp) callbacks = [ keras.callbacks.EarlyStopping( monitor="val_loss", mode="min", patience=10 ) ] history = model.fit( x_train, y_train, validation_data=(x_val, y_val), epochs=100, batch_size=128, callbacks=callbacks, ) val_loss_per_epoch = history.history["val_loss"] best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1 print(f"Best epoch: {best_epoch}") return best_epoch
def get_best_trained_model(hp): best_epoch = get_best_epoch(hp) model = build_model(hp) model.fit( x_train_full, y_train_full, batch_size=128, epochs=int(best_epoch * 1.2) ) return model best_models = [] for hp in best_hps: model = get_best_trained_model(hp) model.evaluate(x_test, y_test) best_models.append(model)
best_models = tuner.get_best_models(top_n)
The art of crafting the right search space
The future of hyperparameter tuning: automated machine learning

Model ensembling

Scaling up model training with multiple devices

Multi-GPU training

Data parallelism: Replicating your model on each GPU
Model parallelism: Splitting your model across multiple GPUs

Distributed training in practice

Getting your hands on two or more GPUs
Using data parallelism with JAX
Using model parallelism with JAX
The DeviceMesh API
The LayoutMap API

TPU training

Using step fusing to improve TPU utilization

Speeding up training and inference with lower-precision computation

Understanding floating-point precision
Float16 inference
Mixed-precision training
Using loss scaling with mixed precision
Beyond mixed precision: float8 training

Faster inference with quantization

from keras import ops x = ops.array([[0.1, 0.9], [1.2, -0.8]]) kernel = ops.array([[-0.1, -2.2], [1.1, 0.7]])
def abs_max_quantize(value): abs_max = ops.max(ops.abs(value), keepdims=True) scale = ops.divide(127, abs_max + 1e-7) scaled_value = value * scale scaled_value = ops.clip(ops.round(scaled_value), -127, 127) scaled_value = ops.cast(scaled_value, dtype="int8") return scaled_value, scale int_x, x_scale = abs_max_quantize(x) int_kernel, kernel_scale = abs_max_quantize(kernel)
int_y = ops.matmul(int_x, int_kernel) y = ops.cast(int_y, dtype="float32") / (x_scale * kernel_scale)
y
ops.matmul(x, kernel)