Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
fchollet
GitHub Repository: fchollet/deep-learning-with-python-notebooks
Path: blob/master/second_edition/chapter11_part02_sequence-models.ipynb
713 views
Kernel: Python 3

This is a companion notebook for the book Deep Learning with Python, Second Edition. For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.

This notebook was generated for TensorFlow 2.6.

Processing words as a sequence: The sequence model approach

A first practical example

Downloading the data

!curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz !tar -xf aclImdb_v1.tar.gz !rm -r aclImdb/train/unsup

Preparing the data

import os, pathlib, shutil, random from tensorflow import keras batch_size = 32 base_dir = pathlib.Path("aclImdb") val_dir = base_dir / "val" train_dir = base_dir / "train" for category in ("neg", "pos"): os.makedirs(val_dir / category) files = os.listdir(train_dir / category) random.Random(1337).shuffle(files) num_val_samples = int(0.2 * len(files)) val_files = files[-num_val_samples:] for fname in val_files: shutil.move(train_dir / category / fname, val_dir / category / fname) train_ds = keras.utils.text_dataset_from_directory( "aclImdb/train", batch_size=batch_size ) val_ds = keras.utils.text_dataset_from_directory( "aclImdb/val", batch_size=batch_size ) test_ds = keras.utils.text_dataset_from_directory( "aclImdb/test", batch_size=batch_size ) text_only_train_ds = train_ds.map(lambda x, y: x)

Preparing integer sequence datasets

from tensorflow.keras import layers max_length = 600 max_tokens = 20000 text_vectorization = layers.TextVectorization( max_tokens=max_tokens, output_mode="int", output_sequence_length=max_length, ) text_vectorization.adapt(text_only_train_ds) int_train_ds = train_ds.map( lambda x, y: (text_vectorization(x), y), num_parallel_calls=4) int_val_ds = val_ds.map( lambda x, y: (text_vectorization(x), y), num_parallel_calls=4) int_test_ds = test_ds.map( lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)

A sequence model built on one-hot encoded vector sequences

import tensorflow as tf inputs = keras.Input(shape=(None,), dtype="int64") embedded = tf.one_hot(inputs, depth=max_tokens) x = layers.Bidirectional(layers.LSTM(32))(embedded) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.summary()

Training a first basic sequence model

callbacks = [ keras.callbacks.ModelCheckpoint("one_hot_bidir_lstm.keras", save_best_only=True) ] model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks) model = keras.models.load_model("one_hot_bidir_lstm.keras") print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

Understanding word embeddings

Learning word embeddings with the Embedding layer

Instantiating an Embedding layer

embedding_layer = layers.Embedding(input_dim=max_tokens, output_dim=256)

Model that uses an Embedding layer trained from scratch

inputs = keras.Input(shape=(None,), dtype="int64") embedded = layers.Embedding(input_dim=max_tokens, output_dim=256)(inputs) x = layers.Bidirectional(layers.LSTM(32))(embedded) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.summary() callbacks = [ keras.callbacks.ModelCheckpoint("embeddings_bidir_gru.keras", save_best_only=True) ] model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks) model = keras.models.load_model("embeddings_bidir_gru.keras") print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

Understanding padding and masking

Using an Embedding layer with masking enabled

inputs = keras.Input(shape=(None,), dtype="int64") embedded = layers.Embedding( input_dim=max_tokens, output_dim=256, mask_zero=True)(inputs) x = layers.Bidirectional(layers.LSTM(32))(embedded) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.summary() callbacks = [ keras.callbacks.ModelCheckpoint("embeddings_bidir_gru_with_masking.keras", save_best_only=True) ] model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks) model = keras.models.load_model("embeddings_bidir_gru_with_masking.keras") print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

Using pretrained word embeddings

!wget http://nlp.stanford.edu/data/glove.6B.zip !unzip -q glove.6B.zip

Parsing the GloVe word-embeddings file

import numpy as np path_to_glove_file = "glove.6B.100d.txt" embeddings_index = {} with open(path_to_glove_file) as f: for line in f: word, coefs = line.split(maxsplit=1) coefs = np.fromstring(coefs, "f", sep=" ") embeddings_index[word] = coefs print(f"Found {len(embeddings_index)} word vectors.")

Preparing the GloVe word-embeddings matrix

embedding_dim = 100 vocabulary = text_vectorization.get_vocabulary() word_index = dict(zip(vocabulary, range(len(vocabulary)))) embedding_matrix = np.zeros((max_tokens, embedding_dim)) for word, i in word_index.items(): if i < max_tokens: embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector
embedding_layer = layers.Embedding( max_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False, mask_zero=True, )

Model that uses a pretrained Embedding layer

inputs = keras.Input(shape=(None,), dtype="int64") embedded = embedding_layer(inputs) x = layers.Bidirectional(layers.LSTM(32))(embedded) x = layers.Dropout(0.5)(x) outputs = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(inputs, outputs) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.summary() callbacks = [ keras.callbacks.ModelCheckpoint("glove_embeddings_sequence_model.keras", save_best_only=True) ] model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks) model = keras.models.load_model("glove_embeddings_sequence_model.keras") print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")