Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/04/imdb_mlp_bow_tf.ipynb
1193 views
Kernel: Unknown Kernel
# Movie review classifier using keras. Based on # https://www.tensorflow.org/tutorials/keras/basic_text_classification from __future__ import absolute_import, division, print_function import numpy as np import matplotlib.pyplot as plt import os figdir = "figures" def savefig(fname): if figdir: plt.savefig(os.path.join(figdir, fname)) try: import tensorflow as tf except ModuleNotFoundError: %pip install -qq tensorflow import tensorflow as tf from tensorflow import keras print(tf.__version__) np.random.seed(0) imdb = keras.datasets.imdb vocab_size = 10000 (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size) print(np.shape(train_data)) # (25000) print(len(train_data[0])) print(len(train_data[1])) print(train_data[0]) # [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941...] word_index = imdb.get_word_index() # The first indices are reserved word_index = {k: (v + 3) for k, v in word_index.items()} word_index["<PAD>"] = 0 word_index["<START>"] = 1 word_index["<UNK>"] = 2 # unknown word_index["<UNUSED>"] = 3 reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) def decode_review(text): return " ".join([reverse_word_index.get(i, "?") for i in text]) for i in range(2): print("example {}, label {}".format(i, train_labels[i])) print(decode_review(train_data[i])) """ example 0, label 1 <START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also <UNK> to the two little boy's that played the <UNK> of norman and paul they were just brilliant children are often left out of the <UNK> list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all example 1, label 0 <START> big hair big boobs bad music and a giant safety pin these are the words to best describe this terrible movie i love cheesy horror movies and i've seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an abomination the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clothes are sickening and funny in equal <UNK> the hair is big lots of boobs <UNK> men wear those cut <UNK> shirts that show off their <UNK> sickening that men actually wore them and the music is just <UNK> trash that plays over and over again in almost every scene there is trashy music boobs and <UNK> taking away bodies and the gym still doesn't close for <UNK> all joking aside this is a truly bad film whose only charm is to look back on the disaster that was the 80's and have a good old laugh at how bad everything was back then """ train_data = keras.preprocessing.sequence.pad_sequences( train_data, value=word_index["<PAD>"], padding="post", maxlen=256 ) test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding="post", maxlen=256) print(train_data[0]) embed_size = 16 def make_model(embed_size): tf.random.set_seed(42) np.random.seed(42) model = keras.Sequential() model.add(keras.layers.Embedding(vocab_size, embed_size)) model.add(keras.layers.GlobalAveragePooling1D()) model.add(keras.layers.Dense(16, activation=tf.nn.relu)) model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid)) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["acc"]) return model model = make_model(embed_size) model.summary() x_val = train_data[:10000] x_train = train_data[10000:] y_val = train_labels[:10000] y_train = train_labels[10000:] history = model.fit(x_train, y_train, epochs=50, batch_size=512, validation_data=(x_val, y_val), verbose=1) history_dict = history.history print(history_dict.keys()) results = model.evaluate(test_data, test_labels) print(results) acc = history_dict["acc"] val_acc = history_dict["val_acc"] loss = history_dict["loss"] val_loss = history_dict["val_loss"] epochs = range(1, len(acc) + 1) fig, ax = plt.subplots() plt.plot(epochs, loss, "bo", label="Training loss") plt.plot(epochs, val_loss, "r-", label="Validation loss") plt.title("Training and validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() savefig("imdb-loss.pdf") plt.show() fig, ax = plt.subplots() plt.plot(epochs, acc, "bo", label="Training acc") plt.plot(epochs, val_acc, "r", label="Validation acc") plt.title("Training and validation accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.legend() savefig("imdb-acc.pdf") plt.show() # Now turn on early stopping # https://chrisalbon.com/deep_learning/keras/neural_network_early_stopping/ class PrintDot(keras.callbacks.Callback): def on_epoch_end(self, epoch, logs): if epoch % 100 == 0: print("") print(".", end="") callbacks = [ PrintDot(), keras.callbacks.EarlyStopping(monitor="val_acc", patience=2), keras.callbacks.ModelCheckpoint(filepath="imdb_keras_best_model.ckpt", monitor="val_acc", save_best_only=True), ] # Reset parameters to a new random state model = make_model(embed_size) history = model.fit( x_train, y_train, epochs=50, batch_size=512, validation_data=(x_val, y_val), verbose=0, callbacks=callbacks ) history_dict = history.history acc = history_dict["acc"] val_acc = history_dict["val_acc"] loss = history_dict["loss"] val_loss = history_dict["val_loss"] epochs = range(1, len(acc) + 1) fig, ax = plt.subplots() plt.plot(epochs, loss, "bo", label="Training loss") plt.plot(epochs, val_loss, "r-", label="Validation loss") plt.title("Training and validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() savefig("imdb-loss-early-stop.pdf") plt.show()