Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/08/autodiff_tf.ipynb
1193 views
Kernel: Python 3

Open In Colab

Automatic differentiation in tensorflow 2

We use binary logistic regression as a running example.

# Standard Python libraries from __future__ import absolute_import, division, print_function, unicode_literals import os import time import numpy as np import glob import matplotlib.pyplot as plt import PIL import imageio from IPython import display import sklearn import seaborn as sns sns.set(style="ticks", color_codes=True) import pandas as pd pd.set_option("precision", 2) # 2 decimal places pd.set_option("display.max_rows", 20) pd.set_option("display.max_columns", 30) pd.set_option("display.width", 100) # wide windows
try: # %tensorflow_version only exists in Colab. %tensorflow_version 2.x IS_COLAB = True except Exception: IS_COLAB = False # TensorFlow ≥2.0 is required import tensorflow as tf from tensorflow import keras assert tf.__version__ >= "2.0" print("tf version {}".format(tf.__version__)) if not tf.config.list_physical_devices('GPU'): print("No GPU was detected. DNNs can be very slow without a GPU.") if IS_COLAB: print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")
## Compute gradient of loss "by hand" using numpy from scipy.special import logsumexp def BCE_with_logits(logits, targets): N = logits.shape[0] logits = logits.reshape(N, 1) logits_plus = np.hstack([np.zeros((N, 1)), logits]) # e^0=1 logits_minus = np.hstack([np.zeros((N, 1)), -logits]) logp1 = -logsumexp(logits_minus, axis=1) logp0 = -logsumexp(logits_plus, axis=1) logprobs = logp1 * targets + logp0 * (1 - targets) return -np.sum(logprobs) / N def sigmoid(x): return 0.5 * (np.tanh(x / 2.0) + 1) def predict_logit(weights, inputs): return np.dot(inputs, weights) # Already vectorized def predict_prob(weights, inputs): return sigmoid(predict_logit(weights, inputs)) def NLL(weights, batch): X, y = batch logits = predict_logit(weights, X) return BCE_with_logits(logits, y) def NLL_grad(weights, batch): X, y = batch N = X.shape[0] mu = predict_prob(weights, X) g = np.sum(np.dot(np.diag(mu - y), X), axis=0) / N return g np.random.seed(0) N = 100 D = 5 X = np.random.randn(N, D) w = 10 * np.random.randn(D) mu = predict_prob(w, X) y = np.random.binomial(n=1, p=mu, size=N) X_test = X y_test = y y_pred = predict_prob(w, X_test) loss = NLL(w, (X_test, y_test)) grad_np = NLL_grad(w, (X_test, y_test)) print("params {}".format(w)) # print("pred {}".format(y_pred)) print("loss {}".format(loss)) print("grad {}".format(grad_np))
params [ 3.8273243 -0.34242281 10.96346846 -2.34215801 -3.47450652] loss 0.05501843790657687 grad [-0.01360904 0.00325892 0.00844617 0.00848175 0.01390088]
w_tf = tf.Variable(np.reshape(w, (D, 1))) x_test_tf = tf.convert_to_tensor(X_test, dtype=np.float64) y_test_tf = tf.convert_to_tensor(np.reshape(y_test, (-1, 1)), dtype=np.float64) with tf.GradientTape() as tape: logits = tf.linalg.matmul(x_test_tf, w_tf) y_pred = tf.math.sigmoid(logits) loss_batch = tf.nn.sigmoid_cross_entropy_with_logits(y_test_tf, logits) loss_tf = tf.reduce_mean(loss_batch, axis=0) grad_tf = tape.gradient(loss_tf, [w_tf]) grad_tf = grad_tf[0][:, 0].numpy() assert np.allclose(grad_np, grad_tf) print("params {}".format(w_tf)) # print("pred {}".format(y_pred)) print("loss {}".format(loss_tf)) print("grad {}".format(grad_tf))
params <tf.Variable 'Variable:0' shape=(5, 1) dtype=float64, numpy= array([[ 3.8273243 ], [-0.34242281], [10.96346846], [-2.34215801], [-3.47450652]])> loss [0.05501844] grad [-0.01360904 0.00325892 0.00844617 0.00848175 0.01390088]