CoCalc -- svi_linear_regression_1d

GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/11/supplementary/svi_linear_regression_1d_tfp.ipynb
¹¹⁹³ views

Kernel: Python 3

Stochastic variational inference for 1d linear regression using TFP.

Code Derived from https://colab.sandbox.google.com/github/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Probabilistic_Layers_Regression.ipynb#scrollTo=5zCEYpzu7bDX

In [1]:


# Tensorflow
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow import keras

print("tf version {}".format(tf.__version__))
tf.config.list_physical_devices('GPU')

Out[1]:

tf version 2.4.0

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


import tensorflow.compat.v2 as tf

tf.enable_v2_behavior()


import tensorflow_probability as tfp
import numpy as np
import matplotlib.pyplot as plt
import os

# figdir = "../figures"
# def save_fig(fname): plt.savefig(os.path.join(figdir, fname))

sns.reset_defaults()
# sns.set_style('whitegrid')
# sns.set_context('talk')
sns.set_context(context="talk", font_scale=0.7)

tfd = tfp.distributions

In [3]:

# @title Synthesize dataset.
w0 = 0.125
b0 = 5.0
x_range = [-20, 60]


def load_dataset(n=150, n_tst=150):
    np.random.seed(43)

    def s(x):
        g = (x - x_range[0]) / (x_range[1] - x_range[0])
        return 3 * (0.25 + g**2.0)

    x = (x_range[1] - x_range[0]) * np.random.rand(n) + x_range[0]
    eps = np.random.randn(n) * s(x)
    y = (w0 * x * (1.0 + np.sin(x)) + b0) + eps
    x = x[..., np.newaxis]
    x_tst = np.linspace(*x_range, num=n_tst).astype(np.float32)
    x_tst = x_tst[..., np.newaxis]
    return y, x, x_tst


y, x, x_tst = load_dataset()

In [4]:

negloglik = lambda y, rv_y: -rv_y.log_prob(y)


"""### Case 1: No Uncertainty"""
# Build model.
model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(1),
        tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1)),
    ]
)

# Do inference.
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
model.fit(x, y, epochs=1000, verbose=False)

# Profit.
[print(np.squeeze(w.numpy())) for w in model.weights]
yhat = model(x_tst)
assert isinstance(yhat, tfd.Distribution)

# @title Figure 1: No uncertainty.
w = np.squeeze(model.layers[-2].kernel.numpy())
b = np.squeeze(model.layers[-2].bias.numpy())

plt.figure(figsize=[6, 1.5])  # inches
# plt.figure(figsize=[8, 5])  # inches
plt.plot(x, y, "b.", label="observed")
plt.plot(x_tst, yhat.mean(), "r", label="mean", linewidth=4)
plt.ylim(-0.0, 17)
plt.yticks(np.linspace(0, 15, 4)[1:])
plt.xticks(np.linspace(*x_range, num=9))

ax = plt.gca()
ax.xaxis.set_ticks_position("bottom")
ax.yaxis.set_ticks_position("left")
ax.spines["left"].set_position(("data", 0))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# ax.spines['left'].set_smart_bounds(True)
# ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc="center left", fancybox=True, framealpha=0.0, bbox_to_anchor=(1.05, 0.5))

# save_fig('svi_regression_1d_mean.pdf')
plt.show()
# plt.savefig('/tmp/fig1.png', bbox_inches='tight', dpi=300)

Out[4]:

0.13577648
5.134606

In [5]:

"""### Case 2: Aleatoric Uncertainty"""

# Build model.
model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(1 + 1),
        tfp.layers.DistributionLambda(
            lambda t: tfd.Normal(loc=t[..., :1], scale=1e-3 + tf.math.softplus(0.05 * t[..., 1:]))
        ),
    ]
)

# Do inference.
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
model.fit(x, y, epochs=1000, verbose=False)

# Profit.
[print(np.squeeze(w.numpy())) for w in model.weights]
yhat = model(x_tst)
assert isinstance(yhat, tfd.Distribution)

# @title Figure 2: Aleatoric Uncertainty
plt.figure(figsize=[6, 1.5])  # inches
plt.plot(x, y, "b.", label="observed")

m = yhat.mean()
s = yhat.stddev()

plt.plot(x_tst, m, "r", linewidth=4, label="mean")
plt.plot(x_tst, m + 2 * s, "g", linewidth=2, label=r"mean + 2 stddev")
plt.plot(x_tst, m - 2 * s, "g", linewidth=2, label=r"mean - 2 stddev")

plt.ylim(-0.0, 17)
plt.yticks(np.linspace(0, 15, 4)[1:])
plt.xticks(np.linspace(*x_range, num=9))

ax = plt.gca()
ax.xaxis.set_ticks_position("bottom")
ax.yaxis.set_ticks_position("left")
ax.spines["left"].set_position(("data", 0))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# ax.spines['left'].set_smart_bounds(True)
# ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc="center left", fancybox=True, framealpha=0.0, bbox_to_anchor=(1.05, 0.5))

# plt.savefig('/tmp/fig2.png', bbox_inches='tight', dpi=300)
# save_fig('svi_regression_1d_mean_var.pdf')
plt.show()

Out[5]:

[0.12663428 0.9923244 ]
[ 5.2025266 13.012779 ]

In [6]:

"""### Case 3: Epistemic Uncertainty"""

# Specify the surrogate posterior over `keras.layers.Dense` `kernel` and `bias`.
def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
    n = kernel_size + bias_size
    c = np.log(np.expm1(1.0))
    return tf.keras.Sequential(
        [
            tfp.layers.VariableLayer(2 * n, dtype=dtype),
            tfp.layers.DistributionLambda(
                lambda t: tfd.Independent(
                    tfd.Normal(loc=t[..., :n], scale=1e-5 + tf.nn.softplus(c + t[..., n:])), reinterpreted_batch_ndims=1
                )
            ),
        ]
    )


# Specify the prior over `keras.layers.Dense` `kernel` and `bias`.
def prior_trainable(kernel_size, bias_size=0, dtype=None):
    n = kernel_size + bias_size
    return tf.keras.Sequential(
        [
            tfp.layers.VariableLayer(n, dtype=dtype),
            tfp.layers.DistributionLambda(
                lambda t: tfd.Independent(tfd.Normal(loc=t, scale=1), reinterpreted_batch_ndims=1)
            ),
        ]
    )


# Build model.
model = tf.keras.Sequential(
    [
        tfp.layers.DenseVariational(1, posterior_mean_field, prior_trainable, kl_weight=1 / x.shape[0]),
        tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1)),
    ]
)

# Do inference.
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
model.fit(x, y, epochs=1000, verbose=False)

# Profit.
[print(np.squeeze(w.numpy())) for w in model.weights]
yhat = model(x_tst)
assert isinstance(yhat, tfd.Distribution)

# @title Figure 3: Epistemic Uncertainty
plt.figure(figsize=[6, 1.5])  # inches
plt.clf()
plt.plot(x, y, "b.", label="observed")

yhats = [model(x_tst) for _ in range(100)]
avgm = np.zeros_like(x_tst[..., 0])
for i, yhat in enumerate(yhats):
    m = np.squeeze(yhat.mean())
    s = np.squeeze(yhat.stddev())
    if i < 25:
        plt.plot(x_tst, m, "r", label="ensemble means" if i == 0 else None, linewidth=0.5)
    avgm += m
plt.plot(x_tst, avgm / len(yhats), "r", label="overall mean", linewidth=4)

plt.ylim(-0.0, 17)
plt.yticks(np.linspace(0, 15, 4)[1:])
plt.xticks(np.linspace(*x_range, num=9))

ax = plt.gca()
ax.xaxis.set_ticks_position("bottom")
ax.yaxis.set_ticks_position("left")
ax.spines["left"].set_position(("data", 0))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# ax.spines['left'].set_smart_bounds(True)
# ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc="center left", fancybox=True, framealpha=0.0, bbox_to_anchor=(1.05, 0.5))

# plt.savefig('/tmp/fig3.png', bbox_inches='tight', dpi=300)
# save_fig('svi_regression_1d_post_mean.pdf')
plt.show()

Out[6]:

[ 0.12679578  5.1012235  -4.138065   -2.390639  ]
[0.14064883 5.1075416 ]

In [7]:

# Both Aleatoric & Epistemic Uncertainty

# Build model.
model = tf.keras.Sequential(
    [
        tfp.layers.DenseVariational(1 + 1, posterior_mean_field, prior_trainable, kl_weight=1 / x.shape[0]),
        tfp.layers.DistributionLambda(
            lambda t: tfd.Normal(loc=t[..., :1], scale=1e-3 + tf.math.softplus(0.01 * t[..., 1:]))
        ),
    ]
)

# Do inference.
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
model.fit(x, y, epochs=1000, verbose=False)

# Profit.
[print(np.squeeze(w.numpy())) for w in model.weights]
yhat = model(x_tst)
assert isinstance(yhat, tfd.Distribution)

# @title Figure 4: Both Aleatoric & Epistemic Uncertainty
plt.figure(figsize=[6, 1.5])  # inches
plt.plot(x, y, "b.", label="observed")

yhats = [model(x_tst) for _ in range(100)]
avgm = np.zeros_like(x_tst[..., 0])
for i, yhat in enumerate(yhats):
    m = np.squeeze(yhat.mean())
    s = np.squeeze(yhat.stddev())
    if i < 15:
        plt.plot(x_tst, m, "r", label="ensemble means" if i == 0 else None, linewidth=1.0)
        plt.plot(x_tst, m + 2 * s, "g", linewidth=0.5, label="ensemble means + 2 ensemble stdev" if i == 0 else None)
        plt.plot(x_tst, m - 2 * s, "g", linewidth=0.5, label="ensemble means - 2 ensemble stdev" if i == 0 else None)
    avgm += m
plt.plot(x_tst, avgm / len(yhats), "r", label="overall mean", linewidth=4)

plt.ylim(-0.0, 17)
plt.yticks(np.linspace(0, 15, 4)[1:])
plt.xticks(np.linspace(*x_range, num=9))

ax = plt.gca()
ax.xaxis.set_ticks_position("bottom")
ax.yaxis.set_ticks_position("left")
ax.spines["left"].set_position(("data", 0))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# ax.spines['left'].set_smart_bounds(True)
# ax.spines['bottom'].set_smart_bounds(True)
plt.legend(loc="center left", fancybox=True, framealpha=0.0, bbox_to_anchor=(1.05, 0.5))

# plt.savefig('/tmp/fig4.png', bbox_inches='tight', dpi=300)
# save_fig('svi_regression_1d_post_mean_var.pdf')
plt.show()

Out[7]:

[ 0.14109193  2.7442472   5.2209177   3.9062521  -3.440829   -0.90962136
 -2.2121413  -0.02255234]
[0.1261001 2.6492155 5.233518  3.8296285]

In [ ]:

Product

Resources

Company