Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book1/02/robust_pdf_plot.ipynb
1193 views
Kernel: pyprobml

Understanding Effect of Outliers on diffrent PDF

import jax.numpy as jnp from jax import random import matplotlib.pyplot as plt from scipy.stats import t, laplace, norm import seaborn as sns import numpy as np try: from probml_utils import savefig, latexify except ModuleNotFoundError: %pip install -qq git+https://github.com/probml/probml-utils.git from probml_utils import savefig, latexify
latexify(width_scale_factor=2, fig_height=2.0)
C:\Users\NITISH SHARMA\.pyenv\pyenv-win\versions\3.9.10\lib\site-packages\probml_utils\plotting.py:26: UserWarning: LATEXIFY environment variable not set, not latexifying warnings.warn("LATEXIFY environment variable not set, not latexifying")
def plot_outlier_effect( save_name, outlier_pos=0, outliers=[], bins=7, samples_norm_dist=30, samples_graph_xaxis=500, range_xaxis=[-5, 10], range_yaxis=[0, 0.60], fig=None, ax=None, ): """ Sample from a normal distribution and plot the PDF for normal distribution, laplacian distribution, and the student T distribution. The function plots/saves data for distributions. If outliers are provided, we see the robustness of the student T distribution compared to the normal distribution. Args: ---------- save_name : string The filenames to save the graphs outlier_pos : int, default=0 Changes position of outliers outliers : list, default=[] A list of outlier values bins : int, default=7 Value of bin size for normal distribution histogram samples_norm_dist : int, default=30 Number of samples to be taken from the normal distribution samples_graph_xaxis : int, default=500 Number of values for the x-axis i.e the values the random variable can take range_xaxis : list, default=[-5, 10] The range of values for the x-axis range_yaxis : list, default=[0, 0.6] The range of values for the y-axis fig : None Will be used to store matplotlib figure ax : None Will be used to store matplotlib axes Returns: ---------- fig : matplotlib figure object Stores the graph data displayed ax : matplotlib axis object Stores the axes data of the graph displayed """ # Generate Samples from normal distribution norm_dist_sample = random.normal(random.PRNGKey(42), shape=(samples_norm_dist,)) # Generate values for x axis i.e. the values your random variable can take x_axis = jnp.linspace(range_xaxis[0], range_xaxis[1], samples_graph_xaxis) # Set figure fig, ax = plt.subplots() if outliers: samples = jnp.hstack((norm_dist_sample, jnp.array(outliers) + outlier_pos)) # Plot the data from normal distribution ax.hist( np.array(norm_dist_sample), bins, color="steelblue", ec="steelblue", weights=[1 / (norm_dist_sample.shape[0] + len(outliers))] * norm_dist_sample.shape[0], rwidth=0.8, ) # Plot outlier data ax.hist( np.array(outliers) + outlier_pos, len(outliers), color="steelblue", ec="steelblue", weights=[1 / (norm_dist_sample.shape[0] + len(outliers))] * len(outliers), rwidth=0.8, ) else: samples = norm_dist_sample # Plot the data from normal distribution ax.hist( np.array(norm_dist_sample), bins, color="steelblue", ec="steelblue", weights=[1 / norm_dist_sample.shape[0]] * norm_dist_sample.shape[0], rwidth=0.8, ) # Calculate mean and standard deviation for different distributions and then # find the PDF for each distribution loc, scale = norm.fit(samples) norm_pdf = norm.pdf(x_axis, loc=loc, scale=scale) loc, scale = laplace.fit(samples) laplace_pdf = laplace.pdf(x_axis, loc=loc, scale=scale) fd, loc, scale = t.fit(samples) studentT_pdf = t.pdf(x_axis, fd, loc=loc, scale=scale) # Find range of values for PDF i.e y-axis y_range = range_yaxis # Update tick intervals for x-axis ax.set_xticks(jnp.arange(range_xaxis[0], range_xaxis[1] + 1, 5)) # Update the tick intervals and limit for y-axis ax.set_ylim(y_range) ax.set_yticks(jnp.linspace(y_range[0], y_range[1], 5)) # Plot the different PDF's obtained ax.plot(x_axis, norm_pdf, "k-", linewidth=2.0) ax.plot(x_axis, studentT_pdf, "r-.", linewidth=2.0) ax.plot(x_axis, laplace_pdf, "b:", linewidth=2.0) # Update the Legend and the axis labels ax.legend(("gaussian", "student T", "laplace", "data")) ax.set_xlabel("$x$") ax.set_ylabel("$p(x)$") sns.despine() # Save figure to files if len(save_name) > 0: savefig(save_name) return fig, ax
plot_outlier_effect(save_name="robust_pdf_plot")
WARNING:absl:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.) C:\Users\NITISH SHARMA\.pyenv\pyenv-win\versions\3.9.10\lib\site-packages\probml_utils\plotting.py:79: UserWarning: set FIG_DIR environment variable to save figures warnings.warn("set FIG_DIR environment variable to save figures")
(<Figure size 432x288 with 1 Axes>, <AxesSubplot:xlabel='$x$', ylabel='$p(x)$'>)
Image in a Jupyter notebook
plot_outlier_effect(save_name="robust_pdf_plot_outliers", outliers=[8, 8.75, 9.5])
C:\Users\NITISH SHARMA\.pyenv\pyenv-win\versions\3.9.10\lib\site-packages\probml_utils\plotting.py:79: UserWarning: set FIG_DIR environment variable to save figures warnings.warn("set FIG_DIR environment variable to save figures")
(<Figure size 432x288 with 1 Axes>, <AxesSubplot:xlabel='$x$', ylabel='$p(x)$'>)
Image in a Jupyter notebook
from ipywidgets import interact @interact(outlier_pos=(-5, 5)) def interactive_plot(outlier_pos): fig, ax = plot_outlier_effect(save_name="", outlier_pos=outlier_pos, outliers=[8, 8.75, 9.5])
interactive(children=(IntSlider(value=0, description='outlier_pos', max=5, min=-5), Output()), _dom_classes=('…