def plot_outlier_effect(
save_name,
outlier_pos=0,
outliers=[],
bins=7,
samples_norm_dist=30,
samples_graph_xaxis=500,
range_xaxis=[-5, 10],
range_yaxis=[0, 0.60],
fig=None,
ax=None,
):
"""
Sample from a normal distribution and plot the PDF for
normal distribution, laplacian distribution, and the student T
distribution. The function plots/saves data for distributions.
If outliers are provided, we see the robustness of the student
T distribution compared to the normal distribution.
Args:
----------
save_name : string
The filenames to save the graphs
outlier_pos : int, default=0
Changes position of outliers
outliers : list, default=[]
A list of outlier values
bins : int, default=7
Value of bin size for normal distribution histogram
samples_norm_dist : int, default=30
Number of samples to be taken from the normal distribution
samples_graph_xaxis : int, default=500
Number of values for the x-axis i.e the values the
random variable can take
range_xaxis : list, default=[-5, 10]
The range of values for the x-axis
range_yaxis : list, default=[0, 0.6]
The range of values for the y-axis
fig : None
Will be used to store matplotlib figure
ax : None
Will be used to store matplotlib axes
Returns:
----------
fig : matplotlib figure object
Stores the graph data displayed
ax : matplotlib axis object
Stores the axes data of the graph displayed
"""
norm_dist_sample = random.normal(random.PRNGKey(42), shape=(samples_norm_dist,))
x_axis = jnp.linspace(range_xaxis[0], range_xaxis[1], samples_graph_xaxis)
fig, ax = plt.subplots()
if outliers:
samples = jnp.hstack((norm_dist_sample, jnp.array(outliers) + outlier_pos))
ax.hist(
np.array(norm_dist_sample),
bins,
color="steelblue",
ec="steelblue",
weights=[1 / (norm_dist_sample.shape[0] + len(outliers))] * norm_dist_sample.shape[0],
rwidth=0.8,
)
ax.hist(
np.array(outliers) + outlier_pos,
len(outliers),
color="steelblue",
ec="steelblue",
weights=[1 / (norm_dist_sample.shape[0] + len(outliers))] * len(outliers),
rwidth=0.8,
)
else:
samples = norm_dist_sample
ax.hist(
np.array(norm_dist_sample),
bins,
color="steelblue",
ec="steelblue",
weights=[1 / norm_dist_sample.shape[0]] * norm_dist_sample.shape[0],
rwidth=0.8,
)
loc, scale = norm.fit(samples)
norm_pdf = norm.pdf(x_axis, loc=loc, scale=scale)
loc, scale = laplace.fit(samples)
laplace_pdf = laplace.pdf(x_axis, loc=loc, scale=scale)
fd, loc, scale = t.fit(samples)
studentT_pdf = t.pdf(x_axis, fd, loc=loc, scale=scale)
y_range = range_yaxis
ax.set_xticks(jnp.arange(range_xaxis[0], range_xaxis[1] + 1, 5))
ax.set_ylim(y_range)
ax.set_yticks(jnp.linspace(y_range[0], y_range[1], 5))
ax.plot(x_axis, norm_pdf, "k-", linewidth=2.0)
ax.plot(x_axis, studentT_pdf, "r-.", linewidth=2.0)
ax.plot(x_axis, laplace_pdf, "b:", linewidth=2.0)
ax.legend(("gaussian", "student T", "laplace", "data"))
ax.set_xlabel("$x$")
ax.set_ylabel("$p(x)$")
sns.despine()
if len(save_name) > 0:
savefig(save_name)
return fig, ax