Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/deprecated/scripts/anscombes_quartet.py
1192 views
1
# Anscombe's quartet
2
# Author: Drishtii
3
4
import superimport
5
6
import seaborn as sns
7
import matplotlib.pyplot as plt
8
import pyprobml_utils as pml
9
10
sns.set_theme(style="ticks")
11
df = sns.load_dataset("anscombe")
12
g = sns.lmplot(x="x", y="y", col="dataset", hue="dataset", data=df, col_wrap=4, ci=None, palette="muted",
13
height=4, scatter_kws={"s": 50, "alpha": 1}, legend_out=True, truncate=False)
14
g.set(xlim=(2.5, 20.5 ))
15
pml.savefig("anscombes_quartet.pdf")
16
17
names = df['dataset'].unique()
18
for name in names:
19
print(name)
20
ndx = df['dataset']==name
21
df2 = df[ndx]
22
lm = sns.lmplot(x="x", y="y", data=df2, ci=None, truncate=False)
23
ax = plt.gca()
24
ax.set_xlim(0, 20)
25
ax.set_ylim(0, 14)
26
mx = df2['x'].to_numpy().mean();
27
my = df2['y'].to_numpy().mean()
28
ax.set_title(f'{name}, mx={mx:0.3f}, my={my:0.3f}', fontsize=12)
29
print(df2[['x', 'y']].agg(['count', 'mean', 'var']))
30
pml.savefig(f"anscombes_quartet_{name}.pdf")
31
32
33
# Compare the two different estimators for the variance
34
# https://github.com/probml/pml-book/issues/264
35
for d in ['I', 'II', 'III', 'IV']:
36
print('dataset ', d)
37
38
x = df[df['dataset'] == d]['x'].to_numpy()
39
print('var x, MLE = {:.2f}'.format(((x - x.mean()) ** 2).mean()))
40
print('var x, numpy: {:.2f}'.format(x.var()))
41
print('var x, unbiased estimator: {:.2f}\n'.format(x.var(ddof=1)))
42
43
y = df[df['dataset'] == d]['y'].to_numpy()
44
print('var y, MLE = {:.2f}'.format(((y - y.mean())**2).mean()))
45
print('var y, numpy: {:.2f}'.format(y.var()))
46
print('var y, unbiased estimator: {:.2f}\n'.format(y.var(ddof=1)))
47
48
49