Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/deprecated/scripts/ard_classification_demo.py
1192 views
1
# Demo of logistic regression with automatic relevancy determination
2
# to eliminate irrelevant features.
3
4
#https://github.com/AmazaspShumik/sklearn-bayes/blob/master/ipython_notebooks_tutorials/rvm_ard/ard_classification_demo.ipynb
5
6
import superimport
7
8
from ard_linreg_logreg import ClassificationARD
9
from ard_vb_linreg_logreg import VBClassificationARD
10
11
import numpy as np
12
import matplotlib.pyplot as plt
13
from pyprobml_utils import save_fig
14
15
from matplotlib import cm
16
from sklearn.model_selection import train_test_split
17
from sklearn.linear_model import LogisticRegressionCV
18
19
20
def generate_dataset(n_samples = 500, n_features = 100,
21
cov_class_1 = [[0.9,0.1],[1.5,.2]],
22
cov_class_2 = [[0.9,0.1],[1.5,.2]],
23
mean_class_1 = (-1,0.4),
24
mean_class_2 = (-1,-0.4)):
25
''' Generate binary classification problem with two relevant features'''
26
X = np.random.randn(n_samples, n_features)
27
Y = np.ones(n_samples)
28
sep = int(n_samples/2)
29
Y[0:sep] = 0
30
X[0:sep,0:2] = np.random.multivariate_normal(mean = mean_class_1,
31
cov = cov_class_1, size = sep)
32
X[sep:n_samples,0:2] = np.random.multivariate_normal(mean = mean_class_2,
33
cov = cov_class_2, size = n_samples - sep)
34
return X,Y
35
36
37
38
39
40
def run_demo(n_samples, n_features):
41
np.random.seed(42)
42
X,Y = generate_dataset(n_samples,n_features)
43
44
plt.figure(figsize = (8,6))
45
plt.plot(X[Y==0,0],X[Y==0,1],"bo", markersize = 3)
46
plt.plot(X[Y==1,0],X[Y==1,1],"ro", markersize = 3)
47
plt.xlabel('feature 1')
48
plt.ylabel('feature 2')
49
plt.title("Example of dataset")
50
plt.show()
51
52
# training & test data
53
X,x,Y,y = train_test_split(X,Y, test_size = 0.4)
54
55
models = list()
56
names = list()
57
58
models.append(ClassificationARD())
59
names.append('logreg-ARD-Laplace')
60
61
models.append(VBClassificationARD())
62
names.append('logreg-ARD-VB')
63
64
models.append(LogisticRegressionCV(penalty = 'l2', cv=3))
65
names.append('logreg-CV-L2')
66
67
models.append(LogisticRegressionCV(penalty = 'l1', solver = 'liblinear', cv=3))
68
names.append('logreg-CV-L1')
69
70
71
nmodels = len(models)
72
for i in range(nmodels):
73
print('\nfitting {}'.format(names[i]))
74
models[i].fit(X,Y)
75
76
# construct grid
77
n_grid = 100
78
max_x = np.max(x[:,0:2],axis = 0)
79
min_x = np.min(x[:,0:2],axis = 0)
80
X1 = np.linspace(min_x[0],max_x[0],n_grid)
81
X2 = np.linspace(min_x[1],max_x[1],n_grid)
82
x1,x2 = np.meshgrid(X1,X2)
83
Xgrid = np.zeros([n_grid**2,2])
84
Xgrid[:,0] = np.reshape(x1,(n_grid**2,))
85
Xgrid[:,1] = np.reshape(x2,(n_grid**2,))
86
Xg = np.random.randn(n_grid**2,n_features)
87
Xg[:,0] = Xgrid[:,0]
88
Xg[:,1] = Xgrid[:,1]
89
90
# estimate probabilities for grid data points
91
#preds = [0]*nmodels # iniitialize list
92
for i in range(nmodels):
93
pred = models[i].predict_proba(Xg)[:,1]
94
fig,ax = plt.subplots()
95
ax.contourf(X1,X2,np.reshape(pred,(n_grid,n_grid)),cmap=cm.coolwarm)
96
ax.plot(x[y==0,0],x[y==0,1],"bo", markersize = 5)
97
ax.plot(x[y==1,0],x[y==1,1],"ro", markersize = 5)
98
nnz = np.sum(models[i].coef_ != 0)
99
ax.set_title('method {}, N={}, D={}, nnz {}'.format(names[i], n_samples, n_features, nnz))
100
name = '{}-N{}-D{}.pdf'.format(names[i], n_samples, n_features)
101
save_fig(name)
102
plt.show()
103
104
ndims = [100]
105
ndata = [100, 200, 500]
106
for n_samples in ndata:
107
for n_features in ndims:
108
run_demo(n_samples, n_features)
109