Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 9 - Dimension Reduction/Kernel PCA/kernel_pca.py
1336 views
1
# Kernel PCA
2
3
# Importing the libraries
4
import numpy as np
5
import matplotlib.pyplot as plt
6
import pandas as pd
7
8
# Importing the dataset
9
dataset = pd.read_csv('Social_Network_Ads.csv')
10
X = dataset.iloc[:, [2, 3]].values
11
y = dataset.iloc[:, 4].values
12
13
# Splitting the dataset into the Training set and Test set
14
from sklearn.model_selection import train_test_split
15
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
16
17
# Feature Scaling
18
from sklearn.preprocessing import StandardScaler
19
sc = StandardScaler()
20
X_train = sc.fit_transform(X_train)
21
X_test = sc.transform(X_test)
22
23
# Applying Kernel PCA
24
from sklearn.decomposition import KernelPCA
25
kpca = KernelPCA(n_components = 2, kernel = 'rbf')
26
X_train = kpca.fit_transform(X_train)
27
X_test = kpca.transform(X_test)
28
29
# Fitting Logistic Regression to the Training set
30
from sklearn.linear_model import LogisticRegression
31
classifier = LogisticRegression(random_state = 0)
32
classifier.fit(X_train, y_train)
33
34
# Predicting the Test set results
35
y_pred = classifier.predict(X_test)
36
37
# Making the Confusion Matrix
38
from sklearn.metrics import confusion_matrix
39
cm = confusion_matrix(y_test, y_pred)
40
41
# Visualising the Training set results
42
from matplotlib.colors import ListedColormap
43
X_set, y_set = X_train, y_train
44
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
45
np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
46
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
47
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
48
plt.xlim(X1.min(), X1.max())
49
plt.ylim(X2.min(), X2.max())
50
for i, j in enumerate(np.unique(y_set)):
51
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
52
c = ListedColormap(('red', 'green'))(i), label = j)
53
plt.title('Logistic Regression (Training set)')
54
plt.xlabel('Age')
55
plt.ylabel('Estimated Salary')
56
plt.legend()
57
plt.show()
58
59
# Visualising the Test set results
60
from matplotlib.colors import ListedColormap
61
X_set, y_set = X_test, y_test
62
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
63
np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
64
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
65
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
66
plt.xlim(X1.min(), X1.max())
67
plt.ylim(X2.min(), X2.max())
68
for i, j in enumerate(np.unique(y_set)):
69
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
70
c = ListedColormap(('red', 'green'))(i), label = j)
71
plt.title('Logistic Regression (Test set)')
72
plt.xlabel('Age')
73
plt.ylabel('Estimated Salary')
74
plt.legend()
75
plt.show()
76