Path: blob/master/Part 9 - Dimension Reduction/Kernel PCA/kernel_pca.R
1335 views
# Kernel PCA12# Importing the dataset3dataset = read.csv('Social_Network_Ads.csv')4dataset = dataset[, 3:5]56# Splitting the dataset into the Training set and Test set7# install.packages('caTools')8library(caTools)9set.seed(123)10split = sample.split(dataset$Purchased, SplitRatio = 0.75)11training_set = subset(dataset, split == TRUE)12test_set = subset(dataset, split == FALSE)1314# Feature Scaling15training_set[, 1:2] = scale(training_set[, 1:2])16test_set[, 1:2] = scale(test_set[, 1:2])1718# Applying Kernel PCA19# install.packages('kernlab')20library(kernlab)21kpca = kpca(~., data = training_set[-3], kernel = 'rbfdot', features = 2)22training_set_pca = as.data.frame(predict(kpca, training_set))23training_set_pca$Purchased = training_set$Purchased24test_set_pca = as.data.frame(predict(kpca, test_set))25test_set_pca$Purchased = test_set$Purchased2627# Fitting Logistic Regression to the Training set28classifier = glm(formula = Purchased ~ .,29family = binomial,30data = training_set_pca)3132# Predicting the Test set results33prob_pred = predict(classifier, type = 'response', newdata = test_set_pca[-3])34y_pred = ifelse(prob_pred > 0.5, 1, 0)3536# Making the Confusion Matrix37cm = table(test_set_pca[, 3], y_pred)3839# Visualising the Training set results40install.packages('ElemStatLearn')41library(ElemStatLearn)42set = training_set_pca43X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)44X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)45grid_set = expand.grid(X1, X2)46colnames(grid_set) = c('V1', 'V2')47prob_set = predict(classifier, type = 'response', newdata = grid_set)48y_grid = ifelse(prob_set > 0.5, 1, 0)49plot(set[, -3],50main = 'Logistic Regression (Training set)',51xlab = 'PC1', ylab = 'PC2',52xlim = range(X1), ylim = range(X2))53contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)54points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))55points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))5657# Visualising the Test set results58# install.packages('ElemStatLearn')59library(ElemStatLearn)60set = test_set_pca61X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)62X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)63grid_set = expand.grid(X1, X2)64colnames(grid_set) = c('V1', 'V2')65prob_set = predict(classifier, type = 'response', newdata = grid_set)66y_grid = ifelse(prob_set > 0.5, 1, 0)67plot(set[, -3],68main = 'Logistic Regression (Test set)',69xlab = 'Age', ylab = 'Estimated Salary',70xlim = range(X1), ylim = range(X2))71contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)72points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))73points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))7475