Path: blob/master/Part 4 - Clustering/K-Means Clustering/kmeans.R
1009 views
# K-Means Clustering12# Importing the dataset3dataset = read.csv('Mall_Customers.csv')4dataset = dataset[4:5]56# Splitting the dataset into the Training set and Test set7# install.packages('caTools')8# library(caTools)9# set.seed(123)10# split = sample.split(dataset$DependentVariable, SplitRatio = 0.8)11# training_set = subset(dataset, split == TRUE)12# test_set = subset(dataset, split == FALSE)1314# Feature Scaling15# training_set = scale(training_set)16# test_set = scale(test_set)1718# Using the elbow method to find the optimal number of clusters19set.seed(6)20wcss = vector()21for (i in 1:10) wcss[i] = sum(kmeans(dataset, i)$withinss)22plot(1:10,23wcss,24type = 'b',25main = paste('The Elbow Method'),26xlab = 'Number of clusters',27ylab = 'WCSS')2829# Fitting K-Means to the dataset30set.seed(29)31kmeans = kmeans(x = dataset, centers = 5)32y_kmeans = kmeans$cluster3334# Visualising the clusters35library(cluster)36clusplot(dataset,37y_kmeans,38lines = 0,39shade = TRUE,40color = TRUE,41labels = 2,42plotchar = FALSE,43span = TRUE,44main = paste('Clusters of customers'),45xlab = 'Annual Income',46ylab = 'Spending Score')4748