Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 4 - Clustering/K-Means Clustering/kmeans.R
1009 views
1
# K-Means Clustering
2
3
# Importing the dataset
4
dataset = read.csv('Mall_Customers.csv')
5
dataset = dataset[4:5]
6
7
# Splitting the dataset into the Training set and Test set
8
# install.packages('caTools')
9
# library(caTools)
10
# set.seed(123)
11
# split = sample.split(dataset$DependentVariable, SplitRatio = 0.8)
12
# training_set = subset(dataset, split == TRUE)
13
# test_set = subset(dataset, split == FALSE)
14
15
# Feature Scaling
16
# training_set = scale(training_set)
17
# test_set = scale(test_set)
18
19
# Using the elbow method to find the optimal number of clusters
20
set.seed(6)
21
wcss = vector()
22
for (i in 1:10) wcss[i] = sum(kmeans(dataset, i)$withinss)
23
plot(1:10,
24
wcss,
25
type = 'b',
26
main = paste('The Elbow Method'),
27
xlab = 'Number of clusters',
28
ylab = 'WCSS')
29
30
# Fitting K-Means to the dataset
31
set.seed(29)
32
kmeans = kmeans(x = dataset, centers = 5)
33
y_kmeans = kmeans$cluster
34
35
# Visualising the clusters
36
library(cluster)
37
clusplot(dataset,
38
y_kmeans,
39
lines = 0,
40
shade = TRUE,
41
color = TRUE,
42
labels = 2,
43
plotchar = FALSE,
44
span = TRUE,
45
main = paste('Clusters of customers'),
46
xlab = 'Annual Income',
47
ylab = 'Spending Score')
48