CoCalc -- kmeans.R

GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 4 - Clustering/K-Means Clustering/kmeans.R
¹⁰⁰⁹ views

1
# K-Means Clustering
2

3
# Importing the dataset
4
dataset = read.csv('Mall_Customers.csv')
5
dataset = dataset[4:5]
6

7
# Splitting the dataset into the Training set and Test set
8
# install.packages('caTools')
9
# library(caTools)
10
# set.seed(123)
11
# split = sample.split(dataset$DependentVariable, SplitRatio = 0.8)
12
# training_set = subset(dataset, split == TRUE)
13
# test_set = subset(dataset, split == FALSE)
14

15
# Feature Scaling
16
# training_set = scale(training_set)
17
# test_set = scale(test_set)
18

19
# Using the elbow method to find the optimal number of clusters
20
set.seed(6)
21
wcss = vector()
22
for (i in 1:10) wcss[i] = sum(kmeans(dataset, i)$withinss)
23
plot(1:10,
24
     wcss,
25
     type = 'b',
26
     main = paste('The Elbow Method'),
27
     xlab = 'Number of clusters',
28
     ylab = 'WCSS')
29

30
# Fitting K-Means to the dataset
31
set.seed(29)
32
kmeans = kmeans(x = dataset, centers = 5)
33
y_kmeans = kmeans$cluster
34

35
# Visualising the clusters
36
library(cluster)
37
clusplot(dataset,
38
         y_kmeans,
39
         lines = 0,
40
         shade = TRUE,
41
         color = TRUE,
42
         labels = 2,
43
         plotchar = FALSE,
44
         span = TRUE,
45
         main = paste('Clusters of customers'),
46
         xlab = 'Annual Income',
47
         ylab = 'Spending Score')
48

Product

Resources

Company