CoCalc -- hc.R

GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 4 - Clustering/Hierarchical Clustering/hc.R
¹⁰⁰⁹ views

1
# Hierarchical Clustering
2

3
# Importing the dataset
4
dataset = read.csv('Mall_Customers.csv')
5
dataset = dataset[4:5]
6

7
# Splitting the dataset into the Training set and Test set
8
# install.packages('caTools')
9
# library(caTools)
10
# set.seed(123)
11
# split = sample.split(dataset$DependentVariable, SplitRatio = 0.8)
12
# training_set = subset(dataset, split == TRUE)
13
# test_set = subset(dataset, split == FALSE)
14

15
# Feature Scaling
16
# training_set = scale(training_set)
17
# test_set = scale(test_set)
18

19
# Using the dendrogram to find the optimal number of clusters
20
dendrogram = hclust(d = dist(dataset, method = 'euclidean'), method = 'ward.D')
21
plot(dendrogram,
22
     main = paste('Dendrogram'),
23
     xlab = 'Customers',
24
     ylab = 'Euclidean distances')
25

26
# Fitting Hierarchical Clustering to the dataset
27
hc = hclust(d = dist(dataset, method = 'euclidean'), method = 'ward.D')
28
y_hc = cutree(hc, 5)
29

30
# Visualising the clusters
31
library(cluster)
32
clusplot(dataset,
33
         y_hc,
34
         lines = 0,
35
         shade = TRUE,
36
         color = TRUE,
37
         labels= 2,
38
         plotchar = FALSE,
39
         span = TRUE,
40
         main = paste('Clusters of customers'),
41
         xlab = 'Annual Income',
42
         ylab = 'Spending Score')
43

Product

Resources

Company