Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 4 - Clustering/Hierarchical Clustering/hc.R
1009 views
1
# Hierarchical Clustering
2
3
# Importing the dataset
4
dataset = read.csv('Mall_Customers.csv')
5
dataset = dataset[4:5]
6
7
# Splitting the dataset into the Training set and Test set
8
# install.packages('caTools')
9
# library(caTools)
10
# set.seed(123)
11
# split = sample.split(dataset$DependentVariable, SplitRatio = 0.8)
12
# training_set = subset(dataset, split == TRUE)
13
# test_set = subset(dataset, split == FALSE)
14
15
# Feature Scaling
16
# training_set = scale(training_set)
17
# test_set = scale(test_set)
18
19
# Using the dendrogram to find the optimal number of clusters
20
dendrogram = hclust(d = dist(dataset, method = 'euclidean'), method = 'ward.D')
21
plot(dendrogram,
22
main = paste('Dendrogram'),
23
xlab = 'Customers',
24
ylab = 'Euclidean distances')
25
26
# Fitting Hierarchical Clustering to the dataset
27
hc = hclust(d = dist(dataset, method = 'euclidean'), method = 'ward.D')
28
y_hc = cutree(hc, 5)
29
30
# Visualising the clusters
31
library(cluster)
32
clusplot(dataset,
33
y_hc,
34
lines = 0,
35
shade = TRUE,
36
color = TRUE,
37
labels= 2,
38
plotchar = FALSE,
39
span = TRUE,
40
main = paste('Clusters of customers'),
41
xlab = 'Annual Income',
42
ylab = 'Spending Score')
43