Path: blob/master/CH05/CH05_SEC04_1_Dendrogram.ipynb
597 views
Kernel: Python 3
In [43]:
import numpy as np import matplotlib.pyplot as plt from matplotlib import rcParams from scipy.cluster import hierarchy from scipy.spatial.distance import pdist rcParams.update({'font.size': 18}) plt.rcParams['figure.figsize'] = [12, 12]
In [44]:
# Training and testing set sizes n1 = 100 # Train n2 = 50 # Test # Random ellipse 1 centered at (0,0) x = np.random.randn(n1+n2) y = 0.5*np.random.randn(n1+n2) # Random ellipse 2 centered at (1,-2) x2 = np.random.randn(n1+n2) + 1 y2 = 0.2*np.random.randn(n1+n2) - 2 # Rotate ellipse 2 by theta theta = np.pi/4 A = np.zeros((2,2)) A[0,0] = np.cos(theta) A[0,1] = -np.sin(theta) A[1,0] = np.sin(theta) A[1,1] = np.cos(theta) x3 = A[0,0]*x2 + A[0,1]*y2 y3 = A[1,0]*x2 + A[1,1]*y2
In [45]:
plt.figure() plt.plot(x[:n1],y[:n1],'ro') plt.plot(x3[:n1],y3[:n1],'bo') plt.show()
Out[45]:
In [47]:
# Training set: first 200 of 240 points X1 = np.column_stack((x3[:n1],y3[:n1])) X2 = np.column_stack((x[:n1],y[:n1])) Y = np.concatenate((X1,X2)) Z = np.column_stack((np.ones(n1),2*np.ones(n1))) # Test set: remaining 40 points x1test = np.column_stack((x3[n1:],y3[n1:])) x2test = np.column_stack((x[n1:],y[n1:]))
In [48]:
## Dendrograms Y3 = np.concatenate((X1[:50,:],X2[:50,:])) Y2 = pdist(Y3,metric='euclidean') Z = hierarchy.linkage(Y2,method='average') thresh = 0.85*np.max(Z[:,2]) plt.figure() dn = hierarchy.dendrogram(Z,p=100,color_threshold=thresh) plt.axis('off') plt.show()
Out[48]:
In [49]:
plt.bar(range(100),dn['leaves']) plt.plot(np.array([0, 100]),np.array([50, 50]),'r:',linewidth=2) plt.plot(np.array([50.5, 50.5]),np.array([0, 100]),'r:',linewidth=2) plt.show()
Out[49]:
In [51]:
thresh = 0.25*np.max(Z[:,2]) plt.figure() dn = hierarchy.dendrogram(Z,p=100,color_threshold=thresh) plt.axis('off') plt.show()
Out[51]:
In [ ]:
In [ ]: