Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News Sign UpSign In
| Download
Views: 26
Kernel: Python 3 (Anaconda)
import pandas as pd import numpy as np from sklearn import metrics from sklearn import tree import matplotlib.pyplot as plt
attr_names="""class Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines """ attr_names = attr_names.split("\n") print(attr_names)
['class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', '']
iris_df = pd.read_csv('data/wine.data',sep =',',names=attr_names)
print(type(iris_df)) iris_df
<class 'pandas.core.frame.DataFrame'>
WARNING: Some output was deleted.
#имена классов cls_names=iris_df["class"] cls_names=cls_names.unique() cls_names=list(cls_names) #номера классов cls_col=iris_df["class"] cls_ids=cls_col.apply(lambda c: cls_names.index(c)+1) iris_df['class_id']=cls_ids
#cols=tuple([iris_df[attr].values for attr in attr_names[:-1]]) #X=np.c_[cols] #Y= cls_ids.values #print(X.shape,Y.shape)
X0=iris_df[attr_names[0]].values X1=iris_df[attr_names[1]].values X2=iris_df[attr_names[2]].values X3=iris_df[attr_names[3]].values X4=iris_df[attr_names[4]].values X5=iris_df[attr_names[5]].values X6=iris_df[attr_names[6]].values X7=iris_df[attr_names[7]].values X8=iris_df[attr_names[8]].values X9=iris_df[attr_names[9]].values X10=iris_df[attr_names[10]].values X11=iris_df[attr_names[11]].values X12=iris_df[attr_names[12]].values
X=np.c_[X1,X2,X3,X4,X5,X6, X7, X8,X9,X10,X11,X12] print(X)
[[ 14.23 1.71 2.43 ..., 5.64 1.04 3.92] [ 13.2 1.78 2.14 ..., 4.38 1.05 3.4 ] [ 13.16 2.36 2.67 ..., 5.68 1.03 3.17] ..., [ 13.27 4.28 2.26 ..., 10.2 0.59 1.56] [ 13.17 2.59 2.37 ..., 9.3 0.6 1.62] [ 14.13 4.1 2.74 ..., 9.2 0.61 1.6 ]]
#cols=tuple([iris_df[attr].values for attr in attr_names[:-1]]) #X=np.c_[cols] Y = X0 print(X.shape,Y.shape) print(X) print(Y)
(178, 12) (178,) [[ 14.23 1.71 2.43 ..., 5.64 1.04 3.92] [ 13.2 1.78 2.14 ..., 4.38 1.05 3.4 ] [ 13.16 2.36 2.67 ..., 5.68 1.03 3.17] ..., [ 13.27 4.28 2.26 ..., 10.2 0.59 1.56] [ 13.17 2.59 2.37 ..., 9.3 0.6 1.62] [ 14.13 4.1 2.74 ..., 9.2 0.61 1.6 ]] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
#dt=tree.DecisionTreeClassifier()
#dt.fit(X,Y)
#metrics.accuracy_score(Y, dt.predict(X))
#def plot_map2d(clf,X): # x_min, x_max = X[:,0].min(), X[:,0].max() # y_min, y_max = X[:,1].min(), X[:,1].max() # x_range = np.linspace(x_min, x_max, 200) # y_range = np.linspace(y_min, y_max, 200) # xx, yy = np.meshgrid(x_range, y_range) # Z= clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Z= Z.reshape(xx.shape) # plt.imshow(Z, extent=(x_min, x_max, y_min, y_max), aspect="auto", interpolation="bilinear", origin="lower")
#plt.figure(figsize=(15.0,8.0)) #k=0 #for i in range(4): # for j in range(i+1,4): # k+=1 # plt.subplot(2,3,k) # X1, X2 = X[:,i], X[:,j] # X13 = np.c_[X1,X2] # dt=tree.DecisionTreeClassifier() # dt.fit(X13,Y) # plot_map2d(dt, X13) # plt.scatter(X1, X2, c=Y, s=36, edgecolors='k') # plt.xlabel(attr_names[i]) # plt.ylabel(attr_names[j]) # plt.grid(1) #plt.tight_layout() #plt.show()
dt=tree.DecisionTreeClassifier() dt.fit(X,Y) tree.export_graphviz(dt, out_file="dt.dot")
# терминальная команда:dot -Tpng -O dt.dot