Kernel: Python 2 (SageMath)
In [3]:
#Load Packages import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from scipy import linalg from sklearn import neighbors from sklearn import metrics from sklearn import svm from sklearn import cross_validation from sklearn.grid_search import GridSearchCV #Set plots to inline %matplotlib inline #Define plot colors and options cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) labels=['sr','og','^b'] colors=['r','g','b'] #Define classifier decision boundary plotting function def plot_classifier(x,y,clf,title): #Prepare grid for plotting decision surface gx1, gx2 = np.meshgrid(np.arange(min(x[:,0]), max(x[:,0]),(max(x[:,0])-min(x[:,0]))/200.0 ), np.arange(min(x[:,1]), max(x[:,1]),(max(x[:,1])-min(x[:,1]))/200.0)) gx1l = gx1.flatten() gx2l = gx2.flatten() gx = np.vstack((gx1l,gx2l)).T #Compute a prediction for every point in the grid gyhat = clf.predict(gx) gyhat = gyhat.reshape(gx1.shape) #Plot the results for i in [0,1,2]: plt.plot(x[y==i,0],x[y==i,1],labels[i]); plt.xlabel('Feature 1'); plt.ylabel('Feature 2'); plt.pcolormesh(gx1,gx2,gyhat,cmap=cmap_light) plt.colorbar(); plt.axis('tight'); plt.title(title);
In [4]:
#Load the Iris data set import sklearn.datasets iris = sklearn.datasets.load_iris() X = iris['data'] Y = iris['target'] feature_names = iris['feature_names'] target_names = iris['target_names']
In [ ]:
#Select K K=5 #Select distance metric metric='euclidean' #Select first two features X2 = X[:,[0,1]] for K in [1,5, 10, 50, 150]: #Fit the classifier clf = neighbors.KNeighborsClassifier(K,metric=metric) clf.fit(X2, Y) #Plot the classification function plt.figure() plot_classifier(X2,Y,clf,"KNN with K=%d"%(K,)) #Make predictions using model Yhat = clf.predict(X2) #Report the error rate Err = 1-metrics.accuracy_score(Yhat,Y) print("Training Error Rate is: %.4f"%(Err,))
SVM on Iris Data
In [ ]:
#Select first two features X2 = X[:,[0,1]] #Select Regularization Setting C=1 #Select Kernel kernel='rbf' #Select the value of gamma for C in [1, 10, 100]: for g in [0.1, 1, 10, 100, 1000]: #Fit the classifier clf = svm.SVC(C=C,kernel=kernel,gamma=g) clf.fit(X2, Y) #Plot the classification function plt.figure() plot_classifier(X2,Y,clf,"SVM with C=%d, Kernel=%s, gamma=%.2f"%(C,kernel,g)) #Make predictions using model Yhat = clf.predict(X2) #Report the error rate Err = 1-metrics.accuracy_score(Yhat,Y) print("Training Error Rate is: %.4f"%(Err,)) plt.show()
Crossvalidation on the Iris Data
In [ ]:
#Define the parameter grid param_grid = [{'C': [0.01,0.1,1, 10, 100], 'kernel': ['rbf'],'gamma': [0.1,1,10,100]}] #Select just the first two features X2 = X[:,[0,1]] #Creat a learning set/test set split X2learn,X2test,Ylearn,Ytest = cross_validation.train_test_split(X2, Y, test_size=0.25, random_state=42) #Do search for optimal parameters using #5-fold cross validation on the learning set clf = GridSearchCV(svm.SVC(C=1), param_grid, cv=5) clf.fit(X2learn, Ylearn) #Print optimal parameter set print "Optimal Parameters:", clf.best_params_ #Plot the classification function with learning set plt.figure(1) plot_classifier(X2learn,Ylearn,clf,"SVM with Learning Set") #Plot the classification function with test set plt.figure(2) plot_classifier(X2test,Ytest,clf,"SVM with Test Set") #Make predictions on the test set using optimal model Yhat = clf.predict(X2test) #Report the error rate Err = 1-metrics.accuracy_score(Yhat,Ytest) print("Test Error Rate is: %.4f"%(Err,))
In [ ]: