Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/Who's Tweeting_ Trump or Trudeau_/datasets/helper_functions.py
Views: 1229
from matplotlib import pyplot as plt1import numpy as np2import itertools345def plot_confusion_matrix(cm, classes,6normalize=False,7title='Confusion matrix',8cmap=plt.cm.Blues,9figure=0):10"""11See full source and example:12http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html1314This function prints and plots the confusion matrix.15Normalization can be applied by setting `normalize=True`.16"""17plt.figure(figure)18plt.imshow(cm, interpolation='nearest', cmap=cmap)19plt.title(title)20plt.colorbar()21tick_marks = np.arange(len(classes))22plt.xticks(tick_marks, classes, rotation=45)23plt.yticks(tick_marks, classes)2425if normalize:26cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]27print("Normalized confusion matrix")28else:29print('Confusion matrix, without normalization')3031thresh = cm.max() / 2.32for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):33plt.text(j, i, cm[i, j],34horizontalalignment="center",35color="white" if cm[i, j] > thresh else "black")3637plt.tight_layout()38plt.ylabel('True label')39plt.xlabel('Predicted label')404142def plot_and_return_top_features(classifier, vectorizer, top_features=20):43"""44Plot the top features in a binary classification model and remove possible overlap.4546Adapted from https://medium.com/@aneesha/visualising-top-features-in-linear-svm-with-scikit-learn-and-matplotlib-3454ab18a14d47and https://stackoverflow.com/a/26980472 by @kjam48"""49class_labels = classifier.classes_50feature_names = vectorizer.get_feature_names()51topn_class1 = sorted(zip(classifier.coef_[0], feature_names))[:top_features]52topn_class2 = sorted(zip(classifier.coef_[0], feature_names))[-top_features:]53top_coefficients = np.hstack([topn_class1, topn_class2])54if set(topn_class1).union(topn_class2):55top_coefficients = topn_class156for ce in topn_class2:57if ce not in topn_class1:58top_coefficients.append(x)5960plt.figure(figsize=(15, 5))61colors = ['red' if c < 0 else 'blue' for c in [tc[0] for tc in top_coefficients]]62plt.bar(np.arange(len(top_coefficients)), [tc[0] for tc in top_coefficients], color=colors)63plt.xticks(np.arange(len(top_coefficients)),64[tc[1] for tc in top_coefficients], rotation=60, ha='right')65plt.show()66return top_coefficients676869