CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
veeralakrishna

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: veeralakrishna/DataCamp-Project-Solutions-Python
Path: blob/master/Who's Tweeting_ Trump or Trudeau_/datasets/helper_functions.py
Views: 1229
1
from matplotlib import pyplot as plt
2
import numpy as np
3
import itertools
4
5
6
def plot_confusion_matrix(cm, classes,
7
normalize=False,
8
title='Confusion matrix',
9
cmap=plt.cm.Blues,
10
figure=0):
11
"""
12
See full source and example:
13
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
14
15
This function prints and plots the confusion matrix.
16
Normalization can be applied by setting `normalize=True`.
17
"""
18
plt.figure(figure)
19
plt.imshow(cm, interpolation='nearest', cmap=cmap)
20
plt.title(title)
21
plt.colorbar()
22
tick_marks = np.arange(len(classes))
23
plt.xticks(tick_marks, classes, rotation=45)
24
plt.yticks(tick_marks, classes)
25
26
if normalize:
27
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
28
print("Normalized confusion matrix")
29
else:
30
print('Confusion matrix, without normalization')
31
32
thresh = cm.max() / 2.
33
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
34
plt.text(j, i, cm[i, j],
35
horizontalalignment="center",
36
color="white" if cm[i, j] > thresh else "black")
37
38
plt.tight_layout()
39
plt.ylabel('True label')
40
plt.xlabel('Predicted label')
41
42
43
def plot_and_return_top_features(classifier, vectorizer, top_features=20):
44
"""
45
Plot the top features in a binary classification model and remove possible overlap.
46
47
Adapted from https://medium.com/@aneesha/visualising-top-features-in-linear-svm-with-scikit-learn-and-matplotlib-3454ab18a14d
48
and https://stackoverflow.com/a/26980472 by @kjam
49
"""
50
class_labels = classifier.classes_
51
feature_names = vectorizer.get_feature_names()
52
topn_class1 = sorted(zip(classifier.coef_[0], feature_names))[:top_features]
53
topn_class2 = sorted(zip(classifier.coef_[0], feature_names))[-top_features:]
54
top_coefficients = np.hstack([topn_class1, topn_class2])
55
if set(topn_class1).union(topn_class2):
56
top_coefficients = topn_class1
57
for ce in topn_class2:
58
if ce not in topn_class1:
59
top_coefficients.append(x)
60
61
plt.figure(figsize=(15, 5))
62
colors = ['red' if c < 0 else 'blue' for c in [tc[0] for tc in top_coefficients]]
63
plt.bar(np.arange(len(top_coefficients)), [tc[0] for tc in top_coefficients], color=colors)
64
plt.xticks(np.arange(len(top_coefficients)),
65
[tc[1] for tc in top_coefficients], rotation=60, ha='right')
66
plt.show()
67
return top_coefficients
68
69