CoCalc -- helper_functions.py

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: veeralakrishna/DataCamp-Project-Solutions-Python
Path: blob/master/Who's Tweeting_ Trump or Trudeau_/datasets/helper_functions.py
Views: ¹²²⁹

1
from matplotlib import pyplot as plt
2
import numpy as np
3
import itertools
4

5

6
def plot_confusion_matrix(cm, classes,
7
                          normalize=False,
8
                          title='Confusion matrix',
9
                          cmap=plt.cm.Blues,
10
                          figure=0):
11
    """
12
    See full source and example:
13
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
14

15
    This function prints and plots the confusion matrix.
16
    Normalization can be applied by setting `normalize=True`.
17
    """
18
    plt.figure(figure)
19
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
20
    plt.title(title)
21
    plt.colorbar()
22
    tick_marks = np.arange(len(classes))
23
    plt.xticks(tick_marks, classes, rotation=45)
24
    plt.yticks(tick_marks, classes)
25

26
    if normalize:
27
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
28
        print("Normalized confusion matrix")
29
    else:
30
        print('Confusion matrix, without normalization')
31

32
    thresh = cm.max() / 2.
33
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
34
        plt.text(j, i, cm[i, j],
35
                 horizontalalignment="center",
36
                 color="white" if cm[i, j] > thresh else "black")
37

38
    plt.tight_layout()
39
    plt.ylabel('True label')
40
    plt.xlabel('Predicted label')
41

42

43
def plot_and_return_top_features(classifier, vectorizer, top_features=20):
44
    """
45
    Plot the top features in a binary classification model and remove possible overlap.
46

47
    Adapted from https://medium.com/@aneesha/visualising-top-features-in-linear-svm-with-scikit-learn-and-matplotlib-3454ab18a14d
48
    and https://stackoverflow.com/a/26980472 by @kjam
49
    """
50
    class_labels = classifier.classes_
51
    feature_names = vectorizer.get_feature_names()
52
    topn_class1 = sorted(zip(classifier.coef_[0], feature_names))[:top_features]
53
    topn_class2 = sorted(zip(classifier.coef_[0], feature_names))[-top_features:]
54
    top_coefficients = np.hstack([topn_class1, topn_class2])
55
    if set(topn_class1).union(topn_class2):
56
        top_coefficients = topn_class1
57
        for ce in topn_class2:
58
            if ce not in topn_class1:
59
                top_coefficients.append(x)
60

61
    plt.figure(figsize=(15, 5))
62
    colors = ['red' if c < 0 else 'blue' for c in [tc[0] for tc in top_coefficients]]
63
    plt.bar(np.arange(len(top_coefficients)), [tc[0] for tc in top_coefficients], color=colors)
64
    plt.xticks(np.arange(len(top_coefficients)),
65
               [tc[1] for tc in top_coefficients], rotation=60, ha='right')
66
    plt.show()
67
    return top_coefficients
68

69

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place. Commercial Alternative to JupyterHub.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.