CoCalc -- ch07.py

GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch07/ch07.py
¹²⁴⁵ views
1
# coding: utf-8
2

3

4
import sys
5
from python_environment_check import check_packages
6
from scipy.special import comb
7
import math
8
import numpy as np
9
import matplotlib.pyplot as plt
10
from sklearn.base import BaseEstimator
11
from sklearn.base import ClassifierMixin
12
from sklearn.preprocessing import LabelEncoder
13
from sklearn.base import clone
14
from sklearn.pipeline import _name_estimators
15
import operator
16
from sklearn import datasets
17
from sklearn.preprocessing import StandardScaler
18
from sklearn.model_selection import train_test_split
19
from sklearn.linear_model import LogisticRegression
20
from sklearn.tree import DecisionTreeClassifier
21
from sklearn.neighbors import KNeighborsClassifier 
22
from sklearn.pipeline import Pipeline
23
from sklearn.model_selection import cross_val_score
24
from sklearn.metrics import roc_curve
25
from sklearn.metrics import auc
26
from itertools import product
27
from sklearn.model_selection import GridSearchCV
28
import pandas as pd
29
from sklearn.ensemble import BaggingClassifier
30
from sklearn.metrics import accuracy_score
31
from sklearn.ensemble import AdaBoostClassifier
32
import xgboost as xgb
33

34
# # Machine Learning with PyTorch and Scikit-Learn  
35
# # -- Code Examples
36

37
# ## Package version checks
38

39
# Add folder to path in order to load from the check_packages.py script:
40

41

42

43
sys.path.insert(0, '..')
44

45

46
# Check recommended package versions:
47

48

49

50

51

52
d = {
53
    'numpy': '1.21.2',
54
    'scipy': '1.7.0',
55
    'matplotlib': '3.4.3',
56
    'sklearn': '1.0',
57
    'pandas': '1.3.2',
58
    'xgboost': '1.5.0',
59
}
60
check_packages(d)
61

62

63
# # Chapter 7 - Combining Different Models for Ensemble Learning
64

65

66
# ### Overview
67

68
# - [Learning with ensembles](#Learning-with-ensembles)
69
# - [Combining classifiers via majority vote](#Combining-classifiers-via-majority-vote)
70
#     - [Implementing a simple majority vote classifier](#Implementing-a-simple-majority-vote-classifier)
71
#     - [Using the majority voting principle to make predictions](#Using-the-majority-voting-principle-to-make-predictions)
72
#     - [Evaluating and tuning the ensemble classifier](#Evaluating-and-tuning-the-ensemble-classifier)
73
# - [Bagging – building an ensemble of classifiers from bootstrap samples](#Bagging----Building-an-ensemble-of-classifiers-from-bootstrap-samples)
74
#     - [Bagging in a nutshell](#Bagging-in-a-nutshell)
75
#     - [Applying bagging to classify examples in the Wine dataset](#Applying-bagging-to-classify-examples-in-the-Wine-dataset)
76
# - [Leveraging weak learners via adaptive boosting](#Leveraging-weak-learners-via-adaptive-boosting)
77
#     - [How boosting works](#How-boosting-works)
78
#     - [Applying AdaBoost using scikit-learn](#Applying-AdaBoost-using-scikit-learn)
79
# - [Gradient boosting -- training an ensemble based on loss gradients](#Gradient-boosting----training-an-ensemble-based-on-loss-gradients)
80
#   - [Comparing AdaBoost with gradient boosting](#Comparing-AdaBoost-with-gradient-boosting)
81
#   - [Outlining the general gradient boosting algorithm](#Outlining-the-general-gradient-boosting-algorithm)
82
#   - [Explaining the gradient boosting algorithm for classification](#Explaining-the-gradient-boosting-algorithm-for-classification)
83
#   - [Illustrating gradient boosting for classification](#Illustrating-gradient-boosting-for-classification)
84
#   - [Using XGBoost](#Using-XGBoost)
85
# - [Summary](#Summary)
86

87

88

89

90

91

92
# # Learning with ensembles
93

94

95

96

97

98

99

100

101

102

103

104

105

106
def ensemble_error(n_classifier, error):
107
    k_start = int(math.ceil(n_classifier / 2.))
108
    probs = [comb(n_classifier, k) * error**k * (1-error)**(n_classifier - k)
109
             for k in range(k_start, n_classifier + 1)]
110
    return sum(probs)
111

112

113

114

115
ensemble_error(n_classifier=11, error=0.25)
116

117

118

119

120

121

122
error_range = np.arange(0.0, 1.01, 0.01)
123
ens_errors = [ensemble_error(n_classifier=11, error=error)
124
              for error in error_range]
125

126

127

128

129

130

131
plt.plot(error_range, 
132
         ens_errors, 
133
         label='Ensemble error', 
134
         linewidth=2)
135

136
plt.plot(error_range, 
137
         error_range, 
138
         linestyle='--',
139
         label='Base error',
140
         linewidth=2)
141

142
plt.xlabel('Base error')
143
plt.ylabel('Base/Ensemble error')
144
plt.legend(loc='upper left')
145
plt.grid(alpha=0.5)
146
#plt.savefig('figures/07_03.png', dpi=300)
147
plt.show()
148

149

150

151
# # Combining classifiers via majority vote
152

153
# ## Implementing a simple majority vote classifier 
154

155

156

157

158
np.argmax(np.bincount([0, 0, 1], 
159
                      weights=[0.2, 0.2, 0.6]))
160

161

162

163

164
ex = np.array([[0.9, 0.1],
165
               [0.8, 0.2],
166
               [0.4, 0.6]])
167

168
p = np.average(ex, 
169
               axis=0, 
170
               weights=[0.2, 0.2, 0.6])
171
p
172

173

174

175

176
np.argmax(p)
177

178

179

180

181

182
# Scikit-learn 0.16 and newer requires reversing the parent classes
183
# See https://github.com/rasbt/machine-learning-book/discussions/205 for more details
184
import sklearn
185
base_classes = (ClassifierMixin, BaseEstimator) if sklearn.__version__ >= "0.16" else (BaseEstimator, ClassifierMixin)
186

187
# class MajorityVoteClassifier(BaseEstimator, 
188
#                             ClassifierMixin):
189

190
class MajorityVoteClassifier(*base_classes):
191
    """ A majority vote ensemble classifier
192

193
    Parameters
194
    ----------
195
    classifiers : array-like, shape = [n_classifiers]
196
      Different classifiers for the ensemble
197

198
    vote : str, {'classlabel', 'probability'} (default='classlabel')
199
      If 'classlabel' the prediction is based on the argmax of
200
        class labels. Else if 'probability', the argmax of
201
        the sum of probabilities is used to predict the class label
202
        (recommended for calibrated classifiers).
203

204
    weights : array-like, shape = [n_classifiers], optional (default=None)
205
      If a list of `int` or `float` values are provided, the classifiers
206
      are weighted by importance; Uses uniform weights if `weights=None`.
207

208
    """
209
    def __init__(self, classifiers, vote='classlabel', weights=None):
210

211
        self.classifiers = classifiers
212
        self.named_classifiers = {key: value for key, value
213
                                  in _name_estimators(classifiers)}
214
        self.vote = vote
215
        self.weights = weights
216

217
    def fit(self, X, y):
218
        """ Fit classifiers.
219

220
        Parameters
221
        ----------
222
        X : {array-like, sparse matrix}, shape = [n_examples, n_features]
223
            Matrix of training examples.
224

225
        y : array-like, shape = [n_examples]
226
            Vector of target class labels.
227

228
        Returns
229
        -------
230
        self : object
231

232
        """
233
        if self.vote not in ('probability', 'classlabel'):
234
            raise ValueError(f"vote must be 'probability' or 'classlabel'"
235
                             f"; got (vote={self.vote})")
236

237
        if self.weights and len(self.weights) != len(self.classifiers):
238
            raise ValueError(f'Number of classifiers and weights must be equal'
239
                             f'; got {len(self.weights)} weights,'
240
                             f' {len(self.classifiers)} classifiers')
241

242
        # Use LabelEncoder to ensure class labels start with 0, which
243
        # is important for np.argmax call in self.predict
244
        self.lablenc_ = LabelEncoder()
245
        self.lablenc_.fit(y)
246
        self.classes_ = self.lablenc_.classes_
247
        self.classifiers_ = []
248
        for clf in self.classifiers:
249
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
250
            self.classifiers_.append(fitted_clf)
251
        return self
252

253
    def predict(self, X):
254
        """ Predict class labels for X.
255

256
        Parameters
257
        ----------
258
        X : {array-like, sparse matrix}, shape = [n_examples, n_features]
259
            Matrix of training examples.
260

261
        Returns
262
        ----------
263
        maj_vote : array-like, shape = [n_examples]
264
            Predicted class labels.
265
            
266
        """
267
        if self.vote == 'probability':
268
            maj_vote = np.argmax(self.predict_proba(X), axis=1)
269
        else:  # 'classlabel' vote
270

271
            #  Collect results from clf.predict calls
272
            predictions = np.asarray([clf.predict(X)
273
                                      for clf in self.classifiers_]).T
274

275
            maj_vote = np.apply_along_axis(
276
                                      lambda x:
277
                                      np.argmax(np.bincount(x,
278
                                                weights=self.weights)),
279
                                      axis=1,
280
                                      arr=predictions)
281
        maj_vote = self.lablenc_.inverse_transform(maj_vote)
282
        return maj_vote
283

284
    def predict_proba(self, X):
285
        """ Predict class probabilities for X.
286

287
        Parameters
288
        ----------
289
        X : {array-like, sparse matrix}, shape = [n_examples, n_features]
290
            Training vectors, where n_examples is the number of examples and
291
            n_features is the number of features.
292

293
        Returns
294
        ----------
295
        avg_proba : array-like, shape = [n_examples, n_classes]
296
            Weighted average probability for each class per example.
297

298
        """
299
        probas = np.asarray([clf.predict_proba(X)
300
                             for clf in self.classifiers_])
301
        avg_proba = np.average(probas, axis=0, weights=self.weights)
302
        return avg_proba
303

304
    def get_params(self, deep=True):
305
        """ Get classifier parameter names for GridSearch"""
306
        if not deep:
307
            return super().get_params(deep=False)
308
        else:
309
            out = self.named_classifiers.copy()
310
            for name, step in self.named_classifiers.items():
311
                for key, value in step.get_params(deep=True).items():
312
                    out[f'{name}__{key}'] = value
313
            return out
314

315

316

317
# ## Using the majority voting principle to make predictions
318

319

320

321

322

323
iris = datasets.load_iris()
324
X, y = iris.data[50:, [1, 2]], iris.target[50:]
325
le = LabelEncoder()
326
y = le.fit_transform(y)
327

328
X_train, X_test, y_train, y_test =       train_test_split(X, y, 
329
                        test_size=0.5, 
330
                        random_state=1,
331
                        stratify=y)
332

333

334

335

336

337

338
clf1 = LogisticRegression(penalty='l2', 
339
                          C=0.001,
340
                          solver='lbfgs',
341
                          random_state=1)
342

343
clf2 = DecisionTreeClassifier(max_depth=1,
344
                              criterion='entropy',
345
                              random_state=0)
346

347
clf3 = KNeighborsClassifier(n_neighbors=1,
348
                            p=2,
349
                            metric='minkowski')
350

351
pipe1 = Pipeline([['sc', StandardScaler()],
352
                  ['clf', clf1]])
353
pipe3 = Pipeline([['sc', StandardScaler()],
354
                  ['clf', clf3]])
355

356
clf_labels = ['Logistic regression', 'Decision tree', 'KNN']
357

358
print('10-fold cross validation:\n')
359
for clf, label in zip([pipe1, clf2, pipe3], clf_labels):
360
    scores = cross_val_score(estimator=clf,
361
                             X=X_train,
362
                             y=y_train,
363
                             cv=10,
364
                             scoring='roc_auc')
365
    print(f'ROC AUC: {scores.mean():.2f} '
366
          f'(+/- {scores.std():.2f}) [{label}]')
367

368

369

370

371
# Majority Rule (hard) Voting
372

373
mv_clf = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
374

375
clf_labels += ['Majority voting']
376
all_clf = [pipe1, clf2, pipe3, mv_clf]
377

378
for clf, label in zip(all_clf, clf_labels):
379
    scores = cross_val_score(estimator=clf,
380
                             X=X_train,
381
                             y=y_train,
382
                             cv=10,
383
                             scoring='roc_auc')
384
    print(f'ROC AUC: {scores.mean():.2f} '
385
          f'(+/- {scores.std():.2f}) [{label}]')
386

387

388

389
# # Evaluating and tuning the ensemble classifier
390

391

392

393

394

395
colors = ['black', 'orange', 'blue', 'green']
396
linestyles = [':', '--', '-.', '-']
397
for clf, label, clr, ls         in zip(all_clf,
398
               clf_labels, colors, linestyles):
399

400
    # assuming the label of the positive class is 1
401
    y_pred = clf.fit(X_train,
402
                     y_train).predict_proba(X_test)[:, 1]
403
    fpr, tpr, thresholds = roc_curve(y_true=y_test,
404
                                     y_score=y_pred)
405
    roc_auc = auc(x=fpr, y=tpr)
406
    plt.plot(fpr, tpr,
407
             color=clr,
408
             linestyle=ls,
409
             label=f'{label} (auc = {roc_auc:.2f})')
410

411
plt.legend(loc='lower right')
412
plt.plot([0, 1], [0, 1],
413
         linestyle='--',
414
         color='gray',
415
         linewidth=2)
416

417
plt.xlim([-0.1, 1.1])
418
plt.ylim([-0.1, 1.1])
419
plt.grid(alpha=0.5)
420
plt.xlabel('False positive rate (FPR)')
421
plt.ylabel('True positive rate (TPR)')
422

423

424
#plt.savefig('figures/07_04', dpi=300)
425
plt.show()
426

427

428

429

430
sc = StandardScaler()
431
X_train_std = sc.fit_transform(X_train)
432

433

434

435

436

437

438
all_clf = [pipe1, clf2, pipe3, mv_clf]
439

440
x_min = X_train_std[:, 0].min() - 1
441
x_max = X_train_std[:, 0].max() + 1
442
y_min = X_train_std[:, 1].min() - 1
443
y_max = X_train_std[:, 1].max() + 1
444

445
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
446
                     np.arange(y_min, y_max, 0.1))
447

448
f, axarr = plt.subplots(nrows=2, ncols=2, 
449
                        sharex='col', 
450
                        sharey='row', 
451
                        figsize=(7, 5))
452

453
for idx, clf, tt in zip(product([0, 1], [0, 1]),
454
                        all_clf, clf_labels):
455
    clf.fit(X_train_std, y_train)
456
    
457
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
458
    Z = Z.reshape(xx.shape)
459

460
    axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.3)
461
    
462
    axarr[idx[0], idx[1]].scatter(X_train_std[y_train==0, 0], 
463
                                  X_train_std[y_train==0, 1], 
464
                                  c='blue', 
465
                                  marker='^',
466
                                  s=50)
467
    
468
    axarr[idx[0], idx[1]].scatter(X_train_std[y_train==1, 0], 
469
                                  X_train_std[y_train==1, 1], 
470
                                  c='green', 
471
                                  marker='o',
472
                                  s=50)
473
    
474
    axarr[idx[0], idx[1]].set_title(tt)
475

476
plt.text(-3.5, -5., 
477
         s='Sepal width [standardized]', 
478
         ha='center', va='center', fontsize=12)
479
plt.text(-12.5, 4.5, 
480
         s='Petal length [standardized]', 
481
         ha='center', va='center', 
482
         fontsize=12, rotation=90)
483

484
#plt.savefig('figures/07_05', dpi=300)
485
plt.show()
486

487

488

489

490
mv_clf.get_params()
491

492

493

494

495

496

497
params = {'decisiontreeclassifier__max_depth': [1, 2],
498
          'pipeline-1__clf__C': [0.001, 0.1, 100.0]}
499

500
grid = GridSearchCV(estimator=mv_clf,
501
                    param_grid=params,
502
                    cv=10,
503
                    scoring='roc_auc')
504
grid.fit(X_train, y_train)
505

506
for r, _ in enumerate(grid.cv_results_['mean_test_score']):
507
    mean_score = grid.cv_results_['mean_test_score'][r]
508
    std_dev = grid.cv_results_['std_test_score'][r]
509
    params = grid.cv_results_['params'][r]
510
    print(f'{mean_score:.3f} +/- {std_dev:.2f} {params}')
511

512

513

514

515
print(f'Best parameters: {grid.best_params_}')
516
print(f'ROC AUC: {grid.best_score_:.2f}')
517

518

519
# **Note**  
520
# By default, the default setting for `refit` in `GridSearchCV` is `True` (i.e., `GridSeachCV(..., refit=True)`), which means that we can use the fitted `GridSearchCV` estimator to make predictions via the `predict` method, for example:
521
# 
522
#     grid = GridSearchCV(estimator=mv_clf, 
523
#                         param_grid=params, 
524
#                         cv=10, 
525
#                         scoring='roc_auc')
526
#     grid.fit(X_train, y_train)
527
#     y_pred = grid.predict(X_test)
528
# 
529
# In addition, the "best" estimator can directly be accessed via the `best_estimator_` attribute.
530

531

532

533
grid.best_estimator_.classifiers
534

535

536

537

538
mv_clf = grid.best_estimator_
539

540

541

542

543
mv_clf.set_params(**grid.best_estimator_.get_params())
544

545

546

547

548
mv_clf
549

550

551

552
# # Bagging -- Building an ensemble of classifiers from bootstrap samples
553

554

555

556

557

558
# ## Bagging in a nutshell
559

560

561

562

563

564
# ## Applying bagging to classify examples in the Wine dataset
565

566

567

568

569

570
df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/'
571
                      'machine-learning-databases/wine/wine.data',
572
                      header=None)
573

574
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
575
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
576
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
577
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
578
                   'Proline']
579

580
# if the Wine dataset is temporarily unavailable from the
581
# UCI machine learning repository, un-comment the following line
582
# of code to load the dataset from a local path:
583

584
# df_wine = pd.read_csv('wine.data', header=None)
585

586
# drop 1 class
587
df_wine = df_wine[df_wine['Class label'] != 1]
588

589
y = df_wine['Class label'].values
590
X = df_wine[['Alcohol', 'OD280/OD315 of diluted wines']].values
591

592

593

594

595

596

597
le = LabelEncoder()
598
y = le.fit_transform(y)
599

600
X_train, X_test, y_train, y_test =            train_test_split(X, y, 
601
                             test_size=0.2, 
602
                             random_state=1,
603
                             stratify=y)
604

605

606

607

608

609
tree = DecisionTreeClassifier(criterion='entropy', 
610
                              max_depth=None,
611
                              random_state=1)
612

613
bag = BaggingClassifier(base_estimator=tree,
614
                        n_estimators=500, 
615
                        max_samples=1.0, 
616
                        max_features=1.0, 
617
                        bootstrap=True, 
618
                        bootstrap_features=False, 
619
                        n_jobs=1, 
620
                        random_state=1)
621

622

623

624

625

626

627
tree = tree.fit(X_train, y_train)
628
y_train_pred = tree.predict(X_train)
629
y_test_pred = tree.predict(X_test)
630

631
tree_train = accuracy_score(y_train, y_train_pred)
632
tree_test = accuracy_score(y_test, y_test_pred)
633
print(f'Decision tree train/test accuracies '
634
      f'{tree_train:.3f}/{tree_test:.3f}')
635

636
bag = bag.fit(X_train, y_train)
637
y_train_pred = bag.predict(X_train)
638
y_test_pred = bag.predict(X_test)
639

640
bag_train = accuracy_score(y_train, y_train_pred) 
641
bag_test = accuracy_score(y_test, y_test_pred) 
642
print(f'Bagging train/test accuracies '
643
      f'{bag_train:.3f}/{bag_test:.3f}')
644

645

646

647

648

649

650
x_min = X_train[:, 0].min() - 1
651
x_max = X_train[:, 0].max() + 1
652
y_min = X_train[:, 1].min() - 1
653
y_max = X_train[:, 1].max() + 1
654

655
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
656
                     np.arange(y_min, y_max, 0.1))
657

658
f, axarr = plt.subplots(nrows=1, ncols=2, 
659
                        sharex='col', 
660
                        sharey='row', 
661
                        figsize=(8, 3))
662

663

664
for idx, clf, tt in zip([0, 1],
665
                        [tree, bag],
666
                        ['Decision tree', 'Bagging']):
667
    clf.fit(X_train, y_train)
668

669
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
670
    Z = Z.reshape(xx.shape)
671

672
    axarr[idx].contourf(xx, yy, Z, alpha=0.3)
673
    axarr[idx].scatter(X_train[y_train == 0, 0],
674
                       X_train[y_train == 0, 1],
675
                       c='blue', marker='^')
676

677
    axarr[idx].scatter(X_train[y_train == 1, 0],
678
                       X_train[y_train == 1, 1],
679
                       c='green', marker='o')
680

681
    axarr[idx].set_title(tt)
682

683
axarr[0].set_ylabel('OD280/OD315 of diluted wines', fontsize=12)
684

685
plt.tight_layout()
686
plt.text(0, -0.2,
687
         s='Alcohol',
688
         ha='center',
689
         va='center',
690
         fontsize=12,
691
         transform=axarr[1].transAxes)
692

693
#plt.savefig('figures/07_08.png', dpi=300, bbox_inches='tight')
694
plt.show()
695

696

697

698
# # Leveraging weak learners via adaptive boosting
699

700
# ## How boosting works
701

702

703

704

705

706

707

708

709

710

711

712
y = np.array([1, 1, 1, -1, -1, -1,  1,  1,  1, -1])
713
yhat = np.array([1, 1, 1, -1, -1, -1, -1, -1, -1, -1])
714
correct = (y == yhat)
715
weights = np.full(10, 0.1)
716
print(weights)
717

718
epsilon = np.mean(~correct)
719
print(epsilon)
720

721

722

723

724
alpha_j = 0.5 * np.log((1-epsilon) / epsilon)
725
print(alpha_j)
726

727

728

729

730
update_if_correct = 0.1 * np.exp(-alpha_j * 1 * 1)
731
print(update_if_correct)
732

733

734

735

736
update_if_wrong_1 = 0.1 * np.exp(-alpha_j * 1 * -1)
737
print(update_if_wrong_1)
738

739

740

741

742
update_if_wrong_2 = 0.1 * np.exp(-alpha_j * -1 * 1)
743
print(update_if_wrong_2)
744

745

746

747

748
weights = np.where(correct == 1, update_if_correct, update_if_wrong_1)
749
print(weights)
750

751

752

753

754
normalized_weights = weights / np.sum(weights)
755
print(normalized_weights)
756

757

758
# ## Applying AdaBoost using scikit-learn
759

760

761

762

763

764
tree = DecisionTreeClassifier(criterion='entropy', 
765
                              max_depth=1,
766
                              random_state=1)
767

768
ada = AdaBoostClassifier(base_estimator=tree,
769
                         n_estimators=500, 
770
                         learning_rate=0.1,
771
                         random_state=1)
772

773

774

775

776
tree = tree.fit(X_train, y_train)
777
y_train_pred = tree.predict(X_train)
778
y_test_pred = tree.predict(X_test)
779

780
tree_train = accuracy_score(y_train, y_train_pred)
781
tree_test = accuracy_score(y_test, y_test_pred)
782
print(f'Decision tree train/test accuracies '
783
      f'{tree_train:.3f}/{tree_test:.3f}')
784

785
ada = ada.fit(X_train, y_train)
786
y_train_pred = ada.predict(X_train)
787
y_test_pred = ada.predict(X_test)
788

789
ada_train = accuracy_score(y_train, y_train_pred) 
790
ada_test = accuracy_score(y_test, y_test_pred) 
791
print(f'AdaBoost train/test accuracies '
792
      f'{ada_train:.3f}/{ada_test:.3f}')
793

794

795

796

797
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
798
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
799
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
800
                     np.arange(y_min, y_max, 0.1))
801

802
f, axarr = plt.subplots(1, 2, sharex='col', sharey='row', figsize=(8, 3))
803

804

805
for idx, clf, tt in zip([0, 1],
806
                        [tree, ada],
807
                        ['Decision tree', 'AdaBoost']):
808
    clf.fit(X_train, y_train)
809

810
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
811
    Z = Z.reshape(xx.shape)
812

813
    axarr[idx].contourf(xx, yy, Z, alpha=0.3)
814
    axarr[idx].scatter(X_train[y_train == 0, 0],
815
                       X_train[y_train == 0, 1],
816
                       c='blue', marker='^')
817
    axarr[idx].scatter(X_train[y_train == 1, 0],
818
                       X_train[y_train == 1, 1],
819
                       c='green', marker='o')
820
    axarr[idx].set_title(tt)
821

822
axarr[0].set_ylabel('OD280/OD315 of diluted wines', fontsize=12)
823

824
plt.tight_layout()
825
plt.text(0, -0.2,
826
         s='Alcohol',
827
         ha='center',
828
         va='center',
829
         fontsize=12,
830
         transform=axarr[1].transAxes)
831

832
# plt.savefig('figures/07_11.png', dpi=300, bbox_inches='tight')
833
plt.show()
834

835

836
# # Gradient boosting -- training an ensemble based on loss gradients
837

838
# ## Comparing AdaBoost with gradient boosting
839

840
# ## Outlining the general gradient boosting algorithm
841

842
# ## Explaining the gradient boosting algorithm for classification
843

844
# ## Illustrating gradient boosting for classification
845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866
# ## Using XGboost 
867

868

869

870

871

872

873

874
xgb.__version__
875

876

877

878

879
model = xgb.XGBClassifier(n_estimators=1000, learning_rate=0.01, max_depth=4, random_state=1, use_label_encoder=False)
880

881

882
gbm = model.fit(X_train, y_train)
883

884
y_train_pred = gbm.predict(X_train)
885
y_test_pred = gbm.predict(X_test)
886

887
gbm_train = accuracy_score(y_train, y_train_pred) 
888
gbm_test = accuracy_score(y_test, y_test_pred) 
889
print(f'XGboost train/test accuracies '
890
      f'{gbm_train:.3f}/{gbm_test:.3f}')
891

892

893

894
# # Summary
895

896
# ...
897

898
# ---
899
# 
900
# Readers may ignore the next cell.
901

902

903

904

905

906

907

908

909

910

911
Product

Resources

Company