Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch07/ch07.py
1245 views
1
# coding: utf-8
2
3
4
import sys
5
from python_environment_check import check_packages
6
from scipy.special import comb
7
import math
8
import numpy as np
9
import matplotlib.pyplot as plt
10
from sklearn.base import BaseEstimator
11
from sklearn.base import ClassifierMixin
12
from sklearn.preprocessing import LabelEncoder
13
from sklearn.base import clone
14
from sklearn.pipeline import _name_estimators
15
import operator
16
from sklearn import datasets
17
from sklearn.preprocessing import StandardScaler
18
from sklearn.model_selection import train_test_split
19
from sklearn.linear_model import LogisticRegression
20
from sklearn.tree import DecisionTreeClassifier
21
from sklearn.neighbors import KNeighborsClassifier
22
from sklearn.pipeline import Pipeline
23
from sklearn.model_selection import cross_val_score
24
from sklearn.metrics import roc_curve
25
from sklearn.metrics import auc
26
from itertools import product
27
from sklearn.model_selection import GridSearchCV
28
import pandas as pd
29
from sklearn.ensemble import BaggingClassifier
30
from sklearn.metrics import accuracy_score
31
from sklearn.ensemble import AdaBoostClassifier
32
import xgboost as xgb
33
34
# # Machine Learning with PyTorch and Scikit-Learn
35
# # -- Code Examples
36
37
# ## Package version checks
38
39
# Add folder to path in order to load from the check_packages.py script:
40
41
42
43
sys.path.insert(0, '..')
44
45
46
# Check recommended package versions:
47
48
49
50
51
52
d = {
53
'numpy': '1.21.2',
54
'scipy': '1.7.0',
55
'matplotlib': '3.4.3',
56
'sklearn': '1.0',
57
'pandas': '1.3.2',
58
'xgboost': '1.5.0',
59
}
60
check_packages(d)
61
62
63
# # Chapter 7 - Combining Different Models for Ensemble Learning
64
65
66
# ### Overview
67
68
# - [Learning with ensembles](#Learning-with-ensembles)
69
# - [Combining classifiers via majority vote](#Combining-classifiers-via-majority-vote)
70
# - [Implementing a simple majority vote classifier](#Implementing-a-simple-majority-vote-classifier)
71
# - [Using the majority voting principle to make predictions](#Using-the-majority-voting-principle-to-make-predictions)
72
# - [Evaluating and tuning the ensemble classifier](#Evaluating-and-tuning-the-ensemble-classifier)
73
# - [Bagging – building an ensemble of classifiers from bootstrap samples](#Bagging----Building-an-ensemble-of-classifiers-from-bootstrap-samples)
74
# - [Bagging in a nutshell](#Bagging-in-a-nutshell)
75
# - [Applying bagging to classify examples in the Wine dataset](#Applying-bagging-to-classify-examples-in-the-Wine-dataset)
76
# - [Leveraging weak learners via adaptive boosting](#Leveraging-weak-learners-via-adaptive-boosting)
77
# - [How boosting works](#How-boosting-works)
78
# - [Applying AdaBoost using scikit-learn](#Applying-AdaBoost-using-scikit-learn)
79
# - [Gradient boosting -- training an ensemble based on loss gradients](#Gradient-boosting----training-an-ensemble-based-on-loss-gradients)
80
# - [Comparing AdaBoost with gradient boosting](#Comparing-AdaBoost-with-gradient-boosting)
81
# - [Outlining the general gradient boosting algorithm](#Outlining-the-general-gradient-boosting-algorithm)
82
# - [Explaining the gradient boosting algorithm for classification](#Explaining-the-gradient-boosting-algorithm-for-classification)
83
# - [Illustrating gradient boosting for classification](#Illustrating-gradient-boosting-for-classification)
84
# - [Using XGBoost](#Using-XGBoost)
85
# - [Summary](#Summary)
86
87
88
89
90
91
92
# # Learning with ensembles
93
94
95
96
97
98
99
100
101
102
103
104
105
106
def ensemble_error(n_classifier, error):
107
k_start = int(math.ceil(n_classifier / 2.))
108
probs = [comb(n_classifier, k) * error**k * (1-error)**(n_classifier - k)
109
for k in range(k_start, n_classifier + 1)]
110
return sum(probs)
111
112
113
114
115
ensemble_error(n_classifier=11, error=0.25)
116
117
118
119
120
121
122
error_range = np.arange(0.0, 1.01, 0.01)
123
ens_errors = [ensemble_error(n_classifier=11, error=error)
124
for error in error_range]
125
126
127
128
129
130
131
plt.plot(error_range,
132
ens_errors,
133
label='Ensemble error',
134
linewidth=2)
135
136
plt.plot(error_range,
137
error_range,
138
linestyle='--',
139
label='Base error',
140
linewidth=2)
141
142
plt.xlabel('Base error')
143
plt.ylabel('Base/Ensemble error')
144
plt.legend(loc='upper left')
145
plt.grid(alpha=0.5)
146
#plt.savefig('figures/07_03.png', dpi=300)
147
plt.show()
148
149
150
151
# # Combining classifiers via majority vote
152
153
# ## Implementing a simple majority vote classifier
154
155
156
157
158
np.argmax(np.bincount([0, 0, 1],
159
weights=[0.2, 0.2, 0.6]))
160
161
162
163
164
ex = np.array([[0.9, 0.1],
165
[0.8, 0.2],
166
[0.4, 0.6]])
167
168
p = np.average(ex,
169
axis=0,
170
weights=[0.2, 0.2, 0.6])
171
p
172
173
174
175
176
np.argmax(p)
177
178
179
180
181
182
# Scikit-learn 0.16 and newer requires reversing the parent classes
183
# See https://github.com/rasbt/machine-learning-book/discussions/205 for more details
184
import sklearn
185
base_classes = (ClassifierMixin, BaseEstimator) if sklearn.__version__ >= "0.16" else (BaseEstimator, ClassifierMixin)
186
187
# class MajorityVoteClassifier(BaseEstimator,
188
# ClassifierMixin):
189
190
class MajorityVoteClassifier(*base_classes):
191
""" A majority vote ensemble classifier
192
193
Parameters
194
----------
195
classifiers : array-like, shape = [n_classifiers]
196
Different classifiers for the ensemble
197
198
vote : str, {'classlabel', 'probability'} (default='classlabel')
199
If 'classlabel' the prediction is based on the argmax of
200
class labels. Else if 'probability', the argmax of
201
the sum of probabilities is used to predict the class label
202
(recommended for calibrated classifiers).
203
204
weights : array-like, shape = [n_classifiers], optional (default=None)
205
If a list of `int` or `float` values are provided, the classifiers
206
are weighted by importance; Uses uniform weights if `weights=None`.
207
208
"""
209
def __init__(self, classifiers, vote='classlabel', weights=None):
210
211
self.classifiers = classifiers
212
self.named_classifiers = {key: value for key, value
213
in _name_estimators(classifiers)}
214
self.vote = vote
215
self.weights = weights
216
217
def fit(self, X, y):
218
""" Fit classifiers.
219
220
Parameters
221
----------
222
X : {array-like, sparse matrix}, shape = [n_examples, n_features]
223
Matrix of training examples.
224
225
y : array-like, shape = [n_examples]
226
Vector of target class labels.
227
228
Returns
229
-------
230
self : object
231
232
"""
233
if self.vote not in ('probability', 'classlabel'):
234
raise ValueError(f"vote must be 'probability' or 'classlabel'"
235
f"; got (vote={self.vote})")
236
237
if self.weights and len(self.weights) != len(self.classifiers):
238
raise ValueError(f'Number of classifiers and weights must be equal'
239
f'; got {len(self.weights)} weights,'
240
f' {len(self.classifiers)} classifiers')
241
242
# Use LabelEncoder to ensure class labels start with 0, which
243
# is important for np.argmax call in self.predict
244
self.lablenc_ = LabelEncoder()
245
self.lablenc_.fit(y)
246
self.classes_ = self.lablenc_.classes_
247
self.classifiers_ = []
248
for clf in self.classifiers:
249
fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
250
self.classifiers_.append(fitted_clf)
251
return self
252
253
def predict(self, X):
254
""" Predict class labels for X.
255
256
Parameters
257
----------
258
X : {array-like, sparse matrix}, shape = [n_examples, n_features]
259
Matrix of training examples.
260
261
Returns
262
----------
263
maj_vote : array-like, shape = [n_examples]
264
Predicted class labels.
265
266
"""
267
if self.vote == 'probability':
268
maj_vote = np.argmax(self.predict_proba(X), axis=1)
269
else: # 'classlabel' vote
270
271
# Collect results from clf.predict calls
272
predictions = np.asarray([clf.predict(X)
273
for clf in self.classifiers_]).T
274
275
maj_vote = np.apply_along_axis(
276
lambda x:
277
np.argmax(np.bincount(x,
278
weights=self.weights)),
279
axis=1,
280
arr=predictions)
281
maj_vote = self.lablenc_.inverse_transform(maj_vote)
282
return maj_vote
283
284
def predict_proba(self, X):
285
""" Predict class probabilities for X.
286
287
Parameters
288
----------
289
X : {array-like, sparse matrix}, shape = [n_examples, n_features]
290
Training vectors, where n_examples is the number of examples and
291
n_features is the number of features.
292
293
Returns
294
----------
295
avg_proba : array-like, shape = [n_examples, n_classes]
296
Weighted average probability for each class per example.
297
298
"""
299
probas = np.asarray([clf.predict_proba(X)
300
for clf in self.classifiers_])
301
avg_proba = np.average(probas, axis=0, weights=self.weights)
302
return avg_proba
303
304
def get_params(self, deep=True):
305
""" Get classifier parameter names for GridSearch"""
306
if not deep:
307
return super().get_params(deep=False)
308
else:
309
out = self.named_classifiers.copy()
310
for name, step in self.named_classifiers.items():
311
for key, value in step.get_params(deep=True).items():
312
out[f'{name}__{key}'] = value
313
return out
314
315
316
317
# ## Using the majority voting principle to make predictions
318
319
320
321
322
323
iris = datasets.load_iris()
324
X, y = iris.data[50:, [1, 2]], iris.target[50:]
325
le = LabelEncoder()
326
y = le.fit_transform(y)
327
328
X_train, X_test, y_train, y_test = train_test_split(X, y,
329
test_size=0.5,
330
random_state=1,
331
stratify=y)
332
333
334
335
336
337
338
clf1 = LogisticRegression(penalty='l2',
339
C=0.001,
340
solver='lbfgs',
341
random_state=1)
342
343
clf2 = DecisionTreeClassifier(max_depth=1,
344
criterion='entropy',
345
random_state=0)
346
347
clf3 = KNeighborsClassifier(n_neighbors=1,
348
p=2,
349
metric='minkowski')
350
351
pipe1 = Pipeline([['sc', StandardScaler()],
352
['clf', clf1]])
353
pipe3 = Pipeline([['sc', StandardScaler()],
354
['clf', clf3]])
355
356
clf_labels = ['Logistic regression', 'Decision tree', 'KNN']
357
358
print('10-fold cross validation:\n')
359
for clf, label in zip([pipe1, clf2, pipe3], clf_labels):
360
scores = cross_val_score(estimator=clf,
361
X=X_train,
362
y=y_train,
363
cv=10,
364
scoring='roc_auc')
365
print(f'ROC AUC: {scores.mean():.2f} '
366
f'(+/- {scores.std():.2f}) [{label}]')
367
368
369
370
371
# Majority Rule (hard) Voting
372
373
mv_clf = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
374
375
clf_labels += ['Majority voting']
376
all_clf = [pipe1, clf2, pipe3, mv_clf]
377
378
for clf, label in zip(all_clf, clf_labels):
379
scores = cross_val_score(estimator=clf,
380
X=X_train,
381
y=y_train,
382
cv=10,
383
scoring='roc_auc')
384
print(f'ROC AUC: {scores.mean():.2f} '
385
f'(+/- {scores.std():.2f}) [{label}]')
386
387
388
389
# # Evaluating and tuning the ensemble classifier
390
391
392
393
394
395
colors = ['black', 'orange', 'blue', 'green']
396
linestyles = [':', '--', '-.', '-']
397
for clf, label, clr, ls in zip(all_clf,
398
clf_labels, colors, linestyles):
399
400
# assuming the label of the positive class is 1
401
y_pred = clf.fit(X_train,
402
y_train).predict_proba(X_test)[:, 1]
403
fpr, tpr, thresholds = roc_curve(y_true=y_test,
404
y_score=y_pred)
405
roc_auc = auc(x=fpr, y=tpr)
406
plt.plot(fpr, tpr,
407
color=clr,
408
linestyle=ls,
409
label=f'{label} (auc = {roc_auc:.2f})')
410
411
plt.legend(loc='lower right')
412
plt.plot([0, 1], [0, 1],
413
linestyle='--',
414
color='gray',
415
linewidth=2)
416
417
plt.xlim([-0.1, 1.1])
418
plt.ylim([-0.1, 1.1])
419
plt.grid(alpha=0.5)
420
plt.xlabel('False positive rate (FPR)')
421
plt.ylabel('True positive rate (TPR)')
422
423
424
#plt.savefig('figures/07_04', dpi=300)
425
plt.show()
426
427
428
429
430
sc = StandardScaler()
431
X_train_std = sc.fit_transform(X_train)
432
433
434
435
436
437
438
all_clf = [pipe1, clf2, pipe3, mv_clf]
439
440
x_min = X_train_std[:, 0].min() - 1
441
x_max = X_train_std[:, 0].max() + 1
442
y_min = X_train_std[:, 1].min() - 1
443
y_max = X_train_std[:, 1].max() + 1
444
445
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
446
np.arange(y_min, y_max, 0.1))
447
448
f, axarr = plt.subplots(nrows=2, ncols=2,
449
sharex='col',
450
sharey='row',
451
figsize=(7, 5))
452
453
for idx, clf, tt in zip(product([0, 1], [0, 1]),
454
all_clf, clf_labels):
455
clf.fit(X_train_std, y_train)
456
457
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
458
Z = Z.reshape(xx.shape)
459
460
axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.3)
461
462
axarr[idx[0], idx[1]].scatter(X_train_std[y_train==0, 0],
463
X_train_std[y_train==0, 1],
464
c='blue',
465
marker='^',
466
s=50)
467
468
axarr[idx[0], idx[1]].scatter(X_train_std[y_train==1, 0],
469
X_train_std[y_train==1, 1],
470
c='green',
471
marker='o',
472
s=50)
473
474
axarr[idx[0], idx[1]].set_title(tt)
475
476
plt.text(-3.5, -5.,
477
s='Sepal width [standardized]',
478
ha='center', va='center', fontsize=12)
479
plt.text(-12.5, 4.5,
480
s='Petal length [standardized]',
481
ha='center', va='center',
482
fontsize=12, rotation=90)
483
484
#plt.savefig('figures/07_05', dpi=300)
485
plt.show()
486
487
488
489
490
mv_clf.get_params()
491
492
493
494
495
496
497
params = {'decisiontreeclassifier__max_depth': [1, 2],
498
'pipeline-1__clf__C': [0.001, 0.1, 100.0]}
499
500
grid = GridSearchCV(estimator=mv_clf,
501
param_grid=params,
502
cv=10,
503
scoring='roc_auc')
504
grid.fit(X_train, y_train)
505
506
for r, _ in enumerate(grid.cv_results_['mean_test_score']):
507
mean_score = grid.cv_results_['mean_test_score'][r]
508
std_dev = grid.cv_results_['std_test_score'][r]
509
params = grid.cv_results_['params'][r]
510
print(f'{mean_score:.3f} +/- {std_dev:.2f} {params}')
511
512
513
514
515
print(f'Best parameters: {grid.best_params_}')
516
print(f'ROC AUC: {grid.best_score_:.2f}')
517
518
519
# **Note**
520
# By default, the default setting for `refit` in `GridSearchCV` is `True` (i.e., `GridSeachCV(..., refit=True)`), which means that we can use the fitted `GridSearchCV` estimator to make predictions via the `predict` method, for example:
521
#
522
# grid = GridSearchCV(estimator=mv_clf,
523
# param_grid=params,
524
# cv=10,
525
# scoring='roc_auc')
526
# grid.fit(X_train, y_train)
527
# y_pred = grid.predict(X_test)
528
#
529
# In addition, the "best" estimator can directly be accessed via the `best_estimator_` attribute.
530
531
532
533
grid.best_estimator_.classifiers
534
535
536
537
538
mv_clf = grid.best_estimator_
539
540
541
542
543
mv_clf.set_params(**grid.best_estimator_.get_params())
544
545
546
547
548
mv_clf
549
550
551
552
# # Bagging -- Building an ensemble of classifiers from bootstrap samples
553
554
555
556
557
558
# ## Bagging in a nutshell
559
560
561
562
563
564
# ## Applying bagging to classify examples in the Wine dataset
565
566
567
568
569
570
df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/'
571
'machine-learning-databases/wine/wine.data',
572
header=None)
573
574
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
575
'Alcalinity of ash', 'Magnesium', 'Total phenols',
576
'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
577
'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
578
'Proline']
579
580
# if the Wine dataset is temporarily unavailable from the
581
# UCI machine learning repository, un-comment the following line
582
# of code to load the dataset from a local path:
583
584
# df_wine = pd.read_csv('wine.data', header=None)
585
586
# drop 1 class
587
df_wine = df_wine[df_wine['Class label'] != 1]
588
589
y = df_wine['Class label'].values
590
X = df_wine[['Alcohol', 'OD280/OD315 of diluted wines']].values
591
592
593
594
595
596
597
le = LabelEncoder()
598
y = le.fit_transform(y)
599
600
X_train, X_test, y_train, y_test = train_test_split(X, y,
601
test_size=0.2,
602
random_state=1,
603
stratify=y)
604
605
606
607
608
609
tree = DecisionTreeClassifier(criterion='entropy',
610
max_depth=None,
611
random_state=1)
612
613
bag = BaggingClassifier(base_estimator=tree,
614
n_estimators=500,
615
max_samples=1.0,
616
max_features=1.0,
617
bootstrap=True,
618
bootstrap_features=False,
619
n_jobs=1,
620
random_state=1)
621
622
623
624
625
626
627
tree = tree.fit(X_train, y_train)
628
y_train_pred = tree.predict(X_train)
629
y_test_pred = tree.predict(X_test)
630
631
tree_train = accuracy_score(y_train, y_train_pred)
632
tree_test = accuracy_score(y_test, y_test_pred)
633
print(f'Decision tree train/test accuracies '
634
f'{tree_train:.3f}/{tree_test:.3f}')
635
636
bag = bag.fit(X_train, y_train)
637
y_train_pred = bag.predict(X_train)
638
y_test_pred = bag.predict(X_test)
639
640
bag_train = accuracy_score(y_train, y_train_pred)
641
bag_test = accuracy_score(y_test, y_test_pred)
642
print(f'Bagging train/test accuracies '
643
f'{bag_train:.3f}/{bag_test:.3f}')
644
645
646
647
648
649
650
x_min = X_train[:, 0].min() - 1
651
x_max = X_train[:, 0].max() + 1
652
y_min = X_train[:, 1].min() - 1
653
y_max = X_train[:, 1].max() + 1
654
655
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
656
np.arange(y_min, y_max, 0.1))
657
658
f, axarr = plt.subplots(nrows=1, ncols=2,
659
sharex='col',
660
sharey='row',
661
figsize=(8, 3))
662
663
664
for idx, clf, tt in zip([0, 1],
665
[tree, bag],
666
['Decision tree', 'Bagging']):
667
clf.fit(X_train, y_train)
668
669
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
670
Z = Z.reshape(xx.shape)
671
672
axarr[idx].contourf(xx, yy, Z, alpha=0.3)
673
axarr[idx].scatter(X_train[y_train == 0, 0],
674
X_train[y_train == 0, 1],
675
c='blue', marker='^')
676
677
axarr[idx].scatter(X_train[y_train == 1, 0],
678
X_train[y_train == 1, 1],
679
c='green', marker='o')
680
681
axarr[idx].set_title(tt)
682
683
axarr[0].set_ylabel('OD280/OD315 of diluted wines', fontsize=12)
684
685
plt.tight_layout()
686
plt.text(0, -0.2,
687
s='Alcohol',
688
ha='center',
689
va='center',
690
fontsize=12,
691
transform=axarr[1].transAxes)
692
693
#plt.savefig('figures/07_08.png', dpi=300, bbox_inches='tight')
694
plt.show()
695
696
697
698
# # Leveraging weak learners via adaptive boosting
699
700
# ## How boosting works
701
702
703
704
705
706
707
708
709
710
711
712
y = np.array([1, 1, 1, -1, -1, -1, 1, 1, 1, -1])
713
yhat = np.array([1, 1, 1, -1, -1, -1, -1, -1, -1, -1])
714
correct = (y == yhat)
715
weights = np.full(10, 0.1)
716
print(weights)
717
718
epsilon = np.mean(~correct)
719
print(epsilon)
720
721
722
723
724
alpha_j = 0.5 * np.log((1-epsilon) / epsilon)
725
print(alpha_j)
726
727
728
729
730
update_if_correct = 0.1 * np.exp(-alpha_j * 1 * 1)
731
print(update_if_correct)
732
733
734
735
736
update_if_wrong_1 = 0.1 * np.exp(-alpha_j * 1 * -1)
737
print(update_if_wrong_1)
738
739
740
741
742
update_if_wrong_2 = 0.1 * np.exp(-alpha_j * -1 * 1)
743
print(update_if_wrong_2)
744
745
746
747
748
weights = np.where(correct == 1, update_if_correct, update_if_wrong_1)
749
print(weights)
750
751
752
753
754
normalized_weights = weights / np.sum(weights)
755
print(normalized_weights)
756
757
758
# ## Applying AdaBoost using scikit-learn
759
760
761
762
763
764
tree = DecisionTreeClassifier(criterion='entropy',
765
max_depth=1,
766
random_state=1)
767
768
ada = AdaBoostClassifier(base_estimator=tree,
769
n_estimators=500,
770
learning_rate=0.1,
771
random_state=1)
772
773
774
775
776
tree = tree.fit(X_train, y_train)
777
y_train_pred = tree.predict(X_train)
778
y_test_pred = tree.predict(X_test)
779
780
tree_train = accuracy_score(y_train, y_train_pred)
781
tree_test = accuracy_score(y_test, y_test_pred)
782
print(f'Decision tree train/test accuracies '
783
f'{tree_train:.3f}/{tree_test:.3f}')
784
785
ada = ada.fit(X_train, y_train)
786
y_train_pred = ada.predict(X_train)
787
y_test_pred = ada.predict(X_test)
788
789
ada_train = accuracy_score(y_train, y_train_pred)
790
ada_test = accuracy_score(y_test, y_test_pred)
791
print(f'AdaBoost train/test accuracies '
792
f'{ada_train:.3f}/{ada_test:.3f}')
793
794
795
796
797
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
798
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
799
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
800
np.arange(y_min, y_max, 0.1))
801
802
f, axarr = plt.subplots(1, 2, sharex='col', sharey='row', figsize=(8, 3))
803
804
805
for idx, clf, tt in zip([0, 1],
806
[tree, ada],
807
['Decision tree', 'AdaBoost']):
808
clf.fit(X_train, y_train)
809
810
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
811
Z = Z.reshape(xx.shape)
812
813
axarr[idx].contourf(xx, yy, Z, alpha=0.3)
814
axarr[idx].scatter(X_train[y_train == 0, 0],
815
X_train[y_train == 0, 1],
816
c='blue', marker='^')
817
axarr[idx].scatter(X_train[y_train == 1, 0],
818
X_train[y_train == 1, 1],
819
c='green', marker='o')
820
axarr[idx].set_title(tt)
821
822
axarr[0].set_ylabel('OD280/OD315 of diluted wines', fontsize=12)
823
824
plt.tight_layout()
825
plt.text(0, -0.2,
826
s='Alcohol',
827
ha='center',
828
va='center',
829
fontsize=12,
830
transform=axarr[1].transAxes)
831
832
# plt.savefig('figures/07_11.png', dpi=300, bbox_inches='tight')
833
plt.show()
834
835
836
# # Gradient boosting -- training an ensemble based on loss gradients
837
838
# ## Comparing AdaBoost with gradient boosting
839
840
# ## Outlining the general gradient boosting algorithm
841
842
# ## Explaining the gradient boosting algorithm for classification
843
844
# ## Illustrating gradient boosting for classification
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
# ## Using XGboost
867
868
869
870
871
872
873
874
xgb.__version__
875
876
877
878
879
model = xgb.XGBClassifier(n_estimators=1000, learning_rate=0.01, max_depth=4, random_state=1, use_label_encoder=False)
880
881
882
gbm = model.fit(X_train, y_train)
883
884
y_train_pred = gbm.predict(X_train)
885
y_test_pred = gbm.predict(X_test)
886
887
gbm_train = accuracy_score(y_train, y_train_pred)
888
gbm_test = accuracy_score(y_test, y_test_pred)
889
print(f'XGboost train/test accuracies '
890
f'{gbm_train:.3f}/{gbm_test:.3f}')
891
892
893
894
# # Summary
895
896
# ...
897
898
# ---
899
#
900
# Readers may ignore the next cell.
901
902
903
904
905
906
907
908
909
910
911