CoCalc -- ch02.py

GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch02/ch02.py
¹²⁴⁷ views
1
# coding: utf-8
2

3

4
import sys
5
from python_environment_check import check_packages
6
import numpy as np
7
import os
8
import pandas as pd
9
import matplotlib.pyplot as plt
10
from matplotlib.colors import ListedColormap
11

12
# # Machine Learning with PyTorch and Scikit-Learn  
13
# # -- Code Examples
14

15
# ## Package version checks
16

17
# Add folder to path in order to load from the check_packages.py script:
18

19

20

21
sys.path.insert(0, '..')
22

23

24
# Check recommended package versions:
25

26

27

28

29

30
d = {
31
    'numpy': '1.21.2',
32
    'matplotlib': '3.4.3',
33
    'pandas': '1.3.2'
34
}
35
check_packages(d)
36

37

38
# # Chapter 2 - Training Machine Learning Algorithms for Classification
39

40
# ### Overview
41
# 
42

43
# - [Artificial neurons – a brief glimpse into the early history of machine learning](#Artificial-neurons-a-brief-glimpse-into-the-early-history-of-machine-learning)
44
#     - [The formal definition of an artificial neuron](#The-formal-definition-of-an-artificial-neuron)
45
#     - [The perceptron learning rule](#The-perceptron-learning-rule)
46
# - [Implementing a perceptron learning algorithm in Python](#Implementing-a-perceptron-learning-algorithm-in-Python)
47
#     - [An object-oriented perceptron API](#An-object-oriented-perceptron-API)
48
#     - [Training a perceptron model on the Iris dataset](#Training-a-perceptron-model-on-the-Iris-dataset)
49
# - [Adaptive linear neurons and the convergence of learning](#Adaptive-linear-neurons-and-the-convergence-of-learning)
50
#     - [Minimizing cost functions with gradient descent](#Minimizing-cost-functions-with-gradient-descent)
51
#     - [Implementing an Adaptive Linear Neuron in Python](#Implementing-an-Adaptive-Linear-Neuron-in-Python)
52
#     - [Improving gradient descent through feature scaling](#Improving-gradient-descent-through-feature-scaling)
53
#     - [Large scale machine learning and stochastic gradient descent](#Large-scale-machine-learning-and-stochastic-gradient-descent)
54
# - [Summary](#Summary)
55

56

57

58

59

60

61
# # Artificial neurons - a brief glimpse into the early history of machine learning
62

63

64

65

66

67
# ## The formal definition of an artificial neuron
68

69

70

71

72

73
# ## The perceptron learning rule
74

75

76

77

78

79

80

81

82

83

84
# # Implementing a perceptron learning algorithm in Python
85

86
# ## An object-oriented perceptron API
87

88

89

90

91

92
class Perceptron:
93
    """Perceptron classifier.
94

95
    Parameters
96
    ------------
97
    eta : float
98
      Learning rate (between 0.0 and 1.0)
99
    n_iter : int
100
      Passes over the training dataset.
101
    random_state : int
102
      Random number generator seed for random weight
103
      initialization.
104

105
    Attributes
106
    -----------
107
    w_ : 1d-array
108
      Weights after fitting.
109
    b_ : Scalar
110
      Bias unit after fitting.
111
    errors_ : list
112
      Number of misclassifications (updates) in each epoch.
113

114
    """
115
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
116
        self.eta = eta
117
        self.n_iter = n_iter
118
        self.random_state = random_state
119

120
    def fit(self, X, y):
121
        """Fit training data.
122

123
        Parameters
124
        ----------
125
        X : {array-like}, shape = [n_examples, n_features]
126
          Training vectors, where n_examples is the number of examples and
127
          n_features is the number of features.
128
        y : array-like, shape = [n_examples]
129
          Target values.
130

131
        Returns
132
        -------
133
        self : object
134

135
        """
136
        rgen = np.random.RandomState(self.random_state)
137
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
138
        self.b_ = np.float_(0.)
139
        
140
        self.errors_ = []
141

142
        for _ in range(self.n_iter):
143
            errors = 0
144
            for xi, target in zip(X, y):
145
                update = self.eta * (target - self.predict(xi))
146
                self.w_ += update * xi
147
                self.b_ += update
148
                errors += int(update != 0.0)
149
            self.errors_.append(errors)
150
        return self
151

152
    def net_input(self, X):
153
        """Calculate net input"""
154
        return np.dot(X, self.w_) + self.b_
155

156
    def predict(self, X):
157
        """Return class label after unit step"""
158
        return np.where(self.net_input(X) >= 0.0, 1, 0)
159

160

161

162

163
v1 = np.array([1, 2, 3])
164
v2 = 0.5 * v1
165
np.arccos(v1.dot(v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
166

167

168

169
# ## Training a perceptron model on the Iris dataset
170

171
# ...
172

173
# ### Reading-in the Iris data
174

175

176

177

178
try:
179
    s = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
180
    print('From URL:', s)
181
    df = pd.read_csv(s,
182
                     header=None,
183
                     encoding='utf-8')
184
    
185
except HTTPError:
186
    s = 'iris.data'
187
    print('From local Iris path:', s)
188
    df = pd.read_csv(s,
189
                     header=None,
190
                     encoding='utf-8')
191
    
192
df.tail()
193

194

195

196
# ### Plotting the Iris data
197

198

199

200

201
# select setosa and versicolor
202
y = df.iloc[0:100, 4].values
203
y = np.where(y == 'Iris-setosa', 0, 1)
204

205
# extract sepal length and petal length
206
X = df.iloc[0:100, [0, 2]].values
207

208
# plot data
209
plt.scatter(X[:50, 0], X[:50, 1],
210
            color='red', marker='o', label='Setosa')
211
plt.scatter(X[50:100, 0], X[50:100, 1],
212
            color='blue', marker='s', label='Versicolor')
213

214
plt.xlabel('Sepal length [cm]')
215
plt.ylabel('Petal length [cm]')
216
plt.legend(loc='upper left')
217

218
# plt.savefig('images/02_06.png', dpi=300)
219
plt.show()
220

221

222

223
# ### Training the perceptron model
224

225

226

227
ppn = Perceptron(eta=0.1, n_iter=10)
228

229
ppn.fit(X, y)
230

231
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
232
plt.xlabel('Epochs')
233
plt.ylabel('Number of updates')
234

235
# plt.savefig('images/02_07.png', dpi=300)
236
plt.show()
237

238

239

240
# ### A function for plotting decision regions
241

242

243

244

245

246
def plot_decision_regions(X, y, classifier, resolution=0.02):
247

248
    # setup marker generator and color map
249
    markers = ('o', 's', '^', 'v', '<')
250
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
251
    cmap = ListedColormap(colors[:len(np.unique(y))])
252

253
    # plot the decision surface
254
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
255
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
256
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
257
                           np.arange(x2_min, x2_max, resolution))
258
    lab = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
259
    lab = lab.reshape(xx1.shape)
260
    plt.contourf(xx1, xx2, lab, alpha=0.3, cmap=cmap)
261
    plt.xlim(xx1.min(), xx1.max())
262
    plt.ylim(xx2.min(), xx2.max())
263

264
    # plot class examples
265
    for idx, cl in enumerate(np.unique(y)):
266
        plt.scatter(x=X[y == cl, 0], 
267
                    y=X[y == cl, 1],
268
                    alpha=0.8, 
269
                    c=colors[idx],
270
                    marker=markers[idx], 
271
                    label=f'Class {cl}', 
272
                    edgecolor='black')
273

274

275

276

277
plot_decision_regions(X, y, classifier=ppn)
278
plt.xlabel('Sepal length [cm]')
279
plt.ylabel('Petal length [cm]')
280
plt.legend(loc='upper left')
281

282

283
#plt.savefig('images/02_08.png', dpi=300)
284
plt.show()
285

286

287

288
# # Adaptive linear neurons and the convergence of learning
289

290
# ...
291

292
# ## Minimizing cost functions with gradient descent
293

294

295

296

297

298

299

300

301

302

303
# ## Implementing an adaptive linear neuron in Python
304

305

306

307
class AdalineGD:
308
    """ADAptive LInear NEuron classifier.
309

310
    Parameters
311
    ------------
312
    eta : float
313
      Learning rate (between 0.0 and 1.0)
314
    n_iter : int
315
      Passes over the training dataset.
316
    random_state : int
317
      Random number generator seed for random weight
318
      initialization.
319

320

321
    Attributes
322
    -----------
323
    w_ : 1d-array
324
      Weights after fitting.
325
    b_ : Scalar
326
      Bias unit after fitting.
327
    losses_ : list
328
      Mean squared eror loss function values in each epoch.
329

330
    """
331
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
332
        self.eta = eta
333
        self.n_iter = n_iter
334
        self.random_state = random_state
335

336
    def fit(self, X, y):
337
        """ Fit training data.
338

339
        Parameters
340
        ----------
341
        X : {array-like}, shape = [n_examples, n_features]
342
          Training vectors, where n_examples is the number of examples and
343
          n_features is the number of features.
344
        y : array-like, shape = [n_examples]
345
          Target values.
346

347
        Returns
348
        -------
349
        self : object
350

351
        """
352
        rgen = np.random.RandomState(self.random_state)
353
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
354
        self.b_ = np.float_(0.)
355
        self.losses_ = []
356

357
        for i in range(self.n_iter):
358
            net_input = self.net_input(X)
359
            # Please note that the "activation" method has no effect
360
            # in the code since it is simply an identity function. We
361
            # could write `output = self.net_input(X)` directly instead.
362
            # The purpose of the activation is more conceptual, i.e.,  
363
            # in the case of logistic regression (as we will see later), 
364
            # we could change it to
365
            # a sigmoid function to implement a logistic regression classifier.
366
            output = self.activation(net_input)
367
            errors = (y - output)
368
            
369
            #for w_j in range(self.w_.shape[0]):
370
            #    self.w_[w_j] += self.eta * (2.0 * (X[:, w_j]*errors)).mean()
371
            
372
            self.w_ += self.eta * 2.0 * X.T.dot(errors) / X.shape[0]
373
            self.b_ += self.eta * 2.0 * errors.mean()
374
            loss = (errors**2).mean()
375
            self.losses_.append(loss)
376
        return self
377

378
    def net_input(self, X):
379
        """Calculate net input"""
380
        return np.dot(X, self.w_) + self.b_
381

382
    def activation(self, X):
383
        """Compute linear activation"""
384
        return X
385

386
    def predict(self, X):
387
        """Return class label after unit step"""
388
        return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)
389

390

391

392

393
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
394

395
ada1 = AdalineGD(n_iter=15, eta=0.1).fit(X, y)
396
ax[0].plot(range(1, len(ada1.losses_) + 1), np.log10(ada1.losses_), marker='o')
397
ax[0].set_xlabel('Epochs')
398
ax[0].set_ylabel('log(Mean squared error)')
399
ax[0].set_title('Adaline - Learning rate 0.1')
400

401
ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y)
402
ax[1].plot(range(1, len(ada2.losses_) + 1), ada2.losses_, marker='o')
403
ax[1].set_xlabel('Epochs')
404
ax[1].set_ylabel('Mean squared error')
405
ax[1].set_title('Adaline - Learning rate 0.0001')
406

407
# plt.savefig('images/02_11.png', dpi=300)
408
plt.show()
409

410

411

412

413

414

415

416

417
# ## Improving gradient descent through feature scaling
418

419

420

421

422

423

424

425
# standardize features
426
X_std = np.copy(X)
427
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
428
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()
429

430

431

432

433
ada_gd = AdalineGD(n_iter=20, eta=0.5)
434
ada_gd.fit(X_std, y)
435

436
plot_decision_regions(X_std, y, classifier=ada_gd)
437
plt.title('Adaline - Gradient descent')
438
plt.xlabel('Sepal length [standardized]')
439
plt.ylabel('Petal length [standardized]')
440
plt.legend(loc='upper left')
441
plt.tight_layout()
442
#plt.savefig('images/02_14_1.png', dpi=300)
443
plt.show()
444

445
plt.plot(range(1, len(ada_gd.losses_) + 1), ada_gd.losses_, marker='o')
446
plt.xlabel('Epochs')
447
plt.ylabel('Mean squared error')
448

449
plt.tight_layout()
450
#plt.savefig('images/02_14_2.png', dpi=300)
451
plt.show()
452

453

454

455
# ## Large scale machine learning and stochastic gradient descent
456

457

458

459
class AdalineSGD:
460
    """ADAptive LInear NEuron classifier.
461

462
    Parameters
463
    ------------
464
    eta : float
465
      Learning rate (between 0.0 and 1.0)
466
    n_iter : int
467
      Passes over the training dataset.
468
    shuffle : bool (default: True)
469
      Shuffles training data every epoch if True to prevent cycles.
470
    random_state : int
471
      Random number generator seed for random weight
472
      initialization.
473

474

475
    Attributes
476
    -----------
477
    w_ : 1d-array
478
      Weights after fitting.
479
    b_ : Scalar
480
        Bias unit after fitting.
481
    losses_ : list
482
      Mean squared error loss function value averaged over all
483
      training examples in each epoch.
484

485
        
486
    """
487
    def __init__(self, eta=0.01, n_iter=10, shuffle=True, random_state=None):
488
        self.eta = eta
489
        self.n_iter = n_iter
490
        self.w_initialized = False
491
        self.shuffle = shuffle
492
        self.random_state = random_state
493
        
494
    def fit(self, X, y):
495
        """ Fit training data.
496

497
        Parameters
498
        ----------
499
        X : {array-like}, shape = [n_examples, n_features]
500
          Training vectors, where n_examples is the number of examples and
501
          n_features is the number of features.
502
        y : array-like, shape = [n_examples]
503
          Target values.
504

505
        Returns
506
        -------
507
        self : object
508

509
        """
510
        self._initialize_weights(X.shape[1])
511
        self.losses_ = []
512
        for i in range(self.n_iter):
513
            if self.shuffle:
514
                X, y = self._shuffle(X, y)
515
            losses = []
516
            for xi, target in zip(X, y):
517
                losses.append(self._update_weights(xi, target))
518
            avg_loss = np.mean(losses)
519
            self.losses_.append(avg_loss)
520
        return self
521

522
    def partial_fit(self, X, y):
523
        """Fit training data without reinitializing the weights"""
524
        if not self.w_initialized:
525
            self._initialize_weights(X.shape[1])
526
        if y.ravel().shape[0] > 1:
527
            for xi, target in zip(X, y):
528
                self._update_weights(xi, target)
529
        else:
530
            self._update_weights(X, y)
531
        return self
532

533
    def _shuffle(self, X, y):
534
        """Shuffle training data"""
535
        r = self.rgen.permutation(len(y))
536
        return X[r], y[r]
537
    
538
    def _initialize_weights(self, m):
539
        """Initialize weights to small random numbers"""
540
        self.rgen = np.random.RandomState(self.random_state)
541
        self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=m)
542
        self.b_ = np.float_(0.)
543
        self.w_initialized = True
544
        
545
    def _update_weights(self, xi, target):
546
        """Apply Adaline learning rule to update the weights"""
547
        output = self.activation(self.net_input(xi))
548
        error = (target - output)
549
        self.w_ += self.eta * 2.0 * xi * (error)
550
        self.b_ += self.eta * 2.0 * error
551
        loss = error**2
552
        return loss
553
    
554
    def net_input(self, X):
555
        """Calculate net input"""
556
        return np.dot(X, self.w_) + self.b_
557

558
    def activation(self, X):
559
        """Compute linear activation"""
560
        return X
561

562
    def predict(self, X):
563
        """Return class label after unit step"""
564
        return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)
565

566

567

568

569
ada_sgd = AdalineSGD(n_iter=15, eta=0.01, random_state=1)
570
ada_sgd.fit(X_std, y)
571

572
plot_decision_regions(X_std, y, classifier=ada_sgd)
573
plt.title('Adaline - Stochastic gradient descent')
574
plt.xlabel('Sepal length [standardized]')
575
plt.ylabel('Petal length [standardized]')
576
plt.legend(loc='upper left')
577

578
plt.tight_layout()
579
plt.savefig('figures/02_15_1.png', dpi=300)
580
plt.show()
581

582
plt.plot(range(1, len(ada_sgd.losses_) + 1), ada_sgd.losses_, marker='o')
583
plt.xlabel('Epochs')
584
plt.ylabel('Average loss')
585

586
plt.savefig('figures/02_15_2.png', dpi=300)
587
plt.show()
588

589

590

591

592
ada_sgd.partial_fit(X_std[0, :], y[0])
593

594

595

596
# # Summary
597

598
# ...
599

600
# --- 
601
# 
602
# Readers may ignore the following cell
603

604

605

606

607

608

609

610

611

612

613
Product

Resources

Company