Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch13/ch13_part2.py
1245 views
1
# coding: utf-8
2
3
4
import sys
5
from python_environment_check import check_packages
6
import numpy as np
7
import torch
8
import torch.nn as nn
9
import pandas as pd
10
import sklearn
11
import sklearn.model_selection
12
from torch.nn.functional import one_hot
13
from torch.utils.data import DataLoader, TensorDataset
14
import torchvision
15
from torchvision import transforms
16
17
# # Machine Learning with PyTorch and Scikit-Learn
18
# # -- Code Examples
19
20
# ## Package version checks
21
22
# Add folder to path in order to load from the check_packages.py script:
23
24
25
26
sys.path.insert(0, '..')
27
28
29
# Check recommended package versions:
30
31
32
33
34
35
d = {
36
'numpy': '1.21.2',
37
'pandas': '1.3.2',
38
'sklearn': '1.0',
39
'torch': '1.8',
40
'torchvision': '0.9.0'
41
}
42
check_packages(d)
43
44
45
# # Chapter 13: Going Deeper -- the Mechanics of PyTorch (Part 2/3)
46
47
# **Outline**
48
#
49
# - [Project one - predicting the fuel efficiency of a car](#Project-one----predicting-the-fuel-efficiency-of-a-car)
50
# - [Working with feature columns](#Working-with-feature-columns)
51
# - [Training a DNN regression model](#Training-a-DNN-regression-model)
52
# - [Project two - classifying MNIST handwritten digits](#Project-two----classifying-MNIST-handwritten-digits)
53
54
55
56
57
58
59
# ## Project one - predicting the fuel efficiency of a car
60
#
61
62
# ### Working with feature columns
63
#
64
#
65
66
67
68
69
70
71
72
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
73
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
74
'Acceleration', 'Model Year', 'Origin']
75
76
df = pd.read_csv(url, names=column_names,
77
na_values = "?", comment='\t',
78
sep=" ", skipinitialspace=True)
79
80
df.tail()
81
82
83
84
85
print(df.isna().sum())
86
87
df = df.dropna()
88
df = df.reset_index(drop=True)
89
df.tail()
90
91
92
93
94
95
96
df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8, random_state=1)
97
train_stats = df_train.describe().transpose()
98
train_stats
99
100
101
102
103
numeric_column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration']
104
105
df_train_norm, df_test_norm = df_train.copy(), df_test.copy()
106
107
for col_name in numeric_column_names:
108
mean = train_stats.loc[col_name, 'mean']
109
std = train_stats.loc[col_name, 'std']
110
df_train_norm.loc[:, col_name] = (df_train_norm.loc[:, col_name] - mean)/std
111
df_test_norm.loc[:, col_name] = (df_test_norm.loc[:, col_name] - mean)/std
112
113
df_train_norm.tail()
114
115
116
117
118
boundaries = torch.tensor([73, 76, 79])
119
120
v = torch.tensor(df_train_norm['Model Year'].values)
121
df_train_norm['Model Year Bucketed'] = torch.bucketize(v, boundaries, right=True)
122
123
v = torch.tensor(df_test_norm['Model Year'].values)
124
df_test_norm['Model Year Bucketed'] = torch.bucketize(v, boundaries, right=True)
125
126
numeric_column_names.append('Model Year Bucketed')
127
128
129
130
131
132
133
total_origin = len(set(df_train_norm['Origin']))
134
135
origin_encoded = one_hot(torch.from_numpy(df_train_norm['Origin'].values) % total_origin)
136
x_train_numeric = torch.tensor(df_train_norm[numeric_column_names].values)
137
x_train = torch.cat([x_train_numeric, origin_encoded], 1).float()
138
139
origin_encoded = one_hot(torch.from_numpy(df_test_norm['Origin'].values) % total_origin)
140
x_test_numeric = torch.tensor(df_test_norm[numeric_column_names].values)
141
x_test = torch.cat([x_test_numeric, origin_encoded], 1).float()
142
143
144
145
146
y_train = torch.tensor(df_train_norm['MPG'].values).float()
147
y_test = torch.tensor(df_test_norm['MPG'].values).float()
148
149
150
151
152
153
154
train_ds = TensorDataset(x_train, y_train)
155
batch_size = 8
156
torch.manual_seed(1)
157
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
158
159
160
161
162
hidden_units = [8, 4]
163
input_size = x_train.shape[1]
164
165
all_layers = []
166
for hidden_unit in hidden_units:
167
layer = nn.Linear(input_size, hidden_unit)
168
all_layers.append(layer)
169
all_layers.append(nn.ReLU())
170
input_size = hidden_unit
171
172
all_layers.append(nn.Linear(hidden_units[-1], 1))
173
174
model = nn.Sequential(*all_layers)
175
176
model
177
178
179
180
181
loss_fn = nn.MSELoss()
182
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
183
184
185
186
187
torch.manual_seed(1)
188
num_epochs = 200
189
log_epochs = 20
190
for epoch in range(num_epochs):
191
loss_hist_train = 0
192
for x_batch, y_batch in train_dl:
193
pred = model(x_batch)[:, 0]
194
loss = loss_fn(pred, y_batch)
195
loss.backward()
196
optimizer.step()
197
optimizer.zero_grad()
198
loss_hist_train += loss.item()
199
if epoch % log_epochs==0:
200
print(f'Epoch {epoch} Loss {loss_hist_train/len(train_dl):.4f}')
201
202
203
204
205
206
with torch.no_grad():
207
pred = model(x_test.float())[:, 0]
208
loss = loss_fn(pred, y_test)
209
print(f'Test MSE: {loss.item():.4f}')
210
print(f'Test MAE: {nn.L1Loss()(pred, y_test).item():.4f}')
211
212
213
# ## Project two - classifying MNIST hand-written digits
214
215
216
217
218
219
image_path = './'
220
transform = transforms.Compose([transforms.ToTensor()])
221
222
mnist_train_dataset = torchvision.datasets.MNIST(root=image_path,
223
train=True,
224
transform=transform,
225
download=True)
226
mnist_test_dataset = torchvision.datasets.MNIST(root=image_path,
227
train=False,
228
transform=transform,
229
download=False)
230
231
batch_size = 64
232
torch.manual_seed(1)
233
train_dl = DataLoader(mnist_train_dataset, batch_size, shuffle=True)
234
235
236
237
238
hidden_units = [32, 16]
239
image_size = mnist_train_dataset[0][0].shape
240
input_size = image_size[0] * image_size[1] * image_size[2]
241
242
all_layers = [nn.Flatten()]
243
for hidden_unit in hidden_units:
244
layer = nn.Linear(input_size, hidden_unit)
245
all_layers.append(layer)
246
all_layers.append(nn.ReLU())
247
input_size = hidden_unit
248
249
all_layers.append(nn.Linear(hidden_units[-1], 10))
250
model = nn.Sequential(*all_layers)
251
252
model
253
254
255
256
257
loss_fn = nn.CrossEntropyLoss()
258
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
259
260
torch.manual_seed(1)
261
num_epochs = 20
262
for epoch in range(num_epochs):
263
accuracy_hist_train = 0
264
for x_batch, y_batch in train_dl:
265
pred = model(x_batch)
266
loss = loss_fn(pred, y_batch)
267
loss.backward()
268
optimizer.step()
269
optimizer.zero_grad()
270
is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
271
accuracy_hist_train += is_correct.sum()
272
accuracy_hist_train /= len(train_dl.dataset)
273
print(f'Epoch {epoch} Accuracy {accuracy_hist_train:.4f}')
274
275
276
277
278
pred = model(mnist_test_dataset.data / 255.)
279
is_correct = (torch.argmax(pred, dim=1) == mnist_test_dataset.targets).float()
280
print(f'Test accuracy: {is_correct.mean():.4f}')
281
282
283
284
# ---
285
#
286
# Readers may ignore the next cell.
287
288
289
290
291
292