Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/deprecated/scripts/armijo_mnist_demo.py
1192 views
1
# We compare armijo line search to fixed learning rate SGD
2
# when used to fit a CNN / MLP to MNIST
3
4
# Linesearch code is from
5
# https://github.com/IssamLaradji/stochastic_line_search/blob/master/main.py
6
import superimport
7
8
from armijo_sgd import SGD_Armijo, ArmijoModel
9
10
# Neural net code is based on various tutorials
11
#https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py
12
#https://github.com/CSCfi/machine-learning-scripts/blob/master/notebooks/pytorch-mnist-mlp.ipynb
13
14
15
import numpy as np
16
np.set_printoptions(precision=3)
17
import matplotlib.pyplot as plt
18
import pyprobml_utils as pml
19
import warnings
20
warnings.filterwarnings('ignore')
21
22
23
24
import torch
25
use_cuda = torch.cuda.is_available()
26
device = torch.device("cuda:0" if use_cuda else "cpu")
27
torch.backends.cudnn.benchmark = True
28
print('Using PyTorch version:', torch.__version__, ' Device:', device)
29
30
31
figdir = "../figures"
32
import os
33
34
############
35
# Get data
36
import torchvision
37
import torchvision.transforms as transforms
38
import torchvision.datasets as datasets
39
40
41
batch_size = 32
42
train_dataset = datasets.MNIST('./data',
43
train=True,
44
download=True,
45
transform=transforms.ToTensor())
46
47
test_dataset = datasets.MNIST('./data',
48
train=False,
49
transform=transforms.ToTensor())
50
51
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
52
batch_size=batch_size,
53
shuffle=True)
54
55
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
56
batch_size=batch_size,
57
shuffle=False)
58
59
60
for (X_train, y_train) in train_loader:
61
print('X_train:', X_train.size(), 'type:', X_train.type())
62
print('y_train:', y_train.size(), 'type:', y_train.type())
63
break
64
65
bs, ncolors, height, width = X_train.shape
66
nclasses = 10
67
N_train = train_dataset.data.shape[0]
68
69
#####
70
# Define model
71
72
import torch.nn as nn
73
import torch.nn.functional as F
74
75
criterion = nn.CrossEntropyLoss(reduction='mean')
76
# https://pytorch.org/docs/stable/nn.html#crossentropyloss
77
# This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single clas
78
# Therefore we don't need the LogSoftmax on the final layer
79
# But we do need it if we use NLLLoss
80
81
# The Armijo method assumes gradient noise goes to zero,
82
# so it is important that we don't have dropout layers.
83
84
class CNN(nn.Module):
85
def __init__(self):
86
super(CNN, self).__init__()
87
self.conv1 = nn.Conv2d(ncolors, 10, kernel_size=5)
88
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
89
#self.dropout = nn.Dropout2d()
90
self.fc1 = nn.Linear(320, 50)
91
self.fc2 = nn.Linear(50, 10)
92
93
def forward(self, x):
94
# input is 28x28x1
95
# conv1(kernel=5, filters=10) 28x28x10 -> 24x24x10
96
# max_pool(kernel=2) 24x24x10 -> 12x12x10
97
x = F.relu(F.max_pool2d(self.conv1(x), 2))
98
99
# conv2(kernel=5, filters=20) 12x12x20 -> 8x8x20
100
# max_pool(kernel=2) 8x8x20 -> 4x4x20
101
#x = F.relu(F.max_pool2d(self.dropout(self.conv2(x)), 2))
102
x = F.relu(F.max_pool2d(self.conv2(x), 2))
103
104
# flatten 4x4x20 = 320
105
x = x.view(-1, 320)
106
107
# 320 -> 50
108
x = F.relu(self.fc1(x))
109
#x = F.dropout(x, training=self.training)
110
111
# 50 -> 10
112
x = self.fc2(x)
113
114
return x
115
#return F.log_softmax(x)
116
117
class MLP(nn.Module):
118
def __init__(self):
119
super(MLP, self).__init__()
120
self.fc1 = nn.Linear(ncolors*height*width, 50)
121
#self.fc1_drop = nn.Dropout(0.2)
122
self.fc2 = nn.Linear(50, 50)
123
#self.fc2_drop = nn.Dropout(0.2)
124
self.fc3 = nn.Linear(50, nclasses)
125
126
def forward(self, x):
127
x = x.view(-1, ncolors*height*width)
128
x = F.relu(self.fc1(x))
129
#x = self.fc1_drop(x)
130
x = F.relu(self.fc2(x))
131
#x = self.fc2_drop(x)
132
x = self.fc3(x)
133
#return F.log_softmax(x, dim=1)
134
return x
135
136
class Logreg(nn.Module):
137
def __init__(self):
138
super(Logreg, self).__init__()
139
self.fc1 = nn.Linear(ncolors*height*width, nclasses)
140
141
def forward(self, x):
142
x = x.view(-1, ncolors*height*width)
143
x = self.fc1(x)
144
#return F.log_softmax(x, dim=1)
145
return x
146
147
def make_model(name, seed=0):
148
np.random.seed(seed)
149
if name == 'CNN':
150
net = CNN()
151
elif name == 'MLP':
152
net = MLP()
153
else:
154
net = Logreg()
155
net = net.to(device)
156
return net
157
158
###############
159
160
# Define each expermental configuration
161
expts = []
162
ep = 4
163
#model = 'Logreg'
164
model = 'MLP'
165
#model = 'CNN'
166
bs = 10
167
expts.append({'lr':'armijo', 'bs':bs, 'epochs':ep, 'model': model})
168
expts.append({'lr':0.01, 'bs':bs, 'epochs':ep, 'model': model})
169
expts.append({'lr':0.1, 'bs':bs, 'epochs':ep, 'model': model})
170
#expts.append({'lr':0.5, 'bs':bs, 'epochs':ep, 'model': model})
171
172
@torch.no_grad()
173
def eval_loss(model, loader):
174
avg_loss = 0.0
175
model.eval()
176
for step, (x_batch, y_batch) in enumerate(loader):
177
# Copy data to GPU if needed
178
x_batch = x_batch.to(device)
179
y_batch = y_batch.to(device)
180
y_pred = model(x_batch)
181
loss = criterion(y_pred, y_batch)
182
avg_loss += loss.item()
183
# Compute average loss per example
184
# Note that the criterion already averages within each batch.
185
n_batches = len(loader)
186
avg_loss /= n_batches
187
return avg_loss
188
189
def fit_epoch(model, optimizer, train_loader, loss_history):
190
epoch_loss = 0.0
191
model.train()
192
for step, (x_batch, y_batch) in enumerate(train_loader):
193
# Copy data to GPU if needed
194
x_batch = x_batch.to(device)
195
y_batch = y_batch.to(device)
196
# Function to (re)evaluate loss and its gradient for this step.
197
def closure():
198
optimizer.zero_grad()
199
y_pred = model(x_batch)
200
loss = criterion(y_pred, y_batch)
201
loss.backward()
202
return loss
203
loss = optimizer.step(closure)
204
batch_loss = loss.item()
205
epoch_loss += batch_loss
206
loss_history.append(batch_loss)
207
# Compute average loss per example for this epoch.
208
# Note that the criterion already averages within each batch.
209
n_batches = len(train_loader)
210
epoch_loss /= n_batches
211
return epoch_loss
212
213
def fit_epoch_armijo(model, optimizer, train_loader, loss_history, step_size_history):
214
epoch_loss = 0.0
215
for step, (x_batch, y_batch) in enumerate(train_loader):
216
x_batch = x_batch.to(device)
217
y_batch = y_batch.to(device)
218
batch_loss, step_size = model.step((x_batch, y_batch))
219
epoch_loss += batch_loss
220
loss_history.append(batch_loss)
221
step_size_history.append(step_size)
222
n_batches = len(train_loader)
223
epoch_loss /= n_batches
224
return epoch_loss
225
226
227
results_dict = {}
228
for expt in expts:
229
lr = expt['lr']
230
bs = expt['bs']
231
max_epochs = expt['epochs']
232
model_name = expt['model']
233
model = make_model(model_name)
234
model.train() # set to training mode
235
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs,
236
shuffle=True, num_workers=2)
237
n_batches = len(train_loader)
238
batch_loss_history = []
239
epoch_loss_history = []
240
step_size_history = []
241
print_every = max(1, int(0.1*max_epochs))
242
if lr == 'armijo':
243
name = '{}-armijo-bs{}'.format(model_name, bs)
244
model = ArmijoModel(model, criterion)
245
optimizer = SGD_Armijo(model, batch_size=bs, dataset_size=N_train)
246
model.opt = optimizer
247
armijo = True
248
else:
249
name = '{}-lr{:0.3f}-bs{}'.format(model_name, lr, bs)
250
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
251
armijo = False
252
253
print('starting {}'.format(name))
254
for epoch in range(max_epochs):
255
if armijo:
256
avg_batch_loss = fit_epoch_armijo(model, optimizer, train_loader, batch_loss_history, step_size_history)
257
else:
258
avg_batch_loss = fit_epoch(model, optimizer, train_loader, batch_loss_history)
259
epoch_loss = eval_loss(model, train_loader)
260
epoch_loss_history.append(epoch_loss)
261
if epoch % print_every == 0:
262
print("epoch {}, loss {}".format(epoch, epoch_loss))
263
264
label = '{}-final-loss{:0.3f}'.format(name, epoch_loss)
265
results = {'label': label, 'batch_loss_history': batch_loss_history,
266
'epoch_loss_history': epoch_loss_history, 'step_size_history': step_size_history}
267
results_dict[name] = results
268
269
270
plt.figure()
271
name = 'MLP-armijo-bs10'
272
results = results_dict[name]
273
plt.plot(results['step_size_history'])
274
plt.ylabel('stepsize')
275
pml.savefig('armijo-mnist-stepsize.pdf')
276
plt.show()
277
278
plt.figure()
279
for name, results in results_dict.items():
280
label = results['label']
281
y = results['epoch_loss_history']
282
plt.plot(y, label=label)
283
plt.legend()
284
pml.savefig('armijo-mnist-epoch-loss.pdf')
285
plt.show()
286
287
# Add smoothed version of batch loss history to results dict
288
import pandas as pd
289
for name, results in results_dict.items():
290
loss_history = results['batch_loss_history']
291
df = pd.Series(loss_history)
292
nsteps = len(loss_history)
293
smoothed = pd.Series.ewm(df, span=0.1*nsteps).mean()
294
results['batch_loss_history_smoothed'] = smoothed
295
296
# Plot curves on one figure
297
plt.figure()
298
for name, results in results_dict.items():
299
label = results['label']
300
y = results['batch_loss_history_smoothed']
301
nsteps = len(y)
302
x = np.arange(nsteps)
303
ndx = np.arange(int(0.2*nsteps), nsteps) # skip first 20%
304
#plt.figure()
305
plt.plot(x[ndx], y[ndx], label=label)
306
plt.legend()
307
pml.savefig('armijo-mnist-batch-loss.pdf')
308
plt.show()
309
310
311