CoCalc -- armijo_mnist

GitHub Repository: probml/pyprobml
Path: blob/master/deprecated/scripts/armijo_mnist_demo.py
¹¹⁹² views
1
# We compare armijo line search to fixed learning rate SGD 
2
# when used to fit a CNN / MLP to MNIST
3

4
# Linesearch code is from
5
# https://github.com/IssamLaradji/stochastic_line_search/blob/master/main.py
6
import superimport
7

8
from armijo_sgd import SGD_Armijo, ArmijoModel
9

10
# Neural net code is based on various tutorials
11
#https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py
12
#https://github.com/CSCfi/machine-learning-scripts/blob/master/notebooks/pytorch-mnist-mlp.ipynb
13

14

15
import numpy as np
16
np.set_printoptions(precision=3)
17
import matplotlib.pyplot as plt
18
import pyprobml_utils as pml
19
import warnings
20
warnings.filterwarnings('ignore')
21

22

23

24
import torch
25
use_cuda = torch.cuda.is_available()
26
device = torch.device("cuda:0" if use_cuda else "cpu")
27
torch.backends.cudnn.benchmark = True
28
print('Using PyTorch version:', torch.__version__, ' Device:', device)
29

30

31
figdir = "../figures"
32
import os
33

34
############
35
# Get data
36
import torchvision
37
import torchvision.transforms as transforms
38
import torchvision.datasets as datasets
39

40

41
batch_size = 32
42
train_dataset = datasets.MNIST('./data', 
43
                               train=True, 
44
                               download=True, 
45
                               transform=transforms.ToTensor())
46

47
test_dataset = datasets.MNIST('./data', 
48
                                    train=False, 
49
                                    transform=transforms.ToTensor())
50

51
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
52
                                           batch_size=batch_size, 
53
                                           shuffle=True)
54

55
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
56
                                                batch_size=batch_size, 
57
                                                shuffle=False)
58

59

60
for (X_train, y_train) in train_loader:
61
    print('X_train:', X_train.size(), 'type:', X_train.type())
62
    print('y_train:', y_train.size(), 'type:', y_train.type())
63
    break
64

65
bs, ncolors, height, width = X_train.shape
66
nclasses = 10
67
N_train = train_dataset.data.shape[0]
68

69
#####
70
# Define model 
71

72
import torch.nn as nn
73
import torch.nn.functional as F
74

75
criterion = nn.CrossEntropyLoss(reduction='mean')
76
# https://pytorch.org/docs/stable/nn.html#crossentropyloss
77
# This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single clas
78
# Therefore we don't need the LogSoftmax on the final layer
79
# But we do need it if we use NLLLoss
80

81
# The Armijo method assumes gradient noise goes to zero,
82
# so it is important that we don't have dropout layers.
83

84
class CNN(nn.Module):
85
    def __init__(self):
86
        super(CNN, self).__init__()
87
        self.conv1 = nn.Conv2d(ncolors, 10, kernel_size=5)
88
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
89
        #self.dropout = nn.Dropout2d()
90
        self.fc1 = nn.Linear(320, 50)
91
        self.fc2 = nn.Linear(50, 10)
92
    
93
    def forward(self, x):
94
        # input is 28x28x1
95
        # conv1(kernel=5, filters=10) 28x28x10 -> 24x24x10
96
        # max_pool(kernel=2) 24x24x10 -> 12x12x10
97
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
98
        
99
        # conv2(kernel=5, filters=20) 12x12x20 -> 8x8x20
100
        # max_pool(kernel=2) 8x8x20 -> 4x4x20
101
        #x = F.relu(F.max_pool2d(self.dropout(self.conv2(x)), 2))
102
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
103

104
        # flatten 4x4x20 = 320
105
        x = x.view(-1, 320)
106
        
107
        # 320 -> 50
108
        x = F.relu(self.fc1(x))
109
        #x = F.dropout(x, training=self.training)
110
        
111
        # 50 -> 10
112
        x = self.fc2(x)
113
        
114
        return x
115
        #return F.log_softmax(x)
116

117
class MLP(nn.Module):
118
    def __init__(self):
119
        super(MLP, self).__init__()
120
        self.fc1 = nn.Linear(ncolors*height*width, 50)
121
        #self.fc1_drop = nn.Dropout(0.2)
122
        self.fc2 = nn.Linear(50, 50)
123
        #self.fc2_drop = nn.Dropout(0.2)
124
        self.fc3 = nn.Linear(50, nclasses)
125

126
    def forward(self, x):
127
        x = x.view(-1, ncolors*height*width)
128
        x = F.relu(self.fc1(x))
129
        #x = self.fc1_drop(x)
130
        x = F.relu(self.fc2(x))
131
        #x = self.fc2_drop(x)
132
        x = self.fc3(x)
133
        #return F.log_softmax(x, dim=1)
134
        return x
135

136
class Logreg(nn.Module):
137
    def __init__(self):
138
        super(Logreg, self).__init__()
139
        self.fc1 = nn.Linear(ncolors*height*width, nclasses)
140

141
    def forward(self, x):
142
        x = x.view(-1, ncolors*height*width)
143
        x = self.fc1(x)
144
        #return F.log_softmax(x, dim=1)
145
        return x
146
    
147
def make_model(name, seed=0):
148
    np.random.seed(seed)
149
    if name == 'CNN':
150
        net = CNN()
151
    elif name == 'MLP':
152
        net = MLP()
153
    else:
154
        net = Logreg()
155
    net = net.to(device)
156
    return net
157

158
###############
159

160
# Define each expermental configuration
161
expts = []
162
ep = 4
163
#model = 'Logreg'
164
model = 'MLP'
165
#model = 'CNN'
166
bs = 10
167
expts.append({'lr':'armijo', 'bs':bs, 'epochs':ep, 'model': model})
168
expts.append({'lr':0.01, 'bs':bs, 'epochs':ep, 'model': model})
169
expts.append({'lr':0.1, 'bs':bs, 'epochs':ep, 'model': model})
170
#expts.append({'lr':0.5, 'bs':bs, 'epochs':ep, 'model': model})
171

172
@torch.no_grad()
173
def eval_loss(model, loader):    
174
    avg_loss = 0.0
175
    model.eval()
176
    for step, (x_batch, y_batch) in enumerate(loader):
177
        # Copy data to GPU if needed
178
        x_batch = x_batch.to(device)
179
        y_batch = y_batch.to(device)
180
        y_pred = model(x_batch)
181
        loss = criterion(y_pred, y_batch)
182
        avg_loss += loss.item()
183
    # Compute average loss per example
184
    # Note that the criterion already averages within each batch.
185
    n_batches = len(loader)
186
    avg_loss /= n_batches 
187
    return avg_loss       
188
    
189
def fit_epoch(model, optimizer, train_loader, loss_history):    
190
    epoch_loss = 0.0
191
    model.train()
192
    for step, (x_batch, y_batch) in enumerate(train_loader):
193
        # Copy data to GPU if needed
194
        x_batch = x_batch.to(device)
195
        y_batch = y_batch.to(device)
196
        # Function to (re)evaluate loss and its gradient for this step.
197
        def closure():
198
            optimizer.zero_grad()
199
            y_pred = model(x_batch)
200
            loss = criterion(y_pred, y_batch)
201
            loss.backward()
202
            return loss
203
        loss = optimizer.step(closure)
204
        batch_loss = loss.item()
205
        epoch_loss += batch_loss
206
        loss_history.append(batch_loss)
207
    # Compute average loss per example for this epoch.
208
    # Note that the criterion already averages within each batch.
209
    n_batches = len(train_loader)
210
    epoch_loss /= n_batches 
211
    return epoch_loss 
212

213
def fit_epoch_armijo(model, optimizer, train_loader, loss_history, step_size_history):    
214
    epoch_loss = 0.0
215
    for step, (x_batch, y_batch) in enumerate(train_loader):
216
        x_batch = x_batch.to(device)
217
        y_batch = y_batch.to(device)
218
        batch_loss, step_size = model.step((x_batch, y_batch))
219
        epoch_loss += batch_loss
220
        loss_history.append(batch_loss)
221
        step_size_history.append(step_size)
222
    n_batches = len(train_loader)
223
    epoch_loss /= n_batches
224
    return epoch_loss
225

226
    
227
results_dict = {}
228
for expt in expts:
229
    lr = expt['lr']
230
    bs = expt['bs']
231
    max_epochs = expt['epochs']
232
    model_name = expt['model']
233
    model = make_model(model_name)
234
    model.train() # set to training mode
235
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs,
236
                                          shuffle=True, num_workers=2)
237
    n_batches = len(train_loader)
238
    batch_loss_history = []
239
    epoch_loss_history = []
240
    step_size_history = []
241
    print_every = max(1, int(0.1*max_epochs))
242
    if lr == 'armijo':
243
        name = '{}-armijo-bs{}'.format(model_name, bs)
244
        model = ArmijoModel(model, criterion)
245
        optimizer = SGD_Armijo(model, batch_size=bs, dataset_size=N_train)  
246
        model.opt = optimizer
247
        armijo = True
248
    else:
249
        name = '{}-lr{:0.3f}-bs{}'.format(model_name, lr, bs)
250
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
251
        armijo = False
252
    
253
    print('starting {}'.format(name))
254
    for epoch in range(max_epochs):
255
        if armijo:
256
           avg_batch_loss = fit_epoch_armijo(model, optimizer, train_loader, batch_loss_history, step_size_history)
257
        else:
258
            avg_batch_loss = fit_epoch(model, optimizer, train_loader, batch_loss_history)
259
        epoch_loss = eval_loss(model, train_loader)
260
        epoch_loss_history.append(epoch_loss)
261
        if epoch % print_every == 0:
262
            print("epoch {}, loss {}".format(epoch, epoch_loss)) 
263
            
264
    label = '{}-final-loss{:0.3f}'.format(name, epoch_loss)
265
    results = {'label': label, 'batch_loss_history': batch_loss_history,
266
               'epoch_loss_history': epoch_loss_history, 'step_size_history': step_size_history}
267
    results_dict[name] = results
268

269

270
plt.figure()
271
name = 'MLP-armijo-bs10'
272
results = results_dict[name]
273
plt.plot(results['step_size_history'])
274
plt.ylabel('stepsize')
275
pml.savefig('armijo-mnist-stepsize.pdf')
276
plt.show()
277

278
plt.figure()
279
for name, results in results_dict.items():
280
    label = results['label']
281
    y = results['epoch_loss_history']
282
    plt.plot(y, label=label)
283
    plt.legend()
284
pml.savefig('armijo-mnist-epoch-loss.pdf')
285
plt.show()
286

287
# Add smoothed version of batch loss history to results dict    
288
import pandas as pd
289
for name, results in results_dict.items():
290
    loss_history = results['batch_loss_history']  
291
    df = pd.Series(loss_history)
292
    nsteps = len(loss_history)
293
    smoothed = pd.Series.ewm(df, span=0.1*nsteps).mean()
294
    results['batch_loss_history_smoothed'] = smoothed
295
    
296
# Plot curves on one figure
297
plt.figure() 
298
for name, results in results_dict.items():
299
    label = results['label']
300
    y = results['batch_loss_history_smoothed']
301
    nsteps = len(y)
302
    x = np.arange(nsteps)
303
    ndx = np.arange(int(0.2*nsteps), nsteps) # skip first 20%
304
    #plt.figure()
305
    plt.plot(x[ndx], y[ndx], label=label)
306
plt.legend()
307
pml.savefig('armijo-mnist-batch-loss.pdf')
308
plt.show()
309

310

311
Product

Resources

Company