Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
yiming-wange
GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a5/src/run.py
1003 views
1
import utils
2
import trainer
3
import model
4
import dataset
5
import numpy as np
6
import torch
7
import torch.nn as nn
8
from tqdm import tqdm
9
from torch.nn import functional as F
10
from torch.utils.tensorboard import SummaryWriter
11
import random
12
import argparse
13
random.seed(0)
14
15
16
argp = argparse.ArgumentParser()
17
argp.add_argument('function', help="Choose pretrain, finetune, or evaluate")
18
argp.add_argument('variant', help="Choose vanilla or perceiver")
19
argp.add_argument('--bottleneck_dim', type=int, default=32)
20
argp.add_argument('pretrain_corpus_path', default=None)
21
argp.add_argument('--reading_params_path', default=None)
22
argp.add_argument('--writing_params_path', default=None)
23
argp.add_argument('--finetune_corpus_path', default=None)
24
argp.add_argument('--eval_corpus_path', default=None)
25
argp.add_argument('--outputs_path', default=None)
26
argp.add_argument('--pretrain_lr', default=6e-3, type=float)
27
argp.add_argument('--finetune_lr', default=6e-4, type=float)
28
argp.add_argument('--tb_expt_name', help='debug string for tb log.',
29
default='run')
30
args = argp.parse_args()
31
32
# Save the device
33
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'
34
35
# TensorBoard training log
36
writer = SummaryWriter(log_dir='expt/%s/%s_%s_%d_pt_lr_%f_ft_lr_%f' % (
37
args.function,
38
args.tb_expt_name,
39
args.variant,
40
args.bottleneck_dim,
41
args.pretrain_lr,
42
args.finetune_lr))
43
44
# Keep the block size 128
45
# Why is the pretraining corpus always required (even if we're not pretraining?)
46
# It's because we're using it as a hack to always have the same vocabulary
47
# (that is, the same mapping from character to integer, and we build the
48
# vocab from the pretraining corpus.)
49
block_size = 128
50
text = open(args.pretrain_corpus_path, encoding='utf-8').read()
51
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size)
52
53
# We don't suggest you change these hyperparameters, as they're known to work.
54
# use them for both the vanilla and the perceiver models
55
mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size,
56
n_layer=4, n_head=8, n_embd=256)
57
58
"""
59
Don't change above here; write your code below
60
"""
61
62
# define models.
63
# note: models should moved to device defined on line 34.
64
65
if args.variant == 'vanilla':
66
# pass # [part c] Make some model here
67
model = model.GPT(mconf).to(device)
68
elif args.variant == 'perceiver':
69
# set mconf.perceiver, and mconf.bottleneck_dim parameters appropriately.
70
pass # [part g] Make some other model here
71
else:
72
raise ValueError("Unknown model variant")
73
74
# Perform pretraining, finetuning, or evaluation
75
if args.function == 'pretrain':
76
assert args.writing_params_path is not None
77
# TODO [part f]:
78
# - Given:
79
# 1. A corpus specified in args.pretrain_corpus_path
80
# 2. An output path args.writing_params_path for the model parameters
81
# - Goals:
82
# 1. Pretrain the model on this corpus
83
# 2. Save the resulting model in args.writing_params_path
84
85
# - Make sure to use the following hyperparameters for pretraining:
86
# Hyperparameters for pretraining:
87
# max_epochs=650
88
# batch_size=128
89
# learning_rate=args.pretrain_lr
90
# lr_decay=True
91
# warmup_tokens=512*20
92
# final_tokens=200*len(pretrain_dataset)*block_size
93
# num_workers=4
94
# writer=writer
95
hyperparameters = {
96
"max_epochs": 650,
97
"batch_size": 128,
98
"learning_rate": 6e-3,
99
"lr_decay": True,
100
"warmup_tokens": 512*20,
101
"final_tokens": 200*len(pretrain_dataset)*block_size,
102
"num_workers": 4
103
}
104
105
# Initialize training configuration & run train
106
tconf = trainer.TrainerConfig(**hyperparameters)
107
trainer.Trainer(model, pretrain_dataset, None, tconf).train()
108
109
# Save the pretrained model parameters to specified path
110
torch.save(model.state_dict(), args.writing_params_path)
111
raise NotImplementedError
112
elif args.function == 'finetune':
113
assert args.writing_params_path is not None
114
assert args.finetune_corpus_path is not None
115
# TODO [part c] [part f]:
116
# - Given:
117
# 1. A finetuning corpus specified in args.finetune_corpus_path
118
# 2. A path args.reading_params_path containing pretrained model
119
# parameters, or None if finetuning without a pretrained model
120
# 3. An output path args.writing_params_path for the model parameters
121
# - Goals:
122
# 1. If args.reading_params_path is specified, load these parameters
123
# into the model
124
# 2. Finetune the model on this corpus
125
# 3. Save the resulting model in args.writing_params_path
126
# - Make sure to use the following hyperparameters:
127
# [part d] Hyperparameters for finetuning WITHOUT a pretrained model:
128
# max_epochs=75
129
# batch_size=256
130
# learning_rate=args.finetune_lr
131
# lr_decay=True
132
# warmup_tokens=512*20
133
# final_tokens=200*len(pretrain_dataset)*block_size
134
# num_workers=4
135
# writer=writer
136
# [part f] Hyperparameters for finetuning WITH a pretrained model:
137
# max_epochs=10
138
# batch_size=256
139
# learning_rate=args.finetune_lr
140
# lr_decay=True
141
# warmup_tokens=512*20
142
# final_tokens=200*len(pretrain_dataset)*block_size
143
# num_workers=4
144
# writer=writer
145
# You can use the args.reading_params_path flag to switch between the
146
# number of epochs for each case.
147
if args.reading_params_path is not None:
148
model.load_state_dict(torch.load(args.reading_params_path))
149
hyperparameters = {
150
"max_epochs": 10,
151
"batch_size": 256,
152
"learning_rate": 6e-4,
153
"lr_decay": True,
154
"warmup_tokens": 512*20,
155
"final_tokens": 200*len(pretrain_dataset)*block_size,
156
"num_workers": 4
157
}
158
else:
159
hyperparameters = {
160
"max_epochs": 75,
161
"batch_size": 256,
162
"learning_rate": 6e-4,
163
"lr_decay": True,
164
"warmup_tokens": 512*20,
165
"final_tokens": 200*len(pretrain_dataset)*block_size,
166
"num_workers": 4
167
}
168
169
# Initialize the name dataset from corpus for finetuning
170
finetune_corpus = open(args.finetune_corpus_path).read()
171
finetune_dataset = dataset.NameDataset(pretrain_dataset, finetune_corpus)
172
173
if args.eval_corpus_path is not None:
174
# Init the name dataset from corpus for evaluation
175
eval_corpus = open(args.eval_corpus_path).read()
176
eval_dataset = dataset.NameDataset(pretrain_dataset, eval_corpus)
177
else:
178
# If not provided
179
eval_dataset = None
180
181
# Initialize training configuration & run train
182
tconf = trainer.TrainerConfig(**hyperparameters)
183
trainer.Trainer(model, finetune_dataset, eval_dataset, tconf).train()
184
185
# Save the finetuned model parameters to specified path
186
torch.save(model.state_dict(), args.writing_params_path)
187
raise NotImplementedError
188
elif args.function == 'evaluate':
189
assert args.outputs_path is not None
190
assert args.reading_params_path is not None
191
assert args.eval_corpus_path is not None
192
model.load_state_dict(torch.load(args.reading_params_path))
193
correct = 0
194
total = 0
195
with open(args.outputs_path, 'w', encoding='utf-8') as fout:
196
predictions = []
197
for line in tqdm(open(args.eval_corpus_path, encoding='utf-8')):
198
x = line.split('\t')[0]
199
x = x + '⁇'
200
x = torch.tensor([pretrain_dataset.stoi[s]
201
for s in x], dtype=torch.long)[None, ...].to(device)
202
pred = utils.sample(model, x, 32, sample=False)[0]
203
completion = ''.join([pretrain_dataset.itos[int(i)] for i in pred])
204
pred = completion.split('⁇')[1]
205
predictions.append(pred)
206
fout.write(pred + '\n')
207
total, correct = utils.evaluate_places(
208
args.eval_corpus_path, predictions)
209
if total > 0:
210
print('Correct: {} out of {}: {}%'.format(
211
correct, total, correct/total*100))
212
else:
213
print('Predictions written to {}; no targets provided'
214
.format(args.outputs_path))
215
216