CoCalc -- run.py

GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a5/src/run.py
¹⁰⁰³ views
1
import utils
2
import trainer
3
import model
4
import dataset
5
import numpy as np
6
import torch
7
import torch.nn as nn
8
from tqdm import tqdm
9
from torch.nn import functional as F
10
from torch.utils.tensorboard import SummaryWriter
11
import random
12
import argparse
13
random.seed(0)
14

15

16
argp = argparse.ArgumentParser()
17
argp.add_argument('function', help="Choose pretrain, finetune, or evaluate")
18
argp.add_argument('variant', help="Choose vanilla or perceiver")
19
argp.add_argument('--bottleneck_dim', type=int, default=32)
20
argp.add_argument('pretrain_corpus_path', default=None)
21
argp.add_argument('--reading_params_path', default=None)
22
argp.add_argument('--writing_params_path', default=None)
23
argp.add_argument('--finetune_corpus_path', default=None)
24
argp.add_argument('--eval_corpus_path', default=None)
25
argp.add_argument('--outputs_path', default=None)
26
argp.add_argument('--pretrain_lr', default=6e-3, type=float)
27
argp.add_argument('--finetune_lr', default=6e-4, type=float)
28
argp.add_argument('--tb_expt_name', help='debug string for tb log.',
29
                  default='run')
30
args = argp.parse_args()
31

32
# Save the device
33
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'
34

35
# TensorBoard training log
36
writer = SummaryWriter(log_dir='expt/%s/%s_%s_%d_pt_lr_%f_ft_lr_%f' % (
37
    args.function,
38
    args.tb_expt_name,
39
    args.variant,
40
    args.bottleneck_dim,
41
    args.pretrain_lr,
42
    args.finetune_lr))
43

44
# Keep the block size 128
45
# Why is the pretraining corpus always required (even if we're not pretraining?)
46
# It's because we're using it as a hack to always have the same vocabulary
47
# (that is, the same mapping from character to integer, and we build the
48
# vocab from the pretraining corpus.)
49
block_size = 128
50
text = open(args.pretrain_corpus_path, encoding='utf-8').read()
51
pretrain_dataset = dataset.CharCorruptionDataset(text, block_size)
52

53
# We don't suggest you change these hyperparameters, as they're known to work.
54
# use them for both the vanilla and the perceiver models
55
mconf = model.GPTConfig(pretrain_dataset.vocab_size, pretrain_dataset.block_size,
56
                        n_layer=4, n_head=8, n_embd=256)
57

58
"""
59
Don't change above here; write your code below
60
"""
61

62
# define models.
63
# note: models should moved to device defined on line 34.
64

65
if args.variant == 'vanilla':
66
    # pass # [part c] Make some model here
67
    model = model.GPT(mconf).to(device)
68
elif args.variant == 'perceiver':
69
    # set mconf.perceiver, and mconf.bottleneck_dim parameters appropriately.
70
    pass  # [part g] Make some other model here
71
else:
72
    raise ValueError("Unknown model variant")
73

74
# Perform pretraining, finetuning, or evaluation
75
if args.function == 'pretrain':
76
    assert args.writing_params_path is not None
77
    # TODO [part f]:
78
    # - Given:
79
    #     1. A corpus specified in args.pretrain_corpus_path
80
    #     2. An output path args.writing_params_path for the model parameters
81
    # - Goals:
82
    #     1. Pretrain the model on this corpus
83
    #     2. Save the resulting model in args.writing_params_path
84

85
    # - Make sure to use the following hyperparameters for pretraining:
86
    # Hyperparameters for pretraining:
87
    # max_epochs=650
88
    # batch_size=128
89
    # learning_rate=args.pretrain_lr
90
    # lr_decay=True
91
    # warmup_tokens=512*20
92
    # final_tokens=200*len(pretrain_dataset)*block_size
93
    # num_workers=4
94
    # writer=writer
95
    hyperparameters = {
96
        "max_epochs": 650,
97
        "batch_size": 128,
98
        "learning_rate": 6e-3,
99
        "lr_decay": True,
100
        "warmup_tokens": 512*20,
101
        "final_tokens": 200*len(pretrain_dataset)*block_size,
102
        "num_workers": 4
103
    }
104

105
    # Initialize training configuration & run train
106
    tconf = trainer.TrainerConfig(**hyperparameters)
107
    trainer.Trainer(model, pretrain_dataset, None, tconf).train()
108

109
    # Save the pretrained model parameters to specified path
110
    torch.save(model.state_dict(), args.writing_params_path)
111
    raise NotImplementedError
112
elif args.function == 'finetune':
113
    assert args.writing_params_path is not None
114
    assert args.finetune_corpus_path is not None
115
    # TODO [part c] [part f]:
116
    # - Given:
117
    #     1. A finetuning corpus specified in args.finetune_corpus_path
118
    #     2. A path args.reading_params_path containing pretrained model
119
    #         parameters, or None if finetuning without a pretrained model
120
    #     3. An output path args.writing_params_path for the model parameters
121
    # - Goals:
122
    #     1. If args.reading_params_path is specified, load these parameters
123
    #         into the model
124
    #     2. Finetune the model on this corpus
125
    #     3. Save the resulting model in args.writing_params_path
126
    # - Make sure to use the following hyperparameters:
127
    #     [part d] Hyperparameters for finetuning WITHOUT a pretrained model:
128
    #         max_epochs=75
129
    #         batch_size=256
130
    #         learning_rate=args.finetune_lr
131
    #         lr_decay=True
132
    #         warmup_tokens=512*20
133
    #         final_tokens=200*len(pretrain_dataset)*block_size
134
    #         num_workers=4
135
    #         writer=writer
136
    #     [part f] Hyperparameters for finetuning WITH a pretrained model:
137
    #         max_epochs=10
138
    #         batch_size=256
139
    #         learning_rate=args.finetune_lr
140
    #         lr_decay=True
141
    #         warmup_tokens=512*20
142
    #         final_tokens=200*len(pretrain_dataset)*block_size
143
    #         num_workers=4
144
    #         writer=writer
145
    #     You can use the args.reading_params_path flag to switch between the
146
    #     number of epochs for each case.
147
    if args.reading_params_path is not None:
148
        model.load_state_dict(torch.load(args.reading_params_path))
149
        hyperparameters = {
150
            "max_epochs": 10,
151
            "batch_size": 256,
152
            "learning_rate": 6e-4,
153
            "lr_decay": True,
154
            "warmup_tokens": 512*20,
155
            "final_tokens": 200*len(pretrain_dataset)*block_size,
156
            "num_workers": 4
157
        }
158
    else:
159
        hyperparameters = {
160
            "max_epochs": 75,
161
            "batch_size": 256,
162
            "learning_rate": 6e-4,
163
            "lr_decay": True,
164
            "warmup_tokens": 512*20,
165
            "final_tokens": 200*len(pretrain_dataset)*block_size,
166
            "num_workers": 4
167
        }
168

169
    # Initialize the name dataset from corpus for finetuning
170
    finetune_corpus = open(args.finetune_corpus_path).read()
171
    finetune_dataset = dataset.NameDataset(pretrain_dataset, finetune_corpus)
172

173
    if args.eval_corpus_path is not None:
174
        # Init the name dataset from corpus for evaluation
175
        eval_corpus = open(args.eval_corpus_path).read()
176
        eval_dataset = dataset.NameDataset(pretrain_dataset, eval_corpus)
177
    else:
178
        # If not provided
179
        eval_dataset = None
180

181
    # Initialize training configuration & run train
182
    tconf = trainer.TrainerConfig(**hyperparameters)
183
    trainer.Trainer(model, finetune_dataset, eval_dataset, tconf).train()
184

185
    # Save the finetuned model parameters to specified path
186
    torch.save(model.state_dict(), args.writing_params_path)
187
    raise NotImplementedError
188
elif args.function == 'evaluate':
189
    assert args.outputs_path is not None
190
    assert args.reading_params_path is not None
191
    assert args.eval_corpus_path is not None
192
    model.load_state_dict(torch.load(args.reading_params_path))
193
    correct = 0
194
    total = 0
195
    with open(args.outputs_path, 'w', encoding='utf-8') as fout:
196
        predictions = []
197
        for line in tqdm(open(args.eval_corpus_path, encoding='utf-8')):
198
            x = line.split('\t')[0]
199
            x = x + '⁇'
200
            x = torch.tensor([pretrain_dataset.stoi[s]
201
                             for s in x], dtype=torch.long)[None, ...].to(device)
202
            pred = utils.sample(model, x, 32, sample=False)[0]
203
            completion = ''.join([pretrain_dataset.itos[int(i)] for i in pred])
204
            pred = completion.split('⁇')[1]
205
            predictions.append(pred)
206
            fout.write(pred + '\n')
207
        total, correct = utils.evaluate_places(
208
            args.eval_corpus_path, predictions)
209
    if total > 0:
210
        print('Correct: {} out of {}: {}%'.format(
211
            correct, total, correct/total*100))
212
    else:
213
        print('Predictions written to {}; no targets provided'
214
              .format(args.outputs_path))
215

216
Product

Resources

Company