Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
yiming-wange
GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a3/run.py
984 views
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
CS224N 2021-2022: Homework 3
5
run.py: Run the dependency parser.
6
Sahil Chopra <[email protected]>
7
Haoshen Hong <[email protected]>
8
"""
9
from datetime import datetime
10
import os
11
import pickle
12
import math
13
import time
14
import argparse
15
import numpy as np
16
from torch import nn, optim
17
import torch
18
from tqdm import tqdm
19
20
from parser_model import ParserModel
21
from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter
22
23
parser = argparse.ArgumentParser(
24
description='Train neural dependency parser in pytorch')
25
parser.add_argument('-d', '--debug', action='store_true',
26
help='whether to enter debug mode')
27
args = parser.parse_args()
28
29
# -----------------
30
# Primary Functions
31
# -----------------
32
33
34
def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005):
35
""" Train the neural dependency parser.
36
37
@param parser (Parser): Neural Dependency Parser
38
@param train_data ():
39
@param dev_data ():
40
@param output_path (str): Path to which model weights and results are written.
41
@param batch_size (int): Number of examples in a single batch
42
@param n_epochs (int): Number of training epochs
43
@param lr (float): Learning rate
44
"""
45
best_dev_UAS = 0
46
47
# YOUR CODE HERE (~2-7 lines)
48
# TODO:
49
# 1) Construct Adam Optimizer in variable `optimizer`
50
# 2) Construct the Cross Entropy Loss Function in variable `loss_func` with `mean`
51
### reduction (default)
52
###
53
# Hint: Use `parser.model.parameters()` to pass optimizer
54
# necessary parameters to tune.
55
# Please see the following docs for support:
56
# Adam Optimizer: https://pytorch.org/docs/stable/optim.html
57
# Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss
58
optimizer = optim.Adam(parser.model.parameters(), lr=lr)
59
loss_func = nn.CrossEntropyLoss(reduction='mean')
60
# END YOUR CODE
61
62
for epoch in range(n_epochs):
63
print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
64
dev_UAS = train_for_epoch(
65
parser, train_data, dev_data, optimizer, loss_func, batch_size)
66
if dev_UAS > best_dev_UAS:
67
best_dev_UAS = dev_UAS
68
print("New best dev UAS! Saving model.")
69
torch.save(parser.model.state_dict(), output_path)
70
print("")
71
72
73
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size):
74
""" Train the neural dependency parser for single epoch.
75
76
Note: In PyTorch we can signify train versus test and automatically have
77
the Dropout Layer applied and removed, accordingly, by specifying
78
whether we are training, `model.train()`, or evaluating, `model.eval()`
79
80
@param parser (Parser): Neural Dependency Parser
81
@param train_data ():
82
@param dev_data ():
83
@param optimizer (nn.Optimizer): Adam Optimizer
84
@param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function
85
@param batch_size (int): batch size
86
87
@return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data
88
"""
89
parser.model.train() # Places model in "train" mode, i.e. apply dropout layer
90
n_minibatches = math.ceil(len(train_data) / batch_size)
91
loss_meter = AverageMeter()
92
93
with tqdm(total=(n_minibatches)) as prog:
94
for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)):
95
optimizer.zero_grad() # remove any baggage in the optimizer
96
loss = 0. # store loss for this batch here
97
train_x = torch.from_numpy(train_x).long()
98
train_y = torch.from_numpy(train_y.nonzero()[1]).long()
99
100
# YOUR CODE HERE (~4-10 lines)
101
# TODO:
102
# 1) Run train_x forward through model to produce `logits`
103
# 2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function.
104
# This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss
105
# between softmax(`logits`) and `train_y`. Remember that softmax(`logits`)
106
# are the predictions (y^ from the PDF).
107
# 3) Backprop losses
108
# 4) Take step with the optimizer
109
# Please see the following docs for support:
110
# Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step
111
logits = parser.model(train_x)
112
loss = loss_func(logits, train_y)
113
loss.backward()
114
optimizer.step()
115
# END YOUR CODE
116
prog.update(1)
117
loss_meter.update(loss.item())
118
119
print("Average Train Loss: {}".format(loss_meter.avg))
120
121
print("Evaluating on dev set",)
122
parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer
123
dev_UAS, _ = parser.parse(dev_data)
124
print("- dev UAS: {:.2f}".format(dev_UAS * 100.0))
125
return dev_UAS
126
127
128
if __name__ == "__main__":
129
debug = args.debug
130
131
assert (torch.__version__.split(".") >= [
132
"1", "0", "0"]), "Please install torch version >= 1.0.0"
133
134
print(80 * "=")
135
print("INITIALIZING")
136
print(80 * "=")
137
parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(
138
debug)
139
140
start = time.time()
141
model = ParserModel(embeddings)
142
parser.model = model
143
print("took {:.2f} seconds\n".format(time.time() - start))
144
145
print(80 * "=")
146
print("TRAINING")
147
print(80 * "=")
148
output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
149
output_path = output_dir + "model.weights"
150
151
if not os.path.exists(output_dir):
152
os.makedirs(output_dir)
153
154
train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005)
155
156
if not debug:
157
print(80 * "=")
158
print("TESTING")
159
print(80 * "=")
160
print("Restoring the best model weights found on the dev set")
161
parser.model.load_state_dict(torch.load(output_path))
162
print("Final evaluation on test set",)
163
parser.model.eval()
164
UAS, dependencies = parser.parse(test_data)
165
print("- test UAS: {:.2f}".format(UAS * 100.0))
166
print("Done!")
167
168