Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
yiming-wange
GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a3/parser_transitions.py
984 views
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
CS224N 2021-2022: Homework 3
5
parser_transitions.py: Algorithms for completing partial parsess.
6
Sahil Chopra <[email protected]>
7
Haoshen Hong <[email protected]>
8
"""
9
10
import sys
11
12
class PartialParse(object):
13
def __init__(self, sentence):
14
"""Initializes this partial parse.
15
16
@param sentence (list of str): The sentence to be parsed as a list of words.
17
Your code should not modify the sentence.
18
"""
19
# The sentence being parsed is kept for bookkeeping purposes. Do NOT alter it in your code.
20
self.sentence = sentence
21
22
### YOUR CODE HERE (3 Lines)
23
### Your code should initialize the following fields:
24
### self.stack: The current stack represented as a list with the top of the stack as the
25
### last element of the list.
26
### self.buffer: The current buffer represented as a list with the first item on the
27
### buffer as the first item of the list
28
### self.dependencies: The list of dependencies produced so far. Represented as a list of
29
### tuples where each tuple is of the form (head, dependent).
30
### Order for this list doesn't matter.
31
###
32
### Note: The root token should be represented with the string "ROOT"
33
### Note: If you need to use the sentence object to initialize anything, make sure to not directly
34
### reference the sentence object. That is, remember to NOT modify the sentence object.
35
self.stack = ['ROOT']
36
self.buffer = sentence.copy()
37
self.dependencies = []
38
### END YOUR CODE
39
40
41
def parse_step(self, transition):
42
"""Performs a single parse step by applying the given transition to this partial parse
43
44
@param transition (str): A string that equals "S", "LA", or "RA" representing the shift,
45
left-arc, and right-arc transitions. You can assume the provided
46
transition is a legal transition.
47
"""
48
### YOUR CODE HERE (~7-12 Lines)
49
### TODO:
50
### Implement a single parsing step, i.e. the logic for the following as
51
### described in the pdf handout:
52
### 1. Shift
53
### 2. Left Arc
54
### 3. Right Arc
55
if transition == 'S':
56
self.stack.append(self.buffer[0])
57
del self.buffer[0]
58
elif transition == 'LA':
59
self.dependencies.append((self.stack[-1], self.stack[-2]))
60
del self.stack[-2]
61
elif transition == 'RA':
62
self.dependencies.append((self.stack[-2], self.stack[-1]))
63
del self.stack[-1]
64
### END YOUR CODE
65
66
def parse(self, transitions):
67
"""Applies the provided transitions to this PartialParse
68
69
@param transitions (list of str): The list of transitions in the order they should be applied
70
71
@return dependencies (list of string tuples): The list of dependencies produced when
72
parsing the sentence. Represented as a list of
73
tuples where each tuple is of the form (head, dependent).
74
"""
75
for transition in transitions:
76
self.parse_step(transition)
77
return self.dependencies
78
79
80
def minibatch_parse(sentences, model, batch_size):
81
"""Parses a list of sentences in minibatches using a model.
82
83
@param sentences (list of list of str): A list of sentences to be parsed
84
(each sentence is a list of words and each word is of type string)
85
@param model (ParserModel): The model that makes parsing decisions. It is assumed to have a function
86
model.predict(partial_parses) that takes in a list of PartialParses as input and
87
returns a list of transitions predicted for each parse. That is, after calling
88
transitions = model.predict(partial_parses)
89
transitions[i] will be the next transition to apply to partial_parses[i].
90
@param batch_size (int): The number of PartialParses to include in each minibatch
91
92
93
@return dependencies (list of dependency lists): A list where each element is the dependencies
94
list for a parsed sentence. Ordering should be the
95
same as in sentences (i.e., dependencies[i] should
96
contain the parse for sentences[i]).
97
"""
98
dependencies = []
99
100
### YOUR CODE HERE (~8-10 Lines)
101
### TODO:
102
### Implement the minibatch parse algorithm. Note that the pseudocode for this algorithm is given in the pdf handout.
103
###
104
### Note: A shallow copy (as denoted in the PDF) can be made with the "=" sign in python, e.g.
105
### unfinished_parses = partial_parses[:].
106
### Here `unfinished_parses` is a shallow copy of `partial_parses`.
107
### In Python, a shallow copied list like `unfinished_parses` does not contain new instances
108
### of the object stored in `partial_parses`. Rather both lists refer to the same objects.
109
### In our case, `partial_parses` contains a list of partial parses. `unfinished_parses`
110
### contains references to the same objects. Thus, you should NOT use the `del` operator
111
### to remove objects from the `unfinished_parses` list. This will free the underlying memory that
112
### is being accessed by `partial_parses` and may cause your code to crash.
113
partial_parses = [PartialParse(sentence) for sentence in sentences]
114
unfinished_parses = partial_parses[:]
115
while len(unfinished_parses) > 0:
116
batch = unfinished_parses[:batch_size]
117
transitions = model.predict(batch)
118
#print(batch_size,transitions)
119
for partial_parse, transition in zip(batch, transitions):
120
#the transition is one step for the partial_parse
121
trans = partial_parse.parse_step(transition)
122
if not partial_parse.buffer and len(partial_parse.stack) == 1:
123
unfinished_parses.remove(partial_parse)
124
dependencies = [parse.dependencies for parse in partial_parses]
125
### END YOUR CODE
126
127
return dependencies
128
129
130
def test_step(name, transition, stack, buf, deps,
131
ex_stack, ex_buf, ex_deps):
132
"""Tests that a single parse step returns the expected output"""
133
pp = PartialParse([])
134
pp.stack, pp.buffer, pp.dependencies = stack, buf, deps
135
136
pp.parse_step(transition)
137
stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies)))
138
assert stack == ex_stack, \
139
"{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack)
140
assert buf == ex_buf, \
141
"{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf)
142
assert deps == ex_deps, \
143
"{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
144
print("{:} test passed!".format(name))
145
146
147
def test_parse_step():
148
"""Simple tests for the PartialParse.parse_step function
149
Warning: these are not exhaustive
150
"""
151
test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [],
152
("ROOT", "the", "cat"), ("sat",), ())
153
test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [],
154
("ROOT", "cat",), ("sat",), (("cat", "the"),))
155
test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [],
156
("ROOT", "run",), (), (("run", "fast"),))
157
158
159
def test_parse():
160
"""Simple tests for the PartialParse.parse function
161
Warning: these are not exhaustive
162
"""
163
sentence = ["parse", "this", "sentence"]
164
dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"])
165
dependencies = tuple(sorted(dependencies))
166
expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this'))
167
assert dependencies == expected, \
168
"parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected)
169
assert tuple(sentence) == ("parse", "this", "sentence"), \
170
"parse test failed: the input sentence should not be modified"
171
print("parse test passed!")
172
173
174
class DummyModel(object):
175
"""Dummy model for testing the minibatch_parse function
176
"""
177
def __init__(self, mode = "unidirectional"):
178
self.mode = mode
179
180
def predict(self, partial_parses):
181
if self.mode == "unidirectional":
182
return self.unidirectional_predict(partial_parses)
183
elif self.mode == "interleave":
184
return self.interleave_predict(partial_parses)
185
else:
186
raise NotImplementedError()
187
188
def unidirectional_predict(self, partial_parses):
189
"""First shifts everything onto the stack and then does exclusively right arcs if the first word of
190
the sentence is "right", "left" if otherwise.
191
"""
192
return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S"
193
for pp in partial_parses]
194
195
def interleave_predict(self, partial_parses):
196
"""First shifts everything onto the stack and then interleaves "right" and "left".
197
"""
198
return [("RA" if len(pp.stack) % 2 == 0 else "LA") if len(pp.buffer) == 0 else "S"
199
for pp in partial_parses]
200
201
def test_dependencies(name, deps, ex_deps):
202
"""Tests the provided dependencies match the expected dependencies"""
203
deps = tuple(sorted(deps))
204
assert deps == ex_deps, \
205
"{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
206
207
208
def test_minibatch_parse():
209
"""Simple tests for the minibatch_parse function
210
Warning: these are not exhaustive
211
"""
212
213
# Unidirectional arcs test
214
sentences = [["right", "arcs", "only"],
215
["right", "arcs", "only", "again"],
216
["left", "arcs", "only"],
217
["left", "arcs", "only", "again"]]
218
deps = minibatch_parse(sentences, DummyModel(), 2)
219
test_dependencies("minibatch_parse", deps[0],
220
(('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs')))
221
test_dependencies("minibatch_parse", deps[1],
222
(('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs')))
223
test_dependencies("minibatch_parse", deps[2],
224
(('only', 'ROOT'), ('only', 'arcs'), ('only', 'left')))
225
test_dependencies("minibatch_parse", deps[3],
226
(('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only')))
227
228
# Out-of-bound test
229
sentences = [["right"]]
230
deps = minibatch_parse(sentences, DummyModel(), 2)
231
test_dependencies("minibatch_parse", deps[0], (('ROOT', 'right'),))
232
233
# Mixed arcs test
234
sentences = [["this", "is", "interleaving", "dependency", "test"]]
235
deps = minibatch_parse(sentences, DummyModel(mode="interleave"), 1)
236
test_dependencies("minibatch_parse", deps[0],
237
(('ROOT', 'is'), ('dependency', 'interleaving'),
238
('dependency', 'test'), ('is', 'dependency'), ('is', 'this')))
239
print("minibatch_parse test passed!")
240
241
242
if __name__ == '__main__':
243
args = sys.argv
244
if len(args) != 2:
245
raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script")
246
elif args[1] == "part_c":
247
test_parse_step()
248
test_parse()
249
elif args[1] == "part_d":
250
test_minibatch_parse()
251
else:
252
raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script")
253
254