Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
yiming-wange
GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a2/utils/gradcheck.py
1003 views
1
#!/usr/bin/env python
2
3
import numpy as np
4
import random
5
6
7
# First implement a gradient checker by filling in the following functions
8
def gradcheck_naive(f, x, gradientText):
9
""" Gradient check for a function f.
10
Arguments:
11
f -- a function that takes a single argument and outputs the
12
loss and its gradients
13
x -- the point (numpy array) to check the gradient at
14
gradientText -- a string detailing some context about the gradient computation
15
16
Notes:
17
Note that gradient checking is a sanity test that only checks whether the
18
gradient and loss values produced by your implementation are consistent with
19
each other. Gradient check passing on its own doesn’t guarantee that you
20
have the correct gradients. It will pass, for example, if both the loss and
21
gradient values produced by your implementation are 0s (as is the case when
22
you have not implemented anything). Here is a detailed explanation of what
23
gradient check is doing if you would like some further clarification:
24
http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/.
25
"""
26
rndstate = random.getstate()
27
random.setstate(rndstate)
28
fx, grad = f(x) # Evaluate function value at original point
29
h = 1e-4 # Do not change this!
30
31
# Iterate over all indexes ix in x to check the gradient.
32
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
33
while not it.finished:
34
ix = it.multi_index
35
36
x[ix] += h # increment by h
37
random.setstate(rndstate)
38
fxh, _ = f(x) # evalute f(x + h)
39
x[ix] -= 2 * h # restore to previous value (very important!)
40
random.setstate(rndstate)
41
fxnh, _ = f(x)
42
x[ix] += h
43
numgrad = (fxh - fxnh) / 2 / h
44
45
# Compare gradients
46
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
47
if reldiff > 1e-5:
48
print("Gradient check failed for %s." % gradientText)
49
print("First gradient error found at index %s in the vector of gradients" % str(ix))
50
print("Your gradient: %f \t Numerical gradient: %f" % (
51
grad[ix], numgrad))
52
return
53
54
it.iternext() # Step to next dimension
55
56
print("Gradient check passed!. Read the docstring of the `gradcheck_naive`"
57
" method in utils.gradcheck.py to understand what the gradient check does.")
58
59
60
def grad_tests_softmax(skipgram, dummy_tokens, dummy_vectors, dataset):
61
print ("======Skip-Gram with naiveSoftmaxLossAndGradient Test Cases======")
62
63
# first test
64
output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
65
skipgram("c", 3, ["a", "b", "e", "d", "b", "c"],
66
dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
67
68
assert np.allclose(output_loss, 11.16610900153398), \
69
"Your loss does not match expected loss."
70
expected_gradCenterVecs = [[ 0., 0., 0. ],
71
[ 0., 0., 0. ],
72
[-1.26947339, -1.36873189, 2.45158957],
73
[ 0., 0., 0. ],
74
[ 0., 0., 0. ]]
75
expected_gradOutsideVectors = [[-0.41045956, 0.18834851, 1.43272264],
76
[ 0.38202831, -0.17530219, -1.33348241],
77
[ 0.07009355, -0.03216399, -0.24466386],
78
[ 0.09472154, -0.04346509, -0.33062865],
79
[-0.13638384, 0.06258276, 0.47605228]]
80
81
assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
82
"Your gradCenterVecs do not match expected gradCenterVecs."
83
assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
84
"Your gradOutsideVectors do not match expected gradOutsideVectors."
85
print("The first test passed!")
86
87
# second test
88
output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
89
skipgram("b", 3, ["a", "b", "e", "d", "b", "c"],
90
dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
91
assert np.allclose(output_loss, 9.87714910003414), \
92
"Your loss does not match expected loss."
93
expected_gradCenterVecs = [[ 0., 0., 0. ],
94
[-0.14586705, -1.34158321, -0.29291951],
95
[ 0., 0., 0. ],
96
[ 0., 0., 0. ],
97
[ 0., 0., 0. ]]
98
expected_gradOutsideVectors = [[-0.30342672, 0.19808298, 0.19587419],
99
[-0.41359958, 0.27000601, 0.26699522],
100
[-0.08192272, 0.05348078, 0.05288442],
101
[ 0.6981188, -0.4557458, -0.45066387],
102
[ 0.10083022, -0.06582396, -0.06508997]]
103
104
assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
105
"Your gradCenterVecs do not match expected gradCenterVecs."
106
assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
107
"Your gradOutsideVectors do not match expected gradOutsideVectors."
108
print("The second test passed!")
109
110
# third test
111
output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
112
skipgram("a", 3, ["a", "b", "e", "d", "b", "c"],
113
dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
114
115
assert np.allclose(output_loss, 10.810758628593335), \
116
"Your loss does not match expected loss."
117
expected_gradCenterVecs = [[-1.1790274, -1.35861865, 1.53590492],
118
[ 0., 0., 0. ],
119
[ 0., 0., 0. ],
120
[ 0., 0., 0. ],
121
[ 0., 0., 0. ]]
122
expected_gradOutsideVectors = [[-7.96035953e-01, -1.79609012e-02, 2.07761330e-01],
123
[ 1.40175316e+00, 3.16276545e-02, -3.65850437e-01],
124
[-1.99691259e-01, -4.50561933e-03, 5.21184016e-02],
125
[ 2.02560028e-02, 4.57034715e-04, -5.28671357e-03],
126
[-4.26281954e-01, -9.61816867e-03, 1.11257419e-01]]
127
128
assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
129
"Your gradCenterVecs do not match expected gradCenterVecs."
130
assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
131
"Your gradOutsideVectors do not match expected gradOutsideVectors."
132
print("The third test passed!")
133
134
print("All 3 tests passed!")
135
136
137
def grad_tests_negsamp(skipgram, dummy_tokens, dummy_vectors, dataset, negSamplingLossAndGradient):
138
print ("======Skip-Gram with negSamplingLossAndGradient======")
139
140
# first test
141
output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
142
skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:],
143
dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)
144
145
assert np.allclose(output_loss, 16.15119285363322), \
146
"Your loss does not match expected loss."
147
expected_gradCenterVecs = [[ 0., 0., 0. ],
148
[ 0., 0., 0. ],
149
[-4.54650789, -1.85942252, 0.76397441],
150
[ 0., 0., 0. ],
151
[ 0., 0., 0. ]]
152
expected_gradOutsideVectors = [[-0.69148188, 0.31730185, 2.41364029],
153
[-0.22716495, 0.10423969, 0.79292674],
154
[-0.45528438, 0.20891737, 1.58918512],
155
[-0.31602611, 0.14501561, 1.10309954],
156
[-0.80620296, 0.36994417, 2.81407799]]
157
158
assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
159
"Your gradCenterVecs do not match expected gradCenterVecs."
160
assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
161
"Your gradOutsideVectors do not match expected gradOutsideVectors."
162
print("The first test passed!")
163
164
# second test
165
output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
166
skipgram("c", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5,:],
167
dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)
168
assert np.allclose(output_loss, 28.653567707668795), \
169
"Your loss does not match expected loss."
170
expected_gradCenterVecs = [ [ 0., 0., 0. ],
171
[ 0., 0., 0. ],
172
[-6.42994865, -2.16396482, -1.89240934],
173
[ 0., 0., 0. ],
174
[ 0., 0., 0. ]]
175
expected_gradOutsideVectors = [ [-0.80413277, 0.36899421, 2.80685192],
176
[-0.9277269, 0.42570813, 3.23826131],
177
[-0.7511534, 0.34468345, 2.62192569],
178
[-0.94807832, 0.43504684, 3.30929863],
179
[-1.12868414, 0.51792184, 3.93970919]]
180
181
assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
182
"Your gradCenterVecs do not match expected gradCenterVecs."
183
assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
184
"Your gradOutsideVectors do not match expected gradOutsideVectors."
185
print("The second test passed!")
186
187
# third test
188
output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
189
skipgram("a", 3, ["a", "b", "e", "d", "b", "c"],
190
dummy_tokens, dummy_vectors[:5,:],
191
dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)
192
assert np.allclose(output_loss, 60.648705494891914), \
193
"Your loss does not match expected loss."
194
expected_gradCenterVecs = [ [-17.89425315, -7.36940626, -1.23364121],
195
[ 0., 0., 0. ],
196
[ 0., 0., 0. ],
197
[ 0., 0., 0. ],
198
[ 0., 0., 0. ]]
199
expected_gradOutsideVectors = [[-6.4780819, -0.14616449, 1.69074639],
200
[-0.86337952, -0.01948037, 0.22533766],
201
[-9.59525734, -0.21649709, 2.5043133 ],
202
[-6.02261515, -0.13588783, 1.57187189],
203
[-9.69010072, -0.21863704, 2.52906694]]
204
205
assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
206
"Your gradCenterVecs do not match expected gradCenterVecs."
207
assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
208
"Your gradOutsideVectors do not match expected gradOutsideVectors."
209
print("The third test passed!")
210
211
print("All 3 tests passed!")
212
213