CoCalc -- gradcheck.py

GitHub Repository: yiming-wange/cs224n-2023-solution
Path: blob/main/a2/utils/gradcheck.py
¹⁰⁰³ views
1
#!/usr/bin/env python
2

3
import numpy as np
4
import random
5

6

7
# First implement a gradient checker by filling in the following functions
8
def gradcheck_naive(f, x, gradientText):
9
    """ Gradient check for a function f.
10
    Arguments:
11
    f -- a function that takes a single argument and outputs the
12
         loss and its gradients
13
    x -- the point (numpy array) to check the gradient at
14
    gradientText -- a string detailing some context about the gradient computation
15

16
    Notes:
17
    Note that gradient checking is a sanity test that only checks whether the
18
    gradient and loss values produced by your implementation are consistent with
19
    each other. Gradient check passing on its own doesn’t guarantee that you
20
    have the correct gradients. It will pass, for example, if both the loss and
21
    gradient values produced by your implementation are 0s (as is the case when
22
    you have not implemented anything). Here is a detailed explanation of what
23
    gradient check is doing if you would like some further clarification:
24
    http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/. 
25
    """
26
    rndstate = random.getstate()
27
    random.setstate(rndstate)
28
    fx, grad = f(x) # Evaluate function value at original point
29
    h = 1e-4        # Do not change this!
30

31
    # Iterate over all indexes ix in x to check the gradient.
32
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
33
    while not it.finished:
34
        ix = it.multi_index
35

36
        x[ix] += h # increment by h
37
        random.setstate(rndstate)
38
        fxh, _ = f(x) # evalute f(x + h)
39
        x[ix] -= 2 * h # restore to previous value (very important!)
40
        random.setstate(rndstate)
41
        fxnh, _ = f(x)
42
        x[ix] += h
43
        numgrad = (fxh - fxnh) / 2 / h
44

45
        # Compare gradients
46
        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
47
        if reldiff > 1e-5:
48
            print("Gradient check failed for %s." % gradientText)
49
            print("First gradient error found at index %s in the vector of gradients" % str(ix))
50
            print("Your gradient: %f \t Numerical gradient: %f" % (
51
                grad[ix], numgrad))
52
            return
53

54
        it.iternext() # Step to next dimension
55

56
    print("Gradient check passed!. Read the docstring of the `gradcheck_naive`"
57
    " method in utils.gradcheck.py to understand what the gradient check does.")
58

59

60
def grad_tests_softmax(skipgram, dummy_tokens, dummy_vectors, dataset):
61
    print ("======Skip-Gram with naiveSoftmaxLossAndGradient Test Cases======")
62

63
    # first test
64
    output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
65
                skipgram("c", 3, ["a", "b", "e", "d", "b", "c"],
66
                dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
67

68
    assert np.allclose(output_loss, 11.16610900153398), \
69
           "Your loss does not match expected loss."
70
    expected_gradCenterVecs = [[ 0.,          0.,          0.        ],
71
                               [ 0.,          0.,          0.        ],
72
                               [-1.26947339, -1.36873189,  2.45158957],
73
                               [ 0.,          0.,          0.        ],
74
                               [ 0.,          0.,          0.        ]]
75
    expected_gradOutsideVectors = [[-0.41045956,  0.18834851,  1.43272264],
76
                                   [ 0.38202831, -0.17530219, -1.33348241],
77
                                   [ 0.07009355, -0.03216399, -0.24466386],
78
                                   [ 0.09472154, -0.04346509, -0.33062865],
79
                                   [-0.13638384,  0.06258276,  0.47605228]]
80
                     
81
    assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
82
           "Your gradCenterVecs do not match expected gradCenterVecs."
83
    assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
84
           "Your gradOutsideVectors do not match expected gradOutsideVectors."
85
    print("The first test passed!")
86

87
    # second test
88
    output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
89
                skipgram("b", 3, ["a", "b", "e", "d", "b", "c"],
90
                dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
91
    assert np.allclose(output_loss, 9.87714910003414), \
92
           "Your loss does not match expected loss."
93
    expected_gradCenterVecs = [[ 0.,          0.,          0.        ],
94
                               [-0.14586705, -1.34158321, -0.29291951],
95
                               [ 0.,          0.,          0.        ],
96
                               [ 0.,          0.,          0.        ],
97
                               [ 0.,          0.,          0.        ]]
98
    expected_gradOutsideVectors = [[-0.30342672,  0.19808298,  0.19587419],
99
                                   [-0.41359958,  0.27000601,  0.26699522],
100
                                   [-0.08192272,  0.05348078,  0.05288442],
101
                                   [ 0.6981188,  -0.4557458,  -0.45066387],
102
                                   [ 0.10083022, -0.06582396, -0.06508997]]
103
                     
104
    assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
105
           "Your gradCenterVecs do not match expected gradCenterVecs."
106
    assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
107
           "Your gradOutsideVectors do not match expected gradOutsideVectors."
108
    print("The second test passed!")
109

110
    # third test
111
    output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
112
                skipgram("a", 3, ["a", "b", "e", "d", "b", "c"],
113
                dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
114

115
    assert np.allclose(output_loss, 10.810758628593335), \
116
           "Your loss does not match expected loss."
117
    expected_gradCenterVecs = [[-1.1790274,  -1.35861865,  1.53590492],
118
                               [ 0.,          0.,          0.        ],
119
                               [ 0.,          0.,          0.        ],
120
                               [ 0.,          0.,          0.        ],
121
                               [ 0.,          0.,          0.        ]]
122
    expected_gradOutsideVectors = [[-7.96035953e-01, -1.79609012e-02,  2.07761330e-01],
123
                                   [ 1.40175316e+00,  3.16276545e-02, -3.65850437e-01],
124
                                   [-1.99691259e-01, -4.50561933e-03,  5.21184016e-02],
125
                                   [ 2.02560028e-02,  4.57034715e-04, -5.28671357e-03],
126
                                   [-4.26281954e-01, -9.61816867e-03,  1.11257419e-01]]
127
                                                     
128
    assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
129
           "Your gradCenterVecs do not match expected gradCenterVecs."
130
    assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
131
           "Your gradOutsideVectors do not match expected gradOutsideVectors."
132
    print("The third test passed!")
133

134
    print("All 3 tests passed!")
135

136

137
def grad_tests_negsamp(skipgram, dummy_tokens, dummy_vectors, dataset, negSamplingLossAndGradient):
138
    print ("======Skip-Gram with negSamplingLossAndGradient======")  
139

140
    # first test
141
    output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
142
                skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:],
143
                dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)
144

145
    assert np.allclose(output_loss, 16.15119285363322), \
146
           "Your loss does not match expected loss."
147
    expected_gradCenterVecs = [[ 0.,          0.,          0.        ],
148
                               [ 0.,          0.,          0.        ],
149
                               [-4.54650789, -1.85942252,  0.76397441],
150
                               [ 0.,          0.,          0.        ],
151
                               [ 0.,          0.,          0.        ]]
152
    expected_gradOutsideVectors = [[-0.69148188,  0.31730185,  2.41364029],
153
                                   [-0.22716495,  0.10423969,  0.79292674],
154
                                   [-0.45528438,  0.20891737,  1.58918512],
155
                                   [-0.31602611,  0.14501561,  1.10309954],
156
                                   [-0.80620296,  0.36994417,  2.81407799]]
157
                     
158
    assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
159
           "Your gradCenterVecs do not match expected gradCenterVecs."
160
    assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
161
           "Your gradOutsideVectors do not match expected gradOutsideVectors."
162
    print("The first test passed!")
163

164
    # second test
165
    output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
166
                skipgram("c", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5,:],
167
                dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)
168
    assert np.allclose(output_loss, 28.653567707668795), \
169
           "Your loss does not match expected loss."
170
    expected_gradCenterVecs = [  [ 0.,          0.,          0.        ],
171
                                 [ 0.,          0.,          0.        ],
172
                                 [-6.42994865, -2.16396482, -1.89240934],
173
                                 [ 0.,          0.,          0.        ],
174
                                 [ 0.,          0.,          0.        ]]
175
    expected_gradOutsideVectors = [  [-0.80413277,  0.36899421,  2.80685192],
176
                                     [-0.9277269,   0.42570813,  3.23826131],
177
                                     [-0.7511534,   0.34468345,  2.62192569],
178
                                     [-0.94807832,  0.43504684,  3.30929863],
179
                                     [-1.12868414,  0.51792184,  3.93970919]]
180
                     
181
    assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
182
           "Your gradCenterVecs do not match expected gradCenterVecs."
183
    assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
184
           "Your gradOutsideVectors do not match expected gradOutsideVectors."
185
    print("The second test passed!")
186

187
    # third test
188
    output_loss, output_gradCenterVecs, output_gradOutsideVectors = \
189
                skipgram("a", 3, ["a", "b", "e", "d", "b", "c"],
190
                dummy_tokens, dummy_vectors[:5,:], 
191
                dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)
192
    assert np.allclose(output_loss, 60.648705494891914), \
193
           "Your loss does not match expected loss."
194
    expected_gradCenterVecs = [  [-17.89425315,  -7.36940626,  -1.23364121],
195
                                 [  0.,           0.,           0.        ],
196
                                 [  0.,           0.,           0.        ],
197
                                 [  0.,           0.,           0.        ],
198
                                 [  0.,           0.,           0.        ]]
199
    expected_gradOutsideVectors = [[-6.4780819,  -0.14616449,  1.69074639],
200
                                   [-0.86337952, -0.01948037,  0.22533766],
201
                                   [-9.59525734, -0.21649709,  2.5043133 ],
202
                                   [-6.02261515, -0.13588783,  1.57187189],
203
                                   [-9.69010072, -0.21863704,  2.52906694]]
204
                                                     
205
    assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \
206
           "Your gradCenterVecs do not match expected gradCenterVecs."
207
    assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \
208
           "Your gradOutsideVectors do not match expected gradOutsideVectors."
209
    print("The third test passed!")
210

211
    print("All 3 tests passed!")
212

213
Product

Resources

Company