Path: blob/main/a2/utils/gradcheck.py
1003 views
#!/usr/bin/env python12import numpy as np3import random456# First implement a gradient checker by filling in the following functions7def gradcheck_naive(f, x, gradientText):8""" Gradient check for a function f.9Arguments:10f -- a function that takes a single argument and outputs the11loss and its gradients12x -- the point (numpy array) to check the gradient at13gradientText -- a string detailing some context about the gradient computation1415Notes:16Note that gradient checking is a sanity test that only checks whether the17gradient and loss values produced by your implementation are consistent with18each other. Gradient check passing on its own doesn’t guarantee that you19have the correct gradients. It will pass, for example, if both the loss and20gradient values produced by your implementation are 0s (as is the case when21you have not implemented anything). Here is a detailed explanation of what22gradient check is doing if you would like some further clarification:23http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/.24"""25rndstate = random.getstate()26random.setstate(rndstate)27fx, grad = f(x) # Evaluate function value at original point28h = 1e-4 # Do not change this!2930# Iterate over all indexes ix in x to check the gradient.31it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])32while not it.finished:33ix = it.multi_index3435x[ix] += h # increment by h36random.setstate(rndstate)37fxh, _ = f(x) # evalute f(x + h)38x[ix] -= 2 * h # restore to previous value (very important!)39random.setstate(rndstate)40fxnh, _ = f(x)41x[ix] += h42numgrad = (fxh - fxnh) / 2 / h4344# Compare gradients45reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))46if reldiff > 1e-5:47print("Gradient check failed for %s." % gradientText)48print("First gradient error found at index %s in the vector of gradients" % str(ix))49print("Your gradient: %f \t Numerical gradient: %f" % (50grad[ix], numgrad))51return5253it.iternext() # Step to next dimension5455print("Gradient check passed!. Read the docstring of the `gradcheck_naive`"56" method in utils.gradcheck.py to understand what the gradient check does.")575859def grad_tests_softmax(skipgram, dummy_tokens, dummy_vectors, dataset):60print ("======Skip-Gram with naiveSoftmaxLossAndGradient Test Cases======")6162# first test63output_loss, output_gradCenterVecs, output_gradOutsideVectors = \64skipgram("c", 3, ["a", "b", "e", "d", "b", "c"],65dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)6667assert np.allclose(output_loss, 11.16610900153398), \68"Your loss does not match expected loss."69expected_gradCenterVecs = [[ 0., 0., 0. ],70[ 0., 0., 0. ],71[-1.26947339, -1.36873189, 2.45158957],72[ 0., 0., 0. ],73[ 0., 0., 0. ]]74expected_gradOutsideVectors = [[-0.41045956, 0.18834851, 1.43272264],75[ 0.38202831, -0.17530219, -1.33348241],76[ 0.07009355, -0.03216399, -0.24466386],77[ 0.09472154, -0.04346509, -0.33062865],78[-0.13638384, 0.06258276, 0.47605228]]7980assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \81"Your gradCenterVecs do not match expected gradCenterVecs."82assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \83"Your gradOutsideVectors do not match expected gradOutsideVectors."84print("The first test passed!")8586# second test87output_loss, output_gradCenterVecs, output_gradOutsideVectors = \88skipgram("b", 3, ["a", "b", "e", "d", "b", "c"],89dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)90assert np.allclose(output_loss, 9.87714910003414), \91"Your loss does not match expected loss."92expected_gradCenterVecs = [[ 0., 0., 0. ],93[-0.14586705, -1.34158321, -0.29291951],94[ 0., 0., 0. ],95[ 0., 0., 0. ],96[ 0., 0., 0. ]]97expected_gradOutsideVectors = [[-0.30342672, 0.19808298, 0.19587419],98[-0.41359958, 0.27000601, 0.26699522],99[-0.08192272, 0.05348078, 0.05288442],100[ 0.6981188, -0.4557458, -0.45066387],101[ 0.10083022, -0.06582396, -0.06508997]]102103assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \104"Your gradCenterVecs do not match expected gradCenterVecs."105assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \106"Your gradOutsideVectors do not match expected gradOutsideVectors."107print("The second test passed!")108109# third test110output_loss, output_gradCenterVecs, output_gradOutsideVectors = \111skipgram("a", 3, ["a", "b", "e", "d", "b", "c"],112dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)113114assert np.allclose(output_loss, 10.810758628593335), \115"Your loss does not match expected loss."116expected_gradCenterVecs = [[-1.1790274, -1.35861865, 1.53590492],117[ 0., 0., 0. ],118[ 0., 0., 0. ],119[ 0., 0., 0. ],120[ 0., 0., 0. ]]121expected_gradOutsideVectors = [[-7.96035953e-01, -1.79609012e-02, 2.07761330e-01],122[ 1.40175316e+00, 3.16276545e-02, -3.65850437e-01],123[-1.99691259e-01, -4.50561933e-03, 5.21184016e-02],124[ 2.02560028e-02, 4.57034715e-04, -5.28671357e-03],125[-4.26281954e-01, -9.61816867e-03, 1.11257419e-01]]126127assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \128"Your gradCenterVecs do not match expected gradCenterVecs."129assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \130"Your gradOutsideVectors do not match expected gradOutsideVectors."131print("The third test passed!")132133print("All 3 tests passed!")134135136def grad_tests_negsamp(skipgram, dummy_tokens, dummy_vectors, dataset, negSamplingLossAndGradient):137print ("======Skip-Gram with negSamplingLossAndGradient======")138139# first test140output_loss, output_gradCenterVecs, output_gradOutsideVectors = \141skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:],142dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)143144assert np.allclose(output_loss, 16.15119285363322), \145"Your loss does not match expected loss."146expected_gradCenterVecs = [[ 0., 0., 0. ],147[ 0., 0., 0. ],148[-4.54650789, -1.85942252, 0.76397441],149[ 0., 0., 0. ],150[ 0., 0., 0. ]]151expected_gradOutsideVectors = [[-0.69148188, 0.31730185, 2.41364029],152[-0.22716495, 0.10423969, 0.79292674],153[-0.45528438, 0.20891737, 1.58918512],154[-0.31602611, 0.14501561, 1.10309954],155[-0.80620296, 0.36994417, 2.81407799]]156157assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \158"Your gradCenterVecs do not match expected gradCenterVecs."159assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \160"Your gradOutsideVectors do not match expected gradOutsideVectors."161print("The first test passed!")162163# second test164output_loss, output_gradCenterVecs, output_gradOutsideVectors = \165skipgram("c", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5,:],166dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)167assert np.allclose(output_loss, 28.653567707668795), \168"Your loss does not match expected loss."169expected_gradCenterVecs = [ [ 0., 0., 0. ],170[ 0., 0., 0. ],171[-6.42994865, -2.16396482, -1.89240934],172[ 0., 0., 0. ],173[ 0., 0., 0. ]]174expected_gradOutsideVectors = [ [-0.80413277, 0.36899421, 2.80685192],175[-0.9277269, 0.42570813, 3.23826131],176[-0.7511534, 0.34468345, 2.62192569],177[-0.94807832, 0.43504684, 3.30929863],178[-1.12868414, 0.51792184, 3.93970919]]179180assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \181"Your gradCenterVecs do not match expected gradCenterVecs."182assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \183"Your gradOutsideVectors do not match expected gradOutsideVectors."184print("The second test passed!")185186# third test187output_loss, output_gradCenterVecs, output_gradOutsideVectors = \188skipgram("a", 3, ["a", "b", "e", "d", "b", "c"],189dummy_tokens, dummy_vectors[:5,:],190dummy_vectors[5:,:], dataset, negSamplingLossAndGradient)191assert np.allclose(output_loss, 60.648705494891914), \192"Your loss does not match expected loss."193expected_gradCenterVecs = [ [-17.89425315, -7.36940626, -1.23364121],194[ 0., 0., 0. ],195[ 0., 0., 0. ],196[ 0., 0., 0. ],197[ 0., 0., 0. ]]198expected_gradOutsideVectors = [[-6.4780819, -0.14616449, 1.69074639],199[-0.86337952, -0.01948037, 0.22533766],200[-9.59525734, -0.21649709, 2.5043133 ],201[-6.02261515, -0.13588783, 1.57187189],202[-9.69010072, -0.21863704, 2.52906694]]203204assert np.allclose(output_gradCenterVecs, expected_gradCenterVecs), \205"Your gradCenterVecs do not match expected gradCenterVecs."206assert np.allclose(output_gradOutsideVectors, expected_gradOutsideVectors), \207"Your gradOutsideVectors do not match expected gradOutsideVectors."208print("The third test passed!")209210print("All 3 tests passed!")211212213