Path: blob/main/C3 - Unsupervised Learning, Recommenders, Reinforcement Learning/week3/C3W3A1/public_tests.py
3564 views
from tensorflow.keras.activations import relu, linear1from tensorflow.keras.layers import Dense2from tensorflow.keras.optimizers import Adam34import numpy as np56def test_network(target):7num_actions = 48state_size = 89i = 010assert len(target.layers) == 3, f"Wrong number of layers. Expected 3 but got {len(target.layers)}"11assert target.input.shape.as_list() == [None, state_size], \12f"Wrong input shape. Expected [None, 400] but got {target.input.shape.as_list()}"13expected = [[Dense, [None, 64], relu],14[Dense, [None, 64], relu],15[Dense, [None, num_actions], linear]]1617for layer in target.layers:18assert type(layer) == expected[i][0], \19f"Wrong type in layer {i}. Expected {expected[i][0]} but got {type(layer)}"20assert layer.output.shape.as_list() == expected[i][1], \21f"Wrong number of units in layer {i}. Expected {expected[i][1]} but got {layer.output.shape.as_list()}"22assert layer.activation == expected[i][2], \23f"Wrong activation in layer {i}. Expected {expected[i][2]} but got {layer.activation}"24i = i + 12526print("\033[92mAll tests passed!")2728def test_optimizer(target, ALPHA):29assert type(target) == Adam, f"Wrong optimizer. Expected: {Adam}, got: {target}"30assert np.isclose(target.learning_rate.numpy(), ALPHA), f"Wrong alpha. Expected: {ALPHA}, got: {target.learning_rate.numpy()}"31print("\033[92mAll tests passed!")323334def test_compute_loss(target):35num_actions = 436def target_q_network_random(inputs):37return np.float32(np.random.rand(inputs.shape[0],num_actions))3839def q_network_random(inputs):40return np.float32(np.random.rand(inputs.shape[0],num_actions))4142def target_q_network_ones(inputs):43return np.float32(np.ones((inputs.shape[0], num_actions)))4445def q_network_ones(inputs):46return np.float32(np.ones((inputs.shape[0], num_actions)))4748np.random.seed(1)49states = np.float32(np.random.rand(64, 8))50actions = np.float32(np.floor(np.random.uniform(0, 1, (64, )) * 4))51rewards = np.float32(np.random.rand(64, ))52next_states = np.float32(np.random.rand(64, 8))53done_vals = np.float32((np.random.uniform(0, 1, size=(64,)) > 0.96) * 1)5455loss = target((states, actions, rewards, next_states, done_vals), 0.995, q_network_random, target_q_network_random)565758assert np.isclose(loss, 0.6991737), f"Wrong value. Expected {0.6991737}, got {loss}"5960# Test when episode terminates61done_vals = np.float32(np.ones((64,)))62loss = target((states, actions, rewards, next_states, done_vals), 0.995, q_network_ones, target_q_network_ones)63assert np.isclose(loss, 0.343270182), f"Wrong value. Expected {0.343270182}, got {loss}"6465# Test MSE with parameters A = B66done_vals = np.float32((np.random.uniform(0, 1, size=(64,)) > 0.96) * 1)67rewards = np.float32(np.ones((64, )))68loss = target((states, actions, rewards, next_states, done_vals), 0, q_network_ones, target_q_network_ones)69assert np.isclose(loss, 0), f"Wrong value. Expected {0}, got {loss}"7071# Test MSE with parameters A = 0 and B = 172done_vals = np.float32((np.random.uniform(0, 1, size=(64,)) > 0.96) * 1)73rewards = np.float32(np.zeros((64, )))74loss = target((states, actions, rewards, next_states, done_vals), 0, q_network_ones, target_q_network_ones)75assert np.isclose(loss, 1), f"Wrong value. Expected {1}, got {loss}"7677print("\033[92mAll tests passed!")787980