from scipy.special import logsumexp
def BCE_with_logits(logits, targets):
N = logits.shape[0]
logits = logits.reshape(N, 1)
logits_plus = np.hstack([np.zeros((N, 1)), logits])
logits_minus = np.hstack([np.zeros((N, 1)), -logits])
logp1 = -logsumexp(logits_minus, axis=1)
logp0 = -logsumexp(logits_plus, axis=1)
logprobs = logp1 * targets + logp0 * (1 - targets)
return -np.sum(logprobs) / N
def sigmoid(x):
return 0.5 * (np.tanh(x / 2.0) + 1)
def predict_logit(weights, inputs):
return np.dot(inputs, weights)
def predict_prob(weights, inputs):
return sigmoid(predict_logit(weights, inputs))
def NLL(weights, batch):
X, y = batch
logits = predict_logit(weights, X)
return BCE_with_logits(logits, y)
def NLL_grad(weights, batch):
X, y = batch
N = X.shape[0]
mu = predict_prob(weights, X)
g = np.sum(np.dot(np.diag(mu - y), X), axis=0) / N
return g
np.random.seed(0)
N = 100
D = 5
X = np.random.randn(N, D)
w = 10 * np.random.randn(D)
mu = predict_prob(w, X)
y = np.random.binomial(n=1, p=mu, size=N)
X_test = X
y_test = y
y_pred = predict_prob(w, X_test)
loss = NLL(w, (X_test, y_test))
grad_np = NLL_grad(w, (X_test, y_test))
print("params {}".format(w))
print("loss {}".format(loss))
print("grad {}".format(grad_np))