Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch11/neuralnet.py
1247 views
1
import numpy as np
2
3
4
##########################
5
### MODEL
6
##########################
7
8
def sigmoid(z):
9
return 1. / (1. + np.exp(-z))
10
11
12
def int_to_onehot(y, num_labels):
13
14
ary = np.zeros((y.shape[0], num_labels))
15
for i, val in enumerate(y):
16
ary[i, val] = 1
17
18
return ary
19
20
21
class NeuralNetMLP:
22
23
def __init__(self, num_features, num_hidden, num_classes, random_seed=123):
24
super().__init__()
25
26
self.num_classes = num_classes
27
28
# hidden
29
rng = np.random.RandomState(random_seed)
30
31
self.weight_h = rng.normal(
32
loc=0.0, scale=0.1, size=(num_hidden, num_features))
33
self.bias_h = np.zeros(num_hidden)
34
35
# output
36
self.weight_out = rng.normal(
37
loc=0.0, scale=0.1, size=(num_classes, num_hidden))
38
self.bias_out = np.zeros(num_classes)
39
40
def forward(self, x):
41
# Hidden layer
42
# input dim: [n_examples, n_features] dot [n_hidden, n_features].T
43
# output dim: [n_examples, n_hidden]
44
z_h = np.dot(x, self.weight_h.T) + self.bias_h
45
a_h = sigmoid(z_h)
46
47
# Output layer
48
# input dim: [n_examples, n_hidden] dot [n_classes, n_hidden].T
49
# output dim: [n_examples, n_classes]
50
z_out = np.dot(a_h, self.weight_out.T) + self.bias_out
51
a_out = sigmoid(z_out)
52
return a_h, a_out
53
54
def backward(self, x, a_h, a_out, y):
55
56
#########################
57
### Output layer weights
58
#########################
59
60
# onehot encoding
61
y_onehot = int_to_onehot(y, self.num_classes)
62
63
# Part 1: dLoss/dOutWeights
64
## = dLoss/dOutAct * dOutAct/dOutNet * dOutNet/dOutWeight
65
## where DeltaOut = dLoss/dOutAct * dOutAct/dOutNet
66
## for convenient re-use
67
68
# input/output dim: [n_examples, n_classes]
69
d_loss__d_a_out = 2.*(a_out - y_onehot) / y.shape[0]
70
71
# input/output dim: [n_examples, n_classes]
72
d_a_out__d_z_out = a_out * (1. - a_out) # sigmoid derivative
73
74
# output dim: [n_examples, n_classes]
75
delta_out = d_loss__d_a_out * d_a_out__d_z_out # "delta (rule) placeholder"
76
77
# gradient for output weights
78
79
# [n_examples, n_hidden]
80
d_z_out__dw_out = a_h
81
82
# input dim: [n_classes, n_examples] dot [n_examples, n_hidden]
83
# output dim: [n_classes, n_hidden]
84
d_loss__dw_out = np.dot(delta_out.T, d_z_out__dw_out)
85
d_loss__db_out = np.sum(delta_out, axis=0)
86
87
88
#################################
89
# Part 2: dLoss/dHiddenWeights
90
## = DeltaOut * dOutNet/dHiddenAct * dHiddenAct/dHiddenNet * dHiddenNet/dWeight
91
92
# [n_classes, n_hidden]
93
d_z_out__a_h = self.weight_out
94
95
# output dim: [n_examples, n_hidden]
96
d_loss__a_h = np.dot(delta_out, d_z_out__a_h)
97
98
# [n_examples, n_hidden]
99
d_a_h__d_z_h = a_h * (1. - a_h) # sigmoid derivative
100
101
# [n_examples, n_features]
102
d_z_h__d_w_h = x
103
104
# output dim: [n_hidden, n_features]
105
d_loss__d_w_h = np.dot((d_loss__a_h * d_a_h__d_z_h).T, d_z_h__d_w_h)
106
d_loss__d_b_h = np.sum((d_loss__a_h * d_a_h__d_z_h), axis=0)
107
108
return d_loss__dw_out, d_loss__db_out, d_loss__d_w_h, d_loss__d_b_h
109