Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
greyhatguy007
GitHub Repository: greyhatguy007/Machine-Learning-Specialization-Coursera
Path: blob/main/C2 - Advanced Learning Algorithms/week4/optional labs/utils.py
3586 views
1
from PIL import Image
2
import networkx as nx
3
import matplotlib.pyplot as plt
4
from networkx.drawing.nx_pydot import graphviz_layout
5
import numpy as np
6
from matplotlib.widgets import Slider, Button
7
plt.style.use('./deeplearning.mplstyle')
8
9
def compute_entropy(y):
10
11
entropy = 0
12
13
if len(y) == 0:
14
return 0
15
entropy = sum(y[y==1])/len(y)
16
if entropy == 0 or entropy == 1:
17
return 0
18
else:
19
return -entropy*np.log2(entropy) - (1-entropy)*np.log2(1-entropy)
20
21
22
def split_dataset(X, node_indices, feature):
23
24
left_indices = []
25
right_indices = []
26
27
for i in node_indices:
28
if X[i][feature] == 1:
29
left_indices.append(i)
30
else:
31
right_indices.append(i)
32
33
return left_indices, right_indices
34
35
36
37
def compute_information_gain(X, y, node_indices, feature):
38
39
left_indices, right_indices = split_dataset(X, node_indices, feature)
40
41
X_node, y_node = X[node_indices], y[node_indices]
42
X_left, y_left = X[left_indices], y[left_indices]
43
X_right, y_right = X[right_indices], y[right_indices]
44
45
information_gain = 0
46
47
node_entropy = compute_entropy(y_node)
48
left_entropy = compute_entropy(y_left)
49
right_entropy = compute_entropy(y_right)
50
w_left = len(X_left) / len(X_node)
51
w_right = len(X_right) / len(X_node)
52
weighted_entropy = w_left * left_entropy + w_right * right_entropy
53
information_gain = node_entropy - weighted_entropy
54
55
return information_gain
56
57
def get_best_split(X, y, node_indices):
58
num_features = X.shape[1]
59
60
best_feature = -1
61
62
max_info_gain = 0
63
for feature in range(num_features):
64
info_gain = compute_information_gain(X, y, node_indices, feature)
65
if info_gain > max_info_gain:
66
max_info_gain = info_gain
67
best_feature = feature
68
69
70
return best_feature
71
72
73
def build_tree_recursive(X, y, node_indices, branch_name, max_depth, current_depth, tree):
74
75
if current_depth == max_depth:
76
formatting = " "*current_depth + "-"*current_depth
77
print(formatting, "%s leaf node with indices" % branch_name, node_indices)
78
return
79
80
81
best_feature = get_best_split(X, y, node_indices)
82
83
formatting = "-"*current_depth
84
print("%s Depth %d, %s: Split on feature: %d" % (formatting, current_depth, branch_name, best_feature))
85
86
87
left_indices, right_indices = split_dataset(X, node_indices, best_feature)
88
tree.append((left_indices, right_indices, best_feature))
89
90
build_tree_recursive(X, y, left_indices, "Left", max_depth, current_depth+1, tree)
91
build_tree_recursive(X, y, right_indices, "Right", max_depth, current_depth+1, tree)
92
return tree
93
94
def generate_node_image(node_indices):
95
image_paths = ["images/%d.png" % idx for idx in node_indices]
96
images = [Image.open(x) for x in image_paths]
97
widths, heights = zip(*(i.size for i in images))
98
99
total_width = sum(widths)
100
max_height = max(heights)
101
102
new_im = Image.new('RGB', (total_width, max_height))
103
104
x_offset = 0
105
for im in images:
106
new_im.paste(im, (x_offset,0))
107
x_offset += im.size[0]
108
109
new_im = new_im.resize((int(total_width*len(node_indices)/10), int(max_height*len(node_indices)/10)))
110
111
return new_im
112
113
114
def generate_split_viz(node_indices, left_indices, right_indices, feature):
115
116
G=nx.DiGraph()
117
118
indices_list = [node_indices, left_indices, right_indices]
119
for idx, indices in enumerate(indices_list):
120
G.add_node(idx,image= generate_node_image(indices))
121
122
G.add_edge(0,1)
123
G.add_edge(0,2)
124
125
pos = graphviz_layout(G, prog="dot")
126
127
fig=plt.figure()
128
ax=plt.subplot(111)
129
ax.set_aspect('equal')
130
nx.draw_networkx_edges(G,pos,ax=ax, arrows=True, arrowsize=40)
131
132
trans=ax.transData.transform
133
trans2=fig.transFigure.inverted().transform
134
135
feature_name = ["Ear Shape", "Face Shape", "Whiskers"][feature]
136
ax_name = ["Splitting on %s" % feature_name , "Left: %s = 1" % feature_name, "Right: %s = 0" % feature_name]
137
for idx, n in enumerate(G):
138
xx,yy=trans(pos[n]) # figure coordinates
139
xa,ya=trans2((xx,yy)) # axes coordinates
140
piesize = len(indices_list[idx])/9
141
p2=piesize/2.0
142
a = plt.axes([xa-p2,ya-p2, piesize, piesize])
143
a.set_aspect('equal')
144
a.imshow(G.nodes[n]['image'])
145
a.axis('off')
146
a.set_title(ax_name[idx])
147
ax.axis('off')
148
plt.show()
149
150
151
def generate_tree_viz(root_indices, y, tree):
152
153
G=nx.DiGraph()
154
155
156
G.add_node(0,image= generate_node_image(root_indices))
157
idx = 1
158
root = 0
159
160
num_images = [len(root_indices)]
161
162
feature_name = ["Ear Shape", "Face Shape", "Whiskers"]
163
y_name = ["Non Cat","Cat"]
164
165
decision_names = []
166
leaf_names = []
167
168
for i, level in enumerate(tree):
169
indices_list = level[:2]
170
for indices in indices_list:
171
G.add_node(idx,image= generate_node_image(indices))
172
G.add_edge(root, idx)
173
174
# For visualization
175
num_images.append(len(indices))
176
idx += 1
177
if i > 0:
178
leaf_names.append("Leaf node: %s" % y_name[max(y[indices])])
179
180
decision_names.append("Split on: %s" % feature_name[level[2]])
181
root += 1
182
183
184
node_names = decision_names + leaf_names
185
pos = graphviz_layout(G, prog="dot")
186
187
fig=plt.figure(figsize=(14, 10))
188
ax=plt.subplot(111)
189
ax.set_aspect('equal')
190
nx.draw_networkx_edges(G,pos,ax=ax, arrows=True, arrowsize=40)
191
192
trans=ax.transData.transform
193
trans2=fig.transFigure.inverted().transform
194
195
for idx, n in enumerate(G):
196
xx,yy=trans(pos[n]) # figure coordinates
197
xa,ya=trans2((xx,yy)) # axes coordinates
198
piesize = num_images[idx]/25
199
p2=piesize/2.0
200
a = plt.axes([xa-p2,ya-p2, piesize, piesize])
201
a.set_aspect('equal')
202
a.imshow(G.nodes[n]['image'])
203
a.axis('off')
204
try:
205
a.set_title(node_names[idx], y=-0.8, fontsize=13, loc="left")
206
except:
207
pass
208
ax.axis('off')
209
plt.show()
210
211
def plot_entropy():
212
def entropy(p):
213
if p == 0 or p == 1:
214
return 0
215
else:
216
return -p * np.log2(p) - (1- p)*np.log2(1 - p)
217
p_array = np.linspace(0,1,201)
218
h_array = [entropy(p) for p in p_array]
219
fig, ax = plt.subplots()
220
plt.subplots_adjust(left=0.25, bottom=0.25)
221
ax.set_title('p x H(p)')
222
ax.set_xlabel('p')
223
ax.set_ylabel('H(p)')
224
axfreq = plt.axes([0.25, 0.1, 0.65, 0.03])
225
h_plot = ax.plot(p_array,h_array)
226
scatter = ax.scatter(0,0,color = 'red', zorder = 100, s = 70)
227
slider = Slider(axfreq, 'p', 0, 1, valinit = 0, valstep = 0.05)
228
229
def update(val):
230
x = val
231
y = entropy(x)
232
scatter.set_offsets((x,y))
233
234
slider.on_changed(update)
235
return slider
236
#plt.plot()
237