Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
greyhatguy007
GitHub Repository: greyhatguy007/Machine-Learning-Specialization-Coursera
Path: blob/main/C2 - Advanced Learning Algorithms/week3/C2W3A1/assigment_utils.py
3520 views
1
"""
2
assignment_utils.py
3
contains routines used by C2_W3 Assignments
4
"""
5
import copy
6
import math
7
import numpy as np
8
import matplotlib.pyplot as plt
9
import matplotlib as mpl
10
from matplotlib.patches import FancyArrowPatch
11
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
12
from matplotlib.widgets import Button, CheckButtons
13
from sklearn.linear_model import LinearRegression, Ridge
14
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
15
from sklearn.metrics import mean_squared_error
16
from sklearn.model_selection import train_test_split
17
from sklearn.datasets import make_blobs
18
19
from ipywidgets import Output
20
np.set_printoptions(precision=2)
21
22
dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0', dldarkblue = '#0D5BDC')
23
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0'; dldarkblue = '#0D5BDC'
24
dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]
25
plt.style.use('./deeplearning.mplstyle')
26
27
# --- Assignment ----------------------------------------
28
def gen_data(m, seed=1, scale=0.7):
29
""" generate a data set based on a x^2 with added noise """
30
c = 0
31
x_train = np.linspace(0,49,m)
32
np.random.seed(seed)
33
y_ideal = x_train**2 + c
34
y_train = y_ideal + scale * y_ideal*(np.random.sample((m,))-0.5)
35
x_ideal = x_train #for redraw when new data included in X
36
return x_train, y_train, x_ideal, y_ideal
37
38
def gen_blobs():
39
classes = 6
40
m = 800
41
std = 0.4
42
centers = np.array([[-1, 0], [1, 0], [0, 1], [0, -1], [-2,1],[-2,-1]])
43
X, y = make_blobs(n_samples=m, centers=centers, cluster_std=std, random_state=2, n_features=2)
44
return (X, y, centers, classes, std)
45
46
class lin_model:
47
def __init__(self, degree, regularization = False, lambda_=0):
48
if regularization:
49
self.linear_model = Ridge(alpha=lambda_)
50
else:
51
self.linear_model = LinearRegression()
52
self.poly = PolynomialFeatures(degree, include_bias=False)
53
self.scaler = StandardScaler()
54
55
def fit(self, X_train,y_train):
56
''' just fits the data. mapping and scaling are not repeated '''
57
X_train_mapped = self.poly.fit_transform(X_train.reshape(-1,1))
58
X_train_mapped_scaled = self.scaler.fit_transform(X_train_mapped)
59
self.linear_model.fit(X_train_mapped_scaled, y_train )
60
61
def predict(self, X):
62
X_mapped = self.poly.transform(X.reshape(-1,1))
63
X_mapped_scaled = self.scaler.transform(X_mapped)
64
yhat = self.linear_model.predict(X_mapped_scaled)
65
return(yhat)
66
67
def mse(self, y, yhat):
68
err = mean_squared_error(y,yhat)/2 #sklean doesn't have div by 2
69
return (err)
70
71
def plt_train_test(X_train, y_train, X_test, y_test, x, y_pred, x_ideal, y_ideal, degree):
72
fig, ax = plt.subplots(1,1, figsize=(4,4))
73
fig.canvas.toolbar_visible = False
74
fig.canvas.header_visible = False
75
fig.canvas.footer_visible = False
76
77
ax.set_title("Poor Performance on Test Data",fontsize = 12)
78
ax.set_xlabel("x")
79
ax.set_ylabel("y")
80
81
ax.scatter(X_train, y_train, color = "red", label="train")
82
ax.scatter(X_test, y_test, color = dlc["dlblue"], label="test")
83
ax.set_xlim(ax.get_xlim())
84
ax.set_ylim(ax.get_ylim())
85
ax.plot(x, y_pred, lw=0.5, label=f"predicted, degree={degree}")
86
ax.plot(x_ideal, y_ideal, "--", color = "orangered", label="y_ideal", lw=1)
87
ax.legend(loc='upper left')
88
plt.tight_layout()
89
plt.show()
90
91
def plt_optimal_degree(X_train, y_train, X_cv, y_cv, x, y_pred, x_ideal, y_ideal, err_train, err_cv, optimal_degree, max_degree):
92
fig, ax = plt.subplots(1,2,figsize=(8,4))
93
fig.canvas.toolbar_visible = False
94
fig.canvas.header_visible = False
95
fig.canvas.footer_visible = False
96
97
ax[0].set_title("predictions vs data",fontsize = 12)
98
ax[0].set_xlabel("x")
99
ax[0].set_ylabel("y")
100
101
ax[0].plot(x_ideal, y_ideal, "--", color = "orangered", label="y_ideal", lw=1)
102
ax[0].scatter(X_train, y_train, color = "red", label="train")
103
ax[0].scatter(X_cv, y_cv, color = dlc["dlorange"], label="cv")
104
ax[0].set_xlim(ax[0].get_xlim())
105
ax[0].set_ylim(ax[0].get_ylim())
106
for i in range(0,max_degree):
107
ax[0].plot(x, y_pred[:,i], lw=0.5, label=f"{i+1}")
108
ax[0].legend(loc='upper left')
109
110
ax[1].set_title("error vs degree",fontsize = 12)
111
cpts = list(range(1, max_degree+1))
112
ax[1].plot(cpts, err_train[0:], marker='o',label="train error", lw=2, color = dlc["dlblue"])
113
ax[1].plot(cpts, err_cv[0:], marker='o',label="cv error", lw=2, color = dlc["dlorange"])
114
ax[1].set_ylim(*ax[1].get_ylim())
115
ax[1].axvline(optimal_degree, lw=1, color = dlc["dlmagenta"])
116
ax[1].annotate("optimal degree", xy=(optimal_degree,80000),xycoords='data',
117
xytext=(0.3, 0.8), textcoords='axes fraction', fontsize=10,
118
arrowprops=dict(arrowstyle="->", connectionstyle="arc3",
119
color=dlc['dldarkred'], lw=1))
120
ax[1].set_xlabel("degree")
121
ax[1].set_ylabel("error")
122
ax[1].legend()
123
fig.suptitle("Find Optimal Degree",fontsize = 12)
124
plt.tight_layout()
125
126
plt.show()
127
128
def plt_tune_regularization(X_train, y_train, X_cv, y_cv, x, y_pred, err_train, err_cv, optimal_reg_idx, lambda_range):
129
fig, ax = plt.subplots(1,2,figsize=(8,4))
130
fig.canvas.toolbar_visible = False
131
fig.canvas.header_visible = False
132
fig.canvas.footer_visible = False
133
134
ax[0].set_title("predictions vs data",fontsize = 12)
135
ax[0].set_xlabel("x")
136
ax[0].set_ylabel("y")
137
138
ax[0].scatter(X_train, y_train, color = "red", label="train")
139
ax[0].scatter(X_cv, y_cv, color = dlc["dlorange"], label="cv")
140
ax[0].set_xlim(ax[0].get_xlim())
141
ax[0].set_ylim(ax[0].get_ylim())
142
# ax[0].plot(x, y_pred[:,:], lw=0.5, label=[f"$\lambda =${i}" for i in lambda_range])
143
for i in (0,3,7,9):
144
ax[0].plot(x, y_pred[:,i], lw=0.5, label=f"$\lambda =${lambda_range[i]}")
145
ax[0].legend()
146
147
ax[1].set_title("error vs regularization",fontsize = 12)
148
ax[1].plot(lambda_range, err_train[:], label="train error", color = dlc["dlblue"])
149
ax[1].plot(lambda_range, err_cv[:], label="cv error", color = dlc["dlorange"])
150
ax[1].set_xscale('log')
151
ax[1].set_ylim(*ax[1].get_ylim())
152
opt_x = lambda_range[optimal_reg_idx]
153
ax[1].vlines(opt_x, *ax[1].get_ylim(), color = "black", lw=1)
154
ax[1].annotate("optimal lambda", (opt_x,150000), xytext=(-80,10), textcoords="offset points",
155
arrowprops={'arrowstyle':'simple'})
156
ax[1].set_xlabel("regularization (lambda)")
157
ax[1].set_ylabel("error")
158
fig.suptitle("Tuning Regularization",fontsize = 12)
159
ax[1].text(0.05,0.44,"High\nVariance",fontsize=12, ha='left',transform=ax[1].transAxes,color = dlc["dlblue"])
160
ax[1].text(0.95,0.44,"High\nBias", fontsize=12, ha='right',transform=ax[1].transAxes,color = dlc["dlblue"])
161
ax[1].legend(loc='upper left')
162
plt.tight_layout()
163
plt.show()
164
165
def tune_m():
166
""" tune the number of examples to reduce overfitting """
167
m = 50
168
m_range = np.array(m*np.arange(1,16))
169
num_steps = m_range.shape[0]
170
degree = 16
171
err_train = np.zeros(num_steps)
172
err_cv = np.zeros(num_steps)
173
y_pred = np.zeros((100,num_steps))
174
175
for i in range(num_steps):
176
X, y, y_ideal, x_ideal = gen_data(m_range[i],5,0.7)
177
x = np.linspace(0,int(X.max()),100)
178
X_train, X_, y_train, y_ = train_test_split(X,y,test_size=0.40, random_state=1)
179
X_cv, X_test, y_cv, y_test = train_test_split(X_,y_,test_size=0.50, random_state=1)
180
181
lmodel = lin_model(degree) # no regularization
182
lmodel.fit(X_train, y_train)
183
yhat = lmodel.predict(X_train)
184
err_train[i] = lmodel.mse(y_train, yhat)
185
yhat = lmodel.predict(X_cv)
186
err_cv[i] = lmodel.mse(y_cv, yhat)
187
y_pred[:,i] = lmodel.predict(x)
188
return(X_train, y_train, X_cv, y_cv, x, y_pred, err_train, err_cv, m_range,degree)
189
190
def plt_tune_m(X_train, y_train, X_cv, y_cv, x, y_pred, err_train, err_cv, m_range, degree):
191
192
fig, ax = plt.subplots(1,2,figsize=(8,4))
193
fig.canvas.toolbar_visible = False
194
fig.canvas.header_visible = False
195
fig.canvas.footer_visible = False
196
197
ax[0].set_title("predictions vs data",fontsize = 12)
198
ax[0].set_xlabel("x")
199
ax[0].set_ylabel("y")
200
201
ax[0].scatter(X_train, y_train, color = "red", s=3, label="train", alpha=0.4)
202
ax[0].scatter(X_cv, y_cv, color = dlc["dlorange"], s=3, label="cv", alpha=0.4)
203
ax[0].set_xlim(ax[0].get_xlim())
204
ax[0].set_ylim(ax[0].get_ylim())
205
for i in range(0,len(m_range),3):
206
ax[0].plot(x, y_pred[:,i], lw=1, label=f"$m =${m_range[i]}")
207
ax[0].legend(loc='upper left')
208
ax[0].text(0.05,0.5,f"degree = {degree}", fontsize=10, ha='left',transform=ax[0].transAxes,color = dlc["dlblue"])
209
210
ax[1].set_title("error vs number of examples",fontsize = 12)
211
ax[1].plot(m_range, err_train[:], label="train error", color = dlc["dlblue"])
212
ax[1].plot(m_range, err_cv[:], label="cv error", color = dlc["dlorange"])
213
ax[1].set_xlabel("Number of Examples (m)")
214
ax[1].set_ylabel("error")
215
fig.suptitle("Tuning number of examples",fontsize = 12)
216
ax[1].text(0.05,0.5,"High\nVariance", fontsize=12, ha='left',transform=ax[1].transAxes,color = dlc["dlblue"])
217
ax[1].text(0.95,0.5,"Good \nGeneralization", fontsize=12, ha='right',transform=ax[1].transAxes,color = dlc["dlblue"])
218
ax[1].legend()
219
plt.tight_layout()
220
plt.show()
221
222
dkcolors = plt.cm.Paired((1,3,7,9,5,11))
223
ltcolors = plt.cm.Paired((0,2,6,8,4,10))
224
dkcolors_map = mpl.colors.ListedColormap(dkcolors)
225
ltcolors_map = mpl.colors.ListedColormap(ltcolors)
226
227
def plt_mc_data(ax, X, y, classes, class_labels=None, map=plt.cm.Paired, legend=False,size=50, m='o'):
228
for i in range(classes):
229
idx = np.where(y == i)
230
col = len(idx[0])*[i]
231
label = class_labels[i] if class_labels else "c{}".format(i)
232
ax.scatter(X[idx, 0], X[idx, 1], marker=m,
233
c=col, vmin=0, vmax=map.N, cmap=map,
234
s=size, label=label)
235
if legend: ax.legend()
236
ax.axis('equal')
237
238
239
#Plot a multi-class categorical decision boundary
240
# This version handles a non-vector prediction (adds a for-loop over points)
241
def plot_cat_decision_boundary(ax, X,predict , class_labels=None, legend=False, vector=True, color='g', lw = 1):
242
243
# create a mesh to points to plot
244
pad = 0.5
245
x_min, x_max = X[:, 0].min() - pad, X[:, 0].max() + pad
246
y_min, y_max = X[:, 1].min() - pad, X[:, 1].max() + pad
247
h = max(x_max-x_min, y_max-y_min)/200
248
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
249
np.arange(y_min, y_max, h))
250
points = np.c_[xx.ravel(), yy.ravel()]
251
#print("points", points.shape)
252
#make predictions for each point in mesh
253
if vector:
254
Z = predict(points)
255
else:
256
Z = np.zeros((len(points),))
257
for i in range(len(points)):
258
Z[i] = predict(points[i].reshape(1,2))
259
Z = Z.reshape(xx.shape)
260
261
#contour plot highlights boundaries between values - classes in this case
262
ax.contour(xx, yy, Z, colors=color, linewidths=lw)
263
ax.axis('tight')
264
265
def recat(pt, origins):
266
""" categorize a point based on distance from origin of clusters """
267
nclusters = len(origins)
268
min_dist = 10000
269
y_new = None
270
for j in range(nclusters):
271
temp = origins[j] - pt.reshape(2,)
272
#print(temp.shape,origins[j].shape)
273
dist = np.sqrt(np.dot(temp.T, temp))
274
if dist < min_dist:
275
y_new = j
276
min_dist = dist
277
return(y_new)
278
279
def plt_train_eq_dist(X_train,y_train,classes, X_cv, y_cv, centers, std):
280
css = np.unique(y_train)
281
fig,ax = plt.subplots(1,2,figsize=(8,4))
282
fig.canvas.toolbar_visible = False
283
fig.canvas.header_visible = False
284
fig.canvas.footer_visible = False
285
plt_mc_data(ax[0], X_train,y_train,classes, map=dkcolors_map, legend=True, size=50)
286
plt_mc_data(ax[0], X_cv, y_cv, classes, map=ltcolors_map, legend=True, m="<")
287
ax[0].set_title("Training, CV Data")
288
for c in css:
289
circ = plt.Circle(centers[c], 2*std, color=dkcolors_map(c), clip_on=False, fill=False, lw=0.5)
290
ax[0].add_patch(circ)
291
292
293
#make a model for plotting routines to call
294
cat_predict = lambda pt: recat(pt.reshape(1,2), centers)
295
plot_cat_decision_boundary(ax[1], X_train, cat_predict, vector=False, color = dlc["dlmagenta"], lw=0.75)
296
ax[1].set_title("ideal performance", fontsize=14)
297
298
#add the original data to the decison boundary
299
plt_mc_data(ax[1], X_train,y_train, classes, map=dkcolors_map, legend=True, size=50)
300
ax[1].set_xlabel('x0') ; ax[1].set_ylabel("x1");
301
plt.show()
302
303
304
def plt_nn(model_predict,X_train,y_train, classes, X_cv, y_cv, suptitle=""):
305
#plot the decison boundary.
306
fig,ax = plt.subplots(1,2, figsize=(8,4))
307
fig.canvas.toolbar_visible = False
308
fig.canvas.header_visible = False
309
fig.canvas.footer_visible = False
310
plot_cat_decision_boundary(ax[0], X_train, model_predict, vector=True)
311
ax[0].set_title("training data", fontsize=14)
312
313
#add the original data to the decison boundary
314
plt_mc_data(ax[0], X_train,y_train, classes, map=dkcolors_map, legend=True, size=75)
315
ax[0].set_xlabel('x0') ; ax[0].set_ylabel("x1");
316
317
plot_cat_decision_boundary(ax[1], X_train, model_predict, vector=True)
318
ax[1].set_title("cross-validation data", fontsize=14)
319
plt_mc_data(ax[1], X_cv,y_cv, classes,
320
map=ltcolors_map, legend=True, size=100, m='<')
321
ax[1].set_xlabel('x0') ; ax[1].set_ylabel("x1");
322
fig.suptitle(suptitle,fontsize = 12)
323
plt.show()
324
325
326
def eval_cat_err(y, yhat):
327
"""
328
Calculate the categorization error
329
Args:
330
y : (ndarray Shape (m,) or (m,1)) target value of each example
331
yhat : (ndarray Shape (m,) or (m,1)) predicted value of each example
332
Returns:|
333
err: (scalar)
334
"""
335
m = len(y)
336
incorrect = 0
337
for i in range(m):
338
if yhat[i] != y[i]:
339
incorrect += 1
340
err = incorrect/m
341
return(err)
342
343
def plot_iterate(lambdas, models, X_train, y_train, X_cv, y_cv):
344
err_train = np.zeros(len(lambdas))
345
err_cv = np.zeros(len(lambdas))
346
for i in range(len(models)):
347
err_train[i] = eval_cat_err(y_train,np.argmax( models[i](X_train), axis=1))
348
err_cv[i] = eval_cat_err(y_cv, np.argmax( models[i](X_cv), axis=1))
349
350
fig, ax = plt.subplots(1,1,figsize=(6,4))
351
fig.canvas.toolbar_visible = False
352
fig.canvas.header_visible = False
353
fig.canvas.footer_visible = False
354
ax.set_title("error vs regularization",fontsize = 12)
355
ax.plot(lambdas, err_train, marker='o', label="train error", color = dlc["dlblue"])
356
ax.plot(lambdas, err_cv, marker='o', label="cv error", color = dlc["dlorange"])
357
ax.set_xscale('log')
358
ax.set_ylim(*ax.get_ylim())
359
ax.set_xlabel("Regularization (lambda)",fontsize = 14)
360
ax.set_ylabel("Error",fontsize = 14)
361
ax.legend()
362
fig.suptitle("Tuning Regularization",fontsize = 14)
363
ax.text(0.05,0.14,"Training Error\nlower than CV",fontsize=12, ha='left',transform=ax.transAxes,color = dlc["dlblue"])
364
ax.text(0.95,0.14,"Similar\nTraining, CV", fontsize=12, ha='right',transform=ax.transAxes,color = dlc["dlblue"])
365
plt.show()
366
367
# not used but will calculate the erro assuming an equal distance
368
def err_all_equal(X_train,X_cv,X_test, y_train,y_cv,y_test, centers):
369
X_all = np.concatenate((X_train,X_cv,X_test), axis=0)
370
y_all = np.concatenate((y_train,y_cv,y_test), axis=0)
371
m = len(X_all)
372
y_eq = np.zeros(m)
373
for i in range(m):
374
y_eq[i] = recat(X_all[i], centers)
375
err_all = eval_cat_err(y_all, y_eq)
376
return(err_all)
377
378
def plt_compare(X,y, classes, simple, regularized, centers):
379
plt.close("all")
380
fig,ax = plt.subplots(1,3, figsize=(8,3))
381
fig.canvas.toolbar_visible = False
382
fig.canvas.header_visible = False
383
fig.canvas.footer_visible = False
384
385
#plt simple
386
plot_cat_decision_boundary(ax[0], X, simple, vector=True)
387
ax[0].set_title("Simple Model", fontsize=14)
388
plt_mc_data(ax[0], X,y, classes, map=dkcolors_map, legend=True, size=75)
389
ax[0].set_xlabel('x0') ; ax[0].set_ylabel("x1");
390
391
#plt regularized
392
plot_cat_decision_boundary(ax[1], X, regularized, vector=True)
393
ax[1].set_title("Regularized Model", fontsize=14)
394
plt_mc_data(ax[1], X,y, classes, map=dkcolors_map, legend=True, size=75)
395
ax[1].set_xlabel('x0') ; ax[0].set_ylabel("x1");
396
397
#plt ideal
398
cat_predict = lambda pt: recat(pt.reshape(1,2), centers)
399
plot_cat_decision_boundary(ax[2], X, cat_predict, vector=False)
400
ax[2].set_title("Ideal Model", fontsize=14)
401
plt_mc_data(ax[2], X,y, classes, map=dkcolors_map, legend=True, size=75)
402
ax[2].set_xlabel('x0') ; ax[0].set_ylabel("x1");
403
404
err_s = eval_cat_err(y, simple(X))
405
err_r = eval_cat_err(y, regularized(X))
406
ax[0].text(-2.75,3,f"err_test={err_s:0.2f}", fontsize=12)
407
ax[1].text(-2.75,3,f"err_test={err_r:0.2f}", fontsize=12)
408
m = len(X)
409
y_eq = np.zeros(m)
410
for i in range(m):
411
y_eq[i] = recat(X[i], centers)
412
err_eq = eval_cat_err(y, y_eq)
413
ax[2].text(-2.75,3,f"err_test={err_eq:0.2f}", fontsize=12)
414
plt.show()
415
416
# --- End Assignment ----------------------------------------
417
418