Path: blob/main/C1 - Supervised Machine Learning: Regression and Classification/week2/Optional Labs/lab_utils_multi.py
3283 views
import numpy as np1import copy2import math3from scipy.stats import norm4import matplotlib.pyplot as plt5from mpl_toolkits.mplot3d import axes3d6from matplotlib.ticker import MaxNLocator7dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0';8plt.style.use('./deeplearning.mplstyle')910def load_data_multi():11data = np.loadtxt("data/ex1data2.txt", delimiter=',')12X = data[:,:2]13y = data[:,2]14return X, y1516##########################################################17# Plotting Routines18##########################################################1920def plt_house_x(X, y,f_wb=None, ax=None):21''' plot house with aXis '''22if not ax:23fig, ax = plt.subplots(1,1)24ax.scatter(X, y, marker='x', c='r', label="Actual Value")2526ax.set_title("Housing Prices")27ax.set_ylabel('Price (in 1000s of dollars)')28ax.set_xlabel(f'Size (1000 sqft)')29if f_wb is not None:30ax.plot(X, f_wb, c=dlblue, label="Our Prediction")31ax.legend()323334def mk_cost_lines(x,y,w,b, ax):35''' makes vertical cost lines'''36cstr = "cost = (1/2m)*1000*("37ctot = 038label = 'cost for point'39for p in zip(x,y):40f_wb_p = w*p[0]+b41c_p = ((f_wb_p - p[1])**2)/242c_p_txt = c_p/100043ax.vlines(p[0], p[1],f_wb_p, lw=3, color=dlpurple, ls='dotted', label=label)44label='' #just one45cxy = [p[0], p[1] + (f_wb_p-p[1])/2]46ax.annotate(f'{c_p_txt:0.0f}', xy=cxy, xycoords='data',color=dlpurple,47xytext=(5, 0), textcoords='offset points')48cstr += f"{c_p_txt:0.0f} +"49ctot += c_p50ctot = ctot/(len(x))51cstr = cstr[:-1] + f") = {ctot:0.0f}"52ax.text(0.15,0.02,cstr, transform=ax.transAxes, color=dlpurple)535455def inbounds(a,b,xlim,ylim):56xlow,xhigh = xlim57ylow,yhigh = ylim58ax, ay = a59bx, by = b60if (ax > xlow and ax < xhigh) and (bx > xlow and bx < xhigh) \61and (ay > ylow and ay < yhigh) and (by > ylow and by < yhigh):62return(True)63else:64return(False)6566from mpl_toolkits.mplot3d import axes3d67def plt_contour_wgrad(x, y, hist, ax, w_range=[-100, 500, 5], b_range=[-500, 500, 5],68contours = [0.1,50,1000,5000,10000,25000,50000],69resolution=5, w_final=200, b_final=100,step=10 ):70b0,w0 = np.meshgrid(np.arange(*b_range),np.arange(*w_range))71z=np.zeros_like(b0)72n,_ = w0.shape73for i in range(w0.shape[0]):74for j in range(w0.shape[1]):75z[i][j] = compute_cost(x, y, w0[i][j], b0[i][j] )7677CS = ax.contour(w0, b0, z, contours, linewidths=2,78colors=[dlblue, dlorange, dldarkred, dlmagenta, dlpurple])79ax.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)80ax.set_xlabel("w"); ax.set_ylabel("b")81ax.set_title('Contour plot of cost J(w,b), vs b,w with path of gradient descent')82w = w_final; b=b_final83ax.hlines(b, ax.get_xlim()[0],w, lw=2, color=dlpurple, ls='dotted')84ax.vlines(w, ax.get_ylim()[0],b, lw=2, color=dlpurple, ls='dotted')8586base = hist[0]87for point in hist[0::step]:88edist = np.sqrt((base[0] - point[0])**2 + (base[1] - point[1])**2)89if(edist > resolution or point==hist[-1]):90if inbounds(point,base, ax.get_xlim(),ax.get_ylim()):91plt.annotate('', xy=point, xytext=base,xycoords='data',92arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 3},93va='center', ha='center')94base=point95return969798# plots p1 vs p2. Prange is an array of entries [min, max, steps]. In feature scaling lab.99def plt_contour_multi(x, y, w, b, ax, prange, p1, p2, title="", xlabel="", ylabel=""):100contours = [1e2, 2e2,3e2,4e2, 5e2, 6e2, 7e2,8e2,1e3, 1.25e3,1.5e3, 1e4, 1e5, 1e6, 1e7]101px,py = np.meshgrid(np.linspace(*(prange[p1])),np.linspace(*(prange[p2])))102z=np.zeros_like(px)103n,_ = px.shape104for i in range(px.shape[0]):105for j in range(px.shape[1]):106w_ij = w107b_ij = b108if p1 <= 3: w_ij[p1] = px[i,j]109if p1 == 4: b_ij = px[i,j]110if p2 <= 3: w_ij[p2] = py[i,j]111if p2 == 4: b_ij = py[i,j]112113z[i][j] = compute_cost(x, y, w_ij, b_ij )114CS = ax.contour(px, py, z, contours, linewidths=2,115colors=[dlblue, dlorange, dldarkred, dlmagenta, dlpurple])116ax.clabel(CS, inline=1, fmt='%1.2e', fontsize=10)117ax.set_xlabel(xlabel); ax.set_ylabel(ylabel)118ax.set_title(title, fontsize=14)119120121def plt_equal_scale(X_train, X_norm, y_train):122fig,ax = plt.subplots(1,2,figsize=(12,5))123prange = [124[ 0.238-0.045, 0.238+0.045, 50],125[-25.77326319-0.045, -25.77326319+0.045, 50],126[-50000, 0, 50],127[-1500, 0, 50],128[0, 200000, 50]]129w_best = np.array([0.23844318, -25.77326319, -58.11084634, -1.57727192])130b_best = 235131plt_contour_multi(X_train, y_train, w_best, b_best, ax[0], prange, 0, 1,132title='Unnormalized, J(w,b), vs w[0],w[1]',133xlabel= "w[0] (size(sqft))", ylabel="w[1] (# bedrooms)")134#135w_best = np.array([111.1972, -16.75480051, -28.51530411, -37.17305735])136b_best = 376.949151515151137prange = [[ 111-50, 111+50, 75],138[-16.75-50,-16.75+50, 75],139[-28.5-8, -28.5+8, 50],140[-37.1-16,-37.1+16, 50],141[376-150, 376+150, 50]]142plt_contour_multi(X_norm, y_train, w_best, b_best, ax[1], prange, 0, 1,143title='Normalized, J(w,b), vs w[0],w[1]',144xlabel= "w[0] (normalized size(sqft))", ylabel="w[1] (normalized # bedrooms)")145fig.suptitle("Cost contour with equal scale", fontsize=18)146#plt.tight_layout(rect=(0,0,1.05,1.05))147fig.tight_layout(rect=(0,0,1,0.95))148plt.show()149150def plt_divergence(p_hist, J_hist, x_train,y_train):151152x=np.zeros(len(p_hist))153y=np.zeros(len(p_hist))154v=np.zeros(len(p_hist))155for i in range(len(p_hist)):156x[i] = p_hist[i][0]157y[i] = p_hist[i][1]158v[i] = J_hist[i]159160fig = plt.figure(figsize=(12,5))161plt.subplots_adjust( wspace=0 )162gs = fig.add_gridspec(1, 5)163fig.suptitle(f"Cost escalates when learning rate is too large")164#===============165# First subplot166#===============167ax = fig.add_subplot(gs[:2], )168169# Print w vs cost to see minimum170fix_b = 100171w_array = np.arange(-70000, 70000, 1000)172cost = np.zeros_like(w_array)173174for i in range(len(w_array)):175tmp_w = w_array[i]176cost[i] = compute_cost(x_train, y_train, tmp_w, fix_b)177178ax.plot(w_array, cost)179ax.plot(x,v, c=dlmagenta)180ax.set_title("Cost vs w, b set to 100")181ax.set_ylabel('Cost')182ax.set_xlabel('w')183ax.xaxis.set_major_locator(MaxNLocator(2))184185#===============186# Second Subplot187#===============188189tmp_b,tmp_w = np.meshgrid(np.arange(-35000, 35000, 500),np.arange(-70000, 70000, 500))190z=np.zeros_like(tmp_b)191for i in range(tmp_w.shape[0]):192for j in range(tmp_w.shape[1]):193z[i][j] = compute_cost(x_train, y_train, tmp_w[i][j], tmp_b[i][j] )194195ax = fig.add_subplot(gs[2:], projection='3d')196ax.plot_surface(tmp_w, tmp_b, z, alpha=0.3, color=dlblue)197ax.xaxis.set_major_locator(MaxNLocator(2))198ax.yaxis.set_major_locator(MaxNLocator(2))199200ax.set_xlabel('w', fontsize=16)201ax.set_ylabel('b', fontsize=16)202ax.set_zlabel('\ncost', fontsize=16)203plt.title('Cost vs (b, w)')204# Customize the view angle205ax.view_init(elev=20., azim=-65)206ax.plot(x, y, v,c=dlmagenta)207208return209210# draw derivative line211# y = m*(x - x1) + y1212def add_line(dj_dx, x1, y1, d, ax):213x = np.linspace(x1-d, x1+d,50)214y = dj_dx*(x - x1) + y1215ax.scatter(x1, y1, color=dlblue, s=50)216ax.plot(x, y, '--', c=dldarkred,zorder=10, linewidth = 1)217xoff = 30 if x1 == 200 else 10218ax.annotate(r"$\frac{\partial J}{\partial w}$ =%d" % dj_dx, fontsize=14,219xy=(x1, y1), xycoords='data',220xytext=(xoff, 10), textcoords='offset points',221arrowprops=dict(arrowstyle="->"),222horizontalalignment='left', verticalalignment='top')223224def plt_gradients(x_train,y_train, f_compute_cost, f_compute_gradient):225#===============226# First subplot227#===============228fig,ax = plt.subplots(1,2,figsize=(12,4))229230# Print w vs cost to see minimum231fix_b = 100232w_array = np.linspace(-100, 500, 50)233w_array = np.linspace(0, 400, 50)234cost = np.zeros_like(w_array)235236for i in range(len(w_array)):237tmp_w = w_array[i]238cost[i] = f_compute_cost(x_train, y_train, tmp_w, fix_b)239ax[0].plot(w_array, cost,linewidth=1)240ax[0].set_title("Cost vs w, with gradient; b set to 100")241ax[0].set_ylabel('Cost')242ax[0].set_xlabel('w')243244# plot lines for fixed b=100245for tmp_w in [100,200,300]:246fix_b = 100247dj_dw,dj_db = f_compute_gradient(x_train, y_train, tmp_w, fix_b )248j = f_compute_cost(x_train, y_train, tmp_w, fix_b)249add_line(dj_dw, tmp_w, j, 30, ax[0])250251#===============252# Second Subplot253#===============254255tmp_b,tmp_w = np.meshgrid(np.linspace(-200, 200, 10), np.linspace(-100, 600, 10))256U = np.zeros_like(tmp_w)257V = np.zeros_like(tmp_b)258for i in range(tmp_w.shape[0]):259for j in range(tmp_w.shape[1]):260U[i][j], V[i][j] = f_compute_gradient(x_train, y_train, tmp_w[i][j], tmp_b[i][j] )261X = tmp_w262Y = tmp_b263n=-2264color_array = np.sqrt(((V-n)/2)**2 + ((U-n)/2)**2)265266ax[1].set_title('Gradient shown in quiver plot')267Q = ax[1].quiver(X, Y, U, V, color_array, units='width', )268qk = ax[1].quiverkey(Q, 0.9, 0.9, 2, r'$2 \frac{m}{s}$', labelpos='E',coordinates='figure')269ax[1].set_xlabel("w"); ax[1].set_ylabel("b")270271def norm_plot(ax, data):272scale = (np.max(data) - np.min(data))*0.2273x = np.linspace(np.min(data)-scale,np.max(data)+scale,50)274_,bins, _ = ax.hist(data, x, color="xkcd:azure")275#ax.set_ylabel("Count")276277mu = np.mean(data);278std = np.std(data);279dist = norm.pdf(bins, loc=mu, scale = std)280281axr = ax.twinx()282axr.plot(bins,dist, color = "orangered", lw=2)283axr.set_ylim(bottom=0)284axr.axis('off')285286def plot_cost_i_w(X,y,hist):287ws = np.array([ p[0] for p in hist["params"]])288rng = max(abs(ws[:,0].min()),abs(ws[:,0].max()))289wr = np.linspace(-rng+0.27,rng+0.27,20)290cst = [compute_cost(X,y,np.array([wr[i],-32, -67, -1.46]), 221) for i in range(len(wr))]291292fig,ax = plt.subplots(1,2,figsize=(12,3))293ax[0].plot(hist["iter"], (hist["cost"])); ax[0].set_title("Cost vs Iteration")294ax[0].set_xlabel("iteration"); ax[0].set_ylabel("Cost")295ax[1].plot(wr, cst); ax[1].set_title("Cost vs w[0]")296ax[1].set_xlabel("w[0]"); ax[1].set_ylabel("Cost")297ax[1].plot(ws[:,0],hist["cost"])298plt.show()299300301##########################################################302# Regression Routines303##########################################################304305def compute_gradient_matrix(X, y, w, b):306"""307Computes the gradient for linear regression308309Args:310X : (array_like Shape (m,n)) variable such as house size311y : (array_like Shape (m,1)) actual value312w : (array_like Shape (n,1)) Values of parameters of the model313b : (scalar ) Values of parameter of the model314Returns315dj_dw: (array_like Shape (n,1)) The gradient of the cost w.r.t. the parameters w.316dj_db: (scalar) The gradient of the cost w.r.t. the parameter b.317318"""319m,n = X.shape320f_wb = X @ w + b321e = f_wb - y322dj_dw = (1/m) * (X.T @ e)323dj_db = (1/m) * np.sum(e)324325return dj_db,dj_dw326327#Function to calculate the cost328def compute_cost_matrix(X, y, w, b, verbose=False):329"""330Computes the gradient for linear regression331Args:332X : (array_like Shape (m,n)) variable such as house size333y : (array_like Shape (m,)) actual value334w : (array_like Shape (n,)) parameters of the model335b : (scalar ) parameter of the model336verbose : (Boolean) If true, print out intermediate value f_wb337Returns338cost: (scalar)339"""340m,n = X.shape341342# calculate f_wb for all examples.343f_wb = X @ w + b344# calculate cost345total_cost = (1/(2*m)) * np.sum((f_wb-y)**2)346347if verbose: print("f_wb:")348if verbose: print(f_wb)349350return total_cost351352# Loop version of multi-variable compute_cost353def compute_cost(X, y, w, b):354"""355compute cost356Args:357X : (ndarray): Shape (m,n) matrix of examples with multiple features358w : (ndarray): Shape (n) parameters for prediction359b : (scalar): parameter for prediction360Returns361cost: (scalar) cost362"""363m = X.shape[0]364cost = 0.0365for i in range(m):366f_wb_i = np.dot(X[i],w) + b367cost = cost + (f_wb_i - y[i])**2368cost = cost/(2*m)369return(np.squeeze(cost))370371def compute_gradient(X, y, w, b):372"""373Computes the gradient for linear regression374Args:375X : (ndarray Shape (m,n)) matrix of examples376y : (ndarray Shape (m,)) target value of each example377w : (ndarray Shape (n,)) parameters of the model378b : (scalar) parameter of the model379Returns380dj_dw : (ndarray Shape (n,)) The gradient of the cost w.r.t. the parameters w.381dj_db : (scalar) The gradient of the cost w.r.t. the parameter b.382"""383m,n = X.shape #(number of examples, number of features)384dj_dw = np.zeros((n,))385dj_db = 0.386387for i in range(m):388err = (np.dot(X[i], w) + b) - y[i]389for j in range(n):390dj_dw[j] = dj_dw[j] + err * X[i,j]391dj_db = dj_db + err392dj_dw = dj_dw/m393dj_db = dj_db/m394395return dj_db,dj_dw396397#This version saves more values and is more verbose than the assigment versons398def gradient_descent_houses(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):399"""400Performs batch gradient descent to learn theta. Updates theta by taking401num_iters gradient steps with learning rate alpha402403Args:404X : (array_like Shape (m,n) matrix of examples405y : (array_like Shape (m,)) target value of each example406w_in : (array_like Shape (n,)) Initial values of parameters of the model407b_in : (scalar) Initial value of parameter of the model408cost_function: function to compute cost409gradient_function: function to compute the gradient410alpha : (float) Learning rate411num_iters : (int) number of iterations to run gradient descent412Returns413w : (array_like Shape (n,)) Updated values of parameters of the model after414running gradient descent415b : (scalar) Updated value of parameter of the model after416running gradient descent417"""418419# number of training examples420m = len(X)421422# An array to store values at each iteration primarily for graphing later423hist={}424hist["cost"] = []; hist["params"] = []; hist["grads"]=[]; hist["iter"]=[];425426w = copy.deepcopy(w_in) #avoid modifying global w within function427b = b_in428save_interval = np.ceil(num_iters/10000) # prevent resource exhaustion for long runs429430print(f"Iteration Cost w0 w1 w2 w3 b djdw0 djdw1 djdw2 djdw3 djdb ")431print(f"---------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|")432433for i in range(num_iters):434435# Calculate the gradient and update the parameters436dj_db,dj_dw = gradient_function(X, y, w, b)437438# Update Parameters using w, b, alpha and gradient439w = w - alpha * dj_dw440b = b - alpha * dj_db441442# Save cost J,w,b at each save interval for graphing443if i == 0 or i % save_interval == 0:444hist["cost"].append(cost_function(X, y, w, b))445hist["params"].append([w,b])446hist["grads"].append([dj_dw,dj_db])447hist["iter"].append(i)448449# Print cost every at intervals 10 times or as many iterations if < 10450if i% math.ceil(num_iters/10) == 0:451#print(f"Iteration {i:4d}: Cost {cost_function(X, y, w, b):8.2f} ")452cst = cost_function(X, y, w, b)453print(f"{i:9d} {cst:0.5e} {w[0]: 0.1e} {w[1]: 0.1e} {w[2]: 0.1e} {w[3]: 0.1e} {b: 0.1e} {dj_dw[0]: 0.1e} {dj_dw[1]: 0.1e} {dj_dw[2]: 0.1e} {dj_dw[3]: 0.1e} {dj_db: 0.1e}")454455return w, b, hist #return w,b and history for graphing456457def run_gradient_descent(X,y,iterations=1000, alpha = 1e-6):458459m,n = X.shape460# initialize parameters461initial_w = np.zeros(n)462initial_b = 0463# run gradient descent464w_out, b_out, hist_out = gradient_descent_houses(X ,y, initial_w, initial_b,465compute_cost, compute_gradient_matrix, alpha, iterations)466print(f"w,b found by gradient descent: w: {w_out}, b: {b_out:0.2f}")467468return(w_out, b_out, hist_out)469470# compact extaction of hist data471#x = hist["iter"]472#J = np.array([ p for p in hist["cost"]])473#ws = np.array([ p[0] for p in hist["params"]])474#dj_ws = np.array([ p[0] for p in hist["grads"]])475476#bs = np.array([ p[1] for p in hist["params"]])477478def run_gradient_descent_feng(X,y,iterations=1000, alpha = 1e-6):479m,n = X.shape480# initialize parameters481initial_w = np.zeros(n)482initial_b = 0483# run gradient descent484w_out, b_out, hist_out = gradient_descent(X ,y, initial_w, initial_b,485compute_cost, compute_gradient_matrix, alpha, iterations)486print(f"w,b found by gradient descent: w: {w_out}, b: {b_out:0.4f}")487488return(w_out, b_out)489490def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):491"""492Performs batch gradient descent to learn theta. Updates theta by taking493num_iters gradient steps with learning rate alpha494495Args:496X : (array_like Shape (m,n) matrix of examples497y : (array_like Shape (m,)) target value of each example498w_in : (array_like Shape (n,)) Initial values of parameters of the model499b_in : (scalar) Initial value of parameter of the model500cost_function: function to compute cost501gradient_function: function to compute the gradient502alpha : (float) Learning rate503num_iters : (int) number of iterations to run gradient descent504Returns505w : (array_like Shape (n,)) Updated values of parameters of the model after506running gradient descent507b : (scalar) Updated value of parameter of the model after508running gradient descent509"""510511# number of training examples512m = len(X)513514# An array to store values at each iteration primarily for graphing later515hist={}516hist["cost"] = []; hist["params"] = []; hist["grads"]=[]; hist["iter"]=[];517518w = copy.deepcopy(w_in) #avoid modifying global w within function519b = b_in520save_interval = np.ceil(num_iters/10000) # prevent resource exhaustion for long runs521522for i in range(num_iters):523524# Calculate the gradient and update the parameters525dj_db,dj_dw = gradient_function(X, y, w, b)526527# Update Parameters using w, b, alpha and gradient528w = w - alpha * dj_dw529b = b - alpha * dj_db530531# Save cost J,w,b at each save interval for graphing532if i == 0 or i % save_interval == 0:533hist["cost"].append(cost_function(X, y, w, b))534hist["params"].append([w,b])535hist["grads"].append([dj_dw,dj_db])536hist["iter"].append(i)537538# Print cost every at intervals 10 times or as many iterations if < 10539if i% math.ceil(num_iters/10) == 0:540#print(f"Iteration {i:4d}: Cost {cost_function(X, y, w, b):8.2f} ")541cst = cost_function(X, y, w, b)542print(f"Iteration {i:9d}, Cost: {cst:0.5e}")543return w, b, hist #return w,b and history for graphing544545def load_house_data():546data = np.loadtxt("./data/houses.txt", delimiter=',', skiprows=1)547X = data[:,:4]548y = data[:,4]549return X, y550551def zscore_normalize_features(X,rtn_ms=False):552"""553returns z-score normalized X by column554Args:555X : (numpy array (m,n))556Returns557X_norm: (numpy array (m,n)) input normalized by column558"""559mu = np.mean(X,axis=0)560sigma = np.std(X,axis=0)561X_norm = (X - mu)/sigma562563if rtn_ms:564return(X_norm, mu, sigma)565else:566return(X_norm)567568569570571