Sharedexam_ml.ipynbOpen in CoCalc
Jupyter notebook exam_ml.ipynb

Final Exam

Machine Learning 2015-2

After solving all the questions in the exam save your notebook with the name username.ipynb and submit it to: https://www.dropbox.com/request/KN8GwdAIi0Hl2jk2mg2E


The following code implements a simple one-neuron neural network:

import numpy as np
import pylab as pl
%matplotlib inline

def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def predict(w, x):
    x = np.append(np.array([1]), x)
   
    return sigmoid(np.dot(w, x))


1. (1.0)

Find a weight vector such that the neural network calculates the NOR function:

f(x,y)=¬(xy)f(x,y)=\neg(x\vee y)

Use the following function to test your answer:

def test_prediction(X, Y, w):
    epsilon = 0.001
    for i, x in enumerate(X):
        if np.abs(predict(w, x) - Y[i]) > epsilon:
            
            raise Exception("Prediction error")
    return True

X = [[0, 0],
 [0, 1],
 [1, 0],
 [1, 1]]
Y = [1, 0, 0 ,0]
w = np.array([10, -20, -20])
test_prediction(X, Y, w)
True

2. (1.0)

The following function calculates the loss function of the neural network

def loss(w, x, y):
    return (predict(w, x) - y) ** 2 / 2

Write a function that calculates the gradient of the loss with respect to the weights:

Ew \frac{\partial E}{\partial w}

def dsig(x):
    
    return x*(1-x)
def de_dw(w, x, y):
    delta = np.zeros(len(w))
      
    val=(predict(w,x)-y)*dsig(predict(w,x))
    x = np.append(np.array([1]), x)
    delta=val*x
    # put your code here
    return delta

Use the following functions to test your code:

def num_de_dw(w, x, y, epsilon):
    deltas = np.identity(len(w)) * epsilon
    de = np.zeros(len(w))
    for i in range(len(w)):
        de[i] = (loss(w + deltas[i, :], x, y) - loss(w - deltas[i, :], x, y)) / (2 * epsilon)
    return de

def test_de_dw():
    num_tests = 100
    epsilon = 0.0001
    for i in range(num_tests):
        tw = np.random.randn(3)
        tx = np.random.randn(2)
        ty = np.random.randn(1)
        print (de_dw(tw, tx,ty)) 
        print (num_de_dw(tw, tx, ty, epsilon))
        if np.linalg.norm(de_dw(tw, tx,ty) - num_de_dw(tw, tx, ty, epsilon)) > epsilon:
            raise Exception("de_dw test failed!")

test_de_dw()
[ 0.00361838 0.00278242 -0.0038118 ] [ 0.00361838 0.00278242 -0.0038118 ] [-0.04035732 -0.05140127 0.04998526] [-0.04035732 -0.05140127 0.04998526] [ 0.02188831 0.01112613 0.02292367] [ 0.02188831 0.01112613 0.02292367] [ 0.03404092 -0.00240233 -0.05807317] [ 0.03404092 -0.00240233 -0.05807317] [ 0.52423482 0.14721585 0.14591704] [ 0.52423482 0.14721585 0.14591704] [ 0.22546929 -0.08998641 0.11208553] [ 0.22546929 -0.08998641 0.11208553] [ 0.04533799 0.00898872 -0.011983 ] [ 0.04533799 0.00898872 -0.011983 ] [ 0.09861583 -0.09234696 -0.21875542] [ 0.09861583 -0.09234696 -0.21875542] [ 0.05877143 -0.02940076 0.03116671] [ 0.05877143 -0.02940076 0.03116671] [ 0.11931123 -0.02702918 -0.06524697] [ 0.11931123 -0.02702918 -0.06524697] [ 0.43251664 -0.56064835 -0.80653295] [ 0.43251664 -0.56064835 -0.80653295] [ 0.32120464 0.09854823 -0.21391254] [ 0.32120464 0.09854823 -0.21391254] [ 0.20352592 -0.01110682 0.05591738] [ 0.20352592 -0.01110682 0.05591738] [ 0.01208132 0.00422836 -0.00190306] [ 0.01208132 0.00422836 -0.00190306] [ 0.0193632 -0.03179308 -0.02202115] [ 0.0193632 -0.03179308 -0.02202115] [ 0.04417083 -0.08437178 -0.02233991] [ 0.04417083 -0.08437178 -0.02233991] [ 0.0004245 0.00103905 0.00073962] [ 0.0004245 0.00103905 0.00073962] [-0.09753699 0.03430715 -0.04138716] [-0.09753699 0.03430715 -0.04138716] [-0.1147732 0.07139269 0.16531029] [-0.1147732 0.07139269 0.16531029] [-0.1867174 -0.12744464 -0.30730282] [-0.1867174 -0.12744464 -0.30730281] [ 0.47051809 0.00854824 -0.58196547] [ 0.47051809 0.00854824 -0.58196547] [ 0.20323275 -0.08996641 0.22137566] [ 0.20323275 -0.08996641 0.22137566] [ 0.07807574 -0.01125673 0.03511072] [ 0.07807574 -0.01125673 0.03511072] [-0.1922282 -0.13344952 -0.12512982] [-0.1922282 -0.13344952 -0.12512982] [ 0.44808434 -0.01082755 0.00216098] [ 0.44808434 -0.01082755 0.00216098] [-0.27836925 -0.00201407 0.24415776] [-0.27836925 -0.00201407 0.24415776] [-0.04933149 -0.02702416 -0.01838848] [-0.04933149 -0.02702416 -0.01838848] [-0.0941828 0.14928483 0.1073521 ] [-0.0941828 0.14928483 0.1073521 ] [-0.04820676 -0.0083064 -0.07348709] [-0.04820676 -0.0083064 -0.07348709] [-0.00020326 0.00050782 -0.00023546] [-0.00020326 0.00050782 -0.00023546] [ 0.03939128 -0.03061601 -0.0046337 ] [ 0.03939128 -0.03061601 -0.0046337 ] [ 0.04757414 0.0327343 0.01285949] [ 0.04757414 0.0327343 0.01285949] [ 0.03075601 -0.01185176 -0.02093231] [ 0.03075601 -0.01185176 -0.02093231] [ 0.05934192 -0.02996796 0.07453932] [ 0.05934192 -0.02996796 0.07453932] [-0.09252648 0.12129026 0.14658041] [-0.09252648 0.12129026 0.14658041] [ 0.0091093 0.00203482 -0.00784722] [ 0.0091093 0.00203482 -0.00784722] [ 2.43267959e-02 2.54306941e-02 -8.54035051e-05] [ 2.43267960e-02 2.54306942e-02 -8.54035051e-05] [ 0.5093184 0.33872083 -0.32138307] [ 0.5093184 0.33872083 -0.32138307] [ 0.10051475 -0.09812507 0.03643533] [ 0.10051475 -0.09812507 0.03643533] [ 0.00541466 0.00453131 0.01127696] [ 0.00541466 0.00453131 0.01127696] [ 0.12173473 0.1238322 -0.08829124] [ 0.12173473 0.1238322 -0.08829124] [ 0.02671952 0.00153561 0.00739973] [ 0.02671952 0.00153561 0.00739973] [-0.00034706 0.00012495 -0.00057136] [-0.00034706 0.00012495 -0.00057136] [ 0.20312833 0.02596203 -0.09139195] [ 0.20312833 0.02596203 -0.09139195] [ 0.35024836 -0.93569558 -0.39538334] [ 0.35024836 -0.93569557 -0.39538334] [ 0.13967526 0.18001101 0.05598359] [ 0.13967526 0.18001101 0.05598359] [ 0.24541106 0.39459918 -0.12311136] [ 0.24541106 0.39459918 -0.12311136] [ 0.25317134 0.34654978 -0.0483229 ] [ 0.25317134 0.34654978 -0.0483229 ] [-0.00073854 -0.00194302 0.00084781] [-0.00073854 -0.00194302 0.00084781] [ 0.00560716 0.00442974 -0.00639664] [ 0.00560716 0.00442974 -0.00639664] [-0.10548431 -0.02398795 0.0383357 ] [-0.1054843 -0.02398795 0.0383357 ] [ 0.32818243 -0.25669906 -0.05766182] [ 0.32818243 -0.25669906 -0.05766182] [ 0.14649066 0.22729138 -0.00566705] [ 0.14649066 0.22729138 -0.00566705] [-0.50233951 -1.13963594 -0.02543276] [-0.50233951 -1.13963594 -0.02543276] [ 0.48959341 -0.62647716 -0.01915929] [ 0.48959341 -0.62647716 -0.01915929] [ 0.11578909 0.00761076 -0.02407562] [ 0.11578909 0.00761076 -0.02407562] [ 0.03487706 -0.03315373 0.01437933] [ 0.03487706 -0.03315373 0.01437933] [-0.48640015 -0.30370259 -0.5777591 ] [-0.48640015 -0.30370259 -0.5777591 ] [ 0.03072539 0.01194149 -0.02394993] [ 0.03072539 0.01194149 -0.02394993] [ 0.01576878 -0.00337766 0.01715638] [ 0.01576878 -0.00337766 0.01715638] [ 0.12775916 0.17459785 0.00469759] [ 0.12775916 0.17459785 0.00469759] [ 0.07849305 -0.04365712 0.01372478] [ 0.07849305 -0.04365712 0.01372478] [ 0.03135299 0.04627793 0.04456123] [ 0.03135299 0.04627793 0.04456123] [ 0.03118055 0.04168922 0.0310658 ] [ 0.03118055 0.04168922 0.0310658 ] [ 0.05352351 -0.06348895 0.05953933] [ 0.05352351 -0.06348895 0.05953933] [-0.00840743 0.00787568 -0.00741771] [-0.00840743 0.00787568 -0.00741771] [ 0.03396481 -0.01756546 0.00143522] [ 0.03396481 -0.01756546 0.00143522] [ 0.07865796 -0.11947434 -0.06254752] [ 0.07865796 -0.11947434 -0.06254752] [ 0.31542869 -0.06063275 0.05544078] [ 0.31542869 -0.06063275 0.05544078] [ 0.12891659 -0.06968791 0.01342116] [ 0.12891659 -0.06968791 0.01342116] [ 0.02215945 -0.01510461 -0.02836185] [ 0.02215945 -0.01510461 -0.02836185] [ 0.07126199 -0.0893883 -0.04065034] [ 0.07126199 -0.0893883 -0.04065034] [ 0.17092115 0.02968431 0.30006181] [ 0.17092115 0.02968431 0.30006181] [ 0.00199004 -0.00356849 -0.00129328] [ 0.00199004 -0.00356849 -0.00129328] [-0.19638704 0.17432019 0.23136635] [-0.19638704 0.17432019 0.23136635] [-0.11959387 0.06890331 -0.16004863] [-0.11959387 0.06890331 -0.16004863] [-0.00472371 0.00875916 0.00055854] [-0.00472371 0.00875916 0.00055854] [ 0.35728917 0.25638664 -0.23197925] [ 0.35728917 0.25638664 -0.23197925] [ 0.11248355 -0.01238601 -0.23657722] [ 0.11248355 -0.01238601 -0.23657722] [-0.0161583 0.01224079 0.02569674] [-0.0161583 0.01224079 0.02569674] [ 0.1354122 0.00966235 -0.04328074] [ 0.1354122 0.00966235 -0.04328074] [ 0.27310759 0.01255336 0.08220588] [ 0.27310759 0.01255336 0.08220588] [-0.36315055 0.15160219 -0.49211041] [-0.36315055 0.15160219 -0.49211041] [ 0.29019676 -0.05150968 0.03283823] [ 0.29019676 -0.05150968 0.03283823] [-0.08386687 -0.05946682 0.05886732] [-0.08386687 -0.05946682 0.05886732] [-0.08411782 -0.06067491 0.00202236] [-0.08411782 -0.06067491 0.00202236] [ 0.00345004 0.0112581 -0.00080798] [ 0.00345004 0.0112581 -0.00080798] [ 0.08287697 0.11685906 0.0575813 ] [ 0.08287697 0.11685906 0.0575813 ] [ 0.2493986 0.29747052 0.09162665] [ 0.2493986 0.29747052 0.09162665] [ 0.54638284 -0.32353767 0.43321959] [ 0.54638284 -0.32353767 0.43321959] [ 0.24395516 -0.15690855 0.03607412] [ 0.24395516 -0.15690855 0.03607412] [ 0.28072974 -0.13066481 -0.2314205 ] [ 0.28072974 -0.13066481 -0.2314205 ] [ 0.31145577 -0.17782658 0.1526641 ] [ 0.31145577 -0.17782658 0.1526641 ] [ 0.02545361 -0.01425304 -0.00147651] [ 0.02545361 -0.01425304 -0.00147651] [-0.06792889 0.00584921 0.03167822] [-0.06792889 0.00584921 0.03167822] [-0.03155762 -0.01993758 0.01719192] [-0.03155762 -0.01993758 0.01719192] [ 0.3676183 0.18589928 0.43560384] [ 0.3676183 0.18589928 0.43560384] [-0.11201769 -0.13025339 0.01482716] [-0.11201769 -0.13025339 0.01482716] [ 0.48846115 -0.61975222 -0.01107753] [ 0.48846115 -0.61975222 -0.01107753] [-0.80876316 -0.75776966 1.73373514] [-0.80876316 -0.75776966 1.73373513]

Now, we can use the gradient function to train the neural network using gradient descent

def evaluate(w, X, Y):
    result = 0
    for i, x in enumerate(X):
        result += loss(w, x, Y[i])
    return result

def train(X, Y, epochs, eta, w_ini):
    losses = []
    w = w_ini
    for i in range(epochs):
        delta = np.zeros(len(w))
        for i, x in enumerate(X):
            delta += de_dw(w, x, Y[i])
        w = w - eta * delta
        losses.append(evaluate(w, X, Y))
    return w, losses
X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
w, losses = train(X, Y, 50, 10, [0, 0, 0])
pl.plot(losses)
print w
print predict(w, [1, 0])
print predict(w, [0, 1])
[-2.75411492 5.35754647 -5.59430763] 0.93108210251 0.000236713671518

3. (1.0)

Now we will modify the loss function to include a regularization term: E(w,D)=12(xi,yi)D(f(w,xi)yi)2+12βw22 E(w,D)=\frac{1}{2}\sum_{(x_{i},y_{i})\in D}(f(w,x_{i})-y_{i})^{2}+\frac{\text{1}}{2}\beta\left\Vert w\right\Vert _{2}^{2}

where f(w,xi)f(w,x_{i}) is the prediction calculated by the neural network.

To accomplish this you must modify the following functions:

def reg_loss(w, beta, x, y):
    loss = (((predict(w, x) - y) ** 2)/2)+(1/4*beta*np.dot(w,w))
    return loss

def reg_de_dw(w, beta, x, y):
    delta = np.zeros(len(w))
    delta= de_dw(w,x,y)+(1/2*beta*w)
    # put your code here
    return delta

You can use the following functions to test your code:

def reg_num_de_dw(w, beta, x, y, epsilon):
    deltas = np.identity(len(w)) * epsilon
    de = np.zeros(len(w))
    for i in range(len(w)):
        de[i] = (reg_loss(w + deltas[i, :], beta, x, y) - reg_loss(w - deltas[i, :], beta, x, y)) / (2 * epsilon)
    return de

def reg_test_de_dw():
    num_tests = 100
    epsilon = 0.0001
    beta = 1
    for i in range(num_tests):
        tw = np.random.randn(3)
        tx = np.random.randn(2)
        ty = np.random.randn(1)
                 
        if np.linalg.norm(reg_de_dw(tw, beta, tx, ty) - reg_num_de_dw(tw, beta, tx, ty, epsilon)) > epsilon:
            raise Exception("reg_de_dw test failed!")

reg_test_de_dw()

4. (1.0)

Now train the neural network using regularization:

def reg_evaluate(w, beta, X, Y):
    result = 0
    for i, x in enumerate(X):
        result += reg_loss(w,beta, x, Y[i])
    return result

def reg_train(X, Y, epochs, eta, w_ini, beta):
    losses = []
    w = np.array(w_ini)
    for i in range(epochs):
        delta = np.zeros(len(w))
        for i, x in enumerate(X):
            delta += reg_de_dw(w,beta, x, Y[i])
        w = w - eta * delta
        losses.append(reg_evaluate(w,beta, X, Y))
    
    # your code here
    return w, losses
X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
wr, losses = reg_train(X, Y, 50, 2, [0, 0, 0], 0.01)
pl.plot(losses)
print wr
print predict(wr, [1, 0])
print predict(wr, [0, 1])
[-1.73530979 3.28436986 -3.55201031] 0.824777934667 0.00502986236723

What is the effect of regularization? Discuss.

5. (1.0)

Here, we will build a kernel version of the previous neural network, i.e., a neural network able to work in a feature space induced by a kernel. To do this we will express the weight vector as a linear combination of vectors in a set XX:

w=xiXαiϕ(xi) w=\sum_{x_{i}\in X}\alpha_{i}\phi(x_{i})

Now, implement this modifying the following functions:

def k_predict(alpha, X, kernel, x):
    
    result = 0
    weight= np.dot(alpha,kernel(X,x))
    x = np.append(np.array([1]), x)
    result= np.dot(weight,x)
    # your code here
    return sigmoid(result)
    
def k_loss(alpha, X, beta, kernel, x, y):
    w=np.dot(alpha,kernel(X,x))
    loss = (((k_predict(alpha,X,kernel, x) - y) ** 2)/2)+(1/4*beta*np.dot(w,w))
    # your code here
    return loss

Test your functions with the following code:

alpha = [1, 0.5, -0.3, -0.4]
Xs = [[0.1, -0.5],
     [0.5, 1.0],
     [-1.0, 0.5],
     [1.0, 1.0]]

def k1(x, y):
    return np.dot(x, y)

def k2(x, y):
    return (np.dot(x, y) + 1) ** 2

X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
for i, x in enumerate(X):
    print k_predict(alpha, Xs, k1, x), k_loss(alpha, Xs, 1, k1, x, Y[i])
print "--------"
for i, x in enumerate(X):
    print k_predict(alpha, Xs, k2, x), k_loss(alpha, Xs, 1, k2, x, Y[i])
[ 0.5 0.5 0.5] [ 0.125 0.125 0.125] [ 0.36586441 0.5 0.36586441] [ 0.06692838 0.125 0.06692838] [ 0.5621765 0.5621765 0.5 ] [ 0.09584471 0.09584471 0.125 ] [ 0.42555748 0.42555748 0.42555748] [ 0.09054959 0.09054959 0.09054959] -------- [ 0.68997448 0.5 0.5 ] [ 0.23803239 0.125 0.125 ] [ 0.49375033 0.5 0.49375033] [ 0.12189469 0.125 0.12189469] [ 0.67590153 0.67590153 0.5 ] [ 0.05251991 0.05251991 0.125 ] [ 0.45264238 0.45264238 0.45264238] [ 0.10244256 0.10244256 0.10244256]

6. (optional, extra credit)

Train the kernel neural network using gradient descent.

def k_de_dw(alpha, X, beta, kernel, x, y):
    w=np.dot(alpha,kernel(X,x))
    delta = 0
    
    val=(k_predict(alpha,X,kernel, x) - y)*dsig(k_predict(alpha,X,kernel, x))
    
    x = np.append(np.array([1]), x)
    delta=val*x
    delta=delta+(1/2*beta*w)
    
    # put your code here
    return delta
def k_evaluate(alpha, X, beta, kernel, X_, Y):
    result = 0
    for i, x in enumerate(X_):
        result += k_loss(alpha, X, beta, kernel, x, Y[i])
    return result

def k_train(X, Y, epochs, eta, w_ini, beta,alpha,X_,kernel):
    losses = []
    w = np.array(w_ini)
    for i in range(epochs):
        delta = 0
        for i, x in enumerate(X_):
            delta += k_de_dw(alpha, X, beta, kernel, x, Y[i])
        w = w - eta * delta
        losses.append(k_evaluate(alpha, X, beta, kernel, x, Y))
    
    # your code here
    return w, losses
alpha = [1, 0.5, -0.3, -0.4]
Xs = [[0.1, -0.5],
     [0.5, 1.0],
     [-1.0, 0.5],
     [1.0, 1.0]]

def k1(x, y):
    return np.dot(x, y)

def k2(x, y):
    return (np.dot(x, y) + 1) ** 2

X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
wr, losses = k_train(Xs, Y, 50, 2, [0, 0, 0], 0.01,alpha,X,k2)
pl.plot(losses)
print wr
print predict(wr, [1, 0])
print predict(wr, [0, 1])
[-31.2159388 -4.11488599 -23.55637307] 4.52916096464e-16 1.63187614121e-24