Jupyter notebook exam_ml.ipynb

⁹⁰ views

Kernel: Python 2 (SageMath)

Final Exam

Machine Learning 2015-2

After solving all the questions in the exam save your notebook with the name username.ipynb and submit it to: https://www.dropbox.com/request/KN8GwdAIi0Hl2jk2mg2E

The following code implements a simple one-neuron neural network:

In [1]:

import numpy as np
import pylab as pl
%matplotlib inline

def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def predict(w, x):
    x = np.append(np.array([1]), x)
   
    return sigmoid(np.dot(w, x))

1. (1.0)

Find a weight vector such that the neural network calculates the NOR function:

f(x,y)=\neg(x\vee y)

Use the following function to test your answer:

In [2]:

def test_prediction(X, Y, w):
    epsilon = 0.001
    for i, x in enumerate(X):
        if np.abs(predict(w, x) - Y[i]) > epsilon:
            
            raise Exception("Prediction error")
    return True

X = [[0, 0],
 [0, 1],
 [1, 0],
 [1, 1]]
Y = [1, 0, 0 ,0]
w = np.array([10, -20, -20])
test_prediction(X, Y, w)

Out[2]:

True

2. (1.0)

The following function calculates the loss function of the neural network

In [3]:

def loss(w, x, y):
    return (predict(w, x) - y) ** 2 / 2

Write a function that calculates the gradient of the loss with respect to the weights:

\frac{\partial E}{\partial w}

In [4]:

def dsig(x):
    
    return x*(1-x)
def de_dw(w, x, y):
    delta = np.zeros(len(w))
      
    val=(predict(w,x)-y)*dsig(predict(w,x))
    x = np.append(np.array([1]), x)
    delta=val*x
    # put your code here
    return delta

Use the following functions to test your code:

In [5]:

def num_de_dw(w, x, y, epsilon):
    deltas = np.identity(len(w)) * epsilon
    de = np.zeros(len(w))
    for i in range(len(w)):
        de[i] = (loss(w + deltas[i, :], x, y) - loss(w - deltas[i, :], x, y)) / (2 * epsilon)
    return de

def test_de_dw():
    num_tests = 100
    epsilon = 0.0001
    for i in range(num_tests):
        tw = np.random.randn(3)
        tx = np.random.randn(2)
        ty = np.random.randn(1)
        print (de_dw(tw, tx,ty)) 
        print (num_de_dw(tw, tx, ty, epsilon))
        if np.linalg.norm(de_dw(tw, tx,ty) - num_de_dw(tw, tx, ty, epsilon)) > epsilon:
            raise Exception("de_dw test failed!")

test_de_dw()

Out[5]:

[ 0.00361838  0.00278242 -0.0038118 ]
[ 0.00361838  0.00278242 -0.0038118 ]
[-0.04035732 -0.05140127  0.04998526]
[-0.04035732 -0.05140127  0.04998526]
[ 0.02188831  0.01112613  0.02292367]
[ 0.02188831  0.01112613  0.02292367]
[ 0.03404092 -0.00240233 -0.05807317]
[ 0.03404092 -0.00240233 -0.05807317]
[ 0.52423482  0.14721585  0.14591704]
[ 0.52423482  0.14721585  0.14591704]
[ 0.22546929 -0.08998641  0.11208553]
[ 0.22546929 -0.08998641  0.11208553]
[ 0.04533799  0.00898872 -0.011983  ]
[ 0.04533799  0.00898872 -0.011983  ]
[ 0.09861583 -0.09234696 -0.21875542]
[ 0.09861583 -0.09234696 -0.21875542]
[ 0.05877143 -0.02940076  0.03116671]
[ 0.05877143 -0.02940076  0.03116671]
[ 0.11931123 -0.02702918 -0.06524697]
[ 0.11931123 -0.02702918 -0.06524697]
[ 0.43251664 -0.56064835 -0.80653295]
[ 0.43251664 -0.56064835 -0.80653295]
[ 0.32120464  0.09854823 -0.21391254]
[ 0.32120464  0.09854823 -0.21391254]
[ 0.20352592 -0.01110682  0.05591738]
[ 0.20352592 -0.01110682  0.05591738]
[ 0.01208132  0.00422836 -0.00190306]
[ 0.01208132  0.00422836 -0.00190306]
[ 0.0193632  -0.03179308 -0.02202115]
[ 0.0193632  -0.03179308 -0.02202115]
[ 0.04417083 -0.08437178 -0.02233991]
[ 0.04417083 -0.08437178 -0.02233991]
[ 0.0004245   0.00103905  0.00073962]
[ 0.0004245   0.00103905  0.00073962]
[-0.09753699  0.03430715 -0.04138716]
[-0.09753699  0.03430715 -0.04138716]
[-0.1147732   0.07139269  0.16531029]
[-0.1147732   0.07139269  0.16531029]
[-0.1867174  -0.12744464 -0.30730282]
[-0.1867174  -0.12744464 -0.30730281]
[ 0.47051809  0.00854824 -0.58196547]
[ 0.47051809  0.00854824 -0.58196547]
[ 0.20323275 -0.08996641  0.22137566]
[ 0.20323275 -0.08996641  0.22137566]
[ 0.07807574 -0.01125673  0.03511072]
[ 0.07807574 -0.01125673  0.03511072]
[-0.1922282  -0.13344952 -0.12512982]
[-0.1922282  -0.13344952 -0.12512982]
[ 0.44808434 -0.01082755  0.00216098]
[ 0.44808434 -0.01082755  0.00216098]
[-0.27836925 -0.00201407  0.24415776]
[-0.27836925 -0.00201407  0.24415776]
[-0.04933149 -0.02702416 -0.01838848]
[-0.04933149 -0.02702416 -0.01838848]
[-0.0941828   0.14928483  0.1073521 ]
[-0.0941828   0.14928483  0.1073521 ]
[-0.04820676 -0.0083064  -0.07348709]
[-0.04820676 -0.0083064  -0.07348709]
[-0.00020326  0.00050782 -0.00023546]
[-0.00020326  0.00050782 -0.00023546]
[ 0.03939128 -0.03061601 -0.0046337 ]
[ 0.03939128 -0.03061601 -0.0046337 ]
[ 0.04757414  0.0327343   0.01285949]
[ 0.04757414  0.0327343   0.01285949]
[ 0.03075601 -0.01185176 -0.02093231]
[ 0.03075601 -0.01185176 -0.02093231]
[ 0.05934192 -0.02996796  0.07453932]
[ 0.05934192 -0.02996796  0.07453932]
[-0.09252648  0.12129026  0.14658041]
[-0.09252648  0.12129026  0.14658041]
[ 0.0091093   0.00203482 -0.00784722]
[ 0.0091093   0.00203482 -0.00784722]
[  2.43267959e-02   2.54306941e-02  -8.54035051e-05]
[  2.43267960e-02   2.54306942e-02  -8.54035051e-05]
[ 0.5093184   0.33872083 -0.32138307]
[ 0.5093184   0.33872083 -0.32138307]
[ 0.10051475 -0.09812507  0.03643533]
[ 0.10051475 -0.09812507  0.03643533]
[ 0.00541466  0.00453131  0.01127696]
[ 0.00541466  0.00453131  0.01127696]
[ 0.12173473  0.1238322  -0.08829124]
[ 0.12173473  0.1238322  -0.08829124]
[ 0.02671952  0.00153561  0.00739973]
[ 0.02671952  0.00153561  0.00739973]
[-0.00034706  0.00012495 -0.00057136]
[-0.00034706  0.00012495 -0.00057136]
[ 0.20312833  0.02596203 -0.09139195]
[ 0.20312833  0.02596203 -0.09139195]
[ 0.35024836 -0.93569558 -0.39538334]
[ 0.35024836 -0.93569557 -0.39538334]
[ 0.13967526  0.18001101  0.05598359]
[ 0.13967526  0.18001101  0.05598359]
[ 0.24541106  0.39459918 -0.12311136]
[ 0.24541106  0.39459918 -0.12311136]
[ 0.25317134  0.34654978 -0.0483229 ]
[ 0.25317134  0.34654978 -0.0483229 ]
[-0.00073854 -0.00194302  0.00084781]
[-0.00073854 -0.00194302  0.00084781]
[ 0.00560716  0.00442974 -0.00639664]
[ 0.00560716  0.00442974 -0.00639664]
[-0.10548431 -0.02398795  0.0383357 ]
[-0.1054843  -0.02398795  0.0383357 ]
[ 0.32818243 -0.25669906 -0.05766182]
[ 0.32818243 -0.25669906 -0.05766182]
[ 0.14649066  0.22729138 -0.00566705]
[ 0.14649066  0.22729138 -0.00566705]
[-0.50233951 -1.13963594 -0.02543276]
[-0.50233951 -1.13963594 -0.02543276]
[ 0.48959341 -0.62647716 -0.01915929]
[ 0.48959341 -0.62647716 -0.01915929]
[ 0.11578909  0.00761076 -0.02407562]
[ 0.11578909  0.00761076 -0.02407562]
[ 0.03487706 -0.03315373  0.01437933]
[ 0.03487706 -0.03315373  0.01437933]
[-0.48640015 -0.30370259 -0.5777591 ]
[-0.48640015 -0.30370259 -0.5777591 ]
[ 0.03072539  0.01194149 -0.02394993]
[ 0.03072539  0.01194149 -0.02394993]
[ 0.01576878 -0.00337766  0.01715638]
[ 0.01576878 -0.00337766  0.01715638]
[ 0.12775916  0.17459785  0.00469759]
[ 0.12775916  0.17459785  0.00469759]
[ 0.07849305 -0.04365712  0.01372478]
[ 0.07849305 -0.04365712  0.01372478]
[ 0.03135299  0.04627793  0.04456123]
[ 0.03135299  0.04627793  0.04456123]
[ 0.03118055  0.04168922  0.0310658 ]
[ 0.03118055  0.04168922  0.0310658 ]
[ 0.05352351 -0.06348895  0.05953933]
[ 0.05352351 -0.06348895  0.05953933]
[-0.00840743  0.00787568 -0.00741771]
[-0.00840743  0.00787568 -0.00741771]
[ 0.03396481 -0.01756546  0.00143522]
[ 0.03396481 -0.01756546  0.00143522]
[ 0.07865796 -0.11947434 -0.06254752]
[ 0.07865796 -0.11947434 -0.06254752]
[ 0.31542869 -0.06063275  0.05544078]
[ 0.31542869 -0.06063275  0.05544078]
[ 0.12891659 -0.06968791  0.01342116]
[ 0.12891659 -0.06968791  0.01342116]
[ 0.02215945 -0.01510461 -0.02836185]
[ 0.02215945 -0.01510461 -0.02836185]
[ 0.07126199 -0.0893883  -0.04065034]
[ 0.07126199 -0.0893883  -0.04065034]
[ 0.17092115  0.02968431  0.30006181]
[ 0.17092115  0.02968431  0.30006181]
[ 0.00199004 -0.00356849 -0.00129328]
[ 0.00199004 -0.00356849 -0.00129328]
[-0.19638704  0.17432019  0.23136635]
[-0.19638704  0.17432019  0.23136635]
[-0.11959387  0.06890331 -0.16004863]
[-0.11959387  0.06890331 -0.16004863]
[-0.00472371  0.00875916  0.00055854]
[-0.00472371  0.00875916  0.00055854]
[ 0.35728917  0.25638664 -0.23197925]
[ 0.35728917  0.25638664 -0.23197925]
[ 0.11248355 -0.01238601 -0.23657722]
[ 0.11248355 -0.01238601 -0.23657722]
[-0.0161583   0.01224079  0.02569674]
[-0.0161583   0.01224079  0.02569674]
[ 0.1354122   0.00966235 -0.04328074]
[ 0.1354122   0.00966235 -0.04328074]
[ 0.27310759  0.01255336  0.08220588]
[ 0.27310759  0.01255336  0.08220588]
[-0.36315055  0.15160219 -0.49211041]
[-0.36315055  0.15160219 -0.49211041]
[ 0.29019676 -0.05150968  0.03283823]
[ 0.29019676 -0.05150968  0.03283823]
[-0.08386687 -0.05946682  0.05886732]
[-0.08386687 -0.05946682  0.05886732]
[-0.08411782 -0.06067491  0.00202236]
[-0.08411782 -0.06067491  0.00202236]
[ 0.00345004  0.0112581  -0.00080798]
[ 0.00345004  0.0112581  -0.00080798]
[ 0.08287697  0.11685906  0.0575813 ]
[ 0.08287697  0.11685906  0.0575813 ]
[ 0.2493986   0.29747052  0.09162665]
[ 0.2493986   0.29747052  0.09162665]
[ 0.54638284 -0.32353767  0.43321959]
[ 0.54638284 -0.32353767  0.43321959]
[ 0.24395516 -0.15690855  0.03607412]
[ 0.24395516 -0.15690855  0.03607412]
[ 0.28072974 -0.13066481 -0.2314205 ]
[ 0.28072974 -0.13066481 -0.2314205 ]
[ 0.31145577 -0.17782658  0.1526641 ]
[ 0.31145577 -0.17782658  0.1526641 ]
[ 0.02545361 -0.01425304 -0.00147651]
[ 0.02545361 -0.01425304 -0.00147651]
[-0.06792889  0.00584921  0.03167822]
[-0.06792889  0.00584921  0.03167822]
[-0.03155762 -0.01993758  0.01719192]
[-0.03155762 -0.01993758  0.01719192]
[ 0.3676183   0.18589928  0.43560384]
[ 0.3676183   0.18589928  0.43560384]
[-0.11201769 -0.13025339  0.01482716]
[-0.11201769 -0.13025339  0.01482716]
[ 0.48846115 -0.61975222 -0.01107753]
[ 0.48846115 -0.61975222 -0.01107753]
[-0.80876316 -0.75776966  1.73373514]
[-0.80876316 -0.75776966  1.73373513]

Now, we can use the gradient function to train the neural network using gradient descent

In [6]:

def evaluate(w, X, Y):
    result = 0
    for i, x in enumerate(X):
        result += loss(w, x, Y[i])
    return result

def train(X, Y, epochs, eta, w_ini):
    losses = []
    w = w_ini
    for i in range(epochs):
        delta = np.zeros(len(w))
        for i, x in enumerate(X):
            delta += de_dw(w, x, Y[i])
        w = w - eta * delta
        losses.append(evaluate(w, X, Y))
    return w, losses

In [7]:

X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
w, losses = train(X, Y, 50, 10, [0, 0, 0])
pl.plot(losses)
print w
print predict(w, [1, 0])
print predict(w, [0, 1])

Out[7]:

[-2.75411492  5.35754647 -5.59430763]
0.93108210251
0.000236713671518

3. (1.0)

Now we will modify the loss function to include a regularization term: $E(w,D)=\frac{1}{2}\sum_{(x_{i},y_{i})\in D}(f(w,x_{i})-y_{i})^{2}+\frac{\text{1}}{2}\beta\left\Vert w\right\Vert _{2}^{2}$

where $f(w,x_{i})$ is the prediction calculated by the neural network.

To accomplish this you must modify the following functions:

In [8]:

def reg_loss(w, beta, x, y):
    loss = (((predict(w, x) - y) ** 2)/2)+(1/4*beta*np.dot(w,w))
    return loss

def reg_de_dw(w, beta, x, y):
    delta = np.zeros(len(w))
    delta= de_dw(w,x,y)+(1/2*beta*w)
    # put your code here
    return delta

You can use the following functions to test your code:

In [9]:

def reg_num_de_dw(w, beta, x, y, epsilon):
    deltas = np.identity(len(w)) * epsilon
    de = np.zeros(len(w))
    for i in range(len(w)):
        de[i] = (reg_loss(w + deltas[i, :], beta, x, y) - reg_loss(w - deltas[i, :], beta, x, y)) / (2 * epsilon)
    return de

def reg_test_de_dw():
    num_tests = 100
    epsilon = 0.0001
    beta = 1
    for i in range(num_tests):
        tw = np.random.randn(3)
        tx = np.random.randn(2)
        ty = np.random.randn(1)
                 
        if np.linalg.norm(reg_de_dw(tw, beta, tx, ty) - reg_num_de_dw(tw, beta, tx, ty, epsilon)) > epsilon:
            raise Exception("reg_de_dw test failed!")

reg_test_de_dw()

4. (1.0)

Now train the neural network using regularization:

In [10]:

def reg_evaluate(w, beta, X, Y):
    result = 0
    for i, x in enumerate(X):
        result += reg_loss(w,beta, x, Y[i])
    return result

def reg_train(X, Y, epochs, eta, w_ini, beta):
    losses = []
    w = np.array(w_ini)
    for i in range(epochs):
        delta = np.zeros(len(w))
        for i, x in enumerate(X):
            delta += reg_de_dw(w,beta, x, Y[i])
        w = w - eta * delta
        losses.append(reg_evaluate(w,beta, X, Y))
    
    # your code here
    return w, losses

In [11]:

X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
wr, losses = reg_train(X, Y, 50, 2, [0, 0, 0], 0.01)
pl.plot(losses)
print wr
print predict(wr, [1, 0])
print predict(wr, [0, 1])

Out[11]:

[-1.73530979  3.28436986 -3.55201031]
0.824777934667
0.00502986236723

What is the effect of regularization? Discuss.

5. (1.0)

Here, we will build a kernel version of the previous neural network, i.e., a neural network able to work in a feature space induced by a kernel. To do this we will express the weight vector as a linear combination of vectors in a set $X$ :

w=\sum_{x_{i}\in X}\alpha_{i}\phi(x_{i})

Now, implement this modifying the following functions:

In [12]:

def k_predict(alpha, X, kernel, x):
    
    result = 0
    weight= np.dot(alpha,kernel(X,x))
    x = np.append(np.array([1]), x)
    result= np.dot(weight,x)
    # your code here
    return sigmoid(result)
    
def k_loss(alpha, X, beta, kernel, x, y):
    w=np.dot(alpha,kernel(X,x))
    loss = (((k_predict(alpha,X,kernel, x) - y) ** 2)/2)+(1/4*beta*np.dot(w,w))
    # your code here
    return loss

Test your functions with the following code:

In [13]:

alpha = [1, 0.5, -0.3, -0.4]
Xs = [[0.1, -0.5],
     [0.5, 1.0],
     [-1.0, 0.5],
     [1.0, 1.0]]

def k1(x, y):
    return np.dot(x, y)

def k2(x, y):
    return (np.dot(x, y) + 1) ** 2

X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
for i, x in enumerate(X):
    print k_predict(alpha, Xs, k1, x), k_loss(alpha, Xs, 1, k1, x, Y[i])
print "--------"
for i, x in enumerate(X):
    print k_predict(alpha, Xs, k2, x), k_loss(alpha, Xs, 1, k2, x, Y[i])

Out[13]:

[ 0.5  0.5  0.5] [ 0.125  0.125  0.125]
[ 0.36586441  0.5         0.36586441] [ 0.06692838  0.125       0.06692838]
[ 0.5621765  0.5621765  0.5      ] [ 0.09584471  0.09584471  0.125     ]
[ 0.42555748  0.42555748  0.42555748] [ 0.09054959  0.09054959  0.09054959]
--------
[ 0.68997448  0.5         0.5       ] [ 0.23803239  0.125       0.125     ]
[ 0.49375033  0.5         0.49375033] [ 0.12189469  0.125       0.12189469]
[ 0.67590153  0.67590153  0.5       ] [ 0.05251991  0.05251991  0.125     ]
[ 0.45264238  0.45264238  0.45264238] [ 0.10244256  0.10244256  0.10244256]

6. (optional, extra credit)

Train the kernel neural network using gradient descent.

In [14]:

def k_de_dw(alpha, X, beta, kernel, x, y):
    w=np.dot(alpha,kernel(X,x))
    delta = 0
    
    val=(k_predict(alpha,X,kernel, x) - y)*dsig(k_predict(alpha,X,kernel, x))
    
    x = np.append(np.array([1]), x)
    delta=val*x
    delta=delta+(1/2*beta*w)
    
    # put your code here
    return delta

In [15]:

def k_evaluate(alpha, X, beta, kernel, X_, Y):
    result = 0
    for i, x in enumerate(X_):
        result += k_loss(alpha, X, beta, kernel, x, Y[i])
    return result

def k_train(X, Y, epochs, eta, w_ini, beta,alpha,X_,kernel):
    losses = []
    w = np.array(w_ini)
    for i in range(epochs):
        delta = 0
        for i, x in enumerate(X_):
            delta += k_de_dw(alpha, X, beta, kernel, x, Y[i])
        w = w - eta * delta
        losses.append(k_evaluate(alpha, X, beta, kernel, x, Y))
    
    # your code here
    return w, losses

In [16]:

alpha = [1, 0.5, -0.3, -0.4]
Xs = [[0.1, -0.5],
     [0.5, 1.0],
     [-1.0, 0.5],
     [1.0, 1.0]]

def k1(x, y):
    return np.dot(x, y)

def k2(x, y):
    return (np.dot(x, y) + 1) ** 2

X = [[0, 0],
     [0, 1],
     [1, 0],
     [1, 1]]
Y = [0, 0, 1, 0]
wr, losses = k_train(Xs, Y, 50, 2, [0, 0, 0], 0.01,alpha,X,k2)
pl.plot(losses)
print wr
print predict(wr, [1, 0])
print predict(wr, [0, 1])

Out[16]:

[-31.2159388   -4.11488599 -23.55637307]
4.52916096464e-16
1.63187614121e-24

In [0]:

In [0]: