Path: blob/main/Trabajo_grupal/WG1/Solution/python_script.py
4511 views
1#######################################23" Homework 1 - solution "4" @author: Roberto Mendoza "5" @date: 12/09/2020 "67#######################################8910import pandas as pd11import numpy as np12import random131415"1. IF statement and Loop"1617vector = random.sample( range(501) , 20)1819for i in range(20):20x = vector[i]2122if x<=100:23y = x**0.524elif (x>100 & x<=300):25y = x - 52627elif x > 300:2829y = 503031vector[i] = y323334vector3536"2. IF statement and Loop, escalar function"373839## Escalar vector4041vector2 = random.sample( range(501) , 100)4243for i in range(100):44x = vector2[i]4546maxi = np.max(vector2)47mini = np.min(vector2)4849y = (x-mini)/(maxi-mini)5051vector2[i] = y5253vector25455## Escalar Matrix5657total = 100*505859matrix = np.random.normal(1,10,total).reshape(100, 50)6061for j in range(matrix.shape[1]):6263maxi = np.max(matrix[:,j])64mini = np.min(matrix[:,j])6566for i in range(matrix.shape[0]):6768x = matrix[i,j]69y = (x-mini)/(maxi-mini)70matrix[i,j] = y7172"3. OLS and Loop"737475np.random.seed(175)7677x1 = np.random.rand(10000) # uniform distribution [0,1]78x2 = np.random.rand(10000) # uniform distribution [0,1]79x3 = np.random.rand(10000) # uniform distribution [0,1]80x4 = np.random.rand(10000) # uniform distribution [0,1]81x5 = np.random.rand(10000) # uniform distribution [0,1]82e = np.random.normal(0,1,10000) # normal distribution mean = 0 and sd = 18384# Poblacional regression (Data Generating Process GDP)858687Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.5*x5 + e8889X = np.column_stack((np.ones(10000),x1,x2,x3,x5))9091size = [10,50,80,120,200,500,800,1000,5000]929394def reg(X,Y):95beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )96y_est = X @ beta97n = X.shape[0]98k = X.shape[1] - 199nk = n - k100sigma = sum(list( map( lambda x: x**2 , Y - y_est) )) / nk101Var = sigma*np.linalg.inv(X.T @ X)102sd = np.sqrt( np.diag(Var) )103104return beta, sd105106beta_coef = np.zeros(X.shape[1])107est_err = np.zeros(X.shape[1])108109for n in size:110sample = random.sample( range(10000) , n)111v_coeff = reg(X[sample,:], Y[sample])[0]112v_sd = reg(X[sample,:], Y[sample])[1]113beta_coef = np.vstack((beta_coef,v_coeff)) # stack vector114est_err = np.vstack((est_err,v_sd))115116117# stack vector pero la primera fila está llena de zeros, por eso no se considera la primera fila en118# el dataframe siguiente:119120table = pd.DataFrame( {"sample_size": size ,121"Coeff_int": beta_coef[1:,0] , "Standar_error_int" : est_err[1:,0],122"Coeff_x1": beta_coef[1:,1] , "Standar_error_x1" : est_err[1:,1],123"Coeff_x2": beta_coef[1:,2] , "Standar_error_x2" : est_err[1:,2],124"Coeff_x3": beta_coef[1:,3] , "Standar_error_x3" : est_err[1:,3],125"Coeff_x5": beta_coef[1:,4] , "Standar_error_x5" : est_err[1:,4]126})127128"4. OLS and Loop"129130from scipy.stats import t # t - student131import pandas as pd132133np.random.seed(175)134135x1 = np.random.rand(800) # uniform distribution [0,1]136x2 = np.random.rand(800) # uniform distribution [0,1]137x3 = np.random.rand(800) # uniform distribution [0,1]138x4 = np.random.rand(800) # uniform distribution [0,1]139x5 = np.random.rand(800) # uniform distribution [0,1]140x6 = np.random.rand(800) # uniform distribution [0,1]141x7 = np.random.rand(800) # uniform distribution [0,1]142e = np.random.normal(0,1,800) # normal distribution mean = 0 and sd = 1143# Poblacional regression (Data Generating Process GDP)144145Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.2*x5+0.5*x6+2*x7+e146147X = np.column_stack((np.ones(800),x1,x2,x3,x4,x5,x6,x7))148149150def ols(X,Y, intercepto = True, robust_sd = False):151152if intercepto:153beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )154y_est = X @ beta155n = X.shape[0]156k = X.shape[1] - 1157nk = n - k158159if robust_sd==False:160161sigma = sum(list( map( lambda x: x**2 , Y - y_est) )) / nk162Var = sigma*np.linalg.inv(X.T @ X)163sd = np.sqrt( np.diag(Var) )164t_est = np.absolute(beta/sd)165pvalue = (1 - t.cdf(t_est, df=nk) ) * 2166lower_bound = beta-1.96*sd167upper_bound = beta+1.96*sd168SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))169SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))170R2 = 1-SCR/SCT171rmse = (SCR/n)**0.5172table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,173"Pvalue" : pvalue, "Lower_bound":lower_bound,174"Upper_bound":upper_bound} )175fit = {"Root_MSE":rmse, "R2": R2}176177else :178matrix_robust = np.diag(list( map( lambda x: x**2 , Y - y_est)))179Var = np.linalg.inv(X.T @ X) @ X.T @ matrix_robust @ X @ np.linalg.inv(X.T @ X)180sd = np.sqrt( np.diag(Var) )181t_est = np.absolute(beta/sd)182pvalue = (1 - t.cdf(t_est, df=nk) ) * 2183lower_bound = beta-1.96*sd184upper_bound = beta+1.96*sd185SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))186SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))187R2 = 1-SCR/SCT188rmse = (SCR/n)**0.5189table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,190"Pvalue" : pvalue, "Lower_bound":lower_bound,191"Upper_bound":upper_bound} )192fit = {"Root_MSE":rmse, "R2": R2}193194else:195196X = X[:,1:] #Nos quedamos desde la segunda columan hasta la ultima (no intercepto)197beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )198y_est = X @ beta199n = X.shape[0]200k = X.shape[1] - 1201nk = n - k202203if robust_sd==False:204205sigma = sum(list( map( lambda x: x**2 , Y - y_est) )) / nk206Var = sigma*np.linalg.inv(X.T @ X)207sd = np.sqrt( np.diag(Var) )208t_est = np.absolute(beta/sd)209pvalue = (1 - t.cdf(t_est, df=nk) ) * 2210lower_bound = beta-1.96*sd211upper_bound = beta+1.96*sd212SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))213SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))214R2 = 1-SCR/SCT215rmse = (SCR/n)**0.5216table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,217"Pvalue" : pvalue, "Lower_bound":lower_bound,218"Upper_bound":upper_bound} )219fit = {"Root_MSE":rmse, "R2": R2}220221else :222matrix_robust = np.diag(list( map( lambda x: x**2 , Y - y_est)))223Var = np.linalg.inv(X.T @ X) @ X.T @ matrix_robust @ X @ np.linalg.inv(X.T @ X)224sd = np.sqrt( np.diag(Var) )225t_est = np.absolute(beta/sd)226pvalue = (1 - t.cdf(t_est, df=nk) ) * 2227lower_bound = beta-1.96*sd228upper_bound = beta+1.96*sd229SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))230SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))231R2 = 1-SCR/SCT232rmse = (SCR/n)**0.5233table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,234"Pvalue" : pvalue, "Lower_bound":lower_bound,235"Upper_bound":upper_bound} )236fit = {"Root_MSE":rmse, "R2": R2}237238return table, fit239240241242243ols(X,Y)244ols(X,Y, intercepto = False, robust_sd = True)245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283