CoCalc -- python

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG1/Solution/python_script.py
⁴⁵¹¹ views
1

2
#######################################
3

4
" Homework 1 - solution  "
5
" @author: Roberto Mendoza "
6
" @date: 12/09/2020 "
7

8
#######################################
9

10

11
import pandas as pd
12
import numpy as np
13
import random
14

15

16
"1. IF statement and Loop"
17

18
vector = random.sample( range(501) , 20)
19

20
for i in range(20):
21
    x = vector[i]
22
    
23
    if x<=100:
24
        y = x**0.5
25
    elif (x>100 & x<=300):
26
        y = x - 5
27
        
28
    elif x > 300:
29
        
30
        y = 50
31
        
32
    vector[i] = y
33
    
34
    
35
vector   
36
    
37
"2. IF statement and Loop, escalar function"
38

39

40
## Escalar vector
41

42
vector2 = random.sample( range(501) , 100)
43

44
for i in range(100):
45
    x = vector2[i]
46
    
47
    maxi = np.max(vector2)
48
    mini = np.min(vector2)
49
    
50
    y = (x-mini)/(maxi-mini)
51
    
52
    vector2[i] = y
53
    
54
vector2
55

56
## Escalar Matrix
57

58
total = 100*50
59

60
matrix = np.random.normal(1,10,total).reshape(100, 50)
61

62
for j in range(matrix.shape[1]):
63
    
64
    maxi = np.max(matrix[:,j])
65
    mini = np.min(matrix[:,j])    
66
    
67
    for i in range(matrix.shape[0]):
68
        
69
        x = matrix[i,j] 
70
        y = (x-mini)/(maxi-mini)
71
        matrix[i,j] = y
72
        
73
"3. OLS and Loop"
74

75

76
np.random.seed(175)
77

78
x1 = np.random.rand(10000) # uniform distribution  [0,1]
79
x2 = np.random.rand(10000) # uniform distribution [0,1]
80
x3 = np.random.rand(10000) # uniform distribution [0,1]
81
x4 = np.random.rand(10000) # uniform distribution [0,1]
82
x5 = np.random.rand(10000) # uniform distribution [0,1]
83
e = np.random.normal(0,1,10000) # normal distribution mean = 0 and sd = 1
84

85
# Poblacional regression (Data Generating Process GDP)
86

87

88
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.5*x5 + e
89

90
X = np.column_stack((np.ones(10000),x1,x2,x3,x5))        
91

92
size = [10,50,80,120,200,500,800,1000,5000]
93

94

95
def reg(X,Y):
96
         beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y ) 
97
         y_est =  X @ beta 
98
         n = X.shape[0]
99
         k = X.shape[1] - 1 
100
         nk = n - k    
101
         sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
102
         Var = sigma*np.linalg.inv(X.T @ X)
103
         sd = np.sqrt( np.diag(Var) )
104
         
105
         return beta, sd
106

107
beta_coef =  np.zeros(X.shape[1])
108
est_err =  np.zeros(X.shape[1])
109

110
for n in size:
111
    sample = random.sample( range(10000) , n)
112
    v_coeff = reg(X[sample,:], Y[sample])[0]
113
    v_sd = reg(X[sample,:], Y[sample])[1]
114
    beta_coef = np.vstack((beta_coef,v_coeff))  # stack vector 
115
    est_err = np.vstack((est_err,v_sd))
116
    
117
    
118
# stack vector pero la primera fila está llena de zeros, por eso no se considera la primera fila en
119
# el dataframe siguiente:
120
    
121
table = pd.DataFrame( {"sample_size": size ,
122
                       "Coeff_int": beta_coef[1:,0] , "Standar_error_int" : est_err[1:,0],
123
                       "Coeff_x1": beta_coef[1:,1] , "Standar_error_x1" : est_err[1:,1],
124
                       "Coeff_x2": beta_coef[1:,2] , "Standar_error_x2" : est_err[1:,2],
125
                       "Coeff_x3": beta_coef[1:,3] , "Standar_error_x3" : est_err[1:,3],
126
                       "Coeff_x5": beta_coef[1:,4] , "Standar_error_x5" : est_err[1:,4]
127
                       }) 
128

129
"4. OLS and Loop"
130

131
from scipy.stats import t # t - student 
132
import pandas as pd 
133

134
np.random.seed(175)
135

136
x1 = np.random.rand(800) # uniform distribution  [0,1]
137
x2 = np.random.rand(800) # uniform distribution [0,1]
138
x3 = np.random.rand(800) # uniform distribution [0,1]
139
x4 = np.random.rand(800) # uniform distribution [0,1]
140
x5 = np.random.rand(800) # uniform distribution [0,1]
141
x6 = np.random.rand(800) # uniform distribution [0,1]
142
x7 = np.random.rand(800) # uniform distribution [0,1]
143
e = np.random.normal(0,1,800) # normal distribution mean = 0 and sd = 1
144
# Poblacional regression (Data Generating Process GDP)
145

146
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.2*x5+0.5*x6+2*x7+e
147

148
X = np.column_stack((np.ones(800),x1,x2,x3,x4,x5,x6,x7))
149

150

151
def ols(X,Y, intercepto = True, robust_sd = False):
152
    
153
    if intercepto:
154
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
155
        y_est =  X @ beta 
156
        n = X.shape[0]
157
        k = X.shape[1] - 1 
158
        nk = n - k  
159
        
160
        if robust_sd==False:
161
             
162
            sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
163
            Var = sigma*np.linalg.inv(X.T @ X)
164
            sd = np.sqrt( np.diag(Var) )
165
            t_est = np.absolute(beta/sd)
166
            pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
167
            lower_bound = beta-1.96*sd
168
            upper_bound = beta+1.96*sd
169
            SCR = sum(list( map( lambda x: x**2 , Y - y_est)   ))
170
            SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est)   )))
171
            R2 = 1-SCR/SCT
172
            rmse = (SCR/n)**0.5
173
            table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
174
                                "Pvalue" : pvalue, "Lower_bound":lower_bound,
175
                                "Upper_bound":upper_bound} ) 
176
            fit = {"Root_MSE":rmse, "R2": R2}
177
            
178
        else :
179
             matrix_robust = np.diag(list( map( lambda x: x**2 , Y - y_est)))
180
             Var = np.linalg.inv(X.T @ X) @ X.T @ matrix_robust @ X @ np.linalg.inv(X.T @ X)
181
             sd = np.sqrt( np.diag(Var) )
182
             t_est = np.absolute(beta/sd)
183
             pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
184
             lower_bound = beta-1.96*sd
185
             upper_bound = beta+1.96*sd
186
             SCR = sum(list( map( lambda x: x**2 , Y - y_est)   ))
187
             SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est)   )))
188
             R2 = 1-SCR/SCT
189
             rmse = (SCR/n)**0.5
190
             table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
191
                                 "Pvalue" : pvalue, "Lower_bound":lower_bound,
192
                                 "Upper_bound":upper_bound} ) 
193
             fit = {"Root_MSE":rmse, "R2": R2}
194
             
195
    else:
196
        
197
        X = X[:,1:]  #Nos quedamos desde la segunda columan hasta la ultima (no intercepto)
198
        beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
199
        y_est =  X @ beta 
200
        n = X.shape[0]
201
        k = X.shape[1] - 1 
202
        nk = n - k  
203
        
204
        if robust_sd==False:
205
             
206
            sigma =  sum(list( map( lambda x: x**2 , Y - y_est)   )) / nk 
207
            Var = sigma*np.linalg.inv(X.T @ X)
208
            sd = np.sqrt( np.diag(Var) )
209
            t_est = np.absolute(beta/sd)
210
            pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
211
            lower_bound = beta-1.96*sd
212
            upper_bound = beta+1.96*sd
213
            SCR = sum(list( map( lambda x: x**2 , Y - y_est)   ))
214
            SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est)   )))
215
            R2 = 1-SCR/SCT
216
            rmse = (SCR/n)**0.5
217
            table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
218
                                "Pvalue" : pvalue, "Lower_bound":lower_bound,
219
                                "Upper_bound":upper_bound} ) 
220
            fit = {"Root_MSE":rmse, "R2": R2}
221
            
222
        else :
223
             matrix_robust = np.diag(list( map( lambda x: x**2 , Y - y_est)))
224
             Var = np.linalg.inv(X.T @ X) @ X.T @ matrix_robust @ X @ np.linalg.inv(X.T @ X)
225
             sd = np.sqrt( np.diag(Var) )
226
             t_est = np.absolute(beta/sd)
227
             pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
228
             lower_bound = beta-1.96*sd
229
             upper_bound = beta+1.96*sd
230
             SCR = sum(list( map( lambda x: x**2 , Y - y_est)   ))
231
             SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est)   )))
232
             R2 = 1-SCR/SCT
233
             rmse = (SCR/n)**0.5
234
             table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
235
                                 "Pvalue" : pvalue, "Lower_bound":lower_bound,
236
                                 "Upper_bound":upper_bound} ) 
237
             fit = {"Root_MSE":rmse, "R2": R2}
238
                  
239
    return table, fit         
240

241

242

243

244
ols(X,Y)
245
ols(X,Y, intercepto = False, robust_sd = True)
246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283
Product

Resources

Company