Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG1/Solution/python_script.py
4511 views
1
2
#######################################
3
4
" Homework 1 - solution "
5
" @author: Roberto Mendoza "
6
" @date: 12/09/2020 "
7
8
#######################################
9
10
11
import pandas as pd
12
import numpy as np
13
import random
14
15
16
"1. IF statement and Loop"
17
18
vector = random.sample( range(501) , 20)
19
20
for i in range(20):
21
x = vector[i]
22
23
if x<=100:
24
y = x**0.5
25
elif (x>100 & x<=300):
26
y = x - 5
27
28
elif x > 300:
29
30
y = 50
31
32
vector[i] = y
33
34
35
vector
36
37
"2. IF statement and Loop, escalar function"
38
39
40
## Escalar vector
41
42
vector2 = random.sample( range(501) , 100)
43
44
for i in range(100):
45
x = vector2[i]
46
47
maxi = np.max(vector2)
48
mini = np.min(vector2)
49
50
y = (x-mini)/(maxi-mini)
51
52
vector2[i] = y
53
54
vector2
55
56
## Escalar Matrix
57
58
total = 100*50
59
60
matrix = np.random.normal(1,10,total).reshape(100, 50)
61
62
for j in range(matrix.shape[1]):
63
64
maxi = np.max(matrix[:,j])
65
mini = np.min(matrix[:,j])
66
67
for i in range(matrix.shape[0]):
68
69
x = matrix[i,j]
70
y = (x-mini)/(maxi-mini)
71
matrix[i,j] = y
72
73
"3. OLS and Loop"
74
75
76
np.random.seed(175)
77
78
x1 = np.random.rand(10000) # uniform distribution [0,1]
79
x2 = np.random.rand(10000) # uniform distribution [0,1]
80
x3 = np.random.rand(10000) # uniform distribution [0,1]
81
x4 = np.random.rand(10000) # uniform distribution [0,1]
82
x5 = np.random.rand(10000) # uniform distribution [0,1]
83
e = np.random.normal(0,1,10000) # normal distribution mean = 0 and sd = 1
84
85
# Poblacional regression (Data Generating Process GDP)
86
87
88
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.5*x5 + e
89
90
X = np.column_stack((np.ones(10000),x1,x2,x3,x5))
91
92
size = [10,50,80,120,200,500,800,1000,5000]
93
94
95
def reg(X,Y):
96
beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
97
y_est = X @ beta
98
n = X.shape[0]
99
k = X.shape[1] - 1
100
nk = n - k
101
sigma = sum(list( map( lambda x: x**2 , Y - y_est) )) / nk
102
Var = sigma*np.linalg.inv(X.T @ X)
103
sd = np.sqrt( np.diag(Var) )
104
105
return beta, sd
106
107
beta_coef = np.zeros(X.shape[1])
108
est_err = np.zeros(X.shape[1])
109
110
for n in size:
111
sample = random.sample( range(10000) , n)
112
v_coeff = reg(X[sample,:], Y[sample])[0]
113
v_sd = reg(X[sample,:], Y[sample])[1]
114
beta_coef = np.vstack((beta_coef,v_coeff)) # stack vector
115
est_err = np.vstack((est_err,v_sd))
116
117
118
# stack vector pero la primera fila está llena de zeros, por eso no se considera la primera fila en
119
# el dataframe siguiente:
120
121
table = pd.DataFrame( {"sample_size": size ,
122
"Coeff_int": beta_coef[1:,0] , "Standar_error_int" : est_err[1:,0],
123
"Coeff_x1": beta_coef[1:,1] , "Standar_error_x1" : est_err[1:,1],
124
"Coeff_x2": beta_coef[1:,2] , "Standar_error_x2" : est_err[1:,2],
125
"Coeff_x3": beta_coef[1:,3] , "Standar_error_x3" : est_err[1:,3],
126
"Coeff_x5": beta_coef[1:,4] , "Standar_error_x5" : est_err[1:,4]
127
})
128
129
"4. OLS and Loop"
130
131
from scipy.stats import t # t - student
132
import pandas as pd
133
134
np.random.seed(175)
135
136
x1 = np.random.rand(800) # uniform distribution [0,1]
137
x2 = np.random.rand(800) # uniform distribution [0,1]
138
x3 = np.random.rand(800) # uniform distribution [0,1]
139
x4 = np.random.rand(800) # uniform distribution [0,1]
140
x5 = np.random.rand(800) # uniform distribution [0,1]
141
x6 = np.random.rand(800) # uniform distribution [0,1]
142
x7 = np.random.rand(800) # uniform distribution [0,1]
143
e = np.random.normal(0,1,800) # normal distribution mean = 0 and sd = 1
144
# Poblacional regression (Data Generating Process GDP)
145
146
Y = 1 + 0.8*x1 + 1.2*x2 + 0.5*x3 + 1.5*x4 + 0.2*x5+0.5*x6+2*x7+e
147
148
X = np.column_stack((np.ones(800),x1,x2,x3,x4,x5,x6,x7))
149
150
151
def ols(X,Y, intercepto = True, robust_sd = False):
152
153
if intercepto:
154
beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
155
y_est = X @ beta
156
n = X.shape[0]
157
k = X.shape[1] - 1
158
nk = n - k
159
160
if robust_sd==False:
161
162
sigma = sum(list( map( lambda x: x**2 , Y - y_est) )) / nk
163
Var = sigma*np.linalg.inv(X.T @ X)
164
sd = np.sqrt( np.diag(Var) )
165
t_est = np.absolute(beta/sd)
166
pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
167
lower_bound = beta-1.96*sd
168
upper_bound = beta+1.96*sd
169
SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))
170
SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))
171
R2 = 1-SCR/SCT
172
rmse = (SCR/n)**0.5
173
table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
174
"Pvalue" : pvalue, "Lower_bound":lower_bound,
175
"Upper_bound":upper_bound} )
176
fit = {"Root_MSE":rmse, "R2": R2}
177
178
else :
179
matrix_robust = np.diag(list( map( lambda x: x**2 , Y - y_est)))
180
Var = np.linalg.inv(X.T @ X) @ X.T @ matrix_robust @ X @ np.linalg.inv(X.T @ X)
181
sd = np.sqrt( np.diag(Var) )
182
t_est = np.absolute(beta/sd)
183
pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
184
lower_bound = beta-1.96*sd
185
upper_bound = beta+1.96*sd
186
SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))
187
SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))
188
R2 = 1-SCR/SCT
189
rmse = (SCR/n)**0.5
190
table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
191
"Pvalue" : pvalue, "Lower_bound":lower_bound,
192
"Upper_bound":upper_bound} )
193
fit = {"Root_MSE":rmse, "R2": R2}
194
195
else:
196
197
X = X[:,1:] #Nos quedamos desde la segunda columan hasta la ultima (no intercepto)
198
beta = np.linalg.inv(X.T @ X) @ ((X.T) @ Y )
199
y_est = X @ beta
200
n = X.shape[0]
201
k = X.shape[1] - 1
202
nk = n - k
203
204
if robust_sd==False:
205
206
sigma = sum(list( map( lambda x: x**2 , Y - y_est) )) / nk
207
Var = sigma*np.linalg.inv(X.T @ X)
208
sd = np.sqrt( np.diag(Var) )
209
t_est = np.absolute(beta/sd)
210
pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
211
lower_bound = beta-1.96*sd
212
upper_bound = beta+1.96*sd
213
SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))
214
SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))
215
R2 = 1-SCR/SCT
216
rmse = (SCR/n)**0.5
217
table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
218
"Pvalue" : pvalue, "Lower_bound":lower_bound,
219
"Upper_bound":upper_bound} )
220
fit = {"Root_MSE":rmse, "R2": R2}
221
222
else :
223
matrix_robust = np.diag(list( map( lambda x: x**2 , Y - y_est)))
224
Var = np.linalg.inv(X.T @ X) @ X.T @ matrix_robust @ X @ np.linalg.inv(X.T @ X)
225
sd = np.sqrt( np.diag(Var) )
226
t_est = np.absolute(beta/sd)
227
pvalue = (1 - t.cdf(t_est, df=nk) ) * 2
228
lower_bound = beta-1.96*sd
229
upper_bound = beta+1.96*sd
230
SCR = sum(list( map( lambda x: x**2 , Y - y_est) ))
231
SCT = sum(list( map( lambda x: x**2 , Y - np.mean(y_est) )))
232
R2 = 1-SCR/SCT
233
rmse = (SCR/n)**0.5
234
table = pd.DataFrame( {"ols": beta , "standar_error" : sd ,
235
"Pvalue" : pvalue, "Lower_bound":lower_bound,
236
"Upper_bound":upper_bound} )
237
fit = {"Root_MSE":rmse, "R2": R2}
238
239
return table, fit
240
241
242
243
244
ols(X,Y)
245
ols(X,Y, intercepto = False, robust_sd = True)
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283