Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG4/Solucion/script_py.py
2835 views
1
# -*- coding: utf-8 -*-
2
"""
3
Estructura de Clase OLS
4
5
@author: Roberto
6
"""
7
8
9
import pandas as pd
10
import numpy as np
11
import pyreadr # Load R dataset
12
import os # for usernanme y set direcotrio
13
14
15
16
class RegClass( object ):
17
18
def __init__( self, X : pd.DataFrame , y : pd.Series , select_vars , sd_robust ):
19
20
21
if not isinstance( X, pd.DataFrame ):
22
raise TypeError( "X must be a pd.DataFrame." )
23
24
if not isinstance( y , pd.Series ):
25
raise TypeError( "y must be a pd.Series." )
26
27
28
# asignamos los atributos
29
30
self.X = X
31
self.y = y
32
self.select_vars = select_vars
33
self.sd_robust = sd_robust
34
35
# Try: ejecuta la seleccion de variables usando .loc (filtro de columnas por nombres)
36
# Entonces, si select_var es una lista de variables, entonces se ejecuta la linea 1
37
# Por otro lado, soi select_vars es una lista de posiciones de columnas, entonces
38
# la linea 1 no podrá ejecutarse y se ejecutará la linea 2.
39
40
try:
41
42
self.X = self.X.loc[ : , self.select_vars ] # 1)
43
44
except Exception:
45
46
self.X = self.X.iloc[:, self.select_vars] # 2)
47
48
49
50
self.X[ 'Intercept' ] = 1
51
52
cols = self.X.columns.tolist()
53
new_cols_orders = [cols[ -1 ]] + cols[ 0:-1 ]
54
self.X = self.X.loc[ : , new_cols_orders ]
55
56
self.X_np = self.X.values
57
self.y_np = y.values.reshape( -1 , 1 )
58
self.columns = self.X.columns.tolist()
59
60
# método que estima el vector de coeficientes
61
62
def coefficients(self):
63
64
X_np = self.X_np
65
y_np = self.y_np
66
67
# beta_ols
68
beta_ols = np.linalg.inv( X_np.T @ X_np ) @ ( X_np.T @ y_np )
69
70
self.beta_OLS = beta_ols
71
72
def output1(self):
73
74
self.coefficients()
75
beta_OLS = self.beta_OLS
76
77
X_np = self.X_np
78
y_np = self.y_np
79
80
81
# errors
82
e = y_np - ( X_np @ beta_OLS )
83
84
# error variance
85
N = X.shape[ 0 ]
86
total_parameters = X.shape[ 1 ]
87
error_var = ( (e.T @ e)[ 0 ] )/( N - total_parameters )
88
89
# Matriz de Varianza y covarianza
90
91
var_OLS = error_var * np.linalg.inv( X_np.T @ X_np )
92
93
# standard errors
94
95
self.beta_se = np.sqrt( np.diag( var_OLS ) )
96
97
# intervalo de confianza
98
99
self.up_bd = beta_OLS.ravel() + 1.96*self.beta_se
100
101
self.lw_bd = beta_OLS.ravel() - 1.96*self.beta_se
102
103
104
105
def output2(self):
106
107
self.coefficients()
108
109
X_np = self.X_np
110
y_np = self.y_np
111
beta_OLS = self.beta_OLS
112
113
114
# errors
115
y_est = X_np @ beta_OLS
116
117
e = y_np - y_est
118
119
e_square = np.array( list( map( lambda x: x**2 , e)) )
120
121
# Matrix de white
122
123
matrix_robust = np.diag(list(e_square.flatten()))
124
125
# Matrix de varianza y covarainza robusta ante heterocedasticidad
126
127
Var_robust = np.linalg.inv(X_np.T @ X_np) @ X_np.T @ matrix_robust @ X_np @ np.linalg.inv(X_np.T @ X_np)
128
129
# standard errors
130
131
self.beta_se = np.sqrt( np.diag( Var_robust ) )
132
133
self.up_bd = beta_OLS.ravel() + 1.96*self.beta_se
134
135
self.lw_bd = beta_OLS.ravel() - 1.96*self.beta_se
136
137
def output3(self):
138
139
self.coefficients()
140
141
y_np = self.y_np
142
X_np = self.X_np
143
N = X_np.shape[ 0 ]
144
y_est = X_np @ self.beta_OLS
145
146
SCR = sum(list( map( lambda x: x**2 , y_np - y_est) ))
147
SCT = sum(list( map( lambda x: x**2 , y_np - np.mean(y_est) )))
148
self.R2 = 1-SCR/SCT
149
self.rmse = (SCR/N)**0.5
150
151
152
def table(self):
153
154
155
self.output3() # ejecutamos el metodo que calcula el R2 y root-mse
156
self.coefficients() # ejectuamos el metodo que estima el vector de coeficientes
157
158
159
if self.sd_robust == True :
160
161
self.output2() # ejecutamos el metodo de errors estandar corregido ante heteroce.
162
163
table_data ={ 'Coef.' : self.beta_OLS.ravel() ,
164
"Std.Err." : self.beta_se.ravel(),
165
"Lower_bound" : self.lw_bd.ravel(),
166
"Upper_bound" : self.up_bd.ravel()
167
}
168
169
170
# defining index names
171
index_names = self.columns
172
173
# defining a pandas dataframe
174
reg_OLS = pd.DataFrame( table_data , index = index_names )
175
176
# output
177
178
179
table_dict = {"Table":reg_OLS, "R2": self.R2, "MSE": self.rmse}
180
181
else:
182
183
self.output1() # ejecutamos el metodo de errors estandar sin corrección ante heterocedasticidad
184
185
table_data ={ 'Coef.' : self.beta_OLS.ravel() ,
186
"Std.Err." : self.beta_se.ravel(),
187
"Lower_bound" : self.lw_bd.ravel(),
188
"Upper_bound" : self.up_bd.ravel()
189
}
190
191
192
# defining index names
193
index_names = self.columns
194
195
# defining a pandas dataframe
196
reg_OLS = pd.DataFrame( table_data , index = index_names )
197
198
# output
199
200
201
table_dict = {"Table":reg_OLS, "R2": self.R2, "MSE": self.rmse}
202
203
204
205
206
return table_dict
207
208
209
210
211
user = os.getlogin() # Username
212
213
# Set directorio
214
215
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG4/Solucion") # Set directorio
216
217
cps2012_env = pyreadr.read_r("../../../data/cps2012.Rdata") # output formato diccionario
218
219
220
cps2012 = cps2012_env[ 'data' ].iloc[0:500] # seleccionamo solo 500 observaciones
221
222
223
Y = cps2012['lnw']
224
X = cps2012.iloc[:,np.arange(1,18)] # No consideramos el logaritmo del salario
225
226
227
Regression1 = RegClass(X, Y , ["female","hsg","cg","exp1","exp2"] , sd_robust = True)
228
Regression1.table()
229
230
231
Regression2 = RegClass(X, Y , ["female","hsg","cg","exp1","exp2"] , sd_robust = False)
232
Regression2.table()
233
234
235
Regression3 = RegClass(X, Y , [1,2,5,8,10] , sd_robust = False)
236
Regression3.table()
237
238
239
Regression4 = RegClass(X, Y , [1,2,5,8,10] , sd_robust = True)
240
Regression4.table()
241
242
243
244
245
246
247
248
249
250
251
252