Path: blob/main/Trabajo_grupal/WG4/Grupo10_py.ipynb
2714 views
Kernel: Python 3 (ipykernel)
In [71]:
import pandas as pd import numpy as np import scipy.stats as stats from scipy.stats import t # t - student import os import pyreadr
In [72]:
user = os.getlogin() # Username os.chdir(f"C:/Users/user/Documentos/Git_Hub/1ECO35_2022_2/Lab4")
In [86]:
data = pyreadr.read_r(r"../data/cps2012.Rdata") type(data)
Out[86]:
collections.OrderedDict
In [96]:
data2012 = data['data']
In [122]:
data2012
Out[122]:
In [146]:
# selecting columns W = data2012.loc[:,['exp1','exp2','exp3','exp4']] # lista que permita seleccionar las variables del DataFrame de variables explicativas y = data2012.iloc[:,1] # El vector de la variable Y
In [147]:
class RegClass( object ): def __init__( self, X : pd.DataFrame , y : pd.Series , robust_sd = False ): #booleano para errores estándar robustos if not isinstance( X, pd.DataFrame ): raise TypeError( "X must be a pd.DataFrame." ) if not isinstance( y , pd.Series ): raise TypeError( "y must be a pd.Series." ) # asignando atributos de la clase self.X = X self.y = y W = data2012.loc[:,['exp1','exp2','exp3','exp4']] self.W = W self.robust_sd = robust_sd if self.robust_sd: self.W[ 'Intercept' ] = 1 cols = self.W.columns.tolist() new_cols_orders = [cols[ -1 ]] + cols[ 0:-1 ] self.W = self.W.loc[ : , new_cols_orders ] else: pass self.W_np = self.W.values self.y_np = y.values.reshape( -1 , 1 ) self.columns = self.W.columns.tolist() def beta_OLS( self ): W_np = self.X_np y_np = self.y_np # beta_ols beta_ols = np.linalg.inv( W_np.T @ W_np ) @ ( W_np.T @ W_np ) beta_OLS_output = pd.DataFrame( beta_ols , index = index_names , columns = [ 'Coef.' ] ) self.beta_OLS = beta_OLS_output return beta_OLS_output def R2yMSE( self ): y_est = self.W @ self.beta_ols self.y_est = y_est error = self.y_np - y_est self.SCR = np.sum(np.square(error)) SCT = np.sum(np.square(self.y_np - np.mean(self.y_np))) N = W.shape[ 0 ] R2 = 1 - self.SCR/SCT rmse = (self.SCR/N)**0.5 def reg_estandar( self ): self.beta_OLS() self.R2yMSE() W_np = self.X_np y_np = self.y_np # beta_ols beta_OLS = self.beta_OLS.values.reshape( - 1, 1 ) # errors self.e = y_np - ( X_np @ beta_OLS ) # error variance N = W.shape[ 0 ] total_parameters = W.shape[ 1 ] error_var = ( (e.T @ e)[ 0 ] )/( N - total_parameters ) # Varianza var_OLS = error_var * np.linalg.inv( W_np.T @ W_np ) var_OLS_output = pd.DataFrame( var_OLS , index = index_names , columns = index_names ) self.var_OLS = var_OLS_output # standard errors beta_se = np.sqrt( np.diag( var_OLS ) ) # Confidence interval up_bd = beta_OLS.ravel() + 1.96*beta_se lw_bd = beta_OLS.ravel() - 1.96*beta_se table_data ={ 'Coef.' : beta_OLS.ravel() , # .ravel() :: .flatten() "Std.Err." : beta_se.ravel(), "t" : t_stat.ravel(), "P>|t|" : pvalue.ravel(), "[0.025" : lw_bd.ravel(), "0.975]" : up_bd.ravel() } return reg_estandar def reg_robusta( self ): self.beta_OLS() self.reg_estandar() self.R2yMSE() # var y beta beta_OLS = self.beta_OLS.values.reshape( -1, 1 ) var_OLS_robust = var_OLS_robust.values # Varianza robust matrix_robust = np.diag( (self.e.T @ self.e)[ 0 ] ) var_OLS_robust = np.linalg.inv(self.X2.T @ self.X2) @ self.X2.T @ matrix_robust @ self.X2 @ np.linalg.inv(self.X2.T @ self.X2) var_OLS_output_robust = pd.DataFrame( var_OLS_robust , index = index_names , columns = index_names ) self.var_OLS_robust = var_OLS_output_robust # standard errors beta_se_robust = np.sqrt( np.diag( var_OLS_robust ) ) # Confidence interval up_bd = beta_OLS.ravel() + 1.96*beta_se_robust lw_bd = beta_OLS.ravel() - 1.96*beta_se_robust table_data_robust ={ 'Coef.' : beta_OLS.ravel() , # .ravel() :: .flatten() "Std.Err." : beta_se.ravel(), "t" : t_stat.ravel(), "P>|t|" : pvalue.ravel(), "[0.025" : lw_bd.ravel(), "0.975]" : up_bd.ravel() } return reg_robusta def Table(self, **Kargs): self.beta_OLS() self.reg_estandar() self.R2yMSE() if (Kargs['Output'] == "DataFrame"): df = pd.DataFrame( {"OLS": self.beta_OLS.flatten() , "standar_error" : beta_sd.flatten()} ) elif (Kargs['Output'] == "Diccionario"): df ={"OLS": self.beta_OLS.flatten() , "standar_error" : beta_sd_robusta.flatten() , "variance" : self.var_OLS.flatten() , "lower_bound" : lw_bd.flatten() , "upper_bound" : up_bd.flatten() }
In [148]:
A = RegClass( W, y )