Path: blob/main/Trabajo_grupal/WG4/Solucion/script_py.py
2835 views
# -*- coding: utf-8 -*-1"""2Estructura de Clase OLS34@author: Roberto5"""678import pandas as pd9import numpy as np10import pyreadr # Load R dataset11import os # for usernanme y set direcotrio12131415class RegClass( object ):1617def __init__( self, X : pd.DataFrame , y : pd.Series , select_vars , sd_robust ):181920if not isinstance( X, pd.DataFrame ):21raise TypeError( "X must be a pd.DataFrame." )2223if not isinstance( y , pd.Series ):24raise TypeError( "y must be a pd.Series." )252627# asignamos los atributos2829self.X = X30self.y = y31self.select_vars = select_vars32self.sd_robust = sd_robust3334# Try: ejecuta la seleccion de variables usando .loc (filtro de columnas por nombres)35# Entonces, si select_var es una lista de variables, entonces se ejecuta la linea 136# Por otro lado, soi select_vars es una lista de posiciones de columnas, entonces37# la linea 1 no podrá ejecutarse y se ejecutará la linea 2.3839try:4041self.X = self.X.loc[ : , self.select_vars ] # 1)4243except Exception:4445self.X = self.X.iloc[:, self.select_vars] # 2)46474849self.X[ 'Intercept' ] = 15051cols = self.X.columns.tolist()52new_cols_orders = [cols[ -1 ]] + cols[ 0:-1 ]53self.X = self.X.loc[ : , new_cols_orders ]5455self.X_np = self.X.values56self.y_np = y.values.reshape( -1 , 1 )57self.columns = self.X.columns.tolist()5859# método que estima el vector de coeficientes6061def coefficients(self):6263X_np = self.X_np64y_np = self.y_np6566# beta_ols67beta_ols = np.linalg.inv( X_np.T @ X_np ) @ ( X_np.T @ y_np )6869self.beta_OLS = beta_ols7071def output1(self):7273self.coefficients()74beta_OLS = self.beta_OLS7576X_np = self.X_np77y_np = self.y_np787980# errors81e = y_np - ( X_np @ beta_OLS )8283# error variance84N = X.shape[ 0 ]85total_parameters = X.shape[ 1 ]86error_var = ( (e.T @ e)[ 0 ] )/( N - total_parameters )8788# Matriz de Varianza y covarianza8990var_OLS = error_var * np.linalg.inv( X_np.T @ X_np )9192# standard errors9394self.beta_se = np.sqrt( np.diag( var_OLS ) )9596# intervalo de confianza9798self.up_bd = beta_OLS.ravel() + 1.96*self.beta_se99100self.lw_bd = beta_OLS.ravel() - 1.96*self.beta_se101102103104def output2(self):105106self.coefficients()107108X_np = self.X_np109y_np = self.y_np110beta_OLS = self.beta_OLS111112113# errors114y_est = X_np @ beta_OLS115116e = y_np - y_est117118e_square = np.array( list( map( lambda x: x**2 , e)) )119120# Matrix de white121122matrix_robust = np.diag(list(e_square.flatten()))123124# Matrix de varianza y covarainza robusta ante heterocedasticidad125126Var_robust = np.linalg.inv(X_np.T @ X_np) @ X_np.T @ matrix_robust @ X_np @ np.linalg.inv(X_np.T @ X_np)127128# standard errors129130self.beta_se = np.sqrt( np.diag( Var_robust ) )131132self.up_bd = beta_OLS.ravel() + 1.96*self.beta_se133134self.lw_bd = beta_OLS.ravel() - 1.96*self.beta_se135136def output3(self):137138self.coefficients()139140y_np = self.y_np141X_np = self.X_np142N = X_np.shape[ 0 ]143y_est = X_np @ self.beta_OLS144145SCR = sum(list( map( lambda x: x**2 , y_np - y_est) ))146SCT = sum(list( map( lambda x: x**2 , y_np - np.mean(y_est) )))147self.R2 = 1-SCR/SCT148self.rmse = (SCR/N)**0.5149150151def table(self):152153154self.output3() # ejecutamos el metodo que calcula el R2 y root-mse155self.coefficients() # ejectuamos el metodo que estima el vector de coeficientes156157158if self.sd_robust == True :159160self.output2() # ejecutamos el metodo de errors estandar corregido ante heteroce.161162table_data ={ 'Coef.' : self.beta_OLS.ravel() ,163"Std.Err." : self.beta_se.ravel(),164"Lower_bound" : self.lw_bd.ravel(),165"Upper_bound" : self.up_bd.ravel()166}167168169# defining index names170index_names = self.columns171172# defining a pandas dataframe173reg_OLS = pd.DataFrame( table_data , index = index_names )174175# output176177178table_dict = {"Table":reg_OLS, "R2": self.R2, "MSE": self.rmse}179180else:181182self.output1() # ejecutamos el metodo de errors estandar sin corrección ante heterocedasticidad183184table_data ={ 'Coef.' : self.beta_OLS.ravel() ,185"Std.Err." : self.beta_se.ravel(),186"Lower_bound" : self.lw_bd.ravel(),187"Upper_bound" : self.up_bd.ravel()188}189190191# defining index names192index_names = self.columns193194# defining a pandas dataframe195reg_OLS = pd.DataFrame( table_data , index = index_names )196197# output198199200table_dict = {"Table":reg_OLS, "R2": self.R2, "MSE": self.rmse}201202203204205return table_dict206207208209210user = os.getlogin() # Username211212# Set directorio213214os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG4/Solucion") # Set directorio215216cps2012_env = pyreadr.read_r("../../../data/cps2012.Rdata") # output formato diccionario217218219cps2012 = cps2012_env[ 'data' ].iloc[0:500] # seleccionamo solo 500 observaciones220221222Y = cps2012['lnw']223X = cps2012.iloc[:,np.arange(1,18)] # No consideramos el logaritmo del salario224225226Regression1 = RegClass(X, Y , ["female","hsg","cg","exp1","exp2"] , sd_robust = True)227Regression1.table()228229230Regression2 = RegClass(X, Y , ["female","hsg","cg","exp1","exp2"] , sd_robust = False)231Regression2.table()232233234Regression3 = RegClass(X, Y , [1,2,5,8,10] , sd_robust = False)235Regression3.table()236237238Regression4 = RegClass(X, Y , [1,2,5,8,10] , sd_robust = True)239Regression4.table()240241242243244245246247248249250251252