Path: blob/main/Trabajo_grupal/WG4/Grupo 5_Tarea 4.py
2714 views
# -*- coding: utf-8 -*-1"""2Created on Fri Sep 23 17:53:59 202234@author: Usuario5"""67import numpy as np8import pandas as pd9from pandas import DataFrame, Series10import statistics11import inspect12from scipy.stats import t # t - student13import os14import pyreadr1516user = os.getlogin() # Username17print(user)1819# Extraemos los datos del directorio2021os.chdir(f"C:/Users/{user}/Documentos/GitHub/1ECO35_2022_2/Lab5")22cps2012_env = pyreadr.read_r("../data/cps2012.Rdata")232425#Para crear W hay que hacer algo con listas26W = ['lnw','female','widowed', 'divorced', 'separated', 'nevermarried', 'hsd08', 'hsd911', 'hsg', 'cg', 'ad', 'mw', 'so', 'we', 'exp1', 'exp2', 'exp3', 'exp4', 'weight', 'ne', 'sc']27#W = [1.2.3]2829class OLS(object):3031def __init__(self, X:pd.DataFrame ,Y:pd.Series , W, robust_sd=False):3233self.X = X34self.Y = Y35self.robust_sd = robust_sd36self.W = W3738#Método 139def Determinarcoeficientes(self): #Método 14041self.columns = self.X.columns.tolist() # nombre de la base de datos como objeto lista4243self.n = self.X.shape[0] # numero de observaciones, # self.n "Se crea un nuevo atributo"44k = self.X.shape[1] + 1 #numero de variables y el intercepto45self.X1 = np.column_stack((np.ones(self.n ), self.X.to_numpy() )) # self.X.to_numpy() # DataFrame to numpy46self.Y1 = self.Y.to_numpy().reshape(self.n ,1) #reshape(-1 ,1)4748self.beta = np.linalg.inv(self.X1.T @ self.X1) @ ((self.X1.T) @ self.Y1 )49self.nk = self.n - k5051#Método 252def Errorvarcovintcof(self):5354if self.robust_sd:5556self.y_est = self.X1 @ self.beta57self.error = self.Y1 - self.y_est58sigma_1 = sum(list( map( lambda x: x**2 , self.error) )) / self.nk59self.Var = sigma_1*np.linalg.inv(self.X.T @ self.X) #Matríz de varianzas y covarianzas caso no robusto60self.sd = np.sqrt( np.diag(self.Var) ) #Desviación estandar o errores estandar61self.límite_inferior = self.beta-1.96*self.sd62self.límite_superior = self.beta+1.96*self.sd6364#Método 365else:6667self.y_est = self.X1 @ self.beta68self.error = self.Y1 - self.y_est69matrix_robust = np.diag(list( map( lambda x: x**2 , self.error)))70self.Var = np.linalg.inv(self.X.T @ self.X) @ self.X.T @ matrix_robust @ self.X @ np.linalg.inv(self.X.T @ self.X)71self.sd = np.sqrt( np.diag(self.Var) )7273self.límite_inferior = self.beta-1.96*self.sd74self.límite_superior = self.beta+1.96*self.sd7576#Método 477def R2yMSE(self):7879self.Determinarcoeficientes() # run function80self.Errorvarcovintcof()8182#SCR = sum(list( map( lambda x: x**2 , self.error)))83#SCT = sum(list( map( lambda x: x**2 , self.Y - np.mean(self.y_est))))84#R2 = 1 - self.SCR/self.SCT8586#y_est = self.y_est87#error = self.error88self.SCR = np.sum(np.square(self.error))89self.rmse = (self.SCR/self.n)**0.590SCT = np.sum(np.square(self.Y1 - np.mean(self.Y1)))9192self.R2 = 1 - self.SCR/SCT9394return self.R29596#Método 597def Table(self, **Kargs):9899W = ['lnw','female','widowed', 'divorced', 'separated', 'nevermarried', 'hsd08', 'hsd911', 'hsg', 'cg', 'ad', 'mw', 'so', 'we', 'exp1', 'exp2', 'exp3', 'exp4', 'weight', 'ne', 'sc']100# Lo agregamos, pero no lo usamos. Lo lamento Roberto, se nos acababa el tiempo.101102self.R2yMSE()103self.Determinarcoeficientes()104r2= self.R2105scr = self.SCR106sigma = scr / self.nk107Var = self.Var #sigma*np.linalg.inv(self.X1.T @ self.X1)108sd = self.sd #np.sqrt( np.diag(Var) )109lower_bound = self.límite_inferior110upper_bound = self.límite_superior111rmse = self.rmse #(scr/self.n)**0.5112113if (Kargs['Output'] == "DataFrame"):114115df = pd.DataFrame( {"OLS": self.beta.flatten() , "standar_error": sd.flatten() , "Lower_bound": lower_bound.flatten() , "Upper_bound": upper_bound.flatten() })116117#self.beta.flatten() # multy-array a simple array118119elif (Kargs['Output'] == "Diccionario"):120121df = pd.DataFrame({"OLS": self.beta.flatten() , "standar_error": sd.flatten() , "Lower_bound": lower_bound.flatten() , "Upper_bound":upper_bound.flatten() })122123df_1 = { "Root_MSE":rmse.flatten() } #R2"R2": R2.flatten()124125df_2 = { "R2": r2.flatten() }126127return df128129cps2012_env # es un diccionario. En la llave "data" está la base de datos130cps2012 = cps2012_env[ 'data' ] # extrae información almacenada en la llave data del diccionario cps2012_env131dt = cps2012.describe()132cps2012.shape133134variance_cols = cps2012.var().to_numpy() # to numpy135136Dataset = cps2012.iloc[ : , np.where( variance_cols != 0 )[0] ]137138X = Dataset.iloc[:,1:10]139Y = Dataset[['lnw']]140141Reg1 = OLS(X,Y, W, robust_sd=True)142143#Comprobaciones144Reg1.Determinarcoeficientes()145Reg1.X146Reg1.beta147148Reg1.Errorvarcovintcof()149Reg1.error150Reg1.Var151Reg1.límite_inferior152Reg1.límite_superior153154Reg1.R2yMSE()155Reg1.rmse156157Reg1.Table(Output = "Diccionario")158159160