Path: blob/main/Trabajo_grupal/WG5/Grupo9_py5.1.py
2714 views
#!/usr/bin/env python1# coding: utf-823# In[ ]:456import numpy as np7import pandas as pd8from pandas import DataFrame, Series9import statistics10import inspect11import os12from scipy.stats import t131415# In[ ]:161718user = os.getlogin() # Colocamos un el username192021# In[ ]:22232425# Utilizamos el set directorio26os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2")272829# In[ ]:303132# output formato diccionario33cps2012_env = pyreadr.read_r("data/cps2012.Rdata")343536# In[ ]:373839# Anterioremente, se evidencio el output de diccionario, en la cual encontramos la base de datos utilizando llaves40cps2012_env41cps2012 = cps2012_env[ 'data' ] # se extrae información de datos42dat = cps2012.describe()434445# In[ ]:464748#Creamos la lista49lista = ['lnw','female','widowed', 'nevermarried','divorced', 'separated', 'hsd08', 'hsd911', 'hsg', 'cg', 'ad', 'mw', 'so', 'we', 'exp1', 'exp2', 'exp3', 'exp4', 'weight', 'ne', 'sc']505152# In[ ]:535455#Creamos la clase56class OLSRegClass(object):57__slots__ = [ '__X', 'Y', 'lista', 'RobustStandardError']58def __init__(self, X:pd.DataFrame, Y:pd.Series, lista, RobustStandardError=False):5960self.__X = X61self.Y = Y62self.RobustStandarError=RobustStandardError63self.lista = lista646566# In[ ]:676869#Realizamos los métodos vistos:70# 1:7172def R2yMSE(self):7374self.Coeficientes() # run function7576self.SCR = sum(list( map( lambda x: x**2 , self.error)))77self.SCT = sum(list( map( lambda x: x**2 , self.Y - np.mean(self.y_est))))78self.rmse = (self.SCR/self.n)**0.579R2 = 1 - self.SCR/self.SCT808182# In[ ]:83848586# 287def Error_var_cov_intcof(self):8889if self.RobustStandardError:9091self.y_est = self.X1 @ self.beta92self.error = self.Y1 - self.y_est93sigma = sum(list( map( lambda x: x**2 , self.error) )) / self.nk94self.Var = sigma*np.linalg.inv(self.X.T @ self.X) #caso no robusto: Matríz de varianzas y covarianzas95self.sd = np.sqrt( np.diag(self.Var) ) #Desviación estandar96self.límite_inferior = self.beta-1.96*self.sd97self.límite_superior = self.beta+1.96*self.sd9899else:100101self.y_est = self.X1 @ self.beta102self.error = self.Y1 - self.y_est103matrix_robust = np.diag(list( map( lambda x: x**2 , self.error)))104self.Var = np.linalg.inv(self.X.T @ self.X) @ self.X.T @ matrix_robust @ self.X @ np.linalg.inv(self.X.T @ self.X)105self.sd = np.sqrt( np.diag(self.Var) )106self.límite_inferior = self.beta-1.96*self.sd107self.límite_superior = self.beta+1.96*self.sd108109110# In[ ]:111112113#3114115def Coeficientes(self):116117self.columns = self.X.columns.tolist() # nombre de la base de datos - objeto lista118# numero de observaciones119self.n = self.X.shape[0] # self.n - creamos un nuevo atributo120k = self.X.shape[1] + 1 #num de variables e intercepto121# self.X.to_numpy()122self.X1 = np.column_stack((np.ones(self.n ), self.X.to_numpy() )) # DataFrame to numpy123self.Y1 = self.Y.to_numpy().reshape(self.n ,1) #reshape(-1 ,1)124125self.beta = np.linalg.inv(self.X1.T @ self.X1) @ ((self.X1.T) @ self.Y1 )126self.nk = self.n - k127128129# In[ ]:130131132#4133134def _Table(self, *Kargs):135#Lista creada en pasos anteriores136lista = ['lnw','female','widowed', 'nevermarried','divorced', 'separated', 'hsd08', 'hsd911', 'hsg', 'cg', 'ad', 'mw', 'so', 'we', 'exp1', 'exp2', 'exp3', 'exp4', 'weight', 'ne', 'sc']137# run functions138139self.R2()140self.Coeficientes()141scr = self.SCR142sigma = scr / self.nk143Var = sigma*np.linalg.inv(self.X1.T @ self.X1)144sd = np.sqrt( np.diag(Var) )145t_est = np.absolute(self.beta/sd)146pvalue = (1 - t.cdf(t_est, df=self.nk) ) * 2147lower_bound = self.beta-1.96*sd148upper_bound = self.beta+1.96*sd149rmse = (scr/self.n)**0.5150151if (Kargs['Output'] == "DataFrame"):152153df = pd.DataFrame( {"OLS": self.beta.flatten() , "standar_error" : sd.flatten(),"Pvalue" : pvalue.flatten() , "Lower_bound":lower_bound.flatten() ,154"Upper_bound":upper_bound.flatten() , "Root_MSE":rmse.flatten() , "R2": self.R2.flatten()})155156#self.beta.flatten()157# multy-array a simple array158159elif (Kargs['Output'] == "Diccionario"):160161df ={"OLS": self.beta.flatten() , "standar_error" : sd.flatten(),"Pvalue" : pvalue.flatten() , "Lower_bound":lower_bound.flatten() ,162"Upper_bound":upper_bound.flatten() , "Root_MSE":rmse.flatten() , "R2": self.R2.flatten()}163164165return df166167variance_cols = cps2012.var().to_numpy()168Dataset = cps2012.iloc[ : , np.where( variance_cols != 0 )[0] ]169X = Dataset.iloc[:,1:10]170Y = Dataset[['lnw']]171OLSRegClass(X, Y,lista,RobustStandardError=True)172173174# In[ ]:175176177178179180181