Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG4/Grupo_4_jupyter (1).ipynb
2714 views
Kernel: Python 3 (ipykernel)

Work Group 4:

Integrantes:

Ascencios, Seidy - 20191622

Morales, Luana - 2019120

Oré, Flavia - 20191215

Quintero, Marcela - 20191445

import pandas as pd import numpy as np import scipy.stats as stats from scipy.stats import t # t - student import os !pip install pyreadr import pyreadr as pyreadr
Requirement already satisfied: pyreadr in d:\anaconda\lib\site-packages (0.4.6) Requirement already satisfied: pandas>=1.2.0 in d:\anaconda\lib\site-packages (from pyreadr) (1.2.4) Requirement already satisfied: numpy>=1.16.5 in d:\anaconda\lib\site-packages (from pandas>=1.2.0->pyreadr) (1.20.1) Requirement already satisfied: pytz>=2017.3 in d:\anaconda\lib\site-packages (from pandas>=1.2.0->pyreadr) (2021.1) Requirement already satisfied: python-dateutil>=2.7.3 in d:\anaconda\lib\site-packages (from pandas>=1.2.0->pyreadr) (2.8.1) Requirement already satisfied: six>=1.5 in d:\anaconda\lib\site-packages (from python-dateutil>=2.7.3->pandas>=1.2.0->pyreadr) (1.15.0)

Crear una Clase OLS con las siguientes características: Los atributos: DataFrame de variables explicativas. El vector de la variable Y Una lista que permita seleccionar las variables del DataFrame (puede ser posiciones o no nombre de variables) Una variable booleana para errores estándar robustas

# Cargamos la base de datos: user = os.getlogin() # Username os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2/Lab4") cps2012_env = pyreadr.read_r("../data/cps2012.Rdata") # Y debemos extraer la información de la base de datos del diccionario: cps2012 = cps2012_dict['data'] dt = cps2012.describe()
# Así, la base de datos es: print(cps2012)
year lnw female widowed divorced separated nevermarried \ 0 2012.0 1.909543 1.0 0.0 0.0 0.0 0.0 1 2012.0 1.365773 1.0 0.0 0.0 0.0 0.0 2 2012.0 2.540223 0.0 0.0 0.0 0.0 0.0 3 2012.0 1.801091 1.0 0.0 0.0 0.0 0.0 4 2012.0 3.349904 0.0 0.0 0.0 0.0 0.0 ... ... ... ... ... ... ... ... 29212 2012.0 3.978513 0.0 0.0 0.0 0.0 1.0 29213 2012.0 3.142265 1.0 0.0 0.0 0.0 1.0 29214 2012.0 2.725619 1.0 0.0 0.0 0.0 0.0 29215 2012.0 3.142265 0.0 0.0 0.0 0.0 0.0 29216 2012.0 2.433613 1.0 0.0 0.0 0.0 0.0 hsd08 hsd911 hsg ... so we exp1 exp2 exp3 exp4 weight \ 0 0.0 0.0 0.0 ... 0.0 0.0 22.0 4.84 10.648 23.4256 569.43 1 0.0 1.0 0.0 ... 0.0 0.0 30.0 9.00 27.000 81.0000 625.96 2 0.0 0.0 1.0 ... 0.0 0.0 19.0 3.61 6.859 13.0321 264.22 3 0.0 0.0 1.0 ... 0.0 0.0 14.0 1.96 2.744 3.8416 256.81 4 0.0 0.0 0.0 ... 0.0 0.0 15.0 2.25 3.375 5.0625 256.81 ... ... ... ... ... ... ... ... ... ... ... ... 29212 0.0 0.0 0.0 ... 0.0 1.0 27.0 7.29 19.683 53.1441 301.43 29213 0.0 0.0 0.0 ... 0.0 1.0 16.0 2.56 4.096 6.5536 206.36 29214 0.0 0.0 0.0 ... 0.0 1.0 21.0 4.41 9.261 19.4481 272.35 29215 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.04 0.008 0.0016 282.87 29216 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.04 0.008 0.0016 282.87 married ne sc 0 True True True 1 True True False 2 True True False 3 True True False 4 True True False ... ... ... ... 29212 False False False 29213 False False False 29214 True False True 29215 True False False 29216 True False False [29217 rows x 23 columns]
# Creamos la clase OLS: class OLS_G4: def __init__(self, X,Y): self.X = X self.Y = Y def coeficientes(self): self.n = self.X.shape[0] # numero de observaciones, # self.n "Se crea un nuevo atributo" k = self.X.shape[1] X1 = np.column_stack((np.ones(self.n ), self.X.to_numpy() )) # self.X.to_numpy() # DataFrame to numpy Y1 = self.Y.to_numpy().reshape(self.n ,1) #reshape(-1 ,1) self.X1 = X1 self.Y1 = Y1 self.beta = np.linalg.inv(X1.T @ X1) @ ((X1.T) @ Y1 ) self.nk = self.n - k self.Y_est = self.X1 @ self.beta def estandar(self): self.coeficientes() y_est = self.X1 @ self.beta sigma = sum(list( map( lambda x: x**2 , self.Y1 - y_est) )) / self.nk Var1 = sigma*np.linalg.inv(self.X1.T @ self.X1) self.sd1 = np.sqrt( np.diag(Var1) ) self.lower_bound1 = self.beta-1.96*self.sd1 self.upper_bound1 = self.beta+1.96*self.sd1 def robust(self): self.coeficientes() y_est = self.X1 @ self.beta matrix_robust = np.diag(list( map( lambda x: x**2 , self.Y1 - y_est))) Var2 = np.linalg.inv(self.X1.T @ self.X1) @ self.X1.T @ matrix_robust @ self.X1 @ np.linalg.inv(self.X1.T @ self.X1) sd2 = np.sqrt( np.diag(Var2) ) lower_bound2 = self.beta-1.96*sd2 upper_bound2 = self.beta+1.96*sd2 def R2_RMSE(self): self.coeficientes() # run function y_est = self.X1 @ self.beta error = self.Y1 - y_est self.SCR = np.sum(np.square(error)) SCT = np.sum(np.square(self.Y1 - np.mean(self.Y1))) self.R2 = 1 - self.SCR/SCT self.rmse = (self.SCR/self.n)**0.5 def Table(self,**Kargs): self.R2_RMSE() self.robust self.estándar() self.coeficientes() scr = self.SCR sigma = scr / self.nk Var = sigma*np.linalg.inv(self.X1.T @ self.X1) sd = np.sqrt( np.diag(Var) ) t_est = np.absolute(self.beta/sd) if (Kargs['Output'] == "DataFrame"): df = pd.DataFrame( {"coeficientes": self.beta.flatten() , "error-estandar" : self.sd1.flatten(), "límite-superior": self.upper_bound1.flatten(), "límite-inferior": self.lower_bound1.flatten() } ) elif (Kargs['Output'] == "Diccionario"): df = {"R^2": self.R2.flatten() ,"Root-MSE": self.rmse.flatten()} return df
#flatten(): De multi array a simple array cps2012.shape variance_cols = cps2012.var().to_numpy() # to numpy Dataset = cps2012.iloc[ : , np.where( variance_cols != 0 )[0] ] X = Dataset.iloc[:,1:3] Y = Dataset[['lnw']]
Reg1 = OLS_G4(X,Y) Reg1.X Reg1.coeficientes() Reg1.beta Reg1.R2_RMSE() Reg1.R2 Reg1.R2_RMSE() Reg1.rmse
0.6495008509803024
Reg1.Table(Output = "DataFrame") Reg1.Table(Output = "R2_result")
[[ 2.9094181 ] [-0.25887621] [-0.17751519]]