Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG5/Grupo9_py5.1.py
2714 views
1
#!/usr/bin/env python
2
# coding: utf-8
3
4
# In[ ]:
5
6
7
import numpy as np
8
import pandas as pd
9
from pandas import DataFrame, Series
10
import statistics
11
import inspect
12
import os
13
from scipy.stats import t
14
15
16
# In[ ]:
17
18
19
user = os.getlogin() # Colocamos un el username
20
21
22
# In[ ]:
23
24
25
26
# Utilizamos el set directorio
27
os.chdir(f"C:/Users/{user}/Documents/GitHub/1ECO35_2022_2")
28
29
30
# In[ ]:
31
32
33
# output formato diccionario
34
cps2012_env = pyreadr.read_r("data/cps2012.Rdata")
35
36
37
# In[ ]:
38
39
40
# Anterioremente, se evidencio el output de diccionario, en la cual encontramos la base de datos utilizando llaves
41
cps2012_env
42
cps2012 = cps2012_env[ 'data' ] # se extrae información de datos
43
dat = cps2012.describe()
44
45
46
# In[ ]:
47
48
49
#Creamos la lista
50
lista = ['lnw','female','widowed', 'nevermarried','divorced', 'separated', 'hsd08', 'hsd911', 'hsg', 'cg', 'ad', 'mw', 'so', 'we', 'exp1', 'exp2', 'exp3', 'exp4', 'weight', 'ne', 'sc']
51
52
53
# In[ ]:
54
55
56
#Creamos la clase
57
class OLSRegClass(object):
58
__slots__ = [ '__X', 'Y', 'lista', 'RobustStandardError']
59
def __init__(self, X:pd.DataFrame, Y:pd.Series, lista, RobustStandardError=False):
60
61
self.__X = X
62
self.Y = Y
63
self.RobustStandarError=RobustStandardError
64
self.lista = lista
65
66
67
# In[ ]:
68
69
70
#Realizamos los métodos vistos:
71
# 1:
72
73
def R2yMSE(self):
74
75
self.Coeficientes() # run function
76
77
self.SCR = sum(list( map( lambda x: x**2 , self.error)))
78
self.SCT = sum(list( map( lambda x: x**2 , self.Y - np.mean(self.y_est))))
79
self.rmse = (self.SCR/self.n)**0.5
80
R2 = 1 - self.SCR/self.SCT
81
82
83
# In[ ]:
84
85
86
87
# 2
88
def Error_var_cov_intcof(self):
89
90
if self.RobustStandardError:
91
92
self.y_est = self.X1 @ self.beta
93
self.error = self.Y1 - self.y_est
94
sigma = sum(list( map( lambda x: x**2 , self.error) )) / self.nk
95
self.Var = sigma*np.linalg.inv(self.X.T @ self.X) #caso no robusto: Matríz de varianzas y covarianzas
96
self.sd = np.sqrt( np.diag(self.Var) ) #Desviación estandar
97
self.límite_inferior = self.beta-1.96*self.sd
98
self.límite_superior = self.beta+1.96*self.sd
99
100
else:
101
102
self.y_est = self.X1 @ self.beta
103
self.error = self.Y1 - self.y_est
104
matrix_robust = np.diag(list( map( lambda x: x**2 , self.error)))
105
self.Var = np.linalg.inv(self.X.T @ self.X) @ self.X.T @ matrix_robust @ self.X @ np.linalg.inv(self.X.T @ self.X)
106
self.sd = np.sqrt( np.diag(self.Var) )
107
self.límite_inferior = self.beta-1.96*self.sd
108
self.límite_superior = self.beta+1.96*self.sd
109
110
111
# In[ ]:
112
113
114
#3
115
116
def Coeficientes(self):
117
118
self.columns = self.X.columns.tolist() # nombre de la base de datos - objeto lista
119
# numero de observaciones
120
self.n = self.X.shape[0] # self.n - creamos un nuevo atributo
121
k = self.X.shape[1] + 1 #num de variables e intercepto
122
# self.X.to_numpy()
123
self.X1 = np.column_stack((np.ones(self.n ), self.X.to_numpy() )) # DataFrame to numpy
124
self.Y1 = self.Y.to_numpy().reshape(self.n ,1) #reshape(-1 ,1)
125
126
self.beta = np.linalg.inv(self.X1.T @ self.X1) @ ((self.X1.T) @ self.Y1 )
127
self.nk = self.n - k
128
129
130
# In[ ]:
131
132
133
#4
134
135
def _Table(self, *Kargs):
136
#Lista creada en pasos anteriores
137
lista = ['lnw','female','widowed', 'nevermarried','divorced', 'separated', 'hsd08', 'hsd911', 'hsg', 'cg', 'ad', 'mw', 'so', 'we', 'exp1', 'exp2', 'exp3', 'exp4', 'weight', 'ne', 'sc']
138
# run functions
139
140
self.R2()
141
self.Coeficientes()
142
scr = self.SCR
143
sigma = scr / self.nk
144
Var = sigma*np.linalg.inv(self.X1.T @ self.X1)
145
sd = np.sqrt( np.diag(Var) )
146
t_est = np.absolute(self.beta/sd)
147
pvalue = (1 - t.cdf(t_est, df=self.nk) ) * 2
148
lower_bound = self.beta-1.96*sd
149
upper_bound = self.beta+1.96*sd
150
rmse = (scr/self.n)**0.5
151
152
if (Kargs['Output'] == "DataFrame"):
153
154
df = pd.DataFrame( {"OLS": self.beta.flatten() , "standar_error" : sd.flatten(),"Pvalue" : pvalue.flatten() , "Lower_bound":lower_bound.flatten() ,
155
"Upper_bound":upper_bound.flatten() , "Root_MSE":rmse.flatten() , "R2": self.R2.flatten()})
156
157
#self.beta.flatten()
158
# multy-array a simple array
159
160
elif (Kargs['Output'] == "Diccionario"):
161
162
df ={"OLS": self.beta.flatten() , "standar_error" : sd.flatten(),"Pvalue" : pvalue.flatten() , "Lower_bound":lower_bound.flatten() ,
163
"Upper_bound":upper_bound.flatten() , "Root_MSE":rmse.flatten() , "R2": self.R2.flatten()}
164
165
166
return df
167
168
variance_cols = cps2012.var().to_numpy()
169
Dataset = cps2012.iloc[ : , np.where( variance_cols != 0 )[0] ]
170
X = Dataset.iloc[:,1:10]
171
Y = Dataset[['lnw']]
172
OLSRegClass(X, Y,lista,RobustStandardError=True)
173
174
175
# In[ ]:
176
177
178
179
180
181