Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab10/script_coef_plot_r.R
2714 views
1
################ Clase 11 Coef Plot ############################
2
## Curso: Laboratorio de R y Python ###########################
3
## @author: Roberto Mendoza
4
5
# clear environment
6
7
rm(list=ls(all=TRUE))
8
9
10
# Cargamos librerias ----
11
12
#install.packages("librarian")
13
14
# librarian::shelf
15
16
librarian::shelf(tidyverse, sandwich, lmtest, xtable,haven)
17
18
# sandwich: libreria de modelos lineales.
19
# lmtest: errores estandar robustos,
20
# xtable: exportar matriz o datafrma, table a latex
21
22
user <- Sys.getenv("USERNAME") # username
23
24
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab10") ) # set directorio
25
26
27
# ubimos la base de datos
28
29
data <- read_dta("../data/Pesos/peso.dta")
30
31
# Typo de variables
32
33
as.list(sapply(data, class))
34
35
36
# Dummy variable
37
38
data <- data %>% mutate(Dummy = ifelse(cigs > 0 , 1 , 0 ) ) %>%
39
mutate(Dummy1 = case_when(Dummy == 0 ~ "No smoking",
40
Dummy == 1 ~ "Smoking"))
41
42
# bwghtlbs: peso del bebe en libras
43
# fill: desagregar los grpaficos
44
# Hitograma del peso del bebé ----
45
46
data %>% ggplot() + aes(x=bwghtlbs, fill = Dummy1) +
47
geom_histogram( color="black", alpha=0.6, size=0.5) +
48
scale_fill_manual(values=c("#69b3a2", "#404080"), name=NULL) + # Set colores usando código
49
labs(x = " ", y = "Absolute frequency", title = "Smoking status and newborn weight (lbs)", size = 10) +
50
theme_classic() + # Fondo blanco
51
theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
52
theme(legend.position = c(0.7,0.8)) +
53
scale_x_continuous(limits = c(0,18), expand = c(0, 0)) +
54
scale_y_continuous(limits = c(0,300), expand = c(0, 0))
55
56
# fill permite desagregar según los valores de Dummy1
57
# plot.title = element_text(hjust = 0.5) Permite centrar el título
58
59
ggsave("../plots/hist_pesos.png"
60
, height = 8 # alto
61
, width = 12 # ancho
62
, dpi = 320 # resolución (calidad de la imagen)
63
)
64
65
66
67
# Histograma en frecuencia relativa ----
68
69
data %>% ggplot(aes(x=bwghtlbs, fill = Dummy1)) +
70
geom_histogram( aes(y = ..density..), color="black", alpha=0.3, position = 'identity', size=0.6) +
71
scale_fill_manual(values=c("blue", "red"), name=NULL) + # for legend section
72
labs(x = " ", y = "Relative frequency", title = "Smoking status and newborn weight (lbs)", size = 15) +
73
theme_classic() +
74
theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
75
theme(legend.position = c(0.85,0.9)) +
76
scale_x_continuous(limits = c(0,18), expand = c(0, 0)) +
77
scale_y_continuous(limits = c(0,0.4), expand = c(0, 0))
78
79
80
81
#y = ..density.. (frecuencia relativa)
82
83
ggsave("../plots/hist_relative_pesos.png"
84
, height = 8 # alto
85
, width = 12 # ancho
86
, dpi = 320 # resolución (calidad de la imagen)
87
)
88
89
# Densidad ----
90
91
data %>% ggplot(aes(x=bwghtlbs, fill = Dummy1 , colour=Dummy1)) +
92
geom_density(alpha=0.5, color = "black", size=0.6) +
93
scale_fill_manual(values=c("blue", "red"), name=NULL) + # Color
94
ggtitle("Smoking status and newborn weight (lbs)") +
95
theme_classic() +
96
theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
97
labs(x = "",
98
y = "Kernel Density") +
99
theme(legend.position = c(0.85,0.9)) +
100
scale_x_continuous(limits = c(0,18), expand = c(0, 0)) +
101
scale_y_continuous(limits = c(0,0.4), expand = c(0, 0))
102
103
104
# scale limita el intervalo de los ejes
105
106
# alpha : grado de transparencia
107
108
# Linear regression -----
109
110
# lbwght : logaritmo del peso del bebé
111
# Dummy: toma 1 si la mujer fumó durante el embarazo
112
113
# First Model ~
114
# ~ : alt + 126
115
# el intercept se añade por default
116
117
118
model1 <- lm(lbwght ~ Dummy, data = data)
119
120
# lmtest: coeftest
121
122
model1.tab <- coeftest(model1, vcov=vcovHC(model1, type='HC1'))
123
124
# HC: heteerocedasticidad, HC1: matriz varianza y cov de Huber - White
125
126
model1_coef <- model1.tab[2,1]
127
128
model1_coef_se = model1.tab[2,2]
129
130
# HC1: standar error robust aginst heterocedasticity
131
132
# Intervalo de confianza
133
# intervalo de cofianza ajsutado por heterocedasticidad
134
model1_lower = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,1]
135
136
model1_upper = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,2]
137
138
# Second Model, se añado los años de educación de la madre
139
140
model2 <- lm(lbwght ~ Dummy + motheduc, data = data)
141
142
model2.tab <- coeftest(model2, vcov=vcovHC(model2, type='HC1'))
143
144
model2_coef <- model2.tab[2,1]
145
146
model2_coef_se = model2.tab[2,2]
147
148
model2_lower = coefci(model2, df = Inf, vcov. = vcovHC, type = "HC1")[2,1]
149
model2_upper = coefci(model2, df = Inf, vcov. = vcovHC, type = "HC1")[2,2]
150
151
# type: HC1: robust standar error Huber-White
152
153
# Third Model
154
155
model3 <- lm(lbwght ~ Dummy + motheduc + lfaminc + white + Dummy:(motheduc + lfaminc + white), data = data)
156
157
model3.tab <- coeftest(model3, vcov=vcovHC(model3, type='HC'))
158
159
model3_coef <- model3.tab[2,1]
160
161
model3_coef_se = model3.tab[2,2]
162
163
model3_lower = coefci(model3, df = Inf, vcov. = vcovHC, type = "HC")[2,1]
164
model3_upper = coefci(model3, df = Inf, vcov. = vcovHC, type = "HC")[2,2]
165
166
# Model Matrix
167
168
m <- lm(lbwght ~ -1 + Dummy:(motheduc + lfaminc + white) , data)
169
X <- as.data.frame( model.matrix(m) )
170
171
172
# Tabla de resultaods
173
174
table<- matrix(0, 3, 4) # 3 filas y 4 columnas
175
176
177
table[1,1]<- model1_coef
178
table[1,2]<- model1_coef_se
179
180
table[2,1]<- model2_coef
181
table[2,2]<- model2_coef_se
182
183
table[3,1]<- model3_coef
184
table[3,2]<- model3_coef_se
185
186
table[1,3]<- model1_lower
187
table[1,4]<- model1_upper
188
189
table[2,3]<- model2_lower
190
table[2,4]<- model2_upper
191
192
table[3,3]<- model3_lower
193
table[3,4]<- model3_upper
194
195
colnames(table)<- c("Estimate","se","lower_bound","upper_bound")
196
rownames(table)<- c("OLS baseline", "OLS with controls", "OLS interactive model")
197
198
# Exportación a Latex
199
xtable(table)
200
201
202
tab <- as.data.frame(table)
203
204
# table de matriz a dataframe (tab)
205
206
# Coef-plot
207
208
theme_set(theme_bw(20)) # tema con fondo blanco y cuadro con bordes en negro
209
210
options(repr.plot.width = 8, repr.plot.height =5) # tamaño del gráfico
211
212
# aes: ejes
213
214
tab %>% ggplot(aes(x=rownames(tab), y=Estimate)) +
215
geom_point(size=2, color = 'black') +
216
geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.05,color="black", size = 0.8) +
217
labs(x="", y="") + ggtitle("Smoking Coefficient (95% CI)") +
218
theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
219
geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
220
scale_x_discrete(limits = c("OLS baseline","OLS with controls", "OLS interactive model")) + # order x - axis
221
theme(panel.grid.major = element_blank(), # borras las cuadrículas en el fondo
222
panel.grid.minor = element_blank())
223
224
# geom_errorbar solicita el limite inferior y superior
225
# width : ancho de la abrra superior
226
# scale_x_discrete: Nombre de modelos en eje inferior
227
# geom_hline: añadir lines horizontal
228
# panel.grid.major = element_blank(), panel.grid.minor = element_blank() borra las cuadrículas en el fondo
229
230
ggsave("../plots/Coef_plot.png"
231
, height = 8 # alto
232
, width = 12 # ancho
233
, dpi = 320 # resolución (calidad de la imagen)
234
)
235
236
237
238
239
240
241
242
243
244
245
246
247