Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo1/Grupo1_Pregunta1_OLS_R.R
2714 views
1
install.packages("rlang")
2
install.packages("Metrics")
3
install.packages("xtable")
4
install.packages("tidyverse")
5
6
library(ggplot2)
7
library(Metrics)
8
library(haven)
9
library(dplyr)
10
library(stargazer)
11
library(plm)
12
library(lmtest)
13
library(sandwich)
14
library(estimatr)
15
library(lfe)
16
library(caret)
17
library(texreg)
18
library(mfx)
19
library(fastDummies)
20
library(lubridate)
21
library(tidyverse)
22
23
librarian::shelf(plm,lfe,caret,texreg,mfx,fastDummies,estimatr,dplyr,stargazer,tidyverse,sandwich,lmtest,xtable,haven)
24
25
#Seteamos el directorio
26
user <- Sys.getenv("USERNAME") # username
27
28
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2") ) # set directorio
29
30
31
DATA <- read_dta("Trabajo_final/datos/mss_repdata.dta")
32
DATA <-as.data.frame(DATA)
33
34
#Me quedo con las variables necesarias para realizar la tabla de estad�sticos
35
dat_2 <- DATA %>% dplyr::select(NDVI_g,tot_100,trade_pGDP,pop_den_rur,land_crop,va_agr,va_ind_manf) %>% as.data.frame()
36
37
#Tabla de estad�sticas de variables seleccionadas
38
list_var <- c("Tasa de variaci�n del �ndice de vegetaci�n", "T�rminos de intercambio",
39
"Porcentaje de las exportaciones recpecto al PBI", "Densidad poblacional rural",
40
"Porcentaje de tierra cultivable en uso", "Valor agregado del sector agricultura respecto PBI",
41
"Valor agregado del sector manufacturero respecto PBI")
42
43
stargazer(dat_2, title = "Descriptive Statistics", digits = 2, covariate.labels = list_var,
44
summary.stat = c("mean", "sd", "n"),min.max = F,
45
notes.append = FALSE, notes.align = 'l')
46
47
#Se crean dummies para cada pa�s
48
49
DATA <- dummy_cols(DATA, select_columns = 'ccode')
50
51
index <- grep("ccode_", colnames(DATA))
52
53
# Se crea la variable a�o a partir de 1981 en adelante
54
DATA$time_year <- DATA$year - 1980
55
56
list_vars_mod <- names(DATA)[index]
57
list_vars_mod[1]
58
59
#Creamos la variable a�o x pa�s.
60
61
i = 1
62
63
while(i < 42){
64
65
var <- paste0(list_vars_mod[i],"_","time")
66
DATA[var] <- DATA[list_vars_mod[i]]*DATA["time_year"]
67
68
i = i + 1
69
}
70
71
72
# Modelo (1)
73
74
75
index_country_time <- grep("_time$", colnames(DATA))
76
country_time_trend <-names(DATA)[index_country_time]
77
index_country <- grep("^ccode_\\d+$", colnames(DATA))
78
79
country_fe <-names(DATA)[index_country]
80
81
82
model1_formula <- as.formula(
83
paste("any_prio",
84
"~",
85
paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
86
paste(country_fe, collapse = "+")
87
, sep="+")
88
)
89
)
90
91
92
ols_model1 <- lm_robust(model1_formula, data = DATA,
93
clusters = ccode, se_type = "stata")
94
95
summary(ols_model1)
96
tidy(ols_model1)
97
glance(ols_model1)
98
99
#Usamos LM y luego coestest para hallar los errores standar robusto"
100
101
102
m1 <- lm(model1_formula, data = DATA)
103
104
lm_rmse1 <- round(rmse(m1$fitted.values, DATA$any_prio) ,2 )
105
106
robust_model1 <- coeftest(m1 ,
107
vcov = vcovCL,
108
type = "HC1",
109
cluster = ~ ccode)
110
111
112
113
sd_robust_model1 <- robust_model1[,2]
114
115
116
117
# Modelo (2)
118
119
model2_formula <- as.formula(
120
paste("war_prio",
121
"~",
122
paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
123
paste(country_fe, collapse = "+")
124
, sep="+")
125
)
126
)
127
128
129
ols_model2 <- lm_robust(model2_formula, data = DATA,
130
clusters = ccode, se_type = "stata")
131
132
summary(ols_model2)
133
134
#Usando LM y luego coestest para los error standar robusto"
135
136
137
m2 <- lm(model2_formula, data = DATA)
138
139
lm_rmse2 <- round(rmse(m2$fitted.values, DATA$war_prio ) ,2 )
140
141
robust_model2 <- coeftest(m2 ,
142
vcov = vcovCL,
143
type = "HC1",
144
cluster = ~ ccode)
145
146
147
148
sd_robust_model2 <- robust_model2[,2]
149
150
#Exportando la tabla
151
152
153
stargazer( m1, m2,
154
se=list(sd_robust_model1, sd_robust_model2),
155
dep.var.labels = c(""),
156
title = "Rainfall and Civil Conflict (Reduced-Form)",
157
keep = c("GPCP_g","GPCP_g_l"),
158
covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),
159
align = T, no.space = T,
160
add.lines=list(c("Country fixed effects","yes","yes"),
161
c("Country-specific time trends","yes","yes"),
162
c("Root mean square error",lm_rmse1,lm_rmse2)),
163
keep.stat = c("rsq","n"),
164
notes.append = FALSE, notes.align = "l",
165
notes ="Huber robust standard errors are in parentheses. Regression disturbance terms are clustered at the country level.A country-specific year time trend is included in all specifications (coefficient estimates not reported). * Significantly different from zero at 90 percent confidence. ** Significantly different from zero at 95 percent confidence. *** Significantly different from zero at 99 percent confidence.", style = "qje"
166
)
167
168
169
170
##### Coef-plot
171
172
# Primero calculamos los intervalos de confianza para ambos modelos
173
174
#Modelos 1. c.i : confidence interval
175
176
model1_lower = coefci(m1, df = Inf,
177
vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
178
179
model1_upper = coefci(m1, df = Inf,
180
vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
181
182
#Modelo 2. c.i : confidence interval
183
184
model2_lower = coefci(m2, df = Inf,
185
vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
186
187
model2_upper = coefci(m2, df = Inf,
188
vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
189
190
191
#Extraemos los coeficientes de la variable gpcp_g de ambos modelos
192
193
model1_coef<- ols_model1$coefficients[2]
194
195
model2_coef<- ols_model2$coefficients[2]
196
197
# Creamos una tabla de resultados que recopile los coeficientes y los intervalos de confianza
198
199
table<- matrix(0, 2, 3)
200
201
table[1,1]<- model1_coef
202
table[1,2]<- model1_lower
203
table[1,3]<- model1_upper
204
205
table[2,1]<- model2_coef
206
table[2,2]<- model2_lower
207
table[2,3]<- model2_upper
208
209
#Le damos nombres a las filas y columnas
210
colnames(table)<- c("Estimate","lower_bound","upper_bound")
211
rownames(table)<- c("OLS 1", "OLS 2")
212
213
214
# Lo convertimos a dataframe para poder ploterarlo
215
tab <- as.data.frame(table)
216
217
#Ploteamos el gr�fico
218
219
theme_set(theme_bw(20)) # tema con fondo blanco y cuadro con bordes en negro
220
221
options(repr.plot.width = 8, repr.plot.height =8) # tama�o del gr�fico
222
223
224
tab %>% ggplot(aes(x=rownames(tab), y=Estimate)) +
225
geom_point(size=2, color = 'red') +
226
geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.1,color="blue", size = 0.9) +
227
labs(x="", y="") + ggtitle("Stimated Coefficient of growth in rainfall (95% CI)") +
228
theme(text=element_text(size =8), plot.title = element_text(hjust = 0.5)) +
229
geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
230
scale_x_discrete(limits = c("OLS 1","OLS 2")) + # order x - axis
231
theme(panel.grid.major = element_blank(), # borramos las cuadr�culas en el fondo
232
panel.grid.minor = element_blank())
233
234
# Guardamos la imagen del coefplot en la ruta especificada
235
236
ggsave("Trabajo_final/grupo1/plots/Coef_plot.png"
237
, height = 8 # alto
238
, width = 12 # ancho
239
, dpi = 320 # resoluci�n (calidad de la imagen)
240
)
241
242
243
244