Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo4/Grupo 4 - Trabajo Final_R.R
2714 views
1
## Grupo 4
2
3
# Flavia Or� - 20191215
4
# Seidy Ascencios - 20191622
5
# Luana Morales - 20191240
6
# Marcela Quintero - 20191445
7
8
9
# clear environment
10
11
rm(list=ls(all=TRUE))
12
13
# Load libraries
14
15
install.packages("librarian")
16
17
18
librarian::shelf(
19
tidyverse # dplyr, tidyr, stringr, ggplot2, etc in unique library
20
, haven # to read datset: .dta (stata), .spss, .dbf
21
, fastDummies # for dummies
22
, stargazer # for summary and econometrics tables
23
, sandwich # for linear models
24
, lmtest # for robust standar error
25
, estimatr # for iv, cluster, robust standar error (LM_robust)
26
, lfe # for fixed effects, cluster standar error
27
, caret # for easy machine learning workflow (mse, rmse)
28
, texreg # library for export table
29
, mfx # probit , logit model marginal effects
30
)
31
32
33
# set directorio
34
35
user <- Sys.getenv("USERNAME") # username
36
37
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/data") )
38
39
# Pregunta 1.1
40
41
# subimos la base de datos
42
43
repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")
44
45
str(repdata)
46
47
lapply(repdata, class)
48
49
names(repdata)
50
51
attributes(repdata)
52
53
summary(repdata)
54
55
56
# Observamos las etiquetas de variable de los valores de las variables
57
58
lapply(repdata, attr, 'label')
59
60
lapply(repdata, attr, 'labels')
61
62
repdata$ccode %>% attr('label') # var label
63
64
# dummy por pa�s para efectos fijos
65
66
67
repdata <- dummy_cols(repdata, select_columns = 'ccode')
68
69
#
70
71
index <- grep("ccode_", colnames(repdata))
72
73
# D*time_var
74
75
repdata$time_year <- repdata$year - 1978 # creando la variable temporal
76
77
list_vars <- names(repdata)[index]
78
79
list_vars[1]
80
81
i = 1
82
83
while(i < 42){
84
85
var <- paste0(list_vars[i],"_","time")
86
repdata[var] <- repdata[list_vars[i]]*repdata["time_year"]
87
88
i = i + 1
89
}
90
91
# TABLE 1: DESCRIPTIVE STATISTICS ----
92
93
94
table1 <- repdata %>% dplyr::select(NDVI_g, tot_100_g, trade_pGDP,
95
pop_den_rur, land_crop, va_agr, va_ind_manf) %>% as.data.frame()
96
97
98
stargazer(table1)
99
100
101
# summary.stat = c("n", "p75", "sd") ordenar las columnas de los estadisticos
102
103
list_vars <- c("Tasa de variaci�n de �ndices de vegetaci�n", "T�rminos de intercambio","Porcentaje de las exportaciones recpecto al PBI","Densidad poblacional rural",
104
"Porcentaje de tierra cultivable en uso","Valor agregado del sector agr�cola respecto PBI","Valor agregado del sector manufacturero respecto PBI")
105
106
stargazer(table1, title = "Descriptive Statistics", digits = 2, # decimales con 2 digitos
107
covariate.labels = list_vars, # Lista de etiquetas
108
summary.stat = c("mean", "sd", "n"), # se especifica el orden de los estad�sticos
109
min.max = F, # borrar el estad�stico de maximo y minimo
110
notes = "Note.-The source of most characteristics is the World Bank's World Development Indicators (WDI)."
111
, notes.append = FALSE, # TRUE append the significance levels
112
notes.align = 'l')
113
114
115
116
117
118
# Pregunta 1.2
119
120
# Anteriormente creamos el country_time_trends, ahora crearemos los efectos fijos por pa�s (country fixed effect):
121
122
index_country_time <- grep("_time$", colnames(repdata))
123
country_time_trend <-names(repdata)[index_country_time]
124
125
126
#Usando ccode como una variable tipo factor (variable categ�rica)
127
128
repdata$ccode_factor <- as.factor(repdata$ccode)
129
130
class(repdata$ccode_factor)
131
132
class(repdata$ccode)
133
134
135
#Planteamos el modelo 1 donde la variable "y" es Civil conflict > 25 deaths, Usando LM
136
137
138
formula_model1 <- as.formula(
139
paste("any_prio",
140
"~",
141
paste("GPCP_g","GPCP_g_l","ccode_factor",
142
paste(country_time_trend, collapse = "+")
143
, sep="+")
144
)
145
)
146
147
m1 <- lm(formula_model1, data = repdata)
148
149
lm_rmse1 <- round(RMSE(m1$fitted.values, repdata$any_prio ) ,2 )
150
151
152
robust_model1 <- coeftest(m1,
153
vcov = vcovCL,
154
type = "HC1",
155
cluster = ~ ccode)
156
157
sd_robust_model1 <- robust_model1[,2]
158
159
coefci(m1, df = Inf,
160
vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
161
162
163
#Planteamos el Modelo 2 donde la variable "y" es Civil conflict > 1000, usando LM
164
165
formula_model2 <- as.formula(
166
paste("war_prio",
167
"~",
168
paste("GPCP_g","GPCP_g_l","ccode_factor",
169
paste(country_time_trend, collapse = "+")
170
, sep="+")
171
)
172
)
173
174
m2 <- lm(formula_model2, data = repdata)
175
176
lm_rmse2 <- round(RMSE(m1$fitted.values, repdata$war_prio ) ,2 )
177
178
179
robust_model2 <- coeftest(m1,
180
vcov = vcovCL,
181
type = "HC1",
182
cluster = ~ ccode)
183
184
sd_robust_model2 <- robust_model2[,2]
185
186
coefci(m1, df = Inf,
187
vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
188
189
190
#latex
191
stargazer( m1, m2,
192
se=list(sd_robust_model1, sd_robust_model2),
193
dep.var.labels = c(""),
194
title = "Rainfall and Civil Conflict (Reduced-Form)",
195
keep = c("GPCP_g","GPCP_g_l"),
196
covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),
197
align = T, no.space = T,
198
add.lines=list(c("Country fixed effects", "yes","yes"),
199
c("Country-specific time trends", "yes","yes"),
200
c("Root mean square error",lm_rmse1,lm_rmse2)),
201
keep.stat = c("rsq","n"),
202
notes.append = FALSE, notes.align = "l",
203
notes ="Huber robust standard errors are in parentheses", style = "qje"
204
)
205
206
207
208
209
# Pregunta 1.3
210
211
# Primer modelo para any_prio
212
213
model1 <- lm(any_prio~ GPCP_g +GPCP_g_l +ccode_factor, data = repdata)
214
215
model1.tab <- coeftest(model1, vcov=vcovHC(model1, type='HC1'))
216
217
model1_coef <- model1.tab[2,1]
218
219
model1_coef_se = model1.tab[2,2]
220
221
model1_lower = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,1]
222
223
model1_upper = coefci(model1, df = Inf, vcov. = vcovHC, type= "HC1")[2,2]
224
225
# Segundo modelo para war_prio
226
227
model2 <- lm(war_prio ~ GPCP_g +GPCP_g_l +ccode_factor, data = repdata)
228
229
model2.tab <- coeftest(model2, vcov=vcovHC(model2, type='HC1'))
230
231
model2_coef <- model2.tab[2,1]
232
233
model2_coef_se = model2.tab[2,2]
234
235
model2_lower = coefci(model2,df = Inf, vcov. = vcovHC, type = "HC1")[2,1]
236
237
model2_upper = coefci(model2,df = Inf, vcov. = vcovHC, type = "HC1")[2,2]
238
239
#Tabla de resultados
240
241
table<- matrix(0, 2, 4) # 3 filas y 4 columnas
242
243
table[1,1] = model1_coef
244
table[1,2] = model1_coef_se
245
table[1,3] = model1_lower
246
table[1,4] = model1_upper
247
248
table[2,1] = model2_coef
249
table[2,2] = model2_coef_se
250
table[2,3] = model2_lower
251
table[2,4] = model2_upper
252
253
colnames(table)<- c("Estimate","se","lower_bound","upper_bound")
254
rownames(table)<- c(" Civil Conflict >=25 Deaths", "Civil Conflict >=1000 Deaths")
255
256
# Exportaci�n a Latex
257
258
tab <- as.data.frame(table)
259
260
# table de matriz a dataframe (tab)
261
262
# Coef-plot
263
264
options(repr.plot.width = 8, repr.plot.height =5)
265
266
# aes: ejes
267
268
tab %>% ggplot(aes(x=rownames(tab), y=Estimate)) +
269
geom_point(size=2, color = 'red') +
270
geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.05,color="blue", size = 0.8) +
271
labs(x="", y="") + ggtitle("GPCP_g Coefficient (95% CI)") +
272
theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
273
geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
274
scale_x_discrete(limits = c(" Civil Conflict >=25 Deaths", "Civil Conflict >=1000 Deaths")) +
275
theme(panel.grid.major = element_blank(),
276
panel.grid.minor = element_blank())
277
278
ggsave("../plots/Coef_plot.png"
279
, height = 8
280
, width = 12
281
, dpi = 320
282
)
283
284