CoCalc -- Grupo 4 - Trabajo Final

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo4/Grupo 4 - Trabajo Final_R.R
⁴⁶⁸⁷ views
1
## Grupo 4
2

3
# Flavia Or� - 20191215
4
# Seidy Ascencios - 20191622
5
# Luana Morales - 20191240
6
# Marcela Quintero - 20191445
7

8

9
# clear environment
10

11
rm(list=ls(all=TRUE))
12

13
# Load libraries 
14

15
install.packages("librarian")
16

17

18
librarian::shelf(
19
  tidyverse  # dplyr, tidyr, stringr, ggplot2, etc in unique library
20
  , haven   # to read datset: .dta (stata), .spss, .dbf
21
  , fastDummies  # for dummies
22
  , stargazer  # for summary and econometrics tables
23
  , sandwich  # for linear models
24
  , lmtest # for robust standar error
25
  , estimatr # for iv, cluster, robust standar error (LM_robust)
26
  , lfe  # for fixed effects, cluster standar error
27
  , caret # for easy machine learning workflow (mse, rmse)
28
  , texreg # library for export table
29
  , mfx # probit , logit model marginal effects
30
)
31

32

33
# set directorio
34

35
user <- Sys.getenv("USERNAME")  # username
36

37
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/data") ) 
38

39
# Pregunta 1.1
40

41
# subimos la base de datos
42

43
repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")
44

45
str(repdata)
46

47
lapply(repdata, class)
48

49
names(repdata)
50

51
attributes(repdata)
52

53
summary(repdata)
54

55

56
# Observamos las etiquetas de variable de los valores de las variables
57

58
lapply(repdata, attr, 'label')
59

60
lapply(repdata, attr, 'labels')
61

62
repdata$ccode %>% attr('label') # var label
63

64
# dummy por pa�s para efectos fijos
65

66

67
repdata  <- dummy_cols(repdata, select_columns = 'ccode')
68

69
#
70

71
index <- grep("ccode_", colnames(repdata))
72

73
# D*time_var 
74

75
repdata$time_year <- repdata$year - 1978 # creando la variable temporal
76

77
list_vars <- names(repdata)[index]
78

79
list_vars[1]
80

81
i = 1
82

83
while(i < 42){
84
  
85
  var <- paste0(list_vars[i],"_","time")
86
  repdata[var]  <- repdata[list_vars[i]]*repdata["time_year"]
87
  
88
  i = i + 1
89
}
90

91
# TABLE 1: DESCRIPTIVE STATISTICS ----
92

93

94
table1 <- repdata %>% dplyr::select(NDVI_g, tot_100_g, trade_pGDP,
95
                                    pop_den_rur, land_crop, va_agr, va_ind_manf) %>% as.data.frame()
96

97

98
stargazer(table1)
99

100

101
# summary.stat = c("n", "p75", "sd") ordenar las columnas de los estadisticos
102

103
list_vars <- c("Tasa de variaci�n de �ndices de vegetaci�n", "T�rminos de intercambio","Porcentaje de las exportaciones recpecto al PBI","Densidad poblacional rural",
104
               "Porcentaje de tierra cultivable en uso","Valor agregado del sector agr�cola respecto PBI","Valor agregado del sector manufacturero respecto PBI")
105

106
stargazer(table1, title = "Descriptive Statistics", digits = 2, # decimales con 2 digitos
107
          covariate.labels = list_vars,  # Lista de etiquetas
108
          summary.stat = c("mean", "sd", "n"), # se especifica el orden de los estad�sticos
109
          min.max = F, # borrar el estad�stico de maximo y minimo
110
          notes = "Note.-The source of most characteristics is the World Bank's World Development Indicators (WDI)."
111
          , notes.append = FALSE, # TRUE append the significance levels
112
          notes.align = 'l')
113

114

115

116

117

118
# Pregunta 1.2
119

120
# Anteriormente creamos el country_time_trends, ahora crearemos los efectos fijos por pa�s (country fixed effect):
121

122
index_country_time <- grep("_time$", colnames(repdata))
123
country_time_trend <-names(repdata)[index_country_time]
124

125

126
#Usando ccode como una variable tipo factor (variable categ�rica)
127

128
repdata$ccode_factor <- as.factor(repdata$ccode)
129

130
class(repdata$ccode_factor)
131

132
class(repdata$ccode)
133

134

135
#Planteamos el modelo 1 donde la variable "y" es Civil conflict > 25 deaths, Usando LM
136

137

138
formula_model1 <- as.formula(
139
  paste("any_prio",
140
        "~",
141
        paste("GPCP_g","GPCP_g_l","ccode_factor",
142
              paste(country_time_trend, collapse = "+")
143
              , sep="+")
144
  )
145
)
146

147
m1 <- lm(formula_model1, data = repdata)
148

149
lm_rmse1 <- round(RMSE(m1$fitted.values, repdata$any_prio ) ,2 )
150

151

152
robust_model1 <- coeftest(m1,
153
                          vcov = vcovCL,
154
                          type = "HC1",
155
                          cluster = ~ ccode)
156

157
sd_robust_model1 <- robust_model1[,2]
158

159
coefci(m1, df = Inf, 
160
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
161

162

163
#Planteamos el Modelo 2 donde la variable "y" es Civil conflict > 1000, usando LM
164

165
formula_model2 <- as.formula(
166
  paste("war_prio",
167
        "~",
168
        paste("GPCP_g","GPCP_g_l","ccode_factor",
169
              paste(country_time_trend, collapse = "+")
170
              , sep="+")
171
  )
172
)
173

174
m2 <- lm(formula_model2, data = repdata)
175

176
lm_rmse2 <- round(RMSE(m1$fitted.values, repdata$war_prio ) ,2 )
177

178

179
robust_model2 <- coeftest(m1,
180
                          vcov = vcovCL,
181
                          type = "HC1",
182
                          cluster = ~ ccode)
183

184
sd_robust_model2 <- robust_model2[,2]
185

186
coefci(m1, df = Inf, 
187
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
188

189

190
#latex
191
stargazer( m1, m2,
192
           se=list(sd_robust_model1, sd_robust_model2),
193
           dep.var.labels = c(""),
194
           title = "Rainfall and Civil Conflict (Reduced-Form)",
195
           keep = c("GPCP_g","GPCP_g_l"),
196
           covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),
197
           align = T, no.space = T,
198
           add.lines=list(c("Country fixed effects", "yes","yes"),
199
                          c("Country-specific time trends", "yes","yes"),
200
                          c("Root mean square error",lm_rmse1,lm_rmse2)),
201
           keep.stat = c("rsq","n"),
202
           notes.append = FALSE, notes.align = "l",
203
           notes ="Huber robust standard errors are in parentheses", style = "qje"
204
           )
205

206

207

208

209
# Pregunta 1.3
210

211
# Primer modelo para any_prio
212

213
model1 <- lm(any_prio~ GPCP_g +GPCP_g_l +ccode_factor, data = repdata)
214

215
model1.tab <- coeftest(model1, vcov=vcovHC(model1, type='HC1'))
216

217
model1_coef <- model1.tab[2,1]
218

219
model1_coef_se = model1.tab[2,2]
220

221
model1_lower = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,1]
222

223
model1_upper = coefci(model1, df = Inf, vcov. = vcovHC, type= "HC1")[2,2]
224

225
# Segundo modelo para war_prio
226

227
model2 <- lm(war_prio ~ GPCP_g +GPCP_g_l +ccode_factor, data = repdata)
228

229
model2.tab <- coeftest(model2, vcov=vcovHC(model2, type='HC1'))
230

231
model2_coef <- model2.tab[2,1]
232

233
model2_coef_se = model2.tab[2,2]
234

235
model2_lower = coefci(model2,df = Inf, vcov. = vcovHC, type = "HC1")[2,1]
236

237
model2_upper = coefci(model2,df = Inf, vcov. = vcovHC, type = "HC1")[2,2]
238

239
#Tabla de resultados
240

241
table<- matrix(0, 2, 4) # 3 filas y 4 columnas
242

243
table[1,1] = model1_coef
244
table[1,2] = model1_coef_se 
245
table[1,3] = model1_lower
246
table[1,4] = model1_upper 
247

248
table[2,1] = model2_coef
249
table[2,2] = model2_coef_se  
250
table[2,3] = model2_lower
251
table[2,4] = model2_upper 
252

253
colnames(table)<- c("Estimate","se","lower_bound","upper_bound")
254
rownames(table)<- c(" Civil Conflict >=25 Deaths", "Civil Conflict >=1000 Deaths")
255

256
# Exportaci�n a Latex
257

258
tab <- as.data.frame(table)
259

260
# table de matriz a dataframe (tab)
261

262
# Coef-plot
263

264
options(repr.plot.width = 8, repr.plot.height =5) 
265

266
# aes: ejes
267

268
tab  %>% ggplot(aes(x=rownames(tab), y=Estimate)) +
269
  geom_point(size=2, color = 'red') +
270
  geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.05,color="blue", size = 0.8) +
271
  labs(x="", y="") + ggtitle("GPCP_g Coefficient (95% CI)")  +
272
  theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
273
  geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
274
  scale_x_discrete(limits = c(" Civil Conflict >=25 Deaths", "Civil Conflict >=1000 Deaths")) + 
275
  theme(panel.grid.major = element_blank(), 
276
        panel.grid.minor = element_blank())
277

278
ggsave("../plots/Coef_plot.png"
279
       , height = 8  
280
       , width = 12  
281
       , dpi = 320  
282
)
283

284
Product

Resources

Company