CoCalc -- Trabajo Final R.R

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_final/grupo2/Trabajo Final R.R
⁴⁶⁸⁴ views
1
##########################################################
2
########Trabajo Final del curso de R y Python ############
3
##########################################################
4

5
################ EJERCICIO 1 (a)(b)(c)####################
6

7
################### Grupo 2###############################
8

9
#rm(list=ls(all=TRUE))
10

11
####Instalamos los paquetes y librerias que usaremos para este ejercicio ----
12

13
librarian::shelf(
14
    tidyverse  # dplyr, tidyr, stringr, ggplot2, etc in unique library
15
    , haven   # to read datset: .dta (stata), .spss, .dbf
16
    , fastDummies  # for dummies
17
    , stargazer  # for summary and econometrics tables
18
    , sandwich  # for linear models
19
    , lmtest # for robust standar error
20
    , estimatr # for iv, cluster, robust standar error (LM_robust)
21
    , lfe  # for fixed effects, cluster standar error
22
    , caret # for easy machine learning workflow (mse, rmse)
23
    , texreg # library for export table
24
    , mfx # probit , logit model marginal effects
25
)
26

27
##Instalamos metrics
28

29
install.packages("Metrics")
30
library(Metrics)
31

32

33
###Instalamos el paquete caret aparte###
34
install.packages('caret', dependencies = TRUE)
35
library(caret)
36
library(estimatr)
37
##Acedemos a la base de datos###
38
user <- Sys.getenv("USERNAME")  # username
39

40
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab10") ) # set directorio
41

42
##Nombramos la data como "repdata"
43
repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")
44

45
##Exploramos la data y vemos sus caracteristicas
46
str(repdata)
47

48
lapply(repdata, class)
49

50
names(repdata)
51

52
attributes(repdata)
53

54
summary(repdata)
55

56
# Observamos las etiquetas de variable de los valores de las variables
57

58
lapply(repdata, attr, 'label')
59

60
lapply(repdata, attr, 'labels')
61

62
repdata$ccode %>% attr('label') # var label
63

64
# dummy por país para efectos fijos que nos servira más adelante
65

66
repdata  <- dummy_cols(repdata, select_columns = 'ccode')
67

68
#
69
index <- grep("ccode_", colnames(repdata))
70

71
# Creamos la variable temporal
72

73
repdata$time_year <- repdata$year - 1981 # creando la variable temporal
74

75
list_vars <- names(repdata)[index]
76

77
list_vars[1]
78

79
i = 1
80

81
while(i < 42){
82

83
    var <- paste0(list_vars[i],"_","time")
84
    repdata[var]  <- repdata[list_vars[i]]*repdata["time_year"]
85

86
    i = i + 1
87
}
88

89
####Con todos estos elementos, ya tenemos la base lista
90
####para ser utilizada 
91

92
#######################
93
####EJERCICIO 1a#######
94
#######################
95

96
# Realizamos una tabla descriptiva de estadisticas de las siguientes variables
97

98
#NDVI g (Tasa de variacion del ındice de vegetacion),
99
#tot 100 (terminos de intercambio), 
100
#trade pGDP (porcentaje de las exportaciones respecto al PBI) 
101
#pop den rur (Densidad poblacional rural),
102
#land crop (porcentaje de tierra cultivable en uso), 
103
#va agr (Valor agregado del sector agrıcultura/ PBI) 
104
#va ind manf (Valor agregado del sector manufactura/ PBI)
105

106
# CREAMOS TABLE 1: 
107

108
table1 <- repdata %>% dplyr::select(NDVI_g, tot_100, trade_pGDP, 
109
                                    pop_den_rur, land_crop, va_agr, 
110
                                    va_ind_manf ) %>% as.data.frame()
111

112
##Ajustamos la tabla 1 con lo que me piden
113

114
stargazer(table1, title = "Estadísticas Descriptivas", digits = 2,
115
          covariate.labels = c("Tasa de variacion del ındice de vegetacion",
116
          "Terminos de intercambio","% Exportaciones respecto al PBI","Densidad poblacional rural",
117
          "% Tierra cultivable en uso","Valor agregado del sector agrıcultura/ PBI",
118
          "Valor agregado del sector manufactura/ PBI",
119
          "Doyle and Sambanis (2000)","Fearon and Laitin (2003)"),
120
          min.max = F)
121

122

123
#######################
124
####EJERCICIO 1b#######
125
#######################
126

127
##Replicar la tabla 3 (pag 737)
128

129
#Establecemos el modelo OLS simple
130

131
ols_model <- lm(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata)
132

133
attributes(ols_model)
134

135
#Con este comando podemos ver los coeficientes del modelo
136
ols_model$coefficients  
137

138
#con este comando vemos los valores del vector Y estimado
139
ols_model$fitted.values 
140

141
# Para conocer bien el modelo hacemos un summary table
142

143
summary(ols_model)
144

145
summary(ols_model)$call
146

147
summary(ols_model)$coef
148

149
# Aplicamos el Test de significancia individual 
150
#usando Huber white robust standard errors 
151

152
coeftest(ols_model, vcov = vcovHC(ols_model, "HC1")) # Huber-White robust (STATA)
153

154

155
# Creamos el sd_robust y el cluster
156

157
robust_model <- coeftest(ols_model,
158
                          vcov = vcovCL,  # Matrix varianza-covarianza cluster (CL)
159
                          type = "HC1",
160
                          cluster = ~ ccode)
161

162
sd_robust <- robust_model[,2]
163

164

165
#### Primer Modelo ----
166

167
# OLS,con efectos fijos o country-time trend
168
# errores estandar robustas (Huber robust)
169
# Los residuos estan clusterizados (agrupados) a nivel país
170
# Using cluster and robust standar error
171

172
ols_model1 <- lm_robust(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata,
173
              clusters = ccode, se_type = "stata")
174

175

176
summary(ols_model1)
177

178
attributes(ols_model1)
179

180
ols_model1$r.squared
181
ols_model1$fitted.values
182

183
#Estimamos los root mean squeare error
184

185
rmse1 <- rmse(ols_model1$fitted.values, repdata$gdp_g )
186

187
rmse(ols_model1$fitted.values, repdata$gdp_g )^2 # mean square error
188

189

190
# Aqui vemos los coeficientes y otras funciones de nuestro ols model1
191

192

193
glance(ols_model1)
194

195
tidy(ols_model1)
196

197
tidy(ols_model1)
198

199
htmlreg(ols_model1)
200

201
tidy(ols_model1)[2,2] # coeficiente
202
tidy(ols_model1)[2,6] # limite inferior
203
tidy(ols_model1)[2,7] # limite superior 
204

205
##Dado que stargazer no es compatible, utilizamos comando LM
206

207
m1 <- lm(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata)
208

209
lm_rmse1 <- round(rmse(m1$fitted.values, repdata$gdp_g ) ,2 )
210
print(lm_rmse1)
211

212
robust_model1 <- coeftest(m1,
213
vcov = vcovCL,  # Matrix varianza-covarianza cluster (CL)
214
type = "HC1",
215
cluster = ~ ccode)
216

217
##Para acceder a la columna de errores estandar -->sd_robust_model1
218
sd_robust_model1 <- robust_model1[,2]
219

220
# Hallamos el  intervalo de confianza --> model1_lower
221

222
model1_lower = coefci(m1, df = Inf, 
223
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
224

225
model1_upper = coefci(m1, df = Inf,
226
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
227

228
tidy(m1, conf.int = TRUE)
229

230
##Agrego el contrytime trends
231

232
#Primero deinimos las variables de control
233
control_vars <- c("y_0", "polity2l", "ethfrac", "relfrac", "Oil", "lmtnest","lpopl1")
234

235
#Vemos que las variables terminan en "_time"
236
names(repdata)
237

238
#Procedemos a crear el countrytrend
239
index_country_time <- grep("_time$", colnames(repdata))
240

241
country_time_trend <-names(repdata)[index_country_time]
242

243

244
model1_formula <- as.formula(
245
  
246
  paste("gdp_g",
247
        "~",
248
        paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
249
              paste(control_vars, collapse = "+")
250
              ,sep="+")
251
  )
252
  
253
)
254

255
# Usamos nuevamente lm para correr el modelo 1 
256

257
m1 <- lm(model1_formula, data = repdata)
258

259
lm_rmse1 <- round(rmse(m1$fitted.values, repdata$gdp_g ) ,2 )
260

261
robust_model1 <- coeftest(m1,
262
                          vcov = vcovCL,
263
                          type = "HC1",
264
                          cluster = ~ ccode)
265

266
sd_robust_model1 <- robust_model1[,2]
267

268
coefci(m1, df = Inf, 
269
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
270

271
##Ahora añadimos efectos fijos
272
##############################
273

274
# Usando ccode como una variable tipo factor (variable categórica)
275

276
repdata$ccode_factor <- as.factor(repdata$ccode)
277

278
class(repdata$ccode_factor)
279

280
class(repdata$ccode)
281

282

283
model1_formula <- as.formula(
284
    paste("any_prio",
285
          "~",
286
          paste("GPCP_g","GPCP_g_l","ccode_factor",
287
                paste(country_time_trend, collapse = "+")
288
                , sep="+")
289
    )
290
)
291

292

293
ols_model1 <- lm_robust(model1_formula, data = repdata,
294
                        clusters = ccode, se_type = "stata")
295

296
summary(ols_model1)
297
tidy(ols_model1)
298
glance(ols_model1)
299

300
"Usando LM y luego coestest para los error standar robusto"
301

302
m1 <- lm(model1_formula, data = repdata)
303

304
lm_rmse1 <- round(rmse(m1$fitted.values, repdata$gdp_g ) ,2 )
305

306
robust_model1 <- coeftest(m1 ,
307
                          vcov = vcovCL,
308
                          type = "HC1",
309
                          cluster = ~ ccode)
310

311
##Saco la columna de errores standar
312
sd_robust_model1 <- robust_model1[,2]
313

314
#c.i : confidence interval 
315

316
coefci(m1, df = Inf, 
317
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
318

319

320
#### Segundo Modelo CON WARPRIOR----
321

322
# OLS,con efectos fijos o country-time trend
323
# errores estandar robustas (Huber robust)
324
# Los residuos estan clusterizados (agrupados) a nivel país
325
# Using cluster and robust standar error
326

327
"Usando LM y luego coestest para los error standar robusto"
328

329
model2_formula <- as.formula(
330
    paste("war_prio",
331
          "~",
332
          paste("GPCP_g","GPCP_g_l","ccode_factor",
333
                paste(country_time_trend, collapse = "+")
334
                , sep="+")
335
    )
336
)
337

338
m2 <- lm(model2_formula, data = repdata)
339

340
lm_rmse2 <- round(rmse(m2$fitted.values, repdata$gdp_g ) ,2 )
341

342
robust_model2 <- coeftest(m2,
343
                          vcov = vcovCL,
344
                          type = "HC1",
345
                          cluster = ~ ccode)
346

347
sd_robust_model2 <- robust_model2[,2]
348

349
coefci(m2, df = Inf, 
350
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
351

352
##Intervalos de confianza
353

354

355
model2_lower = coefci(m2, df = Inf, 
356
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
357

358
model2_upper = coefci(m2, df = Inf,
359
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
360

361

362

363
# RMSE manual
364

365
sq_resid <- m2$residuals**2
366
lm_rmse5 <- round( mean(sq_resid)^0.5, 2)
367

368

369
#############################################
370
#CREAMOS TABLA para pasarla al overleaf######
371
#############################################
372

373
# Export using Stagezer
374

375
stargazer(m1, m2,type="latex",
376
          dep.var.caption = "Dependent variable" ,
377
          dep.var.labels = c("",""),
378
          se=list(sd_robust_model1, sd_robust_model2),
379
          title = "Rainfall and civil conflict(Reduce-form)",
380
          keep = c("GPCP_g","GPCP_g_l"),
381
          covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),
382
          align = T, no.space = T,
383
          add.lines=list(c("Country fixed effects", "yes","yes"),
384
                          c("Country-specific time trends","yes","yes"),
385
                          c("Root mean square error",lm_rmse1,lm_rmse2)),
386
          keep.stat = c("rsq","n"),
387
          notes.append = FALSE, notes.align = "c",
388
          notes ="Regressions disturbance terms are clustered at the country 
389
          trend", style = "qje")
390

391
##################################
392
####EJERCICIO 1b: COEFTPLOT#######
393
##################################
394

395
#Instalamos los paquetes
396
install.packages("librarian")
397

398
librarian::shelf(
399
  tidyverse
400
  , WDI
401
  , forcats
402
  , writexl
403
  
404
)
405

406

407
#Volvemos a estimar el robust_model1 
408

409
robust_model1 <- coeftest(m1,
410
                          vcov = vcovCL,  
411
                          type = "HC1",
412
                          cluster = ~ ccode)
413

414
# Hallamos el  intervalo de confianza --> model1_lower y model1_upper
415

416
model1_lower = coefci(m1, df = Inf, 
417
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
418

419
model1_upper = coefci(m1, df = Inf,
420
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
421

422
tidy(m1, conf.int = TRUE)
423

424
#Sacamos los coeficientes 
425

426
model1_coef <- robust_model1[2,1]
427
model1_coef_se = robust_model2[2,2]
428

429
#Volvemos a estimar el robust_model2 
430
robust_model2 <- coeftest(m2,
431
                          vcov = vcovCL,  # Matrix varianza-covarianza cluster (CL)
432
                          type = "HC1",
433
                          cluster = ~ ccode)
434

435
# Hallamos el  intervalo de confianza --> model2_lower y model2_upper
436

437
model2_lower = coefci(m2, df = Inf, 
438
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
439

440
model2_upper = coefci(m2, df = Inf,
441
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
442

443
tidy(m2, conf.int = TRUE)
444

445
#Sacamos los coeficientes
446
model2_coef <- robust_model2[2,1]
447
model2_coef_se = robust_model2[2,2]
448

449
# Creamos la tabla con los componentes anteriores para usarla de base
450
#en nuestro coeftplot
451

452
table<- matrix(0, 2, 4) # 2 filas y 4 columnas
453

454

455
table[1,1]<- model1_coef
456
table[1,2]<- model1_coef_se
457

458
table[2,1]<- model2_coef
459
table[2,2]<- model2_coef_se
460

461
table[1,3]<- model1_lower
462
table[1,4]<- model1_upper
463

464
table[2,3]<- model2_lower
465
table[2,4]<- model2_upper
466

467
#En el coeftplot, nombramos nuestros modelos como model OLS1 y OLS2
468

469
colnames(table)<- c("Estimate","se","lower_bound","upper_bound")
470
rownames(table)<- c("OLS 1", "OLS 2")
471

472

473
# Para lograr la exportación a Latex utilizamos el comando "xtable"
474
install.packages("xtable")
475
library(xtable)
476
xtable(table)
477

478
# Pasamos la table creada a dataframe como (tab)
479
tab <- as.data.frame(table)
480

481

482
# Actualizamos unas caracteristicas que serviran para el Coeff-plot
483

484
theme_set(theme_bw(20)) # tema con fondo blanco y cuadro con bordes en negro
485

486
options(repr.plot.width = 8, repr.plot.height =5)  # tamaño del gráfico
487

488
# Finalmente creamos el coeftplot
489

490
tab  %>% ggplot(aes(x=rownames(tab), y=Estimate)) +
491
  geom_point(size=2, color = 'black') +
492
  geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.05,color="black", size = 0.8) +
493
  labs(x="", y="") + ggtitle("Coefplot (95% CI)")  +
494
  theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +
495
  geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +
496
  scale_x_discrete(limits = c("OLS 1","OLS 2")) + # order x - axis
497
  theme(panel.grid.major = element_blank(), # borras las cuadrículas en el fondo
498
        panel.grid.minor = element_blank())
499

500
Product

Resources

Company