CoCalc -- scrip_linear_model

GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab10/scrip_linear_model_r.R
⁴⁵²⁹ views
1
################  Clase 10 linear models ############################
2
## Curso: Laboratorio de R y Python #################################
3
## @author: Roberto Mendoza
4

5
# clear environment
6

7
rm(list=ls(all=TRUE))
8

9
# Load libraries ----
10

11

12
librarian::shelf(
13
    tidyverse  # dplyr, tidyr, stringr, ggplot2, etc in unique library
14
    , haven   # to read datset: .dta (stata), .spss, .dbf
15
    , fastDummies  # for dummies
16
    , stargazer  # for summary and econometrics tables
17
    , sandwich  # for linear models
18
    , lmtest # for robust standar error
19
    , estimatr # for iv, cluster, robust standar error (LM_robust)
20
    , lfe  # for fixed effects, cluster standar error
21
    , caret # for easy machine learning workflow (mse, rmse)
22
    , texreg # library for export table
23
    , mfx # probit , logit model marginal effects
24
)
25

26

27

28
user <- Sys.getenv("USERNAME")  # username
29

30
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab10") ) # set directorio
31

32

33
repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")
34

35

36
str(repdata)
37

38
lapply(repdata, class)
39

40
names(repdata)
41

42
attributes(repdata)
43

44
summary(repdata)
45

46
# Observamos las etiquetas de variable de los valores de las variables
47

48
lapply(repdata, attr, 'label')
49

50
lapply(repdata, attr, 'labels')
51

52
repdata$ccode %>% attr('label') # var label
53

54
# dummy por país para efectos fijos
55

56

57
repdata  <- dummy_cols(repdata, select_columns = 'ccode')
58

59
#
60

61
index <- grep("ccode_", colnames(repdata))
62

63
# D*time_var 
64

65
repdata$time_year <- repdata$year - 1978 # creando la variable temporal
66

67
list_vars <- names(repdata)[index]
68

69
list_vars[1]
70

71
i = 1
72

73
while(i < 42){
74

75
    var <- paste0(list_vars[i],"_","time")
76
    repdata[var]  <- repdata[list_vars[i]]*repdata["time_year"]
77

78
    i = i + 1
79
}
80

81

82
# TABLE 1: DESCRIPTIVE STATISTICS ----
83

84

85
table1 <- repdata %>% dplyr::select(any_prio, any_prio_on, any_prio_off,
86
                             war_prio, war_prio_on, war_prio_off, war_col, war_inc, war)
87

88

89
stargazer(table1)
90

91
# No obtenemos resultado pues la libreria exige que la base de datos sea DataFrame
92

93
table1 <- repdata %>% dplyr::select(any_prio, any_prio_on, any_prio_off,
94
                             war_prio, war_prio_on, war_prio_off, war_col, war_inc, war,
95
                             GPCP, GPCP_g, GPCP_g_l,gdp_g, gdp_g_l,
96
        y_0, polity2l, polity2l_6, ethfrac, relfrac, Oil, lmtnest, lpopl1, tot_100_g) %>% as.data.frame()
97

98

99
stargazer(table1)
100

101

102

103
# Ajuste de algunos argumentos digits:
104

105

106

107
stargazer(table1, title = "Descriptive Statistics", digits = 2,
108
          covariate.labels = c("Civil conflict with ≥25 deaths: (PRIO/
109
Uppsala)","Onset","Offset","Civil conflict with ≥1,000 deaths:
110
PRIO/Uppsala","Onset","Offset","Collier and Hoeffler (2002)",
111
                               "Doyle and Sambanis (2000)","Fearon and Laitin (2003)"))
112

113

114
stargazer(table1, title = "Descriptive Statistics", digits = 2,
115
          covariate.labels = c("Civil conflict with ≥25 deaths: (PRIO/
116
Uppsala)","Onset","Offset","Civil conflict with ≥1,000 deaths:
117
PRIO/Uppsala","Onset","Offset","Collier and Hoeffler (2002)",
118
                               "Doyle and Sambanis (2000)","Fearon and Laitin (2003)"),
119
          min.max = F)
120

121

122

123
# summary.stat = c("n", "p75", "sd") ordenar las columnas de los estadisticos
124

125
list_vars <- c("Civil conflict with ≥25 deaths: (PRIO/
126
Uppsala)","Onset","Offset","Civil conflict with ≥1,000 deaths:
127
PRIO/Uppsala","Onset","Offset","Collier and Hoeffler (2002)",
128
               "Doyle and Sambanis (2000)","Fearon and Laitin (2003)",
129
"Annual rainfall (mm), GPCP measure",
130
"Annual growth in rainfall, time t",
131
"Annual growth in rainfall, time t-1",
132
"Annual economic growth rate, time t",
133
"Annual economic growth rate, time t-1",
134
"Log(GDP per capita), 1979",
135
"Democracy level (Polity IV score, -10 to 10), time t-1",
136
"Democracy indicator (Polity IV score > 15),
137
time t-1",
138
"Ethnolinguistic fractionalization (source:
139
Atlas Marodov Mira)",
140
"Religious fractionalization (source: CIA
141
Factbook)",
142
"Oil-exporting country (source: WDI)",
143
"Log(mountainous) (source: Fearon and
144
Laitin 2003)",
145
"Log(national population), time t-1
146
(source: WDI)",
147
"Growth in terms of trade, time t (source:
148
WDI)"
149
)
150

151
stargazer(table1, title = "Descriptive Statistics", digits = 2, # decimales con 2 digitos
152
          covariate.labels = list_vars,  # Lista de etiquetas
153
          summary.stat = c("mean", "sd", "n"), # se especifica el orden de los estadísticos
154
          min.max = F, # borrar el estadístico de maximo y minimo
155
          notes = "Note.—The source of most characteristics is the World Bank’s World Development Indicators (WDI)."
156
          , notes.append = FALSE, # TRUE append the significance levels
157
          notes.align = 'l')
158

159

160

161

162
# TABLE 2: First stage ----
163

164
# OLS simple
165

166
ols_model <- lm(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata)
167

168
attributes(ols_model)
169

170
ols_model$coefficients  # coeficientes
171

172
ols_model$fitted.values  # Y estimado
173

174
# summary table como en stata
175

176
summary(ols_model)
177

178
summary(ols_model)$call
179

180
summary(ols_model)$coef
181

182
# Test de significancia individual usando Huber robust standard errors
183

184
coeftest(ols_model, vcov = vcovHC(ols_model, "HC")) # Clasical white robust
185
coeftest(ols_model, vcov = vcovHC(ols_model, "HC0"))
186

187
coeftest(ols_model, vcov = vcovHC(ols_model, "HC1")) # Huber-White robust (STATA)
188
coeftest(ols_model, vcov = vcovHC(ols_model, "HC2")) # Eicker-Huber-White robust
189
coeftest(ols_model, vcov = vcovHC(ols_model, "HC3"))
190
coeftest(ols_model, vcov = vcovHC(ols_model, "HC4"))
191

192

193
# vcovHC Matrix varianza-covarianza robusta ante heterocedasticidad
194

195
# robust se and cluster
196

197
robust_model <- coeftest(ols_model,
198
                          vcov = vcovCL,  # Matrix varianza-covarianza cluster (CL)
199
                          type = "HC1",
200
                          cluster = ~ ccode)
201

202
sd_robust <- robust_lm[,2]
203

204
#### Primer Modelo ----
205

206
# OLS, sin efectos fijos o country-time trend
207
# errores estandar robustas (Huber robust)
208
# Los residuos estan clusterizados (agrupados) a nivel país
209
# Using cluster and robust standar error
210

211
ols_model1 <- lm_robust(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata,
212
              clusters = ccode, se_type = "stata")
213

214

215
summary(ols_model1)
216

217
attributes(ols_model1)
218

219
ols_model1$r.squared
220

221
rmse1 <- RMSE(ols_model1$fitted.values, repdata$gdp_g ) # root mean squeare error
222

223
RMSE(ols_model1$fitted.values, repdata$gdp_g )^2 # mean square error
224

225

226
# using tdyr functions
227

228

229
glance(ols_model1)
230

231
tidy(ols_model1)
232

233
tidy(ols_model1)
234

235
htmlreg(ols_model1)
236

237
tidy(ols_model1)[2,2] # coeficiente
238
tidy(ols_model1)[2,6] # limite inferior
239
tidy(ols_model1)[2,7] # limite superior 
240

241

242
"Usando LM y luego coestest para los error standar robusto"
243

244
m1 <- lm(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata)
245

246
lm_rmse1 <- round(RMSE(m1$fitted.values, repdata$gdp_g ) ,2 )
247

248
robust_model1 <- coeftest(m1,
249
vcov = vcovCL,  # Matrix varianza-covarianza cluster (CL)
250
type = "HC1",
251
cluster = ~ ccode)
252

253
sd_robust_model1 <- robust_model1[,2]
254

255
# intervalo de confianza 
256

257
model1_lower = coefci(m1, df = Inf, 
258
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]
259

260
model1_upper = coefci(m1, df = Inf,
261
                      vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]
262

263
tidy(m1, conf.int = TRUE)
264

265
#### Segundo Modelo ----
266
# No efectos fijos (country), Si country-time trends
267
# errores estandar robustas (Huber-white robust)
268
# termino de perturbación están clusterizados (agrupados) a nivel país
269
# Se añade variables de control
270

271
control_vars <- c("y_0", "polity2l", "ethfrac", "relfrac", "Oil", "lmtnest","lpopl1")
272

273
# Usando as.formula pues el modelo se hace extenso
274

275
names(repdata)
276

277
index_country_time <- grep("_time$", colnames(repdata))
278

279
country_time_trend <-names(repdata)[index_country_time]
280

281

282
model2_formula <- as.formula(
283
  
284
    paste("gdp_g",
285
          "~",
286
          paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
287
                paste(control_vars, collapse = "+")
288
                ,sep="+")
289
          )
290
    
291
    )
292

293
# Usando LM_robust 
294

295
ols_model2 <- lm_robust(model2_formula, data = repdata,
296
                        clusters = ccode, se_type = "stata")
297

298

299
summary(ols_model2)
300

301

302

303
glance(ols_model2)
304

305
tidy(ols_model2)
306

307
rmse2 <- RMSE(ols_model2$fitted.values, repdata$gdp_g ) # root mean squeare error
308

309

310
htmlreg(list(ols_model1,ols_model2))
311

312
"Usando LM y luego coestest para error standar robusto"
313

314

315
m2 <- lm(model2_formula, data = repdata)
316

317
lm_rmse2 <- round(RMSE(m2$fitted.values, repdata$gdp_g ) ,2 )
318

319
robust_model2 <- coeftest(m2,
320
                          vcov = vcovCL,
321
                          type = "HC1",
322
                          cluster = ~ ccode)
323

324
sd_robust_model2 <- robust_model2[,2]
325

326
coefci(m2, df = Inf, 
327
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
328

329

330
#### Tercer Modelo ----
331
# Si efectos fijos (country), Si country-time trends
332
# errores estandar robustas (Huber robust)
333
# termino de perturbación están clusterizados (agrupados) a nivel país
334

335
# 1.0 Uso de lm_robust
336

337
index_country_time <- grep("_time$", colnames(repdata))
338

339
country_time_trend <-names(repdata)[index_country_time]
340

341
index_country <- grep("^ccode_\\d+$", colnames(repdata))
342

343
country_fe <-names(repdata)[index_country]
344

345
model3_formula <- as.formula(
346
    paste("gdp_g",
347
          "~",
348
          paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),
349
                paste(country_fe, collapse = "+")
350
                , sep="+")
351
    )
352
)
353

354

355
ols_model3 <- lm_robust(model3_formula, data = repdata,
356
                        clusters = ccode, se_type = "stata")
357

358

359
summary(ols_model3)
360

361
glance(ols_model3)
362

363
tidy(ols_model3)
364

365
rmse3 <- RMSE(ols_model3$fitted.values, repdata$gdp_g ) # root mean squeare error
366

367

368

369
rmse3 <- RMSE(ols_model3$fitted.values, repdata$gdp_g ) # root mean squeare error
370

371
# 2. Incluyendo ccode como argumento de efectos fijos en LM_robust
372

373
model3_formula <- as.formula(
374
    paste("gdp_g",
375
          "~",
376
          paste("GPCP_g","GPCP_g_l",
377
                paste(country_time_trend, collapse = "+")
378
                , sep="+")
379
    )
380
)
381

382

383
ols_model3 <- lm_robust(model3_formula, data = repdata,
384
                        clusters = ccode, se_type = "stata", fixed_effects = ~ ccode)
385

386

387
summary(ols_model3)
388
tidy(ols_model3)
389
glance(ols_model3)
390

391
rmse3 <- RMSE(ols_model3$fitted.values, repdata$gdp_g ) # root mean squeare error
392

393
# !! Usar para el trabajo final 
394

395
# 3. Usando ccode como una variable tipo factor (variable categórica)
396

397
repdata$ccode_factor <- as.factor(repdata$ccode)
398

399
class(repdata$ccode_factor)
400

401
class(repdata$ccode)
402

403

404
model3_formula <- as.formula(
405
    paste("gdp_g",
406
          "~",
407
          paste("GPCP_g","GPCP_g_l", "ccode_factor",
408
                paste(country_time_trend, collapse = "+")
409
                , sep="+")
410
    )
411
)
412

413

414
ols_model3 <- lm_robust(model3_formula, data = repdata,
415
                        clusters = ccode, se_type = "stata")
416

417
summary(ols_model3)
418
tidy(ols_model3)
419
glance(ols_model3)
420

421
"Usando LM y luego coestest para los error standar robusto"
422

423

424
m3 <- lm(model3_formula, data = repdata)
425

426
lm_rmse3 <- round(RMSE(m3$fitted.values, repdata$gdp_g ) ,2 )
427

428
robust_model3 <- coeftest(m3 ,
429
                          vcov = vcovCL,
430
                          type = "HC1",
431
                          cluster = ~ ccode)
432

433

434

435
sd_robust_model3 <- robust_model3[,2]
436
#c.i : confidence interval 
437

438
coefci(m3, df = Inf, 
439
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
440

441
#### Cuarto modelo ----
442
# Si efectos fijos (country), Si country-time trends
443
# errores estandar robustas (Huber robust)
444
# termino de perturbación están clusterizados (agrupados) a nivel país
445
# Se añade Growth in rainfall del siguiente año
446

447
model4_formula <- as.formula(
448
    paste("gdp_g",
449
          "~",
450
          paste("GPCP_g","GPCP_g_l","GPCP_g_fl",
451
                paste(country_time_trend, collapse = "+")
452
                , sep="+")
453
    )
454
)
455

456

457
ols_model4 <- lm_robust(model4_formula, data = repdata,
458
                        clusters = ccode, se_type = "stata", fixed_effects = ~ ccode)
459

460

461
summary(ols_model4)
462
tidy(ols_model4)
463
glance(ols_model4)
464

465
rmse4 <- RMSE(ols_model4$fitted.values, repdata$gdp_g ) # root mean squeare error
466

467

468
"Usando LM y luego coestest para los error standar robusto"
469

470
model4_formula <- as.formula(
471
    paste("gdp_g",
472
          "~",
473
          paste("GPCP_g","GPCP_g_l","GPCP_g_fl","ccode_factor",
474
                paste(country_time_trend, collapse = "+")
475
                , sep="+")
476
    )
477
)
478

479
m4 <- lm(model4_formula, data = repdata)
480

481
lm_rmse4 <- round(RMSE(m4$fitted.values, repdata$gdp_g ) ,2 )
482

483

484
robust_model4 <- coeftest(m4,
485
                          vcov = vcovCL,
486
                          type = "HC1",
487
                          cluster = ~ ccode)
488

489
sd_robust_model4 <- robust_model4[,2]
490

491
coefci(m4, df = Inf, 
492
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
493

494
#### Quinto modelo ----
495
# Si efectos fijos (country), Si country-time trends
496
# errores estandar robustas (Huber robust)
497
# termino de perturbación están clusterizados (agrupados) a nivel país
498
# Se añade Growth in terms of trade
499

500
model5_formula <- as.formula(
501
    paste("gdp_g",
502
          "~",
503
          paste("GPCP_g","GPCP_g_l","tot_100_g",
504
                paste(country_time_trend, collapse = "+")
505
                , sep="+")
506
    )
507
)
508

509

510
ols_model5 <- lm_robust(model5_formula, data = repdata,
511
                        clusters = ccode, se_type = "stata", fixed_effects = ~ ccode)
512

513

514
summary(ols_model5)
515
tidy(ols_model5)
516
glance(ols_model5)
517

518
rmse5 <- RMSE(ols_model5$fitted.values, repdata$gdp_g ) # root mean squeare error
519

520

521
"Usando LM y luego coestest para los error standar robusto"
522

523
model5_formula <- as.formula(
524
    paste("gdp_g",
525
          "~",
526
          paste("GPCP_g","GPCP_g_l","tot_100_g","ccode_factor",
527
                paste(country_time_trend, collapse = "+")
528
                , sep="+")
529
    )
530
)
531

532
m5 <- lm(model5_formula, data = repdata)
533

534
robust_model5 <- coeftest(m5,
535
                          vcov = vcovCL,
536
                          type = "HC1",
537
                          cluster = ~ ccode)
538

539
sd_robust_model5 <- robust_model5[,2]
540

541
coefci(m5, df = Inf, 
542
       vcov. = vcovCL, cluster = ~ ccode, type = "HC1")
543

544
# RMSE manual
545

546
sq_resid <- m5$residuals**2
547
lm_rmse5 <- round( mean(sq_resid)^0.5, 2)
548

549
# Export tables ----
550

551
#
552
# stargazer::stargazer(ols_model1, ols_model2, ols_model3, ols_model4, ols_model5)
553
#
554
#
555
# stargazer::stargazer(ols_model5, se = starprep(ols_model5))
556

557
texreg(list(ols_model1, ols_model2, ols_model3, ols_model4, ols_model5),
558
       custom.coef.map = list("GPCP_g"="Growth in rainfall, t",
559
                              "GPCP_g_l"="Growth in rainfall, t-1",
560
                              "GPCP_g_fl" = "Growth in rainfall,t+1",
561
                              "tot_100_g" = "Growth in terms of trade, t" ,
562
                              "y_0" = "Log(GDP per capita), 1979",
563
                              "polity2l" = "Democracy (Polity IV), t-1",
564
                              "ethfrac" = "Ethnolinguistic fractionalization",
565
                              "relfrac" = "Religious fractionalization",
566
                              "Oil" = "Oil-exporting country",
567
                              "lmtnest" = "Log(mountainous)",
568
                              "lpopl1" = "Log(national population), t-1"
569
                              ), digits = 3,
570
       stars = c(0.01, 0.05, 0.1),
571
       custom.gof.rows = list("Country fixed effects" = c("no", "no", "yes", "yes", "yes"),
572
                              "Country-specific time trends" = c("no", "yes", "yes", "yes", "yes"),
573
                              "RMSE" = c(rmse1,rmse2,rmse3,rmse4,rmse5)),
574
       caption = "Dependent Variable: Economic Growth Rate, t")
575

576
# digits: 3 digitos
577
# stars = c(0.01, 0.05, 0.1), *** 1%, ** 5%, ** 10%
578

579
# Export using Stagezer
580

581

582
stargazer( m1, m2, m3, m4, m5,
583
           se=list(sd_robust_model1, sd_robust_model2, sd_robust_model3,
584
                   sd_robust_model4, sd_robust_model5),
585
           dep.var.labels = c(""),
586
           title = "Rainfall and Economic Growth (First-Stage)",
587
           keep = c("GPCP_g","GPCP_g_l","GPCP_g_fl","tot_100_g","y_0",
588
                    "polity2l","ethfrac","relfrac","Oil",
589
                    "lmtnest","lpopl1"),
590
           covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1",
591
                              "Growth in rainfall, t+1",
592
                              "Growth in terms of trade, t","Log(GDP per capita), 1979",
593
                              "Democracy (Polity IV), t-1","Ethnolinguistic fractionalization",
594
                              "Religious fractionalization","Oil-exporting country",
595
                              "Log(mountainous)","Log(national population), t-1"),
596
           align = T, no.space = T,
597
           add.lines=list(c("Country fixed effects", "no", "no","yes","yes","yes"),
598
                c("Country-specific time trends","no", "yes","yes","yes","yes"),
599
                c("Root mean square error",lm_rmse1,lm_rmse2,lm_rmse3,lm_rmse4,lm_rmse5)),
600
           keep.stat = c("rsq","n"),
601
           notes.append = FALSE, notes.align = "l",
602
            notes ="Huber robust standard errors are in parentheses", style = "qje"
603
           )
604

605
# qje: quataerly journal of economics
606

607
# TABL4: OLS VERSUS 2SLS ----
608

609
prob_formula <- as.formula(
610
    paste("any_prio",
611
          "~",
612
          paste("gdp_g","gdp_g_l",
613
                paste(control_vars, collapse = "+"),
614
                "time_year"
615
                , sep="+")
616
    )
617
)
618

619

620
# Logit
621

622
logit <- glm(prob_formula, data = repdata, family = binomial)
623

624
attributes(logit)
625

626
# cluster terminos de perturbación y robust standar error
627

628
coeftest(logit, vcov. = vcovCL(logit, cluster = ~ccode, type = "HC"))
629

630
# Probit
631

632
probit <- glm(prob_formula, data = repdata, family = binomial(link = "probit"))
633

634
attributes(probit)
635

636
coeftest(probit, vcov. = vcovCL(probit, cluster = ~ccode, type = "HC"))
637

638

639
# Usar la libreria mfx base Econometrics Greene
640

641
# Logit model
642

643
###  Logit, Probit models ----
644

645
logit <- logitmfx(prob_formula, data = repdata, atmean = TRUE, robust = T)
646

647
logit <- logitmfx(prob_formula, data = repdata, atmean = TRUE, robust = T,
648
                    clustervar1 = "ccode")
649

650

651
# Probit model
652

653
probit <- probitmfx(prob_formula, data = repdata, atmean = TRUE, robust = T)
654

655
probit <- probitmfx(prob_formula, data = repdata, atmean = TRUE, robust = T,
656
                    clustervar1 = "ccode")
657

658

659
summary(probit)
660

661
# Export table
662

663
texreg(list(logit, probit))
664

665
htmlreg(list(logit, probit)) # export to html
666

667
###  Modelo 2 ----
668

669
model2_formula <- as.formula(
670
    paste("any_prio",
671
          "~",
672
          paste("gdp_g","gdp_g_l",
673
                paste(control_vars, collapse = "+"),
674
                "time_year"
675
                , sep="+")
676
    )
677
)
678

679

680
ols_model2 <- lm_robust(model2_formula, data = repdata,
681
                        clusters = ccode, se_type = "stata")
682

683
summary(ols_model2)
684

685
glance(ols_model2)
686

687
tidy(ols_model2)
688

689
rmse2 <- round( RMSE(ols_model2$fitted.values, repdata$any_prio ), 2)
690

691
###  Modelo 3 ----
692

693
model3_formula <- as.formula(
694
    paste("any_prio",
695
          "~",
696
          paste("gdp_g","gdp_g_l",
697
                paste(control_vars, collapse = "+"),
698
                paste(country_time_trend, collapse = "+")
699
                , sep="+")
700
    )
701
)
702

703

704
ols_model3 <- lm_robust(model3_formula, data = repdata,
705
                        clusters = ccode, se_type = "stata")
706

707
summary(ols_model3)
708

709
glance(ols_model3)
710

711
tidy(ols_model3)
712

713
rmse3 <- round(RMSE(ols_model3$fitted.values, repdata$any_prio ), 2)
714

715
###  Modelo 4 ----
716

717
model4_formula <- as.formula(
718
    paste("any_prio",
719
          "~",
720
          paste("gdp_g","gdp_g_l",
721
                paste(country_time_trend, collapse = "+")
722
                , sep="+")
723
    )
724
)
725

726

727
ols_model4 <- lm_robust(model4_formula, data = repdata,
728
                        clusters = ccode, se_type = "stata", fixed_effects = ~ ccode)
729

730
summary(ols_model4)
731

732
glance(ols_model4)
733

734
tidy(ols_model4)
735

736
rmse4 <- round( RMSE(ols_model4$fitted.values, repdata$any_prio ), 2)
737

738

739
###  Modelo 5 IV ----
740

741
model5_formula <- as.formula(
742
    paste(
743
    paste("any_prio",
744
          "~",
745
          paste("gdp_g","gdp_g_l",
746
                paste(country_time_trend, collapse = "+")
747
                , paste(control_vars, collapse = "+")
748
                , sep="+")
749
    )
750
    , paste("GPCP_g", "GPCP_g_l",
751
            paste(country_time_trend, collapse = "+")
752
            , paste(control_vars, collapse = "+")
753
            , sep="+")
754
    , sep=" | "
755
    )
756
)
757

758

759

760
ols_model5 <- iv_robust(model5_formula, data = repdata,
761
                        clusters = ccode, se_type = "stata")
762

763
summary(ols_model5)
764

765
glance(ols_model5)
766

767
tidy(ols_model5)
768

769
rmse5 <- round(RMSE(ols_model5$fitted.values, repdata$any_prio ), 2)
770

771

772
### Modelo 6 IV ----
773

774
model6_formula <- as.formula(
775
    paste(
776
        paste("any_prio",
777
              "~",
778
              paste("gdp_g","gdp_g_l",
779
                    paste(country_time_trend, collapse = "+")
780
                    , sep="+")
781
        )
782
        , paste("GPCP_g", "GPCP_g_l",
783
                paste(country_time_trend, collapse = "+")
784
                , sep="+")
785
        , sep=" | "
786
    )
787
)
788

789

790
ols_model6 <- iv_robust(model6_formula, data = repdata,
791
                        clusters = ccode, se_type = "stata",
792
                        fixed_effects = ~ ccode)
793

794
summary(ols_model6)
795

796
glance(ols_model6)
797

798
tidy(ols_model6)
799

800
rmse6 <- round(RMSE(ols_model6$fitted.values, repdata$any_prio ), 2)
801

802

803
### Modelo 7 IV ----
804

805
model7_formula <- as.formula(
806
    paste(
807
        paste("war_prio",
808
              "~",
809
              paste("gdp_g","gdp_g_l",
810
                    paste(country_time_trend, collapse = "+")
811
                    , sep="+")
812
        )
813
        , paste("GPCP_g", "GPCP_g_l",
814
                paste(country_time_trend, collapse = "+")
815
                , sep="+")
816
        , sep=" | "
817
    )
818
)
819

820

821
ols_model7 <- iv_robust(model7_formula, data = repdata,
822
                        clusters = ccode, se_type = "stata",
823
                        fixed_effects = ~ ccode)
824

825
summary(ols_model7)
826

827
glance(ols_model7)
828

829
tidy(ols_model7)
830

831
rmse7 <- round(RMSE(ols_model7$fitted.values, repdata$war_prio ) ,2 )
832

833

834
# Export table
835

836
texreg(list(logit, probit, ols_model2, ols_model3, ols_model4,
837
            ols_model5, ols_model6, ols_model7),
838
       custom.coef.map = list("gdp_g"="Economic growth rate, t",
839
                              "gdp_g_l"="Economic growth rate, t-1",
840
                              "y_0" = "Log(GDP per capita), 1979",
841
                              "polity2l" = "Democracy (Polity IV), t-1",
842
                              "ethfrac" = "Ethnolinguistic fractionalization",
843
                              "relfrac" = "Religious fractionalization",
844
                              "Oil" = "Oil-exporting country",
845
                              "lmtnest" = "Log(mountainous)",
846
                              "lpopl1" = "Log(national population), t-1"
847

848
       ), digits = 3,
849
       stars = c(0.01, 0.05, 0.1),
850
       custom.gof.rows = list("Country fixed effects" = c("no","no", "no","no", "yes", "no","yes", "yes"),
851
                              "Country-specific time trends" = c("no","no", "no","no", "yes", "yes","yes", "yes"),
852
                              "RMSE" = c("","",rmse2,rmse3,rmse4,rmse5,rmse6,rmse7)),
853
       caption = "Economic Growth and Civil Conflict")
854

855

856

857
# Links referencias ----
858

859
## glmnet library para modelos de machine learning lineales
860

861
"https://glmnet.stanford.edu/articles/glmnet.html"
862

863
## Stagezer
864

865
"chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://www.princeton.edu/~otorres/NiceOutputR.pdf"
866

867
"chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://cran.r-project.org/web/packages/stargazer/stargazer.pdf"
868

869

870
# Usando fle library -----------------------------------------
871

872
# Modelo 3 de TABLE 2
873

874
model3_formula <- as.formula(
875
    paste("gdp_g",
876
          "~",
877
          paste("GPCP_g","GPCP_g_l",
878
                paste(country_time_trend, collapse = "+")
879
                , sep="+")," |ccode|0| ccode"
880
    )
881
)
882

883

884

885
# | ccode | 0 | ccode, primer ccode: efectos fijos
886
# segundo ccode: cluster el termino de perturbación a nivel país
887

888
ols_model3 <- felm( model3_formula, data = repdata )
889
attributes(ols_model3)
890

891
rmse3 <- RMSE(ols_model3$fitted.values, repdata$gdp_g ) # root mean squeare error
892

893
summary(ols_model3)
894

895
tidy(ols_model3)
896

897
glance(ols_model3)
898

899
ols_model3$rse  # just robust standar error no cluster terminos de perturbación
900

901

902
# Modelo 5 de TABLE 4
903

904
model5_formula <- as.formula(
905
    paste(
906
        paste("any_prio",
907
              "~",
908
              paste("gdp_g","gdp_g_l",
909
                    paste(country_time_trend, collapse = "+")
910
                    , paste(control_vars, collapse = "+")
911
                    , sep="+")
912
        )
913
        , "|0|(gdp_g|gdp_g_l ~ GPCP_g + GPCP_g_l)| ccode"
914
    )
915
)
916

917
ols_model5_fle <- felm( model5_formula, data = repdata )
918

919
summary(ols_model5_fle)
920

921
glance(ols_model5_fle)
922

923
tidy(ols_model5_fle)
924

925
rmse5_fle <- RMSE(ols_model5_fle$fitted.values, repdata$any_prio )
926

927
# Modelo 6 de TABLE 4
928

929
model6_formula <- as.formula(
930
    paste(
931
        paste("any_prio",
932
              "~",
933
              paste("gdp_g","gdp_g_l",
934
                    paste(country_time_trend, collapse = "+")
935
                    , sep="+")
936
        )
937
        , "|ccode|(gdp_g|gdp_g_l ~ GPCP_g + GPCP_g_l)| ccode"
938
    )
939
)
940

941
ols_model6_fle <- felm( model6_formula, data = repdata )
942

943
summary(ols_model6_fle)
944

945
glance(ols_model6_fle)
946

947
tidy(ols_model6_fle)
948

949
rmse6_fle <- round( RMSE(ols_model6_fle$fitted.values, repdata$any_prio ), 2)
950

951

952

953

954

955

956

957

958

959

960

961

962
Product

Resources

Company