Path: blob/main/Trabajo_final/grupo4/Grupo 4 - Trabajo Final_R.R
2714 views
## Grupo 412# Flavia Or� - 201912153# Seidy Ascencios - 201916224# Luana Morales - 201912405# Marcela Quintero - 20191445678# clear environment910rm(list=ls(all=TRUE))1112# Load libraries1314install.packages("librarian")151617librarian::shelf(18tidyverse # dplyr, tidyr, stringr, ggplot2, etc in unique library19, haven # to read datset: .dta (stata), .spss, .dbf20, fastDummies # for dummies21, stargazer # for summary and econometrics tables22, sandwich # for linear models23, lmtest # for robust standar error24, estimatr # for iv, cluster, robust standar error (LM_robust)25, lfe # for fixed effects, cluster standar error26, caret # for easy machine learning workflow (mse, rmse)27, texreg # library for export table28, mfx # probit , logit model marginal effects29)303132# set directorio3334user <- Sys.getenv("USERNAME") # username3536setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/data") )3738# Pregunta 1.13940# subimos la base de datos4142repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")4344str(repdata)4546lapply(repdata, class)4748names(repdata)4950attributes(repdata)5152summary(repdata)535455# Observamos las etiquetas de variable de los valores de las variables5657lapply(repdata, attr, 'label')5859lapply(repdata, attr, 'labels')6061repdata$ccode %>% attr('label') # var label6263# dummy por pa�s para efectos fijos646566repdata <- dummy_cols(repdata, select_columns = 'ccode')6768#6970index <- grep("ccode_", colnames(repdata))7172# D*time_var7374repdata$time_year <- repdata$year - 1978 # creando la variable temporal7576list_vars <- names(repdata)[index]7778list_vars[1]7980i = 18182while(i < 42){8384var <- paste0(list_vars[i],"_","time")85repdata[var] <- repdata[list_vars[i]]*repdata["time_year"]8687i = i + 188}8990# TABLE 1: DESCRIPTIVE STATISTICS ----919293table1 <- repdata %>% dplyr::select(NDVI_g, tot_100_g, trade_pGDP,94pop_den_rur, land_crop, va_agr, va_ind_manf) %>% as.data.frame()959697stargazer(table1)9899100# summary.stat = c("n", "p75", "sd") ordenar las columnas de los estadisticos101102list_vars <- c("Tasa de variaci�n de �ndices de vegetaci�n", "T�rminos de intercambio","Porcentaje de las exportaciones recpecto al PBI","Densidad poblacional rural",103"Porcentaje de tierra cultivable en uso","Valor agregado del sector agr�cola respecto PBI","Valor agregado del sector manufacturero respecto PBI")104105stargazer(table1, title = "Descriptive Statistics", digits = 2, # decimales con 2 digitos106covariate.labels = list_vars, # Lista de etiquetas107summary.stat = c("mean", "sd", "n"), # se especifica el orden de los estad�sticos108min.max = F, # borrar el estad�stico de maximo y minimo109notes = "Note.-The source of most characteristics is the World Bank's World Development Indicators (WDI)."110, notes.append = FALSE, # TRUE append the significance levels111notes.align = 'l')112113114115116117# Pregunta 1.2118119# Anteriormente creamos el country_time_trends, ahora crearemos los efectos fijos por pa�s (country fixed effect):120121index_country_time <- grep("_time$", colnames(repdata))122country_time_trend <-names(repdata)[index_country_time]123124125#Usando ccode como una variable tipo factor (variable categ�rica)126127repdata$ccode_factor <- as.factor(repdata$ccode)128129class(repdata$ccode_factor)130131class(repdata$ccode)132133134#Planteamos el modelo 1 donde la variable "y" es Civil conflict > 25 deaths, Usando LM135136137formula_model1 <- as.formula(138paste("any_prio",139"~",140paste("GPCP_g","GPCP_g_l","ccode_factor",141paste(country_time_trend, collapse = "+")142, sep="+")143)144)145146m1 <- lm(formula_model1, data = repdata)147148lm_rmse1 <- round(RMSE(m1$fitted.values, repdata$any_prio ) ,2 )149150151robust_model1 <- coeftest(m1,152vcov = vcovCL,153type = "HC1",154cluster = ~ ccode)155156sd_robust_model1 <- robust_model1[,2]157158coefci(m1, df = Inf,159vcov. = vcovCL, cluster = ~ ccode, type = "HC1")160161162#Planteamos el Modelo 2 donde la variable "y" es Civil conflict > 1000, usando LM163164formula_model2 <- as.formula(165paste("war_prio",166"~",167paste("GPCP_g","GPCP_g_l","ccode_factor",168paste(country_time_trend, collapse = "+")169, sep="+")170)171)172173m2 <- lm(formula_model2, data = repdata)174175lm_rmse2 <- round(RMSE(m1$fitted.values, repdata$war_prio ) ,2 )176177178robust_model2 <- coeftest(m1,179vcov = vcovCL,180type = "HC1",181cluster = ~ ccode)182183sd_robust_model2 <- robust_model2[,2]184185coefci(m1, df = Inf,186vcov. = vcovCL, cluster = ~ ccode, type = "HC1")187188189#latex190stargazer( m1, m2,191se=list(sd_robust_model1, sd_robust_model2),192dep.var.labels = c(""),193title = "Rainfall and Civil Conflict (Reduced-Form)",194keep = c("GPCP_g","GPCP_g_l"),195covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),196align = T, no.space = T,197add.lines=list(c("Country fixed effects", "yes","yes"),198c("Country-specific time trends", "yes","yes"),199c("Root mean square error",lm_rmse1,lm_rmse2)),200keep.stat = c("rsq","n"),201notes.append = FALSE, notes.align = "l",202notes ="Huber robust standard errors are in parentheses", style = "qje"203)204205206207208# Pregunta 1.3209210# Primer modelo para any_prio211212model1 <- lm(any_prio~ GPCP_g +GPCP_g_l +ccode_factor, data = repdata)213214model1.tab <- coeftest(model1, vcov=vcovHC(model1, type='HC1'))215216model1_coef <- model1.tab[2,1]217218model1_coef_se = model1.tab[2,2]219220model1_lower = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,1]221222model1_upper = coefci(model1, df = Inf, vcov. = vcovHC, type= "HC1")[2,2]223224# Segundo modelo para war_prio225226model2 <- lm(war_prio ~ GPCP_g +GPCP_g_l +ccode_factor, data = repdata)227228model2.tab <- coeftest(model2, vcov=vcovHC(model2, type='HC1'))229230model2_coef <- model2.tab[2,1]231232model2_coef_se = model2.tab[2,2]233234model2_lower = coefci(model2,df = Inf, vcov. = vcovHC, type = "HC1")[2,1]235236model2_upper = coefci(model2,df = Inf, vcov. = vcovHC, type = "HC1")[2,2]237238#Tabla de resultados239240table<- matrix(0, 2, 4) # 3 filas y 4 columnas241242table[1,1] = model1_coef243table[1,2] = model1_coef_se244table[1,3] = model1_lower245table[1,4] = model1_upper246247table[2,1] = model2_coef248table[2,2] = model2_coef_se249table[2,3] = model2_lower250table[2,4] = model2_upper251252colnames(table)<- c("Estimate","se","lower_bound","upper_bound")253rownames(table)<- c(" Civil Conflict >=25 Deaths", "Civil Conflict >=1000 Deaths")254255# Exportaci�n a Latex256257tab <- as.data.frame(table)258259# table de matriz a dataframe (tab)260261# Coef-plot262263options(repr.plot.width = 8, repr.plot.height =5)264265# aes: ejes266267tab %>% ggplot(aes(x=rownames(tab), y=Estimate)) +268geom_point(size=2, color = 'red') +269geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.05,color="blue", size = 0.8) +270labs(x="", y="") + ggtitle("GPCP_g Coefficient (95% CI)") +271theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +272geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +273scale_x_discrete(limits = c(" Civil Conflict >=25 Deaths", "Civil Conflict >=1000 Deaths")) +274theme(panel.grid.major = element_blank(),275panel.grid.minor = element_blank())276277ggsave("../plots/Coef_plot.png"278, height = 8279, width = 12280, dpi = 320281)282283284