Path: blob/main/Trabajo_final/grupo7/PREGUNTA_1_R.R
2714 views
rm(list=ls(all=TRUE))123# cargando librer�as ----45librarian::shelf(6tidyverse # dplyr, tidyr, stringr, ggplot2, etc in unique library7, haven # to read datset: .dta (stata), .spss, .dbf8, fastDummies # for dummies9, stargazer # for summary and econometrics tables10, sandwich # for linear models11, lmtest # for robust standar error12, estimatr # for iv, cluster, robust standar error (LM_robust)13, lfe # for fixed effects, cluster standar error14, caret # for easy machine learning workflow (mse, rmse)15, texreg # library for export table16, mfx # probit , logit model marginal effects17, xtable18)192021# directorio y base de datos ----22user <- Sys.getenv("USERNAME") # username23setwd( paste0("C:/Users/",user,"/Documents/GitHub/Jose_Pastor_r_py_jl/Python & R & Julia/Trabajo Final") ) # set directorio24repdata <- read_dta("mss_repdata.dta")252627#############################################28# exportar en latex una tabla de estad�sticas (media, desviaci�n est�ndar y total de observaciones) ----29table1 <- repdata %>% dplyr::select(NDVI_g, tot_100, trade_pGDP,30pop_den_rur, land_crop, va_agr,31va_ind_manf) %>% as.data.frame()3233list_vars <- c("NDVI_g", "tot_100", "trade_pGDP", "pop_den_rur", "land_crop", "va_agr", "va_ind_manf")3435stargazer(table1, title = "Descriptive Statistics", digits = 2, # decimales con 2 digitos36covariate.labels = list_vars, # Lista de etiquetas37summary.stat = c("mean", "sd", "n"), # se especifica el orden de los estadísticos38min.max = F, # borrar el estad�stico de maximo y minimo39notes = "Source: World Bank's World Development Indicators (WDI)."40,notes.append = FALSE, # TRUE append the significance levels41notes.align = 'l') # notes.align = 'l' : notas a la izquierda424344#####################################45# Replicar la tabla 3 (pag 737). ----464748# creando dummy por pa�s para efectos fijos49repdata <- dummy_cols(repdata, select_columns = 'ccode')505152# creando variable temporal para que vaya de 1, 2, as�...53repdata$time_year <- repdata$year - 1978545556# creando variable trend effect57index <- grep("ccode_", colnames(repdata))58list_vars <- names(repdata)[index]59list_vars[1]6061i = 162while(i < 42){63var <- paste0(list_vars[i],"_","time")64repdata[var] <- repdata[list_vars[i]]*repdata["time_year"]65i = i + 166}6768repdata$ccode_factor <- as.factor(repdata$ccode) # variable categ�rica6970index_country_time <- grep("_time$", colnames(repdata)) # grep() devuelve las posiciones71country_time_trend <-names(repdata)[index_country_time]72737475#####################76#### Modelo 1 #####7778# Usando as.formula pues la f�rmula del modelo se hace extensa, luego de paste lo convertir� en f�rmula79model1 <- as.formula(80paste("any_prio", "~", paste("GPCP_g", "GPCP_g_l", "ccode_factor", paste(country_time_trend, collapse = "+"),81sep="+")82)83)848586# Usando LM y luego coestest para error standar robusto87m1 <- lm(model1, data = repdata)88lm_rmse1 <- round(RMSE(m1$fitted.values, repdata$gdp_g ), 2 )89robust_model1 <- coeftest(m1,90vcov = vcovCL,91type = "HC1",92cluster = ~ ccode)9394sd_robust_model1 <- robust_model1[,2]9596coefci(m1, df = Inf, vcov. = vcovCL, cluster = ~ ccode, type = "HC1")979899100101102#####################103#### Modelo 2 #####104105# Usando as.formula pues la f�rmula del modelo se hace extensa, luego con paste se convertir� en f�rmula106model2 <- as.formula(107paste("war_prio", "~", paste("GPCP_g", "GPCP_g_l", "ccode_factor", paste(country_time_trend, collapse = "+"),108sep="+")109)110)111112# Usando LM y luego coestest para error standar robusto113m2 <- lm(model2, data = repdata)114lm_rmse2 <- round(RMSE(m2$fitted.values, repdata$gdp_g ), 2 )115robust_model2 <- coeftest(m2,116vcov = vcovCL,117type = "HC1",118cluster = ~ ccode)119120sd_robust_model2 <- robust_model2[,2]121122coefci(m2, df = Inf,123vcov. = vcovCL, cluster = ~ ccode, type = "HC1")124125126127128# Exportando tabla a latex129130stargazer( m1, m2, # los 5 modelos anterioremente calculados131se=list(sd_robust_model1, sd_robust_model2),132dep.var.labels = c(""),133title = "Rainfall and Civil Conflict (Reduced-Form)",134keep = c("GPCP_g","GPCP_g_l"), # las variables que mostrar� en la tabla135covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1",136"Growth in rainfall, t+1"), # label de las variables que mostrar� en la tabla137align = T, no.space = T,138add.lines=list(c("Country fixed effects", "yes","yes"),139c("Country-specific time trends","yes","yes"),140c("Root mean square error",lm_rmse1,lm_rmse2)),141keep.stat = c("rsq","n"),142notes.append = FALSE, notes.align = "l",143notes ="Huber robust standard errors are in parentheses",144style = "qje" # estilo de tabla como los journals of economics145)146147148149150###################################################################################################151# Graficar el Coeft plot del coeficiente estimado que corresponde a la variable GPCP g.152# El gr�fico debe incluir el coeficiente e intervalo de confianza respectivo de cada modelo.153154155######### Coefplot Modelo 1 ####################156model1 <- lm(model1, data = repdata)157158model1.tab <- coeftest(model1, vcov=vcovHC(model1, type='HC1')) # vcovHC : heterocedasticidad159# type='HC1': matriz var y cov de Huber-White160model1.tab161162model1_coef <- model1.tab[2,1]163model1_coef_se = model1.tab[2,2]164165model1_lower = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,1] # fila 2, columna 1166model1_upper = coefci(model1, df = Inf, vcov. = vcovHC, type = "HC1")[2,2]167168169170######## Coefplot Modelo 2 ###################171model2 <- lm(model2, data = repdata)172173# lmtest: coeftest (errores standar robustos para heterocedasticidad)174model2.tab <- coeftest(model2, vcov=vcovHC(model2, type='HC1')) # vcovHC : heterocedasticidad175# type='HC1': matriz var y cov de Huber-White176model2.tab177178model2_coef <- model2.tab[2,1]179model2_coef_se = model2.tab[2,2]180181model2_lower = coefci(model2, df = Inf, vcov. = vcovHC, type = "HC1")[2,1] # fila 2, columna 1182model2_upper = coefci(model2, df = Inf, vcov. = vcovHC, type = "HC1")[2,2]183184185# Tabla de resultados186table<- matrix(0, 2, 4) # matriz llena de ceros con 3 filas y 4 columnas187188table[1,1]<- model1_coef189table[1,2]<- model1_coef_se190191table[2,1]<- model2_coef192table[2,2]<- model2_coef_se193194table[1,3]<- model1_lower195table[1,4]<- model1_upper196197table[2,3]<- model2_lower198table[2,4]<- model2_upper199200colnames(table)<- c("Estimate","se","lower_bound","upper_bound")201rownames(table)<- c("OLS any_prio", "OLS war_prio")202203# de matriz a dataframe (tab), para poder graficar204tab = as.data.frame(table)205206# Exportaci�n a Latex (overleaf)207xtable(table)208209210211### Coef-plot ###212213theme_set(theme_bw(20)) # tema con fondo blanco y cuadro con bordes en negro214215# aes: ejes216options(repr.plot.width = 8, repr.plot.height =5) # tama�o del gr�fico217218tab %>% ggplot(aes(x=rownames(tab), y=Estimate)) +219geom_point(size=2, color = 'red') +220geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound), width = 0.05, color="black", size = 0.8) +221labs(x="", y="") + ggtitle("Global Precipitation Climatology Project coefficient (95% CI)") +222theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +223geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +224scale_x_discrete(limits = c("OLS any_prio", "OLS war_prio")) + # order x - axis225theme(panel.grid.major = element_blank(), # borra las cuadr�culas en el fondo226panel.grid.minor = element_blank())227228229ggsave("Coef_plot.png"230, height = 8 # alto231, width = 12 # ancho232, dpi = 320 # resoluci�n233)234235236237238239240241