Path: blob/main/Trabajo_final/grupo2/Trabajo Final R.R
2714 views
##########################################################1########Trabajo Final del curso de R y Python ############2##########################################################34################ EJERCICIO 1 (a)(b)(c)####################56################### Grupo 2###############################78#rm(list=ls(all=TRUE))910####Instalamos los paquetes y librerias que usaremos para este ejercicio ----1112librarian::shelf(13tidyverse # dplyr, tidyr, stringr, ggplot2, etc in unique library14, haven # to read datset: .dta (stata), .spss, .dbf15, fastDummies # for dummies16, stargazer # for summary and econometrics tables17, sandwich # for linear models18, lmtest # for robust standar error19, estimatr # for iv, cluster, robust standar error (LM_robust)20, lfe # for fixed effects, cluster standar error21, caret # for easy machine learning workflow (mse, rmse)22, texreg # library for export table23, mfx # probit , logit model marginal effects24)2526##Instalamos metrics2728install.packages("Metrics")29library(Metrics)303132###Instalamos el paquete caret aparte###33install.packages('caret', dependencies = TRUE)34library(caret)35library(estimatr)36##Acedemos a la base de datos###37user <- Sys.getenv("USERNAME") # username3839setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab10") ) # set directorio4041##Nombramos la data como "repdata"42repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")4344##Exploramos la data y vemos sus caracteristicas45str(repdata)4647lapply(repdata, class)4849names(repdata)5051attributes(repdata)5253summary(repdata)5455# Observamos las etiquetas de variable de los valores de las variables5657lapply(repdata, attr, 'label')5859lapply(repdata, attr, 'labels')6061repdata$ccode %>% attr('label') # var label6263# dummy por país para efectos fijos que nos servira más adelante6465repdata <- dummy_cols(repdata, select_columns = 'ccode')6667#68index <- grep("ccode_", colnames(repdata))6970# Creamos la variable temporal7172repdata$time_year <- repdata$year - 1981 # creando la variable temporal7374list_vars <- names(repdata)[index]7576list_vars[1]7778i = 17980while(i < 42){8182var <- paste0(list_vars[i],"_","time")83repdata[var] <- repdata[list_vars[i]]*repdata["time_year"]8485i = i + 186}8788####Con todos estos elementos, ya tenemos la base lista89####para ser utilizada9091#######################92####EJERCICIO 1a#######93#######################9495# Realizamos una tabla descriptiva de estadisticas de las siguientes variables9697#NDVI g (Tasa de variacion del ındice de vegetacion),98#tot 100 (terminos de intercambio),99#trade pGDP (porcentaje de las exportaciones respecto al PBI)100#pop den rur (Densidad poblacional rural),101#land crop (porcentaje de tierra cultivable en uso),102#va agr (Valor agregado del sector agrıcultura/ PBI)103#va ind manf (Valor agregado del sector manufactura/ PBI)104105# CREAMOS TABLE 1:106107table1 <- repdata %>% dplyr::select(NDVI_g, tot_100, trade_pGDP,108pop_den_rur, land_crop, va_agr,109va_ind_manf ) %>% as.data.frame()110111##Ajustamos la tabla 1 con lo que me piden112113stargazer(table1, title = "Estadísticas Descriptivas", digits = 2,114covariate.labels = c("Tasa de variacion del ındice de vegetacion",115"Terminos de intercambio","% Exportaciones respecto al PBI","Densidad poblacional rural",116"% Tierra cultivable en uso","Valor agregado del sector agrıcultura/ PBI",117"Valor agregado del sector manufactura/ PBI",118"Doyle and Sambanis (2000)","Fearon and Laitin (2003)"),119min.max = F)120121122#######################123####EJERCICIO 1b#######124#######################125126##Replicar la tabla 3 (pag 737)127128#Establecemos el modelo OLS simple129130ols_model <- lm(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata)131132attributes(ols_model)133134#Con este comando podemos ver los coeficientes del modelo135ols_model$coefficients136137#con este comando vemos los valores del vector Y estimado138ols_model$fitted.values139140# Para conocer bien el modelo hacemos un summary table141142summary(ols_model)143144summary(ols_model)$call145146summary(ols_model)$coef147148# Aplicamos el Test de significancia individual149#usando Huber white robust standard errors150151coeftest(ols_model, vcov = vcovHC(ols_model, "HC1")) # Huber-White robust (STATA)152153154# Creamos el sd_robust y el cluster155156robust_model <- coeftest(ols_model,157vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)158type = "HC1",159cluster = ~ ccode)160161sd_robust <- robust_model[,2]162163164#### Primer Modelo ----165166# OLS,con efectos fijos o country-time trend167# errores estandar robustas (Huber robust)168# Los residuos estan clusterizados (agrupados) a nivel país169# Using cluster and robust standar error170171ols_model1 <- lm_robust(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata,172clusters = ccode, se_type = "stata")173174175summary(ols_model1)176177attributes(ols_model1)178179ols_model1$r.squared180ols_model1$fitted.values181182#Estimamos los root mean squeare error183184rmse1 <- rmse(ols_model1$fitted.values, repdata$gdp_g )185186rmse(ols_model1$fitted.values, repdata$gdp_g )^2 # mean square error187188189# Aqui vemos los coeficientes y otras funciones de nuestro ols model1190191192glance(ols_model1)193194tidy(ols_model1)195196tidy(ols_model1)197198htmlreg(ols_model1)199200tidy(ols_model1)[2,2] # coeficiente201tidy(ols_model1)[2,6] # limite inferior202tidy(ols_model1)[2,7] # limite superior203204##Dado que stargazer no es compatible, utilizamos comando LM205206m1 <- lm(gdp_g ~ GPCP_g + GPCP_g_l, data = repdata)207208lm_rmse1 <- round(rmse(m1$fitted.values, repdata$gdp_g ) ,2 )209print(lm_rmse1)210211robust_model1 <- coeftest(m1,212vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)213type = "HC1",214cluster = ~ ccode)215216##Para acceder a la columna de errores estandar -->sd_robust_model1217sd_robust_model1 <- robust_model1[,2]218219# Hallamos el intervalo de confianza --> model1_lower220221model1_lower = coefci(m1, df = Inf,222vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]223224model1_upper = coefci(m1, df = Inf,225vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]226227tidy(m1, conf.int = TRUE)228229##Agrego el contrytime trends230231#Primero deinimos las variables de control232control_vars <- c("y_0", "polity2l", "ethfrac", "relfrac", "Oil", "lmtnest","lpopl1")233234#Vemos que las variables terminan en "_time"235names(repdata)236237#Procedemos a crear el countrytrend238index_country_time <- grep("_time$", colnames(repdata))239240country_time_trend <-names(repdata)[index_country_time]241242243model1_formula <- as.formula(244245paste("gdp_g",246"~",247paste("GPCP_g","GPCP_g_l", paste(country_time_trend, collapse = "+"),248paste(control_vars, collapse = "+")249,sep="+")250)251252)253254# Usamos nuevamente lm para correr el modelo 1255256m1 <- lm(model1_formula, data = repdata)257258lm_rmse1 <- round(rmse(m1$fitted.values, repdata$gdp_g ) ,2 )259260robust_model1 <- coeftest(m1,261vcov = vcovCL,262type = "HC1",263cluster = ~ ccode)264265sd_robust_model1 <- robust_model1[,2]266267coefci(m1, df = Inf,268vcov. = vcovCL, cluster = ~ ccode, type = "HC1")269270##Ahora añadimos efectos fijos271##############################272273# Usando ccode como una variable tipo factor (variable categórica)274275repdata$ccode_factor <- as.factor(repdata$ccode)276277class(repdata$ccode_factor)278279class(repdata$ccode)280281282model1_formula <- as.formula(283paste("any_prio",284"~",285paste("GPCP_g","GPCP_g_l","ccode_factor",286paste(country_time_trend, collapse = "+")287, sep="+")288)289)290291292ols_model1 <- lm_robust(model1_formula, data = repdata,293clusters = ccode, se_type = "stata")294295summary(ols_model1)296tidy(ols_model1)297glance(ols_model1)298299"Usando LM y luego coestest para los error standar robusto"300301m1 <- lm(model1_formula, data = repdata)302303lm_rmse1 <- round(rmse(m1$fitted.values, repdata$gdp_g ) ,2 )304305robust_model1 <- coeftest(m1 ,306vcov = vcovCL,307type = "HC1",308cluster = ~ ccode)309310##Saco la columna de errores standar311sd_robust_model1 <- robust_model1[,2]312313#c.i : confidence interval314315coefci(m1, df = Inf,316vcov. = vcovCL, cluster = ~ ccode, type = "HC1")317318319#### Segundo Modelo CON WARPRIOR----320321# OLS,con efectos fijos o country-time trend322# errores estandar robustas (Huber robust)323# Los residuos estan clusterizados (agrupados) a nivel país324# Using cluster and robust standar error325326"Usando LM y luego coestest para los error standar robusto"327328model2_formula <- as.formula(329paste("war_prio",330"~",331paste("GPCP_g","GPCP_g_l","ccode_factor",332paste(country_time_trend, collapse = "+")333, sep="+")334)335)336337m2 <- lm(model2_formula, data = repdata)338339lm_rmse2 <- round(rmse(m2$fitted.values, repdata$gdp_g ) ,2 )340341robust_model2 <- coeftest(m2,342vcov = vcovCL,343type = "HC1",344cluster = ~ ccode)345346sd_robust_model2 <- robust_model2[,2]347348coefci(m2, df = Inf,349vcov. = vcovCL, cluster = ~ ccode, type = "HC1")350351##Intervalos de confianza352353354model2_lower = coefci(m2, df = Inf,355vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]356357model2_upper = coefci(m2, df = Inf,358vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]359360361362# RMSE manual363364sq_resid <- m2$residuals**2365lm_rmse5 <- round( mean(sq_resid)^0.5, 2)366367368#############################################369#CREAMOS TABLA para pasarla al overleaf######370#############################################371372# Export using Stagezer373374stargazer(m1, m2,type="latex",375dep.var.caption = "Dependent variable" ,376dep.var.labels = c("",""),377se=list(sd_robust_model1, sd_robust_model2),378title = "Rainfall and civil conflict(Reduce-form)",379keep = c("GPCP_g","GPCP_g_l"),380covariate.labels=c("Growth in rainfall, t","Growth in rainfall, t-1"),381align = T, no.space = T,382add.lines=list(c("Country fixed effects", "yes","yes"),383c("Country-specific time trends","yes","yes"),384c("Root mean square error",lm_rmse1,lm_rmse2)),385keep.stat = c("rsq","n"),386notes.append = FALSE, notes.align = "c",387notes ="Regressions disturbance terms are clustered at the country388trend", style = "qje")389390##################################391####EJERCICIO 1b: COEFTPLOT#######392##################################393394#Instalamos los paquetes395install.packages("librarian")396397librarian::shelf(398tidyverse399, WDI400, forcats401, writexl402403)404405406#Volvemos a estimar el robust_model1407408robust_model1 <- coeftest(m1,409vcov = vcovCL,410type = "HC1",411cluster = ~ ccode)412413# Hallamos el intervalo de confianza --> model1_lower y model1_upper414415model1_lower = coefci(m1, df = Inf,416vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]417418model1_upper = coefci(m1, df = Inf,419vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]420421tidy(m1, conf.int = TRUE)422423#Sacamos los coeficientes424425model1_coef <- robust_model1[2,1]426model1_coef_se = robust_model2[2,2]427428#Volvemos a estimar el robust_model2429robust_model2 <- coeftest(m2,430vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)431type = "HC1",432cluster = ~ ccode)433434# Hallamos el intervalo de confianza --> model2_lower y model2_upper435436model2_lower = coefci(m2, df = Inf,437vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]438439model2_upper = coefci(m2, df = Inf,440vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]441442tidy(m2, conf.int = TRUE)443444#Sacamos los coeficientes445model2_coef <- robust_model2[2,1]446model2_coef_se = robust_model2[2,2]447448# Creamos la tabla con los componentes anteriores para usarla de base449#en nuestro coeftplot450451table<- matrix(0, 2, 4) # 2 filas y 4 columnas452453454table[1,1]<- model1_coef455table[1,2]<- model1_coef_se456457table[2,1]<- model2_coef458table[2,2]<- model2_coef_se459460table[1,3]<- model1_lower461table[1,4]<- model1_upper462463table[2,3]<- model2_lower464table[2,4]<- model2_upper465466#En el coeftplot, nombramos nuestros modelos como model OLS1 y OLS2467468colnames(table)<- c("Estimate","se","lower_bound","upper_bound")469rownames(table)<- c("OLS 1", "OLS 2")470471472# Para lograr la exportación a Latex utilizamos el comando "xtable"473install.packages("xtable")474library(xtable)475xtable(table)476477# Pasamos la table creada a dataframe como (tab)478tab <- as.data.frame(table)479480481# Actualizamos unas caracteristicas que serviran para el Coeff-plot482483theme_set(theme_bw(20)) # tema con fondo blanco y cuadro con bordes en negro484485options(repr.plot.width = 8, repr.plot.height =5) # tamaño del gráfico486487# Finalmente creamos el coeftplot488489tab %>% ggplot(aes(x=rownames(tab), y=Estimate)) +490geom_point(size=2, color = 'black') +491geom_errorbar(aes(ymin=lower_bound, ymax=upper_bound) , width = 0.05,color="black", size = 0.8) +492labs(x="", y="") + ggtitle("Coefplot (95% CI)") +493theme(text=element_text(size =15), plot.title = element_text(hjust = 0.5)) +494geom_hline(yintercept=0, linetype="dashed", color = "black", size=1) +495scale_x_discrete(limits = c("OLS 1","OLS 2")) + # order x - axis496theme(panel.grid.major = element_blank(), # borras las cuadrículas en el fondo497panel.grid.minor = element_blank())498499500