Path: blob/main/Trabajo_final/grupo6/grupo6_pregunta1.R
2714 views
################ Trabajo Final ############################1## Curso: Laboratorio de R y Python #################################2## Grupo 634# clear environment56rm(list=ls(all=TRUE))78# Load libraries ----91011librarian::shelf(12tidyverse # dplyr, tidyr, stringr, ggplot2, etc in unique library13, haven # to read datset: .dta (stata), .spss, .dbf14, fastDummies # for dummies15, stargazer # for summary and econometrics tables16, sandwich # for linear models17, lmtest # for robust standar error18, estimatr # for iv, cluster, robust standar error (LM_robust)19, lfe # for fixed effects, cluster standar error20, caret # for easy machine learning workflow (mse, rmse)21, texreg # library for export table22, mfx # probit , logit model marginal effects23)2425user <- Sys.getenv("USERNAME") # username2627setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab10") ) # set directorio2829repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")303132str(repdata)3334lapply(repdata, class)3536names(repdata)3738attributes(repdata)3940summary(repdata)4142################# T A B L A 1: ESTADISTICOS DESCRIPTIVOS ########################4344#construyendo la tabla de las variables solicitadas4546repdata <- read_dta("../data/dataverse_files/mss_repdata.dta")4748table1 <- repdata %>% dplyr::select(NDVI_g, tot_100, trade_pGDP, pop_den_rur, land_crop, va_agr, va_ind_manf)495051stargazer(table1)5253# No obtenemos resultado pues la libreria exige que la base de datos sea DataFrame5455table1 <- repdata %>% dplyr::select(NDVI_g, tot_100, trade_pGDP, pop_den_rur, land_crop, va_agr, va_ind_manf) %>% as.data.frame()565758stargazer(table1)5960#etiquetas6162stargazer(table1, title = "Estadísticos Descriptivos", digits = 2,63covariate.labels = c("Tasa de variación del índice de vegetación","Términos de intercambio","Porcentajes de las exportaciones respecto al PBI","Densidad poblacional rural",64"Porcentaje de tierra cultivable en uso","Valor agregado del sector agrícola respecto al PBI","Valor agregado del secctor manufacturero respecto al PBI"))65stargazer(table1, title = "Estadísticos Descriptivos", digits = 2,66covariate.labels = c("Tasa de variación del índice de vegetación","Términos de intercambio","Porcentajes de las exportaciones respecto al PBI","Densidad poblacional rural",67"Porcentaje de tierra cultivable en uso","Valor agregado del sector agrícola respecto al PBI","Valor agregado del secctor manufacturero respecto al PBI"),68min.max = F)6970#sum, ordenar las columnas de los estadisticos7172list_vars <- c("Tasa de variación del índice de vegetación (NDVI_g)","Términos de intercambio (tot_100)","Porcentajes de las exportaciones respecto al PBI (trade_pGDP)","Densidad poblacional rural (pop73_den_rur)",74"Porcentaje de tierra cultivable en uso (land_crop)","Valor agregado del sector agrícola respecto al PBI (va_agr)","Valor agregado del secctor manufacturero respecto al PBI (va_ind_manf)")757677stargazer(table1, title = "Estadísticos descriptivos", digits = 2, # decimales con 2 digitos78covariate.labels = list_vars, # Lista de etiquetas79summary.stat = c("mean", "sd", "n"), # se especifica el orden de los estadÃ?sticos80min.max = F, # borrar el estadÃ?stico de maximo y minimo81notes = "Note.— Se tomó las variables indicadas."82, notes.append = FALSE, # TRUE append the significance levels83notes.align = 'l')8485################# T A B L A 2: REGRESIÓN MCO SIMPLE ########################8687#modelo18889index_country_time <- grep("_time$", colnames(repdata))9091country_time_trend <-names(repdata)[index_country_time]9293index_country <- grep("^ccode_\\d+$", colnames(repdata))9495country_fe <-names(repdata)[index_country]9697ols_model <- lm( any_prio~ GPCP_g + GPCP_g_l,clusters = ccode, data = repdata, se_type = "stata", fixed_effects = ~ ccode)9899attributes(ols_model)100101ols_model$coefficients # coeficientes102103ols_model$fitted.values # Y estimado104105# summary table como en stata106107summary(ols_model)108109summary(ols_model)$call110111summary(ols_model)$coef112113# Test de significancia individual usando Huber robust standard errors114115coeftest(ols_model, vcov = vcovHC(ols_model, "HC")) # Clasical white robust116coeftest(ols_model, vcov = vcovHC(ols_model, "HC0"))117118coeftest(ols_model, vcov = vcovHC(ols_model, "HC1")) # Huber-White robust (STATA)119coeftest(ols_model, vcov = vcovHC(ols_model, "HC2")) # Eicker-Huber-White robust120coeftest(ols_model, vcov = vcovHC(ols_model, "HC3"))121coeftest(ols_model, vcov = vcovHC(ols_model, "HC4"))122123124# vcovHC Matrix varianza-covarianza robusta ante heterocedasticidad125126# robust se and cluster127128robust_model <- coeftest(ols_model,129vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)130type = "HC1",131cluster = ~ ccode)132133sd_robust <- robust_model[,2]134135#rms136137rmse1 <- RMSE(ols_model$fitted.values, repdata$any_prio ) # root mean squeare error138139RMSE(ols_model$fitted.values, repdata$any_prio )^2 # mean square error140141# using tdyr functions142143144glance(ols_model)145146tidy(ols_model)147148tidy(ols_model)149150htmlreg(ols_model)151152tidy(ols_model)[2,2] # coeficiente153tidy(ols_model)[2,6] # limite inferior154tidy(ols_model)[2,7] # limite superior155156157"Usando LM y luego coestest para los error standar robusto"158159m1 <- lm(any_prio ~ GPCP_g + GPCP_g_l, data = repdata)160161lm_rmse1 <- round(RMSE(m1$fitted.values, repdata$any_prio ) ,2 )162163robust_model <- coeftest(m1,164vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)165type = "HC1",166cluster = ~ ccode)167168sd_robust_model <- robust_model[,2]169170# intervalo de confianza171172model1_lower = coefci(m1, df = Inf,173vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]174175model1_upper = coefci(m1, df = Inf,176vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]177178tidy(m1, conf.int = TRUE)179180181# segundo modelo182183index_country_time <- grep("_time$", colnames(repdata))184185country_time_trend <-names(repdata)[index_country_time]186187index_country <- grep("^ccode_\\d+$", colnames(repdata))188189country_fe <-names(repdata)[index_country]190191ols_model2 <- lm( war_prio~ GPCP_g + GPCP_g_l,clusters = ccode, data = repdata, se_type = "stata", fixed_effects = ~ ccode)192193attributes(ols_model2)194195ols_model2$coefficients # coeficientes196197ols_model2$fitted.values # Y estimado198199# summary table como en stata200201summary(ols_model2)202203summary(ols_model2)$call204205summary(ols_model2)$coef206207# Test de significancia individual usando Huber robust standard errors208209coeftest(ols_model, vcov = vcovHC(ols_model2, "HC")) # Clasical white robust210coeftest(ols_model, vcov = vcovHC(ols_model2, "HC0"))211212coeftest(ols_model, vcov = vcovHC(ols_model2, "HC1")) # Huber-White robust (STATA)213coeftest(ols_model, vcov = vcovHC(ols_model2, "HC2")) # Eicker-Huber-White robust214coeftest(ols_model, vcov = vcovHC(ols_model2, "HC3"))215coeftest(ols_model, vcov = vcovHC(ols_model2, "HC4"))216217218# vcovHC Matrix varianza-covarianza robusta ante heterocedasticidad219220# robust se and cluster221222robust_model2 <- coeftest(ols_model2,223vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)224type = "HC1",225cluster = ~ ccode)226227sd_robust2 <- robust_model2[,2]228229#rms230231rmse2 <- RMSE(ols_model2$fitted.values, repdata$war_prio ) # root mean squeare error232233RMSE(ols_model2$fitted.values, repdata$war_prio )^2 # mean square error234235# using tdyr functions236237238glance(ols_model2)239240tidy(ols_model2)241242tidy(ols_model2)243244htmlreg(ols_model2)245246tidy(ols_model2)[2,2] # coeficiente247tidy(ols_model2)[2,6] # limite inferior248tidy(ols_model2)[2,7] # limite superior249250251"Usando LM y luego coestest para los error standar robusto"252253m2 <- lm(war_prio ~ GPCP_g + GPCP_g_l, data = repdata)254255lm_rmse2 <- round(RMSE(m2$fitted.values, repdata$war_prio ) ,2 )256257robust_model2 <- coeftest(m2,258vcov = vcovCL, # Matrix varianza-covarianza cluster (CL)259type = "HC1",260cluster = ~ ccode)261262sd_robust_model2 <- robust_model2[,2]263264# intervalo de confianza265266model2_lower = coefci(m2, df = Inf,267vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,1]268269model2_upper = coefci(m2, df = Inf,270vcov. = vcovCL, cluster = ~ ccode, type = "HC1")[2,2]271272tidy(m2, conf.int = TRUE)273274275# stargazer::stargazer(ols_model5, se = starprep(ols_model5))276277texreg(list(ols_model, ols_model2),278custom.coef.map = list("GPCP_g"="Growth in rainfall, t",279"GPCP_g_l"="Growth in rainfall, t-1"), digits = 3,280stars = c(0.01, 0.05, 0.1),281custom.gof.rows = list("Country fixed effects" = c("yes", "yes"),282"Country-specific time trends" = c("yes", "yes"),283"RMSE" = c(rmse1,rmse2)),284caption = "Dependent Variable: Economic Growth Rate, t")285286287##################################################288#Gráfico de Coefplot del coeficiente para GPCP_g #289##################################################290291#tabla de resultados292293table<- matrix(0, 3, 4)294295#296297298library(coefplot)299coefplot(ols_model, ols_model2) +300theme_minimal() +301labs(title="Estimación de coeficientes con error estandar",302x="Estimación",303y="Variable",304caption="Elaboración propia con datos de mss_repdata")305306307ggsave("../plots/Coef_plot.png"308, height = 8 # alto309, width = 12 # ancho310, dpi = 320 # resolución (calidad de la imagen)311)312313314