Path: blob/main/Trabajo_grupal/WG6/Grupo_9_r_Pregunta4.R
2714 views
#### Grupo 9 #### Pregunta 4 ######123library(haven) # leer archivos stata: dta4library(dplyr) # limpieza de datos5library(stringr) # grep for regular expression6library(srvyr) # libreria para declarar el diseño muestral de una encuesta7library(survey)8910"Set Directorio"11#---------------12user <- Sys.getenv("USERNAME") # username1314setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Grupo9") ) # set directorio151617"Load dataset de ENAHO"18#----------------------1920enaho34 = data.frame(21read_dta("../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")22)2324enaho37 = data.frame(25read_dta("../../enaho/2020/737-Modulo37/737-Modulo37/enaho01-2020-700.dta")26)272829"Porcentaje de hogares a nivel departamental (o región) que se beneficia del programa 'juntos'"30#-----------------------------------------------------------------------------------------------3132# Mantenemos variables de interés3334enaho_merge_37 <- enaho37[ ,c("conglome", "vivienda", "hogar" ,"ubigeo",35"estrato", "p710_04", "factor07")]3637# Diseño muestral de su encuesta3839# Declarar el diseño de la encuesta40# ids: conglomerado, strato: estrato y wieght : factor de expansión4142survey_enaho_37 <- enaho_merge_37 %>% as_survey_design(ids = conglome,43strata = estrato,44weight = factor07)454647# Creamos la variables 'ubigeo_dep', 'region'4849enaho_merge_37['ubigeo_dep'] = substr(enaho_merge_37$ubigeo, 1, 2)5051enaho_merge_37 <- enaho_merge_37 %>%52mutate(region = case_when(ubigeo_dep == "01" ~ "Amazonas",53ubigeo_dep == "02" ~ "Ancash",54ubigeo_dep == "03" ~ "Apurimac",55ubigeo_dep == "04" ~ "Arequipa",56ubigeo_dep == "05" ~ "Ayacucho",57ubigeo_dep == "06" ~ "Cajamarca",58ubigeo_dep == "07" ~ "Callao",59ubigeo_dep == "08" ~ "Cusco",60ubigeo_dep == "09" ~ "Huancavelica",61ubigeo_dep == "10" ~ "Huanuco",62ubigeo_dep == "11" ~ "Ica",63ubigeo_dep == "12" ~ "Junin",64ubigeo_dep == "13" ~ "La Libertad",65ubigeo_dep == "14" ~ "Lambayeque",66ubigeo_dep == "15" ~ "Lima",67ubigeo_dep == "16" ~ "Loreto",68ubigeo_dep == "17" ~ "Madre de Dios",69ubigeo_dep == "18" ~ "Moquegua",70ubigeo_dep == "19" ~ "Pasco",71ubigeo_dep == "20" ~ "Piura",72ubigeo_dep == "21" ~ "Puno",73ubigeo_dep == "22" ~ "San Martín",74ubigeo_dep == "23" ~ "Tacna",75ubigeo_dep == "24" ~ "Tumbes",76ubigeo_dep == "25" ~ "Ucayali",)77)787980survey_enaho_37 <- enaho_merge_37 %>% as_survey_design(ids = conglome,81strata = estrato,82weight = factor07)8384# Creamos el índice 'juntos' que representa el porcentaje de hogares a85# nivel departamental (región) que se beneficia del programa8687enaho_37 <- survey_enaho_37 %>% group_by(region) %>%88summarise(juntos= survey_mean(p710_04, na.rm = T))8990#Respuesta:91View(enaho_37[, c('region', 'juntos')])9293"************************ 2da parte del ejercicio *********************"949596"El promedio del porcentaje de gasto en salud realizado por los hogares a nivel de región (o departamentos)"97#----------------------------------------------------------------------------------------------------------9899# Mantenemos variables de interés100101enaho_merge_34 <- enaho34[ ,c("conglome", "vivienda", "hogar" ,"ubigeo",102"estrato", "gru51hd", "gashog2d", "factor07")]103104105# Diseño muestral de su encuesta106107# Declarar el diseño de la encuesta108# ids: conglomerado, strato: estrato y wieght : factor de expansión109110survey_enaho_34 <- enaho_merge_34 %>% as_survey_design(ids = conglome, strata = estrato,111weight = factor07)112113114# Creamos la variables 'ubigeo_dep', 'region' y la dummy 'p710_04'115116enaho_merge_34['ubigeo_dep'] = substr(enaho_merge_34$ubigeo, 1, 2)117enaho_merge_34 <- enaho_merge_34 %>%118mutate(region = case_when(ubigeo_dep == "01" ~ "Amazonas",119ubigeo_dep == "02" ~ "Ancash",120ubigeo_dep == "03" ~ "Apurimac",121ubigeo_dep == "04" ~ "Arequipa",122ubigeo_dep == "05" ~ "Ayacucho",123ubigeo_dep == "06" ~ "Cajamarca",124ubigeo_dep == "07" ~ "Callao",125ubigeo_dep == "08" ~ "Cusco",126ubigeo_dep == "09" ~ "Huancavelica",127ubigeo_dep == "10" ~ "Huanuco",128ubigeo_dep == "11" ~ "Ica",129ubigeo_dep == "12" ~ "Junin",130ubigeo_dep == "13" ~ "La Libertad",131ubigeo_dep == "14" ~ "Lambayeque",132ubigeo_dep == "15" ~ "Lima",133ubigeo_dep == "16" ~ "Loreto",134ubigeo_dep == "17" ~ "Madre de Dios",135ubigeo_dep == "18" ~ "Moquegua",136ubigeo_dep == "19" ~ "Pasco",137ubigeo_dep == "20" ~ "Piura",138ubigeo_dep == "21" ~ "Puno",139ubigeo_dep == "22" ~ "San Martín",140ubigeo_dep == "23" ~ "Tacna",141ubigeo_dep == "24" ~ "Tumbes",142ubigeo_dep == "25" ~ "Ucayali",) )143144145146survey_enaho_34 <- enaho_merge_34 %>% as_survey_design(ids = conglome, strata = estrato,147weight = factor07)148149# Creamos el índice 'promedio_gasto_salud' que representa el porcentaje de hogares a150# nivel departamental (o región) que se beneficia del programa151152ind2 <- survey_enaho_34 %>% mutate(153porcentaje_gasto_salud = gru51hd/gashog2d154) %>% group_by(region) %>% # indicador a nivel regional %>%155156summarise(promedio_gasto_salud= survey_mean(porcentaje_gasto_salud))157158#Respuesta159View(ind2[, c('region', 'promedio_gasto_salud')])160161162