Path: blob/main/Trabajo_grupal/WG6/Grupo_3_R.R
2714 views
# TAREA 6123pacman::p_load(haven,dplyr,stringr, fastDummies,srvyr)45user <- Sys.getenv("fdcc0")67setwd( paste0("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho") )89#1011enaho01_2019 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")1213enaho01_2020 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")141516enaho34_2019 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")1718enaho34_2020 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")192021deflactor_temporal <- read_dta("D:/PYTHON/2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta")2223# 1. MERGE DATASET2425enaho_merge2019 <- merge(enaho34_2019, enaho01_2019,26by = c("conglome", "vivienda", "hogar"),27all.x = T28)293031enaho_merge2020 <- merge(enaho34_2020, enaho01_2020,32by = c("conglome", "vivienda", "hogar"),33all.x = T34)3536# juntamos las bases 2019 y 202037enaho_append <- append(enaho_merge2019, enaho_merge2020)3839# ingreso y gasto mensual40enaho_append$ingreso_mensual <- enaho_append$inghog1d / (12*enaho_append$mieperho)4142enaho_append$gasto_mensual <- enaho_append$gashog2d / (12*enaho_append$mieperho)4344# deflactando las variables (deflactor espacial y temporal)4546# espacial47enaho_append$ingreso_mensual_defl <- enaho_append$ingreso_mensual * enaho_append$ld4849enaho_append$gasto_mensual_defl <- enaho_append$gasto_mensual * enaho_append$ld5051# temporal525354##############################3#5556# 2. Salario por hora del trabajador dependiente5758enaho01_500 <- read_dta("D:/PYTHON/2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta")5960# salario anual del primer y segundo empleo6162enaho01_500$ingreso_anual <- enaho01_500$i524e1 + enaho01_500$i538e16364# cantidad de hrs trabajadas a la semana6566enaho01_500$horas_trab_sem <- enaho01_500$i513t + enaho01_500$i5186768# salario por hora del trabajador dependiente6970enaho01_500$salarioxhora <- enaho01_500$ingreso_anual / (enaho01_500$horas_trab_sem*52)7172# reemplazamos los NA por valores cero7374enaho01_500$salarioxhora[is.na(enaho01_500$salarioxhora)] = 0757677# GROUPBY7879# personas con 65 o más años que puedan participar del programa Juntos80enaho01_200_2019 <- read_dta("D:/PYTHON/2019/687-Modulo02/687-Modulo02/enaho01-2019-200.dta")81enaho01_200_2020 <- read_dta("D:/PYTHON/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")8283enaho_200_append <- append(enaho01_200_2019, enaho01_200_2020)8485enaho_200_append$mayor_65 <- enaho_200_append$p208a >= 658687888990# GROUPBY9192# personas con 65 o más años que puedan participar del programa pensión 659394enaho01_200_2019 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2019/687-Modulo02/687-Modulo02/enaho01-2019-200.dta")95enaho01_200_2020 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")96979899enaho01_200_2019 <- enaho01_200_2019[ , c("conglome", "vivienda", "hogar" , "codperso",100"ubigeo", "dominio" ,"estrato" ,"p208a", "p209",101"p207", "p203", "p201p" , "p204", "facpob07")]102103104enaho_merge2019 <- merge(enaho34_2019, enaho01_200_2019,105by = c("conglome", "vivienda", "hogar"),106all.x = T)107108109enaho_merge2019 <- enaho_merge2019[ , c("conglome", "vivienda", "hogar" , "codperso",110"pobreza" ,"p208a")] %>%111mutate(dummy_pobreza = ifelse(enaho_merge2019$pobreza == 3,0,1)) %>%112filter(enaho_merge2019$p208a >= 65)113114# INDICADORES115116# halle el porcentaje que hogares a nivel departamental que se beneficia del programa.117118enaho01_37_2020 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2020/737-Modulo37/737-Modulo37/enaho01-2020-700.dta")119120# Creamos la variable departamental121122enaho01_37_2020["cod_departamento"] = paste(str_sub(enaho01_37_2020$ubigeo,1,2))123124survey_enaho37 <- enaho01_37_2020 %>% as_survey_design(ids = conglome,125strata = estrato,126weight = factor07)127128indicador1 <- survey_enaho37 %>%129group_by(cod_departamento) %>%130summarise (beneficiario = survey_mean(p710_04))131132# muestre el promedio del porcentaje de gasto en salud a nivel región133134135enaho34_2020 <- read_dta("C:/Users/fdcc0/Desktop/PUCP/2022-2/R-PYTHON/TAREA 6/enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")136137# Creamos la variable departamental138139enaho34_2020["cod_departamento"] = paste(str_sub(enaho34_2020$ubigeo,1,2))140141survey_enaho34 <- enaho34_2020 %>% as_survey_design(ids = conglome,142strata = estrato,143weight = factor07)144145146indicador2 <- survey_enaho34 %>% mutate(gastosalud =147enaho34_2020$gru51hd/148enaho34_2020$gashog2d) %>%149group_by(cod_departamento) %>%150summarise (beneficiario = survey_median(gastosalud))151152153154155156157158159160