Path: blob/main/Trabajo_grupal/WG6/Grupo_4_r.R
2714 views
1############################### WG # 6 ######################################23# Grupo 445# Seidy Ascencios - 201916226# Luana Morales - 201912407# Marcela Quintero - 201914458# Flavia Or� - 20191215910##############################################################################11# #12# PREGUNTA 1 #13# #14##############################################################################151617#install.packages("stringr")181920library(haven) # leer archivos spss, stata, dbf, etc21library(dplyr) # limpieza de datos22library(stringr) # grep for regular expression23library(fastDummies) # crear dummy24library(srvyr) # libreria para declarar el diseño muestral de una encuesta25library(survey)2627"1) Set Directorio"2829user <- Sys.getenv("USERNAME")3031setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG6") ) # set directorio323334"ENAHO 2020"3536"2) Load dataset de ENAHO"3738enaho01 <- read_dta("../../../../enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")394041enaho34 <- read_dta("../../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")424344enaho01<- data.frame(4546read_dta("../../../../enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")4748)4950enaho34 = data.frame(51read_dta("../../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")52)535455"4) Merge section"5657"Left merge"5859#enaho34: master data60#enaho01: using data6162enaho_merge <- merge(enaho34, enaho01,63by = c("conglome", "vivienda", "hogar"),64all.x = T65)666768index <- grep(".y$", colnames(enaho_merge)) # Regular regular6970# $ el texto finaliza con .y7172merge_base_2020 <- enaho_merge[, - index]737475"ENAHO 2019"767778enaho01_1 <- read_dta("../../../../enaho/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")798081enaho34_1 <- read_dta("../../../../enaho/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")828384"4) Merge section"8586"Left merge"8788#enaho34_1: master data89#enaho01_1: using data9091enaho_merge_1 <- merge(enaho34_1, enaho01_1,92by = c("conglome", "vivienda", "hogar"),93all.x = T94)959697index_1 <- grep(".y$", colnames(enaho_merge_1)) # Regular regular9899# $ el texto finaliza con .y100101merge_base_2019 <- enaho_merge_1[, - index_1]102103104colnames(merge_base_2020)105106#----------------------- Append -----------------------------------107108merge_append <- bind_rows(merge_base_2019, merge_base_2020) # bind_rows from dyplr109110unique(merge_append$aÑo)111112113#rename114115merge_append <- merge_append %>% dplyr::rename(aÑo = aÑo.x,ubigeo = ubigeo.x)116117118# sibstr permite sustraer digitos de un string, texto, caracter119120merge_append['ubigeo_dep'] = substr(merge_append$ubigeo, 1, 2)121122#----------------------- Deflactar -----------------------------------123124deflactores_base2020_new <- read_dta("../../../../enaho/2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta")125126127deflactores_base2020_new <- deflactores_base2020_new %>% dplyr::rename(aÑo = aniorec)128129130names(deflactores_base2020_new)131132"4) Merge section deflactores"133134135# merge usando como llaves a las variables dpto y aniorec.136#merge_append: master data137#deflactores_base2020_new: using data138139140enaho_merge_defla <- merge(merge_append, deflactores_base2020_new,141by = c("dpto", "aÑo"),142all.x = T, suffixes = c("","")143)144145colnames(enaho_merge_defla)146147enaho_merge_defla <- enaho_merge_defla %>%148mutate(ingreso_month_pc = enaho_merge_defla$inghog1d/(12*enaho_merge_defla$mieperho*enaho_merge_defla$ld*enaho_merge_defla$i00),149gasto_month_pc = enaho_merge_defla$gashog2d/(12*enaho_merge_defla$mieperho*enaho_merge_defla$ld*enaho_merge_defla$i00)150)151152153154155156#------------------------------------------------------------------------------#157# #158# PREGUNTA 2 - GROUP BY #159# #160#------------------------------------------------------------------------------#161162#Importamos los programas necesarios163164165library(haven)166library(dplyr)167library(stringr)168library(fastDummies)169library(srvyr)170library(survey)171172# Seteamos el directorio173174user <- Sys.getenv("USERNAME")175176setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG6") ) # set directorio177178#Leemos la base de datos179180enaho_2 <- read_dta(r"../../../enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")181182#Vemos la base de datos183184enaho_2$dominio185186enaho_2 <- data.frame(187188read_dta("../../../enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")189190)191192193#Vemos los labels antes de realizar el groupby194195names(enaho_2)196197198#Seleccionamos las variables que usaremos199200hogares <- enaho_2[ , c("conglome", "vivienda", "hogar", "p208a") ]201202203#Hacemos un merge con el modulo 34 para obtener los datos que nos faltan (pobreza)204205#Para ello primero cargamos la base de datos (modulo 34) y obtenemos sus labels206207208enaho34 = data.frame(209read_dta("../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")210)211212names(enaho34)213214#Ahora filtramos la base con groupby para quedarnos solo con lo que necesitamos (el estado de pobreza)215216hogares34 <- enaho34[ , c("conglome", "vivienda", "hogar", "pobreza") ]217218219#Ahora, procedemos a realizar el merge entre hogares y hogares 34220221enaho_merge <- merge(hogares, hogares34,222by = c("conglome", "vivienda", "hogar")223)224225print (enaho_merge)226227228229#Procedemos a crear la dummy que verifica si el hogar es pobre y cuenta con algun miembro del hogar mayor a 65 a�os.230231enaho_merge['dummypension'] <- (enaho_merge['p208a'] >= 65) & (enaho_merge['pobreza'] < 3)*1232## Se puede obtener la dummy mediante la funci�n if_else(), as.numeric() o multiplicando por 1 lo que deseamos evaluar,233## en este caso, pobreza del hogar y el requisito de la edad.234235236#Visualizamos la dummy, la cual ser� true (1) si se cumplen ambas condiciones y false (0) si no se cumplen237238print(enaho_merge['dummypension'])239240241242243244245246#------------------------------------------------------------------------------#247# #248# PREGUNTA 4 - indicadores #249# #250#------------------------------------------------------------------------------#251252#Insatalamos los paquetes necesarios253254pacman::p_load(haven,dplyr, stringr, fastDummies,srvyr )255256library(reshape)257library(haven)258library(dplyr)259library (srvyr)260library(survey)261262#insertamos las bases de datos263user <- Sys.getenv("USERNAME")264265setwd( paste0("C:/Users/",user,"/Documents/data_enaho") )266267enaho.700 <- read_dta("C:/Users/seibe/OneDrive/Documents/DATA_STATA/enaho01-2020-700.dta")268View(enaho.700)269270enaho.sumaria <- read_dta("C:/Users/seibe/OneDrive/Documents/DATA_STATA/sumaria-2020-12g.dta")271View(enaho.sumaria)272273#Especificamos el diseño muestral274survey_enaho <- enaho.700%>% as_survey_design(ids = conglome, strata = estrato,275weight = factor07)276277View (survey_enaho)278279survey_enaho_2 <- enaho.sumaria%>% as_survey_design(ids = conglome, strata = estrato,280weight = factor07)281282View (survey_enaho_2)283284#Creamos la variable region para ambas muestras285286enaho.700['ubigeo'] = substr(enaho.700$ubigeo, 1, 2)287288enaho.700 <- enaho.700 %>%289mutate(region = case_when(ubigeo == "01" ~ "Amazonas",290ubigeo == "02" ~ "Ancash",291ubigeo == "03" ~ "Apurimac",292ubigeo == "04" ~ "Arequipa",293ubigeo == "05" ~ "Ayacucho",294ubigeo == "06" ~ "Cajamarca",295ubigeo == "07" ~ "Callao",296ubigeo == "08" ~ "CUsco",297ubigeo == "09" ~ "Huancavelica",298ubigeo == "10" ~ "Huanuco",299ubigeo == "11" ~ "Ica",300ubigeo == "12" ~ "Junin",301ubigeo == "13" ~ "La Libertad",302ubigeo == "14" ~ "Lambayeque",303ubigeo == "15" ~ "Lima",304ubigeo == "16" ~ "Loreto",305ubigeo == "17" ~ "Madre de Dios",306ubigeo == "18" ~ "Moquegua",307ubigeo == "19" ~ "Pasco",308ubigeo == "20" ~ "Piura",309ubigeo == "21" ~ "Puno",310ubigeo == "22" ~ "San Martin",311ubigeo == "23" ~ "Tacna",312ubigeo == "24" ~ "Tumbes",313ubigeo == "25" ~ "Ucayali", ))314315View(enaho.700[, c("region")])316317318enaho.sumaria['ubigeo'] = substr(enaho.sumaria$ubigeo, 1, 2)319320enaho.sumaria <- enaho.sumaria %>%321mutate(region = case_when(ubigeo == "01" ~ "Amazonas",322ubigeo == "02" ~ "Ancash",323ubigeo == "03" ~ "Apurimac",324ubigeo == "04" ~ "Arequipa",325ubigeo == "05" ~ "Ayacucho",326ubigeo == "06" ~ "Cajamarca",327ubigeo == "07" ~ "Callao",328ubigeo == "08" ~ "CUsco",329ubigeo == "09" ~ "Huancavelica",330ubigeo == "10" ~ "Huanuco",331ubigeo == "11" ~ "Ica",332ubigeo == "12" ~ "Junin",333ubigeo == "13" ~ "La Libertad",334ubigeo == "14" ~ "Lambayeque",335ubigeo == "15" ~ "Lima",336ubigeo == "16" ~ "Loreto",337ubigeo == "17" ~ "Madre de Dios",338ubigeo == "18" ~ "Moquegua",339ubigeo == "19" ~ "Pasco",340ubigeo == "20" ~ "Piura",341ubigeo == "21" ~ "Puno",342ubigeo == "22" ~ "San Martin",343ubigeo == "23" ~ "Tacna",344ubigeo == "24" ~ "Tumbes",345ubigeo == "25" ~ "Ucayali", ))346347View(enaho.sumaria[, c("region")])348349# Se halla el porcentaje que hogares a nivel departamental (o region) que se beneficia del programa.350351bene_prog <- enaho.700 %>% group_by(ubigeo, region) %>%352summarise(porc_bene_pro = mean(p710_04, na.rm = T), .groups = "keep" )353354355View(bene_prog)356357# Se muestra el promedio del porcentaje de gasto en salud realizado por los hogares a nivel de region358359enaho.sumaria<-mutate(enaho.sumaria, gasto_anual_hogar=gru51hd/gashog2d)360361View(enaho.sumaria[, c("gasto_anual_hogar", "region")])362363gasto_salud <- enaho.sumaria %>% group_by(ubigeo, region) %>%364summarise(porc_gasto_salud = mean(gasto_anual_hogar, na.rm = T), .groups = "keep" )365366View(gasto_salud)367368369