Path: blob/main/Trabajo_grupal/WG6/Grupo_8_R.R
2714 views
#Tarea 61#Grupo 8234#----------------------------------------------------------------------------------------------#56#PREGUNTA 178rm(list=ls())910install.packages("pacman")1112pacman::p_load(haven,dplyr, stringr, fastDummies)1314#MERGE DATASET1516setwd(paste0("C:/Users/Alexander/Documents/2020/737-Modulo01/737-Modulo01") )1718#Primero trabajamos con la bse de datos del año 20201920enaho01_2020 <- read_dta("../../../2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")2122enaho34_2020 <- read_dta("../../../2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")2324enaho01_2020 <- enaho01_2020[ , c("conglome", "vivienda", "hogar")]2526enaho34_2020 <- enaho34_2020[ , c("conglome", "vivienda", "hogar", "ld", "mieperho", "inghog1d", "gashog2d", "aÑo")]2728enaho_merge_2020 <- merge(enaho01_2020, enaho34_2020,29by = c("conglome", "vivienda", "hogar"),30all.x = T)3132#Primero trabajamos con la base de datos del año 20193334enaho01_2019 <- read_dta("../../../2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")3536enaho34_2019 <- read_dta("../../../2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")3738enaho01_2019 <- enaho01_2019[ , c("conglome", "vivienda", "hogar")]3940enaho34_2019 <- enaho34_2019[ , c("conglome", "vivienda", "hogar", "ld", "mieperho", "inghog1d", "gashog2d", "aÑo")]4142enaho_merge_2019 <- merge(enaho01_2019, enaho34_2019,43by = c("conglome", "vivienda", "hogar"),44all.x = T)4546#Hacemos el append:4748enaho_append <- bind_rows(enaho_merge_2019, enaho_merge_2020)49unique(enaho_append$aÑo)5051#Base de deflactor temporal para el 202052deflactores_2020 <- read_dta("../../../2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta")5354#Creamos ingreso y gasto mensual55enaho_append$ingreso_mensual <- enaho_append$inghog1d / (12*enaho_append$mieperho)5657enaho_append$gasto_mensual <- enaho_append$gashog2d / (12*enaho_append$mieperho)5859#Deflactando las variables (deflactor espacial y temporal):6061#Deflactor espacial6263enaho_append$ingreso_mensual_defl <- enaho_append$ingreso_mensual * enaho_append$ld64enaho_append$gasto_mensual_defl <- enaho_append$gasto_mensual * enaho_append$ld656667#Creamos la variable "departamento" a partir del ubigeo para luego aplicar el merge con deflactores_20206869enaho_append['departamento'] = substr(enaho_append$ubigeo,1,2)7071class(enaho_append$departamento)72class(deflactores_2020$dpto)7374enaho_append$departamento <- as.numeric(enaho_append$departamento)75class(enaho_append$departamento)7677#Merge entre la enaho_append y el deflactor temporal:7879enaho_merge_def_temporal_2020 <- merge(enaho_append, deflactores_2020,80by.x = c("departamento", "aÑo"),81by.y = c("dpto", "aniorec"),82all.x = TRUE)8384unique(enaho_merge_def_temporal_2020$aÑo)85colnames(enaho_merge_def_temporal_2020)8687#Ahora dividimos nuestras variables de ingreso y gasto por mieperho, 12, ld e i008889#Comenzamos con el ingreso90enaho_merge_def_temporal_2020['ingr_Per1'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$mieperho91enaho_merge_def_temporal_2020['ingr_Per2'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$ld92enaho_merge_def_temporal_2020['ingr_Per3'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$ 1293enaho_merge_def_temporal_2020['ingr_Per4'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$i009495#Seguimos con el gasto96enaho_merge_def_temporal_2020['gast_Per1'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$mieperho97enaho_merge_def_temporal_2020['gast_Per2'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$ld98enaho_merge_def_temporal_2020['gast_Per3'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$ 1299enaho_merge_def_temporal_2020['gast_Per4'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$i00100101102#----------------------------------------------------------------------------------------------#103104#PREGUNTA2105106#SALARIO POR HORA DEL TRABAJADOR DEPENDIENTE107108setwd(paste0("C:/Users/Alexander/Documents/2020/737-Modulo05/737-Modulo05") )109110enaho05_2020 <- read_dta("../../../2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta") %>%111112mutate(enaho05_2020_ingr_anual = i524e1 + i538e1, enaho05_2020_hrs_en_principal_y_2do_empleo = i513t + i518) %>%113114#Luego, hallamos salario por hora del trabajador independiente:115116enaho05_2020 <- read_dta("../../../2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta") %>%117mutate(enaho_2020_salario_x_hora_trabajador_indep = enaho05_2020_ingr_anual / ((enaho05_2020_hrs_en_principal_y_2do_empleo) * 52) ) %>%118119120#----------------------------------------------------------------------------------------------#121122#PREGUNTA3123#GROUP BY124125#Librerías126library(haven) # leer archivos spss, stata, dbf, etc127library(dplyr) # limpieza de datos128library(stringr) # grep for regular expression129library(fastDummies) # crear dummy130library(srvyr) # libreria para declarar el diseño muestral de una encuesta131library(survey)132133134#Seteamos ubicación135user <- Sys.getenv("USERNAME") # username136137setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab7") ) # set directorio138139#cargamos las bases de datos a utilizar140141enaho02 <- read_dta("../../../enaho/2020/737-Modulo02/enaho01-2020-200.dta")142143144base1 <- enaho02%>% group_by(conglome, vivienda, hogar ) %>% summarise(edad_max = max(p208a))145146enaho34 <- read_dta("../../../enaho/2020/737-Modulo34/sumaria-2020.dta")147148#Hacemos el merge149num = list(enaho34)150merge1 = enaho02151152for (i in num){153154merge1 <- merge(merge1, i,155by = c("conglome", "vivienda", "hogar"),156all.x = T, suffixes = c("",".y")157)158}159160names(merge1)161162#Creamos la variable dummy163pension <- merge1 %>%mutate(g1 = ifelse(edad_max <=65,1,0))164165166#----------------------------------------------------------------------------------------------#167#PREGUNTA4168169#INDICES170171user <- Sys.getenv("USERNAME") # username172173setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab7") ) # set directorio174175#EJERCICIO 1176#cargamos las bases de datos a utilizar177enaho37 <- read_dta("../../../enaho/2020/737-Modulo37/enaho01-2020-700.dta")178179enaho37['ubigeo_dep'] = substr(enaho37$ubigeo, 1, 2)180enaho37 <- enaho37 %>%181mutate(region = case_when(ubigeo_dep == "01" ~ "Amazonas",182ubigeo_dep == "02" ~ "Ancash",183ubigeo_dep == "03" ~ "Apurimac",184ubigeo_dep == "04" ~ "Arequipa",185ubigeo_dep == "05" ~ "Ayacucho",186ubigeo_dep == "06" ~ "Cajamarca",187ubigeo_dep == "07" ~ "Callao",188ubigeo_dep == "08" ~ "Cusco",189ubigeo_dep == "09" ~ "Huancavelica",190ubigeo_dep == "10" ~ "Huanuco",191ubigeo_dep == "11" ~ "Ica",192ubigeo_dep == "12" ~ "Junin",193ubigeo_dep == "13" ~ "La Libertad",194ubigeo_dep == "14" ~ "Lambayeque",195ubigeo_dep == "15" ~ "Lima",196ubigeo_dep == "16" ~ "Loreto",197ubigeo_dep == "17" ~ "Madre de Dios",198ubigeo_dep == "18" ~ "Moquegua",199ubigeo_dep == "19" ~ "Pasco",200ubigeo_dep == "20" ~ "Piura",201ubigeo_dep == "21"~ "Puno",202ubigeo_dep == "22" ~ "San Martin",203ubigeo_dep == "23" ~ "Tacna",204ubigeo_dep == "24" ~ "Tumber",205ubigeo_dep == "25" ~ "Ucayali") )206207#Declaramos el diseño muestral208survey_enaho37 <- enaho37 %>% as_survey_design(dep = region, pension = p710_04)209210#Obtenemos el promedio211survey_enaho37 <- survey_enaho37%>% group_by(vivienda)%>% group_by(region) %>% summarize(count = n(), pension1=sum(p710_04),promedio = pension1/count)212213#EJERCICIO 2214#cargamos las bases de datos a utilizar215216enaho34 <- read_dta("../../../enaho/2020/737-Modulo34/sumaria-2020.dta")217218#A partir del ubigeo, creamos las regiones219220enaho34['ubigeo_dep'] = substr(enaho34$ubigeo, 1, 2)221enaho34 <- enaho34 %>%222mutate(region = case_when(ubigeo_dep == "01" ~ "Amazonas",223ubigeo_dep == "02" ~ "Ancash",224ubigeo_dep == "03" ~ "Apurimac",225ubigeo_dep == "04" ~ "Arequipa",226ubigeo_dep == "05" ~ "Ayacucho",227ubigeo_dep == "06" ~ "Cajamarca",228ubigeo_dep == "07" ~ "Callao",229ubigeo_dep == "08" ~ "Cusco",230ubigeo_dep == "09" ~ "Huancavelica",231ubigeo_dep == "10" ~ "Huanuco",232ubigeo_dep == "11" ~ "Ica",233ubigeo_dep == "12" ~ "Junin",234ubigeo_dep == "13" ~ "La Libertad",235ubigeo_dep == "14" ~ "Lambayeque",236ubigeo_dep == "15" ~ "Lima",237ubigeo_dep == "16" ~ "Loreto",238ubigeo_dep == "17" ~ "Madre de Dios",239ubigeo_dep == "18" ~ "Moquegua",240ubigeo_dep == "19" ~ "Pasco",241ubigeo_dep == "20" ~ "Piura",242ubigeo_dep == "21"~ "Puno",243ubigeo_dep == "22" ~ "San Martin",244ubigeo_dep == "23" ~ "Tacna",245ubigeo_dep == "24" ~ "Tumber",246ubigeo_dep == "25" ~ "Ucayali") )247248#Declaramos el diseño muestral249survey_enaho34 <- enaho34 %>% as_survey_design(dep = region,250salud = gru51hd,gasto = gashog2d)251252#Obtenemos el porcentaje de gasto en salud253survey_enaho34 <- survey_enaho34 %>% dplyr::mutate(gasto_salud= gru51hd/gashog2d)254255#Obtenemos el promedio por region256survey_enaho34 <- survey_enaho34 %>% group_by(region) %>% summarise(mean(gasto_salud))257258