library(haven)
library(dplyr)
library(stringr)
library(fastDummies)
library(srvyr)
library(survey)
"1.0 Set Directorio"
user <- Sys.getenv("USERNAME")
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab7") )
"2.0 Load dataset de ENAHO"
enaho01 <- read_dta("../../../enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")
enaho01$dominio
enaho01 <- data.frame(
read_dta("../../../enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")
)
enaho01
enaho01$dominio
enaho01$estrato %>% attr('labels')
enaho01$factor07 %>% attr('label')
names(enaho01)
enaho02 = data.frame(
read_dta("../../../enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")
)
names(enaho02)
length( unique(enaho02$facpob07) )
length( unique(enaho02$conglome) )
length( unique(enaho01$factor07) )
length( unique(enaho01$conglome) )
sum(enaho02$facpob07)
unique(enaho01$conglome)
"Módulo02"
enaho02 = data.frame(
read_dta("../../../enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")
)
enaho03 = data.frame(
read_dta("../../../enaho/2020/737-Modulo03/737-Modulo03/enaho01a-2020-300.dta"))
enaho04 = data.frame(
read_dta("../../../enaho/2020/737-Modulo04/737-Modulo04/enaho01a-2020-400.dta")
)
enaho05 = data.frame(
read_dta("../../../enaho/2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta")
)
enaho34 = data.frame(
read_dta("../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
)
enaho37 = data.frame(
read_dta("../../../enaho/2020/737-Modulo37/737-Modulo37/enaho01-2020-700.dta")
)
enaho02 <- enaho02[ , c("conglome", "vivienda", "hogar" , "codperso",
"ubigeo", "dominio" ,"estrato" ,"p208a", "p209",
"p207", "p203", "p201p" , "p204", "facpob07") ]
enaho03 <- enaho03[ , c("conglome", "vivienda", "hogar" , "codperso",
"p301a", "p301b", "p301c" , "p300a","p301b","p301c")]
enaho05 <- enaho05[ , c("conglome", "vivienda", "hogar" , "codperso",
"i524e1", "i538e1", "p558a5" , "i513t", "i518",
"p507", "p511a", "p512b", "p513a1", "p505" , "p506", "d544t", "d556t1",
"d556t2" , "d557t" , "d558t" , "ocu500" , "i530a" , "i541a")]
"Left merge"
enaho_merge <- merge(enaho02, enaho01,
by = c("conglome", "vivienda", "hogar"),
all.x = T
)
enaho_02_05 <- merge(enaho02, enaho05,
by = c("conglome", "vivienda", "hogar","codperso"),
all.x = T
)
enaho_02_05 <- merge(enaho02, enaho05,
by = c("conglome", "vivienda", "hogar","codperso"),
all.y = TRUE
)
enaho_merge_inner <- merge(enaho02, enaho01,
by = c("conglome", "vivienda", "hogar"),
all.x = F, all.y = F
)
enaho_merge_inner <- merge(enaho02, enaho01,
by = c("conglome", "vivienda", "hogar")
)
enaho_merge_02_05 <- merge(enaho02, enaho05,
by = c("conglome", "vivienda", "hogar","codperso")
)
enaho_merge_outer <- merge(enaho02, enaho05,
by = c("conglome", "vivienda", "hogar","codperso"),
all.x = T, all.y = T
)
enaho_merge_outer_2 <- merge(enaho02, enaho05,
by = c("conglome", "vivienda", "hogar","codperso"),
all= T
)
enaho_merge <- merge(enaho02, enaho01,
by = c("conglome", "vivienda", "hogar"),
all.x = T
)
names(enaho_merge)
enaho_merge <- merge(enaho02, enaho01,
by = c("conglome", "vivienda", "hogar"),
all.x = T, suffixes = c("","")
)
enaho_merge <- merge(enaho02, enaho01,
by = c("conglome", "vivienda", "hogar"),
all.x = T, suffixes = c("",".y")
)
names(enaho_merge)
enaho05 <- enaho05 %>% dplyr::rename(Conglo = conglome, viv = vivienda,
hog = hogar, cod = codperso)
enaho_02_05 <- merge(enaho02, enaho05,
by.x = c("conglome", "vivienda", "hogar","codperso"),
by.y = c("Conglo", "viv", "hog","cod"),
all = TRUE
)
enaho05 <- enaho05 %>% dplyr::rename(conglome = Conglo, vivienda = viv,
hogar = hog, codperso = cod)
num = list(enaho34 , enaho37)
merge_hog = enaho01
for (i in num){
merge_hog <- merge(merge_hog, i,
by = c("conglome", "vivienda", "hogar"),
all.x = T, suffixes = c("",".y")
)
}
names(merge_hog)
num = list(enaho03 , enaho04, enaho05 )
merge_ind = enaho02
for (i in num){
merge_ind <- merge(merge_ind, i,
by = c("conglome", "vivienda", "hogar","codperso"),
all.x = T, suffixes = c("",".y")
)
}
names(merge_ind)
merge_base <- merge(merge_ind, merge_hog,
by = c("conglome", "vivienda", "hogar"),
all.x = T, suffixes = c("",".y"))
colnames(merge_base)
index <- grep(".y$", colnames(merge_base))
merge_base_2020 <- merge_base[, - index]
colnames(merge_base_2020)
merge_base_2020['ubigeo_dep'] = substr(merge_base_2020$ubigeo, 1, 2)
merge_base_2020['ubigeo_dep_2'] = paste(substr(merge_base_2020$ubigeo,1,2),
"0000", sep = "")
merge_base_2020 <- merge_base_2020 %>% filter(
merge_base_2020$ubigeo_dep %in% c("15","03","04","12") )
merge_base_2020 <- merge_base_2020 %>%
mutate(region = case_when(ubigeo_dep == "04" ~ "Arequipa",
ubigeo_dep == "03" ~ "Apurimac",
ubigeo_dep == "12" ~ "Junin",
ubigeo_dep == "15" ~ "Lima") )
"ENAHO 2019"
enaho01 <- data.frame(
read_dta("../../../datos/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")
)
enaho02 = data.frame(
read_dta("../../../datos/2019/687-Modulo02/687-Modulo02/enaho01-2019-200.dta")
)
enaho03 = data.frame(
read_dta("../../../datos/2019/687-Modulo03/687-Modulo03/enaho01a-2019-300.dta"))
enaho04 = data.frame(
read_dta("../../../datos/2019/687-Modulo04/687-Modulo04/enaho01a-2019-400.dta")
)
enaho05 = data.frame(
read_dta("../../../datos/2019/687-Modulo05/687-Modulo05/enaho01a-2019-500.dta")
)
enaho34 = data.frame(
read_dta("../../../datos/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")
)
enaho37 = data.frame(
read_dta("../../../datos/2019/687-Modulo37/687-Modulo37/enaho01-2019-700.dta")
)
enaho02 <- enaho02[ , c("conglome", "vivienda", "hogar" , "codperso",
"ubigeo", "dominio" ,"estrato" ,"p208a", "p209",
"p207", "p203", "p201p" , "p204", "facpob07")]
enaho03 <- enaho03[ , c("conglome", "vivienda", "hogar" , "codperso",
"p301a", "p301b", "p301c" , "p300a","p301b","p301c")]
enaho05 <- enaho05[ , c("conglome", "vivienda", "hogar" , "codperso",
"i524e1", "i538e1", "p558a5" , "i513t", "i518",
"p507", "p511a", "p512b", "p513a1", "p505" , "p506", "d544t",
"d556t1",
"d556t2" , "d557t" , "d558t" , "ocu500" , "i530a" , "i541a")]
num = list(enaho34 , enaho37)
merge_hog = enaho01
for (i in num){
merge_hog <- merge(merge_hog, i,
by = c("conglome", "vivienda", "hogar"),
all.x = T, suffixes = c("",".y")
)
}
num = list(enaho03 , enaho04, enaho05 )
merge_ind = enaho02
for (i in num){
merge_ind <- merge(merge_ind, i,
by = c("conglome", "vivienda", "hogar","codperso"),
all.x = T, suffixes = c("",".y")
)
}
merge_base <- merge(merge_ind, merge_hog,
by = c("conglome", "vivienda", "hogar"),
all.x = T, suffixes = c("",".y"))
index <- grep(".y$", colnames(merge_base))
merge_base_2019 <- merge_base[, - index]
merge_append <- bind_rows(merge_base_2020, merge_base_2019)
unique(merge_append$aÑo)
write_dta(merge_append, "../data/append_enaho_r.dta")
merge_base_2020 <- merge_base_2020 %>%
dplyr::mutate(ingreso_month_pc = inghog1d/(12*mieperho),
gasto_month_pc = gashog2d/(12*mieperho)
) %>%
dplyr::mutate(dummy_pobre = ifelse( gasto_month_pc < linea ,
1 ,
0 ) ) %>%
dplyr::mutate(pobre = ifelse( gasto_month_pc < linea ,
"pobre" ,
"No pobre") ) %>%
dplyr::mutate(pc_pobre = case_when(pobreza == 1 ~ "Pobre extremo",
pobreza == 2 ~ "Pobre",
pobreza == 3 ~ "No pobre"))
var1 <- c(NA,2,3)
var2 <- c(400,1,5)
base <- tibble(
var1, var2
)
base %>% mutate(
Dummy = ifelse(var1 < var2, 1,0)
)
sum(is.na(merge_base_2020$gashog2d))
merge_base_2020 <- dummy_cols(merge_base_2020, select_columns = 'p301a')
View(merge_base_2020[, c("p301a","p301a_1","p301a_2","p301a_3","p301a_4","p301a_5")])
count(merge_base_2020, pobreza, sort = TRUE)
count(merge_base_2020, pc_pobre, sort = F)
table(merge_base_2020$pc_pobre)
table(merge_base_2020$p301a)
merge_base_2020 %>% dplyr::filter(!is.na(p301a)) %>% group_by(p301a) %>% summarise(Freq.abs = n()) %>%
mutate(Freq.relative = (Freq.abs/sum(Freq.abs))*100) %>% arrange(desc(Freq.relative))
df1 <- merge_base_2020 %>% group_by(conglome, vivienda, hogar ) %>%
summarise(
edu_min = min(p301a),
sup_educ = sum(p301a_10), total_miembros = n(),
edu_max = max(p301a), .groups = "keep"
)
df1_no_missing <- merge_base_2020 %>% group_by(conglome, vivienda, hogar ) %>%
summarise(
edu_min = min(p301a, na.rm = TRUE),
sup_educ = sum(p301a_10, na.rm = T), total_miembros = n(),
edu_max = max(p301a, na.rm = T),
)
df2 <- merge_base_2020 %>% group_by(conglome, vivienda, hogar ) %>%
summarise(
edu_min = min(p301a, na.rm = TRUE),
sup_educ = sum(p301a_10, na.rm = T), total_miembros = n(),
edu_max = max(p301a, na.rm = T), .groups = "keep"
)
df3 <- merge_base_2020 %>% group_by(ubigeo_dep, region) %>%
summarise(index_poverty = mean(dummy_pobre, na.rm = T), .groups = "keep" )
class(merge_base_2020$p505)
survey_enaho <- merge_base_2020 %>% as_survey_design(ids = conglome, strata = estrato,
weight = facpob07)
names(merge_base_2020)
ind1 <- survey_enaho %>% dplyr::filter(p208a >= 10 & p208a<= 65) %>%
mutate(
g1 = ifelse(p208a>=10 & p208a <=20,1,0),
g2 = ifelse(p208a>20 & p208a <=30,1,0),
g3 = ifelse(p208a >30 & p208a <=40,1,0),
g4 = ifelse(p208a >40 & p208a <=65,1,0),
) %>% group_by(region) %>%
summarise(
gp1 = survey_mean(g1), gp2 = survey_mean(g2), gp3 = survey_mean(g3),
gp4 = survey_mean(g4),
g_sec = survey_mean(p301a_6, na.rm = T), g_uni_co = survey_mean(p301a_10, na.rm = T)
)
merge_base_2020$estrato
merge_base_2020$dominio
survey_enaho <- svydesign(id=~conglome, weights=~facpob07,strata=~estrato, data=merge_base_2020)