Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG6/Solución/solucion_r.R
2835 views
1
################ Solución R ############################
2
3
4
# clear environment
5
6
rm(list=ls(all=TRUE))
7
8
# load libraries
9
10
librarian::shelf(tidyverse,haven,srvyr)
11
12
13
# 1.0 merge Datasets ----------------------------------------------
14
15
user <- Sys.getenv("USERNAME") # username
16
17
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2") ) # set directorio
18
19
20
enaho_19_01 <- read_dta("../../datos/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")
21
22
23
enaho_19_34 <- read_dta("../../datos/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")
24
25
26
deflactor <- read_dta(
27
"../../datos/2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta")
28
29
30
sapply(deflactor, class)
31
32
class(enaho_19_01$aÑo)
33
34
## Año 2019 ##
35
36
# Sumaria
37
38
enaho_19_34 <- enaho_19_34 |> dplyr::select(conglome,vivienda,hogar,mieperho,inghog1d,gashog2d,ld)
39
40
41
# Modelo 1 (caracteristica de la vivienda y del hogar)
42
43
enaho_merge_19 <- enaho_19_01 |> dplyr::rename(year = aÑo) |>
44
select(year,conglome,vivienda,hogar,ubigeo) |>
45
left_join(enaho_19_34,
46
by = c("conglome","vivienda","hogar"))
47
48
49
# Año 2020 ##
50
51
52
enaho_20_01 <- read_dta("../../datos/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")
53
54
55
enaho_20_34 <- read_dta("../../datos/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
56
57
58
enaho_20_34 <- enaho_20_34 |> dplyr::select(conglome,vivienda,hogar,mieperho,inghog1d,gashog2d,ld)
59
60
61
62
enaho_append <- enaho_20_01 |> dplyr::rename(year = aÑo) |>
63
select(year,conglome,vivienda,hogar,ubigeo) |> # seleccion de variables
64
left_join(enaho_20_34, by = c("conglome","vivienda","hogar")) |> # left merge
65
bind_rows(enaho_merge_19) |> # append
66
mutate(dep = as.numeric(substr(ubigeo, 1, 2)), # creamos codigo solo departamento y convertimos a numeric
67
year = as.numeric(year) # year de string a numeric
68
) |>
69
left_join(deflactor, by = c("year"="aniorec","dep"="dpto")) |> # merge deflactor
70
mutate(ing_pc_real = inghog1d/(12*ld*i00*mieperho),
71
gas_pc_real = gashog2d/(12*ld*i00*mieperho)) # creacion de variables deflactadas
72
73
74
75
# Salario por hora ---------------------------------------------
76
77
78
enaho_20_05 <- read_dta("../../datos/2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta")
79
80
81
enaho_20_05 <- enaho_20_05 |> select(conglome,vivienda,hogar,i524e1, i538e1,i513t, i518) |>
82
rowwise() |> # permite aplicar luego suma fila por fila
83
mutate(suma_ingreso = sum(i524e1, i538e1, na.rm = TRUE), # na.rm ignores NA
84
total_horas = sum(i513t, i518, na.rm = TRUE),
85
hour_wage = suma_ingreso/(52*total_horas),
86
hour_wage = replace(hour_wage, which(hour_wage %in% c(0,NaN)) , NA))
87
88
# which permite reemplazar con rapidez
89
90
91
# Groupby -------------------------------------------------------------
92
93
94
enaho_20_02 <- read_dta("../../datos/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")
95
96
enaho_20_34 <- read_dta("../../datos/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
97
98
99
df <- enaho_20_02 |> select(conglome, vivienda, hogar, p208a ) |>
100
group_by(conglome, vivienda, hogar) |>
101
summarise(edad_max = max(p208a, na.rm = T), .groups = "keep")
102
103
# Si quiero observar la mayor edad como dato en cada fila
104
105
df2 <- enaho_20_02 |> select(conglome, vivienda, hogar, p208a ) |>
106
group_by(conglome, vivienda, hogar) |>
107
summarise(edad_max = max(p208a, na.rm = T), .groups = "keep") |> ungroup()
108
109
# Si quiero observar el dato de mayor edad en cada fila y con todas las demás variables de la base
110
111
df3 <- enaho_20_02 |> select(conglome, vivienda, hogar, p208a ) |>
112
group_by(conglome, vivienda, hogar) |>
113
mutate(edad_max = max(p208a, na.rm = T))
114
115
116
117
# Merge Sumaria (modulo 34)
118
119
enaho_pension <- df |> left_join(enaho_20_34, by = c("conglome", "vivienda", "hogar")) |>
120
mutate(hogar_benf_pen = ifelse(edad_max >= 65 & (pobreza %in% c(1,2)), 1, 0))
121
122
# Ifelse coloca missing si edad_max o pobreza es missing
123
124
125
table(enaho_pension$hogar_benf_pen)
126
127
128
# Se verifica que coincide con Python
129
130
# Indicadores ----
131
132
# Programas sociales
133
134
enaho_20_37 <- read_dta("../../datos/2020/737-Modulo37/737-Modulo37/enaho01-2020-700.dta")
135
136
137
enaho_20_37 <- enaho_20_37 |> dplyr::select(conglome, vivienda, hogar, p710_04)
138
139
enaho_20 <- enaho_20_34 |> dplyr::select(conglome, vivienda, hogar, estrato, ubigeo, gru51hd,
140
gashog2d,factor07) |>
141
left_join(enaho_20_37, by = c("conglome", "vivienda", "hogar")) |>
142
mutate(dep = substr(ubigeo, 1, 2), health_spend = (gru51hd/gashog2d)*100,
143
region = case_when(dep == "01" ~ "Amazonas",
144
dep == "02" ~ "Ancash",
145
dep == "03" ~ "Apurimac",
146
dep == "04" ~ "Arequipa",
147
dep == "05" ~ "Ayacucho",
148
dep == "06" ~ "Cajamarca",
149
dep == "07" ~ "Callao",
150
dep == "08" ~ "Cusco",
151
dep == "09" ~ "Huancavelica",
152
dep == "10" ~ "Huanuco",
153
dep == "11" ~ "Ica",
154
dep == "12" ~ "Junin",
155
dep == "13" ~ "La Libertad",
156
dep == "14" ~ "Lambayeque",
157
dep == "15" ~ "Lima",
158
dep == "16" ~ "Loreto",
159
dep == "17" ~ "Madre de Dios",
160
dep == "18" ~ "Moquegua",
161
dep == "19" ~ "Pasco",
162
dep == "20" ~ "Piura",
163
dep == "21" ~ "Puno",
164
dep == "22" ~ "San Martin",
165
dep == "23" ~ "Tacna",
166
dep == "24" ~ "Tumbes",
167
dep == "25" ~ "Ucayali"
168
)
169
170
)
171
172
survey_enaho <- enaho_20 %>% as_survey_design(ids = conglome, strata = estrato,
173
weight = factor07)
174
175
# En este caso el factor de expansión es a nivel de hogares factor07
176
177
attributes(survey_enaho)
178
179
survey_enaho %>% group_by(region) %>%
180
summarise(
181
percent_juntos = survey_mean(p710_04, na.rm = T)*100, percent_health = survey_mean(health_spend, na.rm = T)
182
) -> table_ind
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222