Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG6/Grupo_4_r.R
2714 views
1
2
############################### WG # 6 ######################################
3
4
# Grupo 4
5
6
# Seidy Ascencios - 20191622
7
# Luana Morales - 20191240
8
# Marcela Quintero - 20191445
9
# Flavia Or� - 20191215
10
11
##############################################################################
12
# #
13
# PREGUNTA 1 #
14
# #
15
##############################################################################
16
17
18
#install.packages("stringr")
19
20
21
library(haven) # leer archivos spss, stata, dbf, etc
22
library(dplyr) # limpieza de datos
23
library(stringr) # grep for regular expression
24
library(fastDummies) # crear dummy
25
library(srvyr) # libreria para declarar el diseño muestral de una encuesta
26
library(survey)
27
28
"1) Set Directorio"
29
30
user <- Sys.getenv("USERNAME")
31
32
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG6") ) # set directorio
33
34
35
"ENAHO 2020"
36
37
"2) Load dataset de ENAHO"
38
39
enaho01 <- read_dta("../../../../enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")
40
41
42
enaho34 <- read_dta("../../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
43
44
45
enaho01<- data.frame(
46
47
read_dta("../../../../enaho/2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")
48
49
)
50
51
enaho34 = data.frame(
52
read_dta("../../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
53
)
54
55
56
"4) Merge section"
57
58
"Left merge"
59
60
#enaho34: master data
61
#enaho01: using data
62
63
enaho_merge <- merge(enaho34, enaho01,
64
by = c("conglome", "vivienda", "hogar"),
65
all.x = T
66
)
67
68
69
index <- grep(".y$", colnames(enaho_merge)) # Regular regular
70
71
# $ el texto finaliza con .y
72
73
merge_base_2020 <- enaho_merge[, - index]
74
75
76
"ENAHO 2019"
77
78
79
enaho01_1 <- read_dta("../../../../enaho/2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")
80
81
82
enaho34_1 <- read_dta("../../../../enaho/2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")
83
84
85
"4) Merge section"
86
87
"Left merge"
88
89
#enaho34_1: master data
90
#enaho01_1: using data
91
92
enaho_merge_1 <- merge(enaho34_1, enaho01_1,
93
by = c("conglome", "vivienda", "hogar"),
94
all.x = T
95
)
96
97
98
index_1 <- grep(".y$", colnames(enaho_merge_1)) # Regular regular
99
100
# $ el texto finaliza con .y
101
102
merge_base_2019 <- enaho_merge_1[, - index_1]
103
104
105
colnames(merge_base_2020)
106
107
#----------------------- Append -----------------------------------
108
109
merge_append <- bind_rows(merge_base_2019, merge_base_2020) # bind_rows from dyplr
110
111
unique(merge_append$aÑo)
112
113
114
#rename
115
116
merge_append <- merge_append %>% dplyr::rename(aÑo = aÑo.x,ubigeo = ubigeo.x)
117
118
119
# sibstr permite sustraer digitos de un string, texto, caracter
120
121
merge_append['ubigeo_dep'] = substr(merge_append$ubigeo, 1, 2)
122
123
#----------------------- Deflactar -----------------------------------
124
125
deflactores_base2020_new <- read_dta("../../../../enaho/2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta")
126
127
128
deflactores_base2020_new <- deflactores_base2020_new %>% dplyr::rename(aÑo = aniorec)
129
130
131
names(deflactores_base2020_new)
132
133
"4) Merge section deflactores"
134
135
136
# merge usando como llaves a las variables dpto y aniorec.
137
#merge_append: master data
138
#deflactores_base2020_new: using data
139
140
141
enaho_merge_defla <- merge(merge_append, deflactores_base2020_new,
142
by = c("dpto", "aÑo"),
143
all.x = T, suffixes = c("","")
144
)
145
146
colnames(enaho_merge_defla)
147
148
enaho_merge_defla <- enaho_merge_defla %>%
149
mutate(ingreso_month_pc = enaho_merge_defla$inghog1d/(12*enaho_merge_defla$mieperho*enaho_merge_defla$ld*enaho_merge_defla$i00),
150
gasto_month_pc = enaho_merge_defla$gashog2d/(12*enaho_merge_defla$mieperho*enaho_merge_defla$ld*enaho_merge_defla$i00)
151
)
152
153
154
155
156
157
#------------------------------------------------------------------------------#
158
# #
159
# PREGUNTA 2 - GROUP BY #
160
# #
161
#------------------------------------------------------------------------------#
162
163
#Importamos los programas necesarios
164
165
166
library(haven)
167
library(dplyr)
168
library(stringr)
169
library(fastDummies)
170
library(srvyr)
171
library(survey)
172
173
# Seteamos el directorio
174
175
user <- Sys.getenv("USERNAME")
176
177
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Trabajo_grupal/WG6") ) # set directorio
178
179
#Leemos la base de datos
180
181
enaho_2 <- read_dta(r"../../../enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")
182
183
#Vemos la base de datos
184
185
enaho_2$dominio
186
187
enaho_2 <- data.frame(
188
189
read_dta("../../../enaho/2020/737-Modulo02/737-Modulo02/enaho01-2020-200.dta")
190
191
)
192
193
194
#Vemos los labels antes de realizar el groupby
195
196
names(enaho_2)
197
198
199
#Seleccionamos las variables que usaremos
200
201
hogares <- enaho_2[ , c("conglome", "vivienda", "hogar", "p208a") ]
202
203
204
#Hacemos un merge con el modulo 34 para obtener los datos que nos faltan (pobreza)
205
206
#Para ello primero cargamos la base de datos (modulo 34) y obtenemos sus labels
207
208
209
enaho34 = data.frame(
210
read_dta("../../../enaho/2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
211
)
212
213
names(enaho34)
214
215
#Ahora filtramos la base con groupby para quedarnos solo con lo que necesitamos (el estado de pobreza)
216
217
hogares34 <- enaho34[ , c("conglome", "vivienda", "hogar", "pobreza") ]
218
219
220
#Ahora, procedemos a realizar el merge entre hogares y hogares 34
221
222
enaho_merge <- merge(hogares, hogares34,
223
by = c("conglome", "vivienda", "hogar")
224
)
225
226
print (enaho_merge)
227
228
229
230
#Procedemos a crear la dummy que verifica si el hogar es pobre y cuenta con algun miembro del hogar mayor a 65 a�os.
231
232
enaho_merge['dummypension'] <- (enaho_merge['p208a'] >= 65) & (enaho_merge['pobreza'] < 3)*1
233
## Se puede obtener la dummy mediante la funci�n if_else(), as.numeric() o multiplicando por 1 lo que deseamos evaluar,
234
## en este caso, pobreza del hogar y el requisito de la edad.
235
236
237
#Visualizamos la dummy, la cual ser� true (1) si se cumplen ambas condiciones y false (0) si no se cumplen
238
239
print(enaho_merge['dummypension'])
240
241
242
243
244
245
246
247
#------------------------------------------------------------------------------#
248
# #
249
# PREGUNTA 4 - indicadores #
250
# #
251
#------------------------------------------------------------------------------#
252
253
#Insatalamos los paquetes necesarios
254
255
pacman::p_load(haven,dplyr, stringr, fastDummies,srvyr )
256
257
library(reshape)
258
library(haven)
259
library(dplyr)
260
library (srvyr)
261
library(survey)
262
263
#insertamos las bases de datos
264
user <- Sys.getenv("USERNAME")
265
266
setwd( paste0("C:/Users/",user,"/Documents/data_enaho") )
267
268
enaho.700 <- read_dta("C:/Users/seibe/OneDrive/Documents/DATA_STATA/enaho01-2020-700.dta")
269
View(enaho.700)
270
271
enaho.sumaria <- read_dta("C:/Users/seibe/OneDrive/Documents/DATA_STATA/sumaria-2020-12g.dta")
272
View(enaho.sumaria)
273
274
#Especificamos el diseño muestral
275
survey_enaho <- enaho.700%>% as_survey_design(ids = conglome, strata = estrato,
276
weight = factor07)
277
278
View (survey_enaho)
279
280
survey_enaho_2 <- enaho.sumaria%>% as_survey_design(ids = conglome, strata = estrato,
281
weight = factor07)
282
283
View (survey_enaho_2)
284
285
#Creamos la variable region para ambas muestras
286
287
enaho.700['ubigeo'] = substr(enaho.700$ubigeo, 1, 2)
288
289
enaho.700 <- enaho.700 %>%
290
mutate(region = case_when(ubigeo == "01" ~ "Amazonas",
291
ubigeo == "02" ~ "Ancash",
292
ubigeo == "03" ~ "Apurimac",
293
ubigeo == "04" ~ "Arequipa",
294
ubigeo == "05" ~ "Ayacucho",
295
ubigeo == "06" ~ "Cajamarca",
296
ubigeo == "07" ~ "Callao",
297
ubigeo == "08" ~ "CUsco",
298
ubigeo == "09" ~ "Huancavelica",
299
ubigeo == "10" ~ "Huanuco",
300
ubigeo == "11" ~ "Ica",
301
ubigeo == "12" ~ "Junin",
302
ubigeo == "13" ~ "La Libertad",
303
ubigeo == "14" ~ "Lambayeque",
304
ubigeo == "15" ~ "Lima",
305
ubigeo == "16" ~ "Loreto",
306
ubigeo == "17" ~ "Madre de Dios",
307
ubigeo == "18" ~ "Moquegua",
308
ubigeo == "19" ~ "Pasco",
309
ubigeo == "20" ~ "Piura",
310
ubigeo == "21" ~ "Puno",
311
ubigeo == "22" ~ "San Martin",
312
ubigeo == "23" ~ "Tacna",
313
ubigeo == "24" ~ "Tumbes",
314
ubigeo == "25" ~ "Ucayali", ))
315
316
View(enaho.700[, c("region")])
317
318
319
enaho.sumaria['ubigeo'] = substr(enaho.sumaria$ubigeo, 1, 2)
320
321
enaho.sumaria <- enaho.sumaria %>%
322
mutate(region = case_when(ubigeo == "01" ~ "Amazonas",
323
ubigeo == "02" ~ "Ancash",
324
ubigeo == "03" ~ "Apurimac",
325
ubigeo == "04" ~ "Arequipa",
326
ubigeo == "05" ~ "Ayacucho",
327
ubigeo == "06" ~ "Cajamarca",
328
ubigeo == "07" ~ "Callao",
329
ubigeo == "08" ~ "CUsco",
330
ubigeo == "09" ~ "Huancavelica",
331
ubigeo == "10" ~ "Huanuco",
332
ubigeo == "11" ~ "Ica",
333
ubigeo == "12" ~ "Junin",
334
ubigeo == "13" ~ "La Libertad",
335
ubigeo == "14" ~ "Lambayeque",
336
ubigeo == "15" ~ "Lima",
337
ubigeo == "16" ~ "Loreto",
338
ubigeo == "17" ~ "Madre de Dios",
339
ubigeo == "18" ~ "Moquegua",
340
ubigeo == "19" ~ "Pasco",
341
ubigeo == "20" ~ "Piura",
342
ubigeo == "21" ~ "Puno",
343
ubigeo == "22" ~ "San Martin",
344
ubigeo == "23" ~ "Tacna",
345
ubigeo == "24" ~ "Tumbes",
346
ubigeo == "25" ~ "Ucayali", ))
347
348
View(enaho.sumaria[, c("region")])
349
350
# Se halla el porcentaje que hogares a nivel departamental (o region) que se beneficia del programa.
351
352
bene_prog <- enaho.700 %>% group_by(ubigeo, region) %>%
353
summarise(porc_bene_pro = mean(p710_04, na.rm = T), .groups = "keep" )
354
355
356
View(bene_prog)
357
358
# Se muestra el promedio del porcentaje de gasto en salud realizado por los hogares a nivel de region
359
360
enaho.sumaria<-mutate(enaho.sumaria, gasto_anual_hogar=gru51hd/gashog2d)
361
362
View(enaho.sumaria[, c("gasto_anual_hogar", "region")])
363
364
gasto_salud <- enaho.sumaria %>% group_by(ubigeo, region) %>%
365
summarise(porc_gasto_salud = mean(gasto_anual_hogar, na.rm = T), .groups = "keep" )
366
367
View(gasto_salud)
368
369