Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG6/Grupo_8_R.R
2714 views
1
#Tarea 6
2
#Grupo 8
3
4
5
#----------------------------------------------------------------------------------------------#
6
7
#PREGUNTA 1
8
9
rm(list=ls())
10
11
install.packages("pacman")
12
13
pacman::p_load(haven,dplyr, stringr, fastDummies)
14
15
#MERGE DATASET
16
17
setwd(paste0("C:/Users/Alexander/Documents/2020/737-Modulo01/737-Modulo01") )
18
19
#Primero trabajamos con la bse de datos del año 2020
20
21
enaho01_2020 <- read_dta("../../../2020/737-Modulo01/737-Modulo01/enaho01-2020-100.dta")
22
23
enaho34_2020 <- read_dta("../../../2020/737-Modulo34/737-Modulo34/sumaria-2020.dta")
24
25
enaho01_2020 <- enaho01_2020[ , c("conglome", "vivienda", "hogar")]
26
27
enaho34_2020 <- enaho34_2020[ , c("conglome", "vivienda", "hogar", "ld", "mieperho", "inghog1d", "gashog2d", "aÑo")]
28
29
enaho_merge_2020 <- merge(enaho01_2020, enaho34_2020,
30
by = c("conglome", "vivienda", "hogar"),
31
all.x = T)
32
33
#Primero trabajamos con la base de datos del año 2019
34
35
enaho01_2019 <- read_dta("../../../2019/687-Modulo01/687-Modulo01/enaho01-2019-100.dta")
36
37
enaho34_2019 <- read_dta("../../../2019/687-Modulo34/687-Modulo34/sumaria-2019.dta")
38
39
enaho01_2019 <- enaho01_2019[ , c("conglome", "vivienda", "hogar")]
40
41
enaho34_2019 <- enaho34_2019[ , c("conglome", "vivienda", "hogar", "ld", "mieperho", "inghog1d", "gashog2d", "aÑo")]
42
43
enaho_merge_2019 <- merge(enaho01_2019, enaho34_2019,
44
by = c("conglome", "vivienda", "hogar"),
45
all.x = T)
46
47
#Hacemos el append:
48
49
enaho_append <- bind_rows(enaho_merge_2019, enaho_merge_2020)
50
unique(enaho_append$aÑo)
51
52
#Base de deflactor temporal para el 2020
53
deflactores_2020 <- read_dta("../../../2020/737-Modulo34/737-Modulo34/ConstVarGasto-Metodologia actualizada/Gasto2020/Bases/deflactores_base2020_new.dta")
54
55
#Creamos ingreso y gasto mensual
56
enaho_append$ingreso_mensual <- enaho_append$inghog1d / (12*enaho_append$mieperho)
57
58
enaho_append$gasto_mensual <- enaho_append$gashog2d / (12*enaho_append$mieperho)
59
60
#Deflactando las variables (deflactor espacial y temporal):
61
62
#Deflactor espacial
63
64
enaho_append$ingreso_mensual_defl <- enaho_append$ingreso_mensual * enaho_append$ld
65
enaho_append$gasto_mensual_defl <- enaho_append$gasto_mensual * enaho_append$ld
66
67
68
#Creamos la variable "departamento" a partir del ubigeo para luego aplicar el merge con deflactores_2020
69
70
enaho_append['departamento'] = substr(enaho_append$ubigeo,1,2)
71
72
class(enaho_append$departamento)
73
class(deflactores_2020$dpto)
74
75
enaho_append$departamento <- as.numeric(enaho_append$departamento)
76
class(enaho_append$departamento)
77
78
#Merge entre la enaho_append y el deflactor temporal:
79
80
enaho_merge_def_temporal_2020 <- merge(enaho_append, deflactores_2020,
81
by.x = c("departamento", "aÑo"),
82
by.y = c("dpto", "aniorec"),
83
all.x = TRUE)
84
85
unique(enaho_merge_def_temporal_2020$aÑo)
86
colnames(enaho_merge_def_temporal_2020)
87
88
#Ahora dividimos nuestras variables de ingreso y gasto por mieperho, 12, ld e i00
89
90
#Comenzamos con el ingreso
91
enaho_merge_def_temporal_2020['ingr_Per1'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$mieperho
92
enaho_merge_def_temporal_2020['ingr_Per2'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$ld
93
enaho_merge_def_temporal_2020['ingr_Per3'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$ 12
94
enaho_merge_def_temporal_2020['ingr_Per4'] = enaho_merge_def_temporal_2020$inghog1d / enaho_merge_def_temporal_2020$i00
95
96
#Seguimos con el gasto
97
enaho_merge_def_temporal_2020['gast_Per1'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$mieperho
98
enaho_merge_def_temporal_2020['gast_Per2'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$ld
99
enaho_merge_def_temporal_2020['gast_Per3'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$ 12
100
enaho_merge_def_temporal_2020['gast_Per4'] = enaho_merge_def_temporal_2020$gashog2d / enaho_merge_def_temporal_2020$i00
101
102
103
#----------------------------------------------------------------------------------------------#
104
105
#PREGUNTA2
106
107
#SALARIO POR HORA DEL TRABAJADOR DEPENDIENTE
108
109
setwd(paste0("C:/Users/Alexander/Documents/2020/737-Modulo05/737-Modulo05") )
110
111
enaho05_2020 <- read_dta("../../../2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta") %>%
112
113
mutate(enaho05_2020_ingr_anual = i524e1 + i538e1, enaho05_2020_hrs_en_principal_y_2do_empleo = i513t + i518) %>%
114
115
#Luego, hallamos salario por hora del trabajador independiente:
116
117
enaho05_2020 <- read_dta("../../../2020/737-Modulo05/737-Modulo05/enaho01a-2020-500.dta") %>%
118
mutate(enaho_2020_salario_x_hora_trabajador_indep = enaho05_2020_ingr_anual / ((enaho05_2020_hrs_en_principal_y_2do_empleo) * 52) ) %>%
119
120
121
#----------------------------------------------------------------------------------------------#
122
123
#PREGUNTA3
124
#GROUP BY
125
126
#Librerías
127
library(haven) # leer archivos spss, stata, dbf, etc
128
library(dplyr) # limpieza de datos
129
library(stringr) # grep for regular expression
130
library(fastDummies) # crear dummy
131
library(srvyr) # libreria para declarar el diseño muestral de una encuesta
132
library(survey)
133
134
135
#Seteamos ubicación
136
user <- Sys.getenv("USERNAME") # username
137
138
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab7") ) # set directorio
139
140
#cargamos las bases de datos a utilizar
141
142
enaho02 <- read_dta("../../../enaho/2020/737-Modulo02/enaho01-2020-200.dta")
143
144
145
base1 <- enaho02%>% group_by(conglome, vivienda, hogar ) %>% summarise(edad_max = max(p208a))
146
147
enaho34 <- read_dta("../../../enaho/2020/737-Modulo34/sumaria-2020.dta")
148
149
#Hacemos el merge
150
num = list(enaho34)
151
merge1 = enaho02
152
153
for (i in num){
154
155
merge1 <- merge(merge1, i,
156
by = c("conglome", "vivienda", "hogar"),
157
all.x = T, suffixes = c("",".y")
158
)
159
}
160
161
names(merge1)
162
163
#Creamos la variable dummy
164
pension <- merge1 %>%mutate(g1 = ifelse(edad_max <=65,1,0))
165
166
167
#----------------------------------------------------------------------------------------------#
168
#PREGUNTA4
169
170
#INDICES
171
172
user <- Sys.getenv("USERNAME") # username
173
174
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab7") ) # set directorio
175
176
#EJERCICIO 1
177
#cargamos las bases de datos a utilizar
178
enaho37 <- read_dta("../../../enaho/2020/737-Modulo37/enaho01-2020-700.dta")
179
180
enaho37['ubigeo_dep'] = substr(enaho37$ubigeo, 1, 2)
181
enaho37 <- enaho37 %>%
182
mutate(region = case_when(ubigeo_dep == "01" ~ "Amazonas",
183
ubigeo_dep == "02" ~ "Ancash",
184
ubigeo_dep == "03" ~ "Apurimac",
185
ubigeo_dep == "04" ~ "Arequipa",
186
ubigeo_dep == "05" ~ "Ayacucho",
187
ubigeo_dep == "06" ~ "Cajamarca",
188
ubigeo_dep == "07" ~ "Callao",
189
ubigeo_dep == "08" ~ "Cusco",
190
ubigeo_dep == "09" ~ "Huancavelica",
191
ubigeo_dep == "10" ~ "Huanuco",
192
ubigeo_dep == "11" ~ "Ica",
193
ubigeo_dep == "12" ~ "Junin",
194
ubigeo_dep == "13" ~ "La Libertad",
195
ubigeo_dep == "14" ~ "Lambayeque",
196
ubigeo_dep == "15" ~ "Lima",
197
ubigeo_dep == "16" ~ "Loreto",
198
ubigeo_dep == "17" ~ "Madre de Dios",
199
ubigeo_dep == "18" ~ "Moquegua",
200
ubigeo_dep == "19" ~ "Pasco",
201
ubigeo_dep == "20" ~ "Piura",
202
ubigeo_dep == "21"~ "Puno",
203
ubigeo_dep == "22" ~ "San Martin",
204
ubigeo_dep == "23" ~ "Tacna",
205
ubigeo_dep == "24" ~ "Tumber",
206
ubigeo_dep == "25" ~ "Ucayali") )
207
208
#Declaramos el diseño muestral
209
survey_enaho37 <- enaho37 %>% as_survey_design(dep = region, pension = p710_04)
210
211
#Obtenemos el promedio
212
survey_enaho37 <- survey_enaho37%>% group_by(vivienda)%>% group_by(region) %>% summarize(count = n(), pension1=sum(p710_04),promedio = pension1/count)
213
214
#EJERCICIO 2
215
#cargamos las bases de datos a utilizar
216
217
enaho34 <- read_dta("../../../enaho/2020/737-Modulo34/sumaria-2020.dta")
218
219
#A partir del ubigeo, creamos las regiones
220
221
enaho34['ubigeo_dep'] = substr(enaho34$ubigeo, 1, 2)
222
enaho34 <- enaho34 %>%
223
mutate(region = case_when(ubigeo_dep == "01" ~ "Amazonas",
224
ubigeo_dep == "02" ~ "Ancash",
225
ubigeo_dep == "03" ~ "Apurimac",
226
ubigeo_dep == "04" ~ "Arequipa",
227
ubigeo_dep == "05" ~ "Ayacucho",
228
ubigeo_dep == "06" ~ "Cajamarca",
229
ubigeo_dep == "07" ~ "Callao",
230
ubigeo_dep == "08" ~ "Cusco",
231
ubigeo_dep == "09" ~ "Huancavelica",
232
ubigeo_dep == "10" ~ "Huanuco",
233
ubigeo_dep == "11" ~ "Ica",
234
ubigeo_dep == "12" ~ "Junin",
235
ubigeo_dep == "13" ~ "La Libertad",
236
ubigeo_dep == "14" ~ "Lambayeque",
237
ubigeo_dep == "15" ~ "Lima",
238
ubigeo_dep == "16" ~ "Loreto",
239
ubigeo_dep == "17" ~ "Madre de Dios",
240
ubigeo_dep == "18" ~ "Moquegua",
241
ubigeo_dep == "19" ~ "Pasco",
242
ubigeo_dep == "20" ~ "Piura",
243
ubigeo_dep == "21"~ "Puno",
244
ubigeo_dep == "22" ~ "San Martin",
245
ubigeo_dep == "23" ~ "Tacna",
246
ubigeo_dep == "24" ~ "Tumber",
247
ubigeo_dep == "25" ~ "Ucayali") )
248
249
#Declaramos el diseño muestral
250
survey_enaho34 <- enaho34 %>% as_survey_design(dep = region,
251
salud = gru51hd,gasto = gashog2d)
252
253
#Obtenemos el porcentaje de gasto en salud
254
survey_enaho34 <- survey_enaho34 %>% dplyr::mutate(gasto_salud= gru51hd/gashog2d)
255
256
#Obtenemos el promedio por region
257
survey_enaho34 <- survey_enaho34 %>% group_by(region) %>% summarise(mean(gasto_salud))
258