Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab8/script_reshape_r.R
2714 views
1
################ laboratorio 7 ############################
2
## Curso: Laboratorio de R y Python ###########################
3
## @author: Roberto Mendoza
4
## Clean dataset
5
6
7
library(reshape)
8
library(haven)
9
library(dplyr)
10
11
#------- Reshape -----------
12
13
"1.0 Set Directorio"
14
15
user <- Sys.getenv("USERNAME") # username
16
17
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab8") ) # set directorio
18
19
20
# load panel dataset
21
22
panel <- read_dta("../../../datos/panel/743-Modulo1478/sumaria-2016-2020-panelf.dta", encoding = "latin1")
23
24
25
26
# Filtro la variable hpanel1620 ==1, el hogar es entrevista seguidamente desde 2016-2020
27
28
panel <- panel[panel$hpanel1620 == 1,]
29
30
# nombre de las variables en minuscula
31
32
colnames(panel) <- tolower(colnames(panel))
33
34
# filtramos nuestras variables de interés
35
36
colnames(panel)
37
38
39
index = grep("(año)|(conglome)|(vivienda)|(hogar)|(estrato)|(mieperho)|(gashog2d)|
40
(inghog1d)|(pobreza)|(factor07)",
41
colnames(panel))
42
43
index = grep("(año)|(^conglome)|(vivienda)|(hogar)|(estrato_)|(mieperho)|(gashog2d)|
44
(inghog1d)|(pobreza_)|(factor07)",
45
colnames(panel))
46
47
print(colnames(panel)[index])
48
49
# rename años
50
51
panel <- panel %>% dplyr::rename("year_16" = "año_16", "year_17" = "año_17", "year_18" = "año_18", "year_19" = "año_19",
52
"year_20" = "año_20", "cong"= "conglome", "viv" ="vivienda" )
53
54
panel <- panel[,index]
55
56
"Nos quedamos con 47 variables"
57
58
# identificador del hogar
59
60
panel$hog <- seq(1,dim(panel)[1])
61
62
# ordenando
63
64
panel <- panel %>%
65
select(cong, viv, hog, everything())
66
67
68
# Usando la libreria reshape
69
70
71
new_panel <- reshape(data = panel, idvar = c("cong", "viv", "hog"), varying = 4:48, sep="_", timevar = "time_var",
72
times = c(16,17,18,19,20), direction = "long")
73
74
75
76
new_panel$cong <- NULL # borrar columnas
77
new_panel$viv <- NULL # borrar columnas
78
new_panel$hog <- NULL # borrar columnas
79
new_panel$time_var <- NULL # borrar columnas
80
81
82
# ordenando para inspección visual de panel de datos
83
84
new_panel <- new_panel[order(new_panel$conglome, new_panel$vivienda, new_panel$hogar, new_panel$year),]
85
86
87
88
89