Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Trabajo_grupal/WG7/Grupo_8_R.R
2714 views
1
#TAREA 7
2
#Grupo 8
3
4
# clear environment
5
rm(list=ls(all=TRUE))
6
install.packages("readxl")
7
install.packages("lubridate")
8
install.packages("tidyverse")
9
library(reshape)
10
library(haven)
11
library(dplyr)
12
library("readxl")
13
library(stringr)
14
15
16
#------- Reshape -----------
17
18
"1.0 Set Directorio"
19
20
user <- Sys.getenv("USERNAME") # username
21
22
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab8") ) # set directorio
23
24
25
# load dataset
26
27
data <- read_excel("../data/crime_data/data_administrativa.xlsx")
28
29
# nombres de variables en minúsculas
30
colnames(data) <- tolower(colnames(data))
31
32
# limpiar nombres
33
data$inst1 <- apply(data['nombre'],
34
1 ,
35
function(x) gsub("[0-9]", '', x))
36
data$inst2 <- apply(data['inst1'],
37
1 ,
38
function(x) gsub("\\-", '', x))
39
data$inst3 <- apply(data['inst2'],
40
1 ,
41
function(x) gsub("\\.", '', x))
42
data$inst4 <- apply(data['inst3'],
43
1 ,
44
function(x) gsub("\\/", '', x))
45
46
# limpiar born_date
47
48
data$date1 <- apply(data['born_date'],
49
1 ,
50
function(x) str_replace(x,'00:00',''))
51
52
data$date2 <- apply(data['date1'],
53
1 ,
54
function(x) str_replace(x,'"#%',''))
55
56
data$date3 <- apply(data['date2'],
57
1 ,
58
function(x) str_replace(x,'!',''))
59
60
# date como variable en formato fecha
61
data <- data |> dplyr::mutate(
62
date = dmy( date3 )
63
)
64
65
# limpiar born_date
66
67
data$age1 <- apply(data['age'],
68
1 ,
69
function(x) str_extract(x,'[0-9]+'))
70
71
# limpiar rank
72
data$rank1 <- apply(data['rank'],
73
1 ,
74
function(x) str_replace(x,'extorsionador','extorsion'))
75
76
data$rank2 <- apply(data['rank1'],
77
1 ,
78
function(x) str_replace(x,'novto','novato'))
79
80
data$rank3 <- apply(data['rank2'],
81
1 ,
82
function(x) str_replace(x,'noato','novato'))
83
84
data$rank4 <- apply(data['rank3'],
85
1 ,
86
function(x) str_replace(x,'principiante','novato'))
87
# crear dummies
88
89
data1<- data %>% dplyr::mutate(dum1 = ifelse( rank == 'lider de la banda criminal',
90
1 ,
91
0 ) )%>%
92
dplyr::mutate(dum2 = ifelse( rank == 'cabecilla local',
93
1 ,
94
0 ) )%>%
95
dplyr::mutate(dum3 = ifelse( rank == 'cabecilla regional',
96
1 ,
97
0 ) )%>%
98
dplyr::mutate(dum4 = ifelse( rank == 'sicario',
99
1 ,
100
0 ) )%>%
101
dplyr::mutate(dum5 = ifelse( rank == 'extorsion',
102
1 ,
103
0 ) )%>%
104
dplyr::mutate(dum6 = ifelse( rank == 'miembro',
105
1 ,
106
0 ) )%>%
107
dplyr::mutate(dum7 = ifelse( rank == 'novato',
108
1 ,
109
0 ) )
110
111
# extraer usuario de correo electronico
112
113
data1$user<- apply(data1['correo_abogado'],
114
1 ,
115
function(x) str_match(x, "(\\w+)\\@.*")[2])
116
117
# extraer dni
118
119
data1$dni1 <- apply(data1['dni'],
120
1 ,
121
function(x) str_match(x,"\\.*(\\d+\\-\\d+)$")[2])
122
# crear variable crimen
123
data1$crimen <-apply(data1['observaciones'],
124
1 ,
125
function(x) str_match(x,"\\.*+[P/p]or\\s([\\w*\\s]*)")[2])
126
# crear variable n_hijos
127
data1$n_hijos <-apply(data1['observaciones'],
128
1 ,
129
function(x) str_match(x,"\\d*[Tt]iene\\s([0-9]*)")[2])
130
131
# crear variable edad_inicio
132
data1$edad_inicio1 <-apply(data1['observaciones'],
133
1 ,
134
function(x) str_match(x,"\\.*+([\\w*\\s]*)\\s[A/a]ños")[2])
135
136
data1$edad_inicio <- apply(data1['edad_inicio1'],
137
1 ,
138
function(x) str_extract(x,"[0-9]+"))
139