Path: blob/main/Trabajo_grupal/WG7/Grupo_8_R.R
2714 views
#TAREA 71#Grupo 823# clear environment4rm(list=ls(all=TRUE))5install.packages("readxl")6install.packages("lubridate")7install.packages("tidyverse")8library(reshape)9library(haven)10library(dplyr)11library("readxl")12library(stringr)131415#------- Reshape -----------1617"1.0 Set Directorio"1819user <- Sys.getenv("USERNAME") # username2021setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab8") ) # set directorio222324# load dataset2526data <- read_excel("../data/crime_data/data_administrativa.xlsx")2728# nombres de variables en minúsculas29colnames(data) <- tolower(colnames(data))3031# limpiar nombres32data$inst1 <- apply(data['nombre'],331 ,34function(x) gsub("[0-9]", '', x))35data$inst2 <- apply(data['inst1'],361 ,37function(x) gsub("\\-", '', x))38data$inst3 <- apply(data['inst2'],391 ,40function(x) gsub("\\.", '', x))41data$inst4 <- apply(data['inst3'],421 ,43function(x) gsub("\\/", '', x))4445# limpiar born_date4647data$date1 <- apply(data['born_date'],481 ,49function(x) str_replace(x,'00:00',''))5051data$date2 <- apply(data['date1'],521 ,53function(x) str_replace(x,'"#%',''))5455data$date3 <- apply(data['date2'],561 ,57function(x) str_replace(x,'!',''))5859# date como variable en formato fecha60data <- data |> dplyr::mutate(61date = dmy( date3 )62)6364# limpiar born_date6566data$age1 <- apply(data['age'],671 ,68function(x) str_extract(x,'[0-9]+'))6970# limpiar rank71data$rank1 <- apply(data['rank'],721 ,73function(x) str_replace(x,'extorsionador','extorsion'))7475data$rank2 <- apply(data['rank1'],761 ,77function(x) str_replace(x,'novto','novato'))7879data$rank3 <- apply(data['rank2'],801 ,81function(x) str_replace(x,'noato','novato'))8283data$rank4 <- apply(data['rank3'],841 ,85function(x) str_replace(x,'principiante','novato'))86# crear dummies8788data1<- data %>% dplyr::mutate(dum1 = ifelse( rank == 'lider de la banda criminal',891 ,900 ) )%>%91dplyr::mutate(dum2 = ifelse( rank == 'cabecilla local',921 ,930 ) )%>%94dplyr::mutate(dum3 = ifelse( rank == 'cabecilla regional',951 ,960 ) )%>%97dplyr::mutate(dum4 = ifelse( rank == 'sicario',981 ,990 ) )%>%100dplyr::mutate(dum5 = ifelse( rank == 'extorsion',1011 ,1020 ) )%>%103dplyr::mutate(dum6 = ifelse( rank == 'miembro',1041 ,1050 ) )%>%106dplyr::mutate(dum7 = ifelse( rank == 'novato',1071 ,1080 ) )109110# extraer usuario de correo electronico111112data1$user<- apply(data1['correo_abogado'],1131 ,114function(x) str_match(x, "(\\w+)\\@.*")[2])115116# extraer dni117118data1$dni1 <- apply(data1['dni'],1191 ,120function(x) str_match(x,"\\.*(\\d+\\-\\d+)$")[2])121# crear variable crimen122data1$crimen <-apply(data1['observaciones'],1231 ,124function(x) str_match(x,"\\.*+[P/p]or\\s([\\w*\\s]*)")[2])125# crear variable n_hijos126data1$n_hijos <-apply(data1['observaciones'],1271 ,128function(x) str_match(x,"\\d*[Tt]iene\\s([0-9]*)")[2])129130# crear variable edad_inicio131data1$edad_inicio1 <-apply(data1['observaciones'],1321 ,133function(x) str_match(x,"\\.*+([\\w*\\s]*)\\s[A/a]ños")[2])134135data1$edad_inicio <- apply(data1['edad_inicio1'],1361 ,137function(x) str_extract(x,"[0-9]+"))138139