'Solo es necesario cargar una vez los paquetes, luego simplemente debemos llamarlo:'
library(dplyr)
library(tidyr)
library(readxl)
getwd()
user <- Sys.getenv("USERNAME")
print(user)
setwd( paste0("C:/Users/",user,"/Documents/GitHub/1ECO35_2022_2/Lab3") )
netflix <- read.csv("../data/netflix_titles.csv", encoding = "UTF-8")
netflix <- read.csv("../data/netflix_titles.csv", encoding = "UTF-8", na.strings=c("",NA))
dim(netflix)
class(netflix)
print(release_year)
print(netflix$release_year)
attach(netflix)
print(release_year)
str(netflix)
lapply(netflix, class)
str(lapply)
sapply(netflix, class)
summary(netflix)
unique(show_id)
length(unique(show_id))
duplicated(show_id)
sum(duplicated(show_id) )
class( netflix["director"] )
netflix["director"]
class( netflix[director,] )
class( netflix$director )
" En R, tenemos dos formas de missing, en general, NA y Null "
unique(director)
unique(director)[1]
any( is.na(netflix["director"]) )
any(is.null(netflix["director"]))
any(is.na(netflix$director))
any(is.na(director))
sum(is.na(director))
netflix %>% drop_na()
netflix2 <- netflix %>% drop_na()
netflix2 <- netflix %>% drop_na(director)
netflix <- netflix %>% replace_na(list(director = "Sin director"))
"En R debe asignarse el objeto alterado a uno nuevo. En este caso a Netflix2"
netflix2 <- netflix[,c('director','release_year','show_id')]
View(netflix2)
View( netflix[1:100,c('director','release_year')] )
View( netflix[c(1,10,100),c('director','release_year')] )
View( netflix[c(1,10,100),c(1,5)] )
View( netflix[1:100, c(1:5,10)] )
names(netflix)
names(netflix)[2]
netflix['number'] = runif(n = dim(netflix)[1], min = 1, max = 10)
netflix$number2 = runif(n = dim(netflix)[1], min = 1, max = 10)
netflix['new'] = netflix['release_year'] + netflix['number']
netflix['new2'] = netflix$release_year + netflix$number
netflix <- netflix[order(netflix$release_year),]
netflix <- netflix[order(- netflix$release_year),]
netflix <- netflix[order(netflix$release_year, netflix$number2),]
netflix <- netflix[order(netflix$release_year, - netflix$number2),]
netflix2 <- netflix[which(netflix$release_year < 2011 & netflix$number > 5), ]
View(netflix2)
netflix %>% filter( release_year < 2011 & netflix$number > 5 )
netflix2 <- netflix[which(netflix$country == "Peru"), ]
View(netflix2)
netflix2 <- netflix[which(netflix$country == "Mexico"), ]
View(netflix2)
netflix2 <- netflix %>% filter( (country == "Brazil") | ( netflix$country == "Peru") )
View(netflix2)
netflix2 <- netflix %>% filter( (type == "Movie") & ( country == "United States")
& (release_year > 2019) )
"
If you want to filter by multiple values of a categorical variable,
the easiest way is to use the %in% method "
data_frame <- netflix[netflix$country %in% c("Peru","Chile"),]
data_frame <- netflix[! netflix$country %in% c("Peru","Chile"),]
data_frame <- netflix %>% rename(Titulo = title,
Duration_movie = duration)
data_frame <- netflix[, ! names(netflix) %in% c('show_id', 'director')]
write.csv(base, '../data/new_base.csv')
write.csv(base, '../data/new_base.xlsx')