Path: blob/master/association_rule/R/apriori.R
2615 views
library(DT) # for interactive data frame1library(arules)2library(data.table)3wdpath <- normalizePath('/Users/ethen/machine-learning/association_rule/R')4setwd(wdpath)56load('titanic.raw.rdata')7dt <- data.table(titanic.raw)8titanic <- as(dt, 'transactions')9summary( itemFrequency(titanic) )1011# train apriori12rules <- apriori(13titanic,1415# the min/max len denotes the min/max number of items in a itemset16parameter = list(support = 0.05, confidence = 0.7, minlen = 2, maxlen = 5),1718# for appearance we can specify we only want rules with rhs19# containing "Survived" only (we then specfiy the default parameter20# to 'lhs' to tell the algorithm that every other variables that21# has not been specified can go in the left hand side22appearance = list( rhs = c('Survived=No', 'Survived=Yes'), default = 'lhs' ),2324# don't print the algorthm's training message25control = list(verbose = FALSE)26)272829# converting rules' info, such as left and right hand side, and all the quality measures,30# including support, confidence and lift a to data.frame31# http://stackoverflow.com/questions/25730000/converting-object-of-class-rules-to-data-frame-in-r32rules_dt <- data.table( lhs = labels( lhs(rules) ),33rhs = labels( rhs(rules) ),34quality(rules) )[ order(-lift), ]3536# -------------------------------------------------------------------------37# not included3839# a scatter plot using support and confidence on the x and y axes.40# and the lift is used as the color of the points41library(cowplot)42library(ggplot2)4344ggplot( rules_dt, aes(support, confidence, color = lift) ) +45geom_point() +46labs( title = sprintf( 'scatter plot for %d rules', nrow(rules_dt) ) )474849# confirm that the toy python code's result matches R's apriori50X = matrix(c(1, 1, 0, 0, 0, 0,511, 0, 1, 1, 1, 0,520, 1, 1, 1, 0, 1,531, 1, 1, 1, 0, 0,541, 1, 1, 0, 0, 1), ncol = 6, byrow = TRUE)5556rules <- apriori(57X,5859# the min/max len denotes the min/max number of items in a itemset60parameter = list( support = 0.5, confidence = 0.5, minlen = 2, maxlen = 5 ),6162# don't print the algorthm's training message63control = list( verbose = FALSE )64)65666768