CoCalc -- apriori.R

GitHub Repository: ethen8181/machine-learning
Path: blob/master/association_rule/R/apriori.R
²⁶¹⁵ views
1
library(DT) # for interactive data frame
2
library(arules)
3
library(data.table)
4
wdpath <- normalizePath('/Users/ethen/machine-learning/association_rule/R')
5
setwd(wdpath)
6

7
load('titanic.raw.rdata')
8
dt <- data.table(titanic.raw)
9
titanic <- as(dt, 'transactions')
10
summary( itemFrequency(titanic) )
11

12
# train apriori
13
rules <- apriori( 
14
    titanic,
15

16
	# the min/max len denotes the min/max number of items in a itemset
17
	parameter = list(support = 0.05, confidence = 0.7, minlen = 2, maxlen = 5),
18
    
19
    # for appearance we can specify we only want rules with rhs 
20
    # containing "Survived" only (we then specfiy the default parameter
21
    # to 'lhs' to tell the algorithm that every other variables that
22
    # has not been specified can go in the left hand side
23
    appearance = list( rhs = c('Survived=No', 'Survived=Yes'), default = 'lhs' ),
24

25
	# don't print the algorthm's training message
26
	control = list(verbose = FALSE)
27
)
28

29

30
# converting rules' info, such as left and right hand side, and all the quality measures,
31
# including support, confidence and lift a to data.frame
32
# http://stackoverflow.com/questions/25730000/converting-object-of-class-rules-to-data-frame-in-r
33
rules_dt <- data.table( lhs = labels( lhs(rules) ), 
34
                        rhs = labels( rhs(rules) ), 
35
                        quality(rules) )[ order(-lift), ]
36

37
# -------------------------------------------------------------------------
38
# not included
39

40
# a scatter plot using support and confidence on the x and y axes. 
41
# and the lift is used as the color of the points
42
library(cowplot)
43
library(ggplot2)
44

45
ggplot( rules_dt, aes(support, confidence, color = lift) ) +
46
geom_point() + 
47
labs( title = sprintf( 'scatter plot for %d rules', nrow(rules_dt) ) )
48

49

50
# confirm that the toy python code's result matches R's apriori
51
X = matrix(c(1, 1, 0, 0, 0, 0,
52
             1, 0, 1, 1, 1, 0,
53
             0, 1, 1, 1, 0, 1,
54
             1, 1, 1, 1, 0, 0,
55
             1, 1, 1, 0, 0, 1), ncol = 6, byrow = TRUE)
56

57
rules <- apriori( 
58
    X,
59
    
60
    # the min/max len denotes the min/max number of items in a itemset
61
    parameter = list( support = 0.5, confidence = 0.5, minlen = 2, maxlen = 5 ),
62
    
63
    # don't print the algorthm's training message
64
    control = list( verbose = FALSE )
65
)
66

67

68
Product

Resources

Company