Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ethen8181
GitHub Repository: ethen8181/machine-learning
Path: blob/master/association_rule/R/apriori.R
2615 views
1
library(DT) # for interactive data frame
2
library(arules)
3
library(data.table)
4
wdpath <- normalizePath('/Users/ethen/machine-learning/association_rule/R')
5
setwd(wdpath)
6
7
load('titanic.raw.rdata')
8
dt <- data.table(titanic.raw)
9
titanic <- as(dt, 'transactions')
10
summary( itemFrequency(titanic) )
11
12
# train apriori
13
rules <- apriori(
14
titanic,
15
16
# the min/max len denotes the min/max number of items in a itemset
17
parameter = list(support = 0.05, confidence = 0.7, minlen = 2, maxlen = 5),
18
19
# for appearance we can specify we only want rules with rhs
20
# containing "Survived" only (we then specfiy the default parameter
21
# to 'lhs' to tell the algorithm that every other variables that
22
# has not been specified can go in the left hand side
23
appearance = list( rhs = c('Survived=No', 'Survived=Yes'), default = 'lhs' ),
24
25
# don't print the algorthm's training message
26
control = list(verbose = FALSE)
27
)
28
29
30
# converting rules' info, such as left and right hand side, and all the quality measures,
31
# including support, confidence and lift a to data.frame
32
# http://stackoverflow.com/questions/25730000/converting-object-of-class-rules-to-data-frame-in-r
33
rules_dt <- data.table( lhs = labels( lhs(rules) ),
34
rhs = labels( rhs(rules) ),
35
quality(rules) )[ order(-lift), ]
36
37
# -------------------------------------------------------------------------
38
# not included
39
40
# a scatter plot using support and confidence on the x and y axes.
41
# and the lift is used as the color of the points
42
library(cowplot)
43
library(ggplot2)
44
45
ggplot( rules_dt, aes(support, confidence, color = lift) ) +
46
geom_point() +
47
labs( title = sprintf( 'scatter plot for %d rules', nrow(rules_dt) ) )
48
49
50
# confirm that the toy python code's result matches R's apriori
51
X = matrix(c(1, 1, 0, 0, 0, 0,
52
1, 0, 1, 1, 1, 0,
53
0, 1, 1, 1, 0, 1,
54
1, 1, 1, 1, 0, 0,
55
1, 1, 1, 0, 0, 1), ncol = 6, byrow = TRUE)
56
57
rules <- apriori(
58
X,
59
60
# the min/max len denotes the min/max number of items in a itemset
61
parameter = list( support = 0.5, confidence = 0.5, minlen = 2, maxlen = 5 ),
62
63
# don't print the algorthm's training message
64
control = list( verbose = FALSE )
65
)
66
67
68