Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/elmergrid/src/metis-5.1.0/GKlib/itemsets.c
3206 views
1
/*!
2
* \file
3
* \brief Frequent/Closed itemset discovery routines
4
*
5
* This file contains the code for finding frequent/closed itemests. These routines
6
* are implemented using a call-back mechanism to deal with the discovered itemsets.
7
*
8
* \date 6/13/2008
9
* \author George Karypis
10
* \version\verbatim $Id: itemsets.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
11
*/
12
13
#include <GKlib.h>
14
15
/*-------------------------------------------------------------*/
16
/*! Data structures for use within this module */
17
/*-------------------------------------------------------------*/
18
typedef struct {
19
int minfreq; /* the minimum frequency of a pattern */
20
int maxfreq; /* the maximum frequency of a pattern */
21
int minlen; /* the minimum length of the requested pattern */
22
int maxlen; /* the maximum length of the requested pattern */
23
int tnitems; /* the initial range of the item space */
24
25
/* the call-back function */
26
void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids);
27
void *stateptr; /* the user-supplied pointer to pass to the callback */
28
29
/* workspace variables */
30
int *rmarker;
31
gk_ikv_t *cand;
32
} isparams_t;
33
34
35
/*-------------------------------------------------------------*/
36
/*! Prototypes for this module */
37
/*-------------------------------------------------------------*/
38
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat,
39
int preflen, int *prefix);
40
gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid);
41
42
43
44
/*************************************************************************/
45
/*! The entry point of the frequent itemset discovery code */
46
/*************************************************************************/
47
void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
48
int minfreq, int maxfreq, int minlen, int maxlen,
49
void (*process_itemset)(void *stateptr, int nitems, int *itemids,
50
int ntrans, int *transids),
51
void *stateptr)
52
{
53
ssize_t i;
54
gk_csr_t *mat, *pmat;
55
isparams_t params;
56
int *pattern;
57
58
/* Create the matrix */
59
mat = gk_csr_Create();
60
mat->nrows = ntrans;
61
mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1;
62
mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
63
mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
64
mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
65
66
/* Setup the parameters */
67
params.minfreq = minfreq;
68
params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq);
69
params.minlen = minlen;
70
params.maxlen = (maxlen == -1 ? mat->ncols : maxlen);
71
params.tnitems = mat->ncols;
72
params.callback = process_itemset;
73
params.stateptr = stateptr;
74
params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
75
params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
76
77
/* Perform the initial projection */
78
gk_csr_CreateIndex(mat, GK_CSR_COL);
79
pmat = itemsets_project_matrix(&params, mat, -1);
80
gk_csr_Free(&mat);
81
82
pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
83
itemsets_find_frequent_itemsets(&params, pmat, 0, pattern);
84
85
gk_csr_Free(&pmat);
86
gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
87
88
}
89
90
91
92
/*************************************************************************/
93
/*! The recursive routine for DFS-based frequent pattern discovery */
94
/*************************************************************************/
95
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat,
96
int preflen, int *prefix)
97
{
98
ssize_t i;
99
gk_csr_t *cmat;
100
101
/* Project each frequent column */
102
for (i=0; i<mat->ncols; i++) {
103
prefix[preflen] = mat->colids[i];
104
105
if (preflen+1 >= params->minlen)
106
(*params->callback)(params->stateptr, preflen+1, prefix,
107
mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
108
109
if (preflen+1 < params->maxlen) {
110
cmat = itemsets_project_matrix(params, mat, i);
111
itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
112
gk_csr_Free(&cmat);
113
}
114
}
115
116
}
117
118
119
/******************************************************************************/
120
/*! This function projects a matrix w.r.t. to a particular column.
121
It performs the following steps:
122
- Determines the length of each column that is remaining
123
- Sorts the columns in increasing length
124
- Creates a column-based version of the matrix with the proper
125
column ordering and renamed rowids.
126
*/
127
/*******************************************************************************/
128
gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid)
129
{
130
ssize_t i, j, k, ii, pnnz;
131
int nrows, ncols, pnrows, pncols;
132
ssize_t *colptr, *pcolptr;
133
int *colind, *colids, *pcolind, *pcolids, *rmarker;
134
gk_csr_t *pmat;
135
gk_ikv_t *cand;
136
137
nrows = mat->nrows;
138
ncols = mat->ncols;
139
colptr = mat->colptr;
140
colind = mat->colind;
141
colids = mat->colids;
142
143
rmarker = params->rmarker;
144
cand = params->cand;
145
146
147
/* Allocate space for the projected matrix based on what you know thus far */
148
pmat = gk_csr_Create();
149
pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
150
151
152
/* Mark the rows that will be kept and determine the prowids */
153
if (cid == -1) { /* Initial projection */
154
gk_iset(nrows, 1, rmarker);
155
}
156
else { /* The other projections */
157
for (i=colptr[cid]; i<colptr[cid+1]; i++)
158
rmarker[colind[i]] = 1;
159
}
160
161
162
/* Determine the length of each column that will be left in the projected matrix */
163
for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
164
for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
165
k += rmarker[colind[j]];
166
}
167
if (k >= params->minfreq && k <= params->maxfreq) {
168
cand[pncols].val = i;
169
cand[pncols++].key = k;
170
pnnz += k;
171
}
172
}
173
174
/* Sort the columns in increasing order */
175
gk_ikvsorti(pncols, cand);
176
177
178
/* Allocate space for the remaining fields of the projected matrix */
179
pmat->ncols = pncols;
180
pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
181
pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
182
pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
183
184
185
/* Populate the projected matrix */
186
pcolptr[0] = 0;
187
for (pnnz=0, ii=0; ii<pncols; ii++) {
188
i = cand[ii].val;
189
for (j=colptr[i]; j<colptr[i+1]; j++) {
190
if (rmarker[colind[j]])
191
pcolind[pnnz++] = colind[j];
192
}
193
194
pcolids[ii] = colids[i];
195
pcolptr[ii+1] = pnnz;
196
}
197
198
199
/* Reset the rmarker array */
200
if (cid == -1) { /* Initial projection */
201
gk_iset(nrows, 0, rmarker);
202
}
203
else { /* The other projections */
204
for (i=colptr[cid]; i<colptr[cid+1]; i++)
205
rmarker[colind[i]] = 0;
206
}
207
208
209
return pmat;
210
}
211
212