Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/elmergrid/src/metis-5.1.0/GKlib/test/fis.c
3206 views
1
/*!
2
\file
3
\brief A simple frequent itemset discovery program to test GKlib's routines
4
5
\date 6/12/2008
6
\author George
7
\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
8
*/
9
10
#include <GKlib.h>
11
12
/*************************************************************************/
13
/*! Data structures for the code */
14
/*************************************************************************/
15
typedef struct {
16
ssize_t minlen, maxlen;
17
ssize_t minfreq, maxfreq;
18
char *filename;
19
int silent;
20
ssize_t nitemsets;
21
char *clabelfile;
22
char **clabels;
23
} params_t;
24
25
/*************************************************************************/
26
/*! Constants */
27
/*************************************************************************/
28
#define CMD_MINLEN 1
29
#define CMD_MAXLEN 2
30
#define CMD_MINFREQ 3
31
#define CMD_MAXFREQ 4
32
#define CMD_SILENT 5
33
#define CMD_CLABELFILE 6
34
#define CMD_HELP 10
35
36
37
/*************************************************************************/
38
/*! Local variables */
39
/*************************************************************************/
40
static struct gk_option long_options[] = {
41
{"minlen", 1, 0, CMD_MINLEN},
42
{"maxlen", 1, 0, CMD_MAXLEN},
43
{"minfreq", 1, 0, CMD_MINFREQ},
44
{"maxfreq", 1, 0, CMD_MAXFREQ},
45
{"silent", 0, 0, CMD_SILENT},
46
{"clabels", 1, 0, CMD_CLABELFILE},
47
{"help", 0, 0, CMD_HELP},
48
{0, 0, 0, 0}
49
};
50
51
52
/*-------------------------------------------------------------------*/
53
/* Mini help */
54
/*-------------------------------------------------------------------*/
55
static char helpstr[][100] = {
56
" ",
57
"Usage: fis [options] <mat-file>",
58
" ",
59
" Required parameters",
60
" mat-file",
61
" The name of the file storing the transactions. The file is in ",
62
" Cluto's .mat format.",
63
" ",
64
" Optional parameters",
65
" -minlen=int",
66
" Specifies the minimum length of the patterns. [default: 1]",
67
" ",
68
" -maxlen=int",
69
" Specifies the maximum length of the patterns. [default: none]",
70
" ",
71
" -minfreq=int",
72
" Specifies the minimum frequency of the patterns. [default: 10]",
73
" ",
74
" -maxfreq=int",
75
" Specifies the maximum frequency of the patterns. [default: none]",
76
" ",
77
" -silent",
78
" Does not print the discovered itemsets.",
79
" ",
80
" -clabels=filename",
81
" Specifies the name of the file that stores the column labels.",
82
" ",
83
" -help",
84
" Prints this message.",
85
""
86
};
87
88
static char shorthelpstr[][100] = {
89
" ",
90
" Usage: fis [options] <mat-file>",
91
" use 'fis -help' for a summary of the options.",
92
""
93
};
94
95
96
97
/*************************************************************************/
98
/*! Function prototypes */
99
/*************************************************************************/
100
void print_init_info(params_t *params, gk_csr_t *mat);
101
void print_final_info(params_t *params);
102
params_t *parse_cmdline(int argc, char *argv[]);
103
void print_an_itemset(void *stateptr, int nitems, int *itemind,
104
int ntrans, int *tranind);
105
106
107
/*************************************************************************/
108
/*! the entry point */
109
/**************************************************************************/
110
int main(int argc, char *argv[])
111
{
112
ssize_t i;
113
char line[8192];
114
FILE *fpin;
115
params_t *params;
116
gk_csr_t *mat;
117
118
params = parse_cmdline(argc, argv);
119
params->nitemsets = 0;
120
121
/* read the data */
122
mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
123
gk_csr_CreateIndex(mat, GK_CSR_COL);
124
125
/* read the column labels */
126
params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
127
if (params->clabelfile == NULL) {
128
for (i=0; i<mat->ncols; i++) {
129
sprintf(line, "%zd", i);
130
params->clabels[i] = gk_strdup(line);
131
}
132
}
133
else {
134
fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
135
for (i=0; i<mat->ncols; i++) {
136
if (fgets(line, 8192, fpin) == NULL)
137
errexit("Failed on fgets.\n");
138
params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
139
}
140
gk_fclose(fpin);
141
}
142
143
144
print_init_info(params, mat);
145
146
gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
147
params->minfreq, params->maxfreq, params->minlen, params->maxlen,
148
&print_an_itemset, (void *)params);
149
150
printf("Total itemsets found: %zd\n", params->nitemsets);
151
152
print_final_info(params);
153
}
154
155
156
157
/*************************************************************************/
158
/*! This function prints run parameters */
159
/*************************************************************************/
160
void print_init_info(params_t *params, gk_csr_t *mat)
161
{
162
printf("*******************************************************************************\n");
163
printf(" fis\n\n");
164
printf("Matrix Information ---------------------------------------------------------\n");
165
printf(" input file=%s, [%d, %d, %zd]\n",
166
params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
167
168
printf("\n");
169
printf("Options --------------------------------------------------------------------\n");
170
printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
171
params->minlen, params->maxlen, params->minfreq, params->maxfreq);
172
173
printf("\n");
174
printf("Finding patterns... -----------------------------------------------------\n");
175
}
176
177
178
/*************************************************************************/
179
/*! This function prints final statistics */
180
/*************************************************************************/
181
void print_final_info(params_t *params)
182
{
183
printf("\n");
184
printf("Memory Usage Information -----------------------------------------------------\n");
185
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
186
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
187
printf("********************************************************************************\n");
188
}
189
190
191
/*************************************************************************/
192
/*! This is the entry point of the command-line argument parser */
193
/*************************************************************************/
194
params_t *parse_cmdline(int argc, char *argv[])
195
{
196
int i;
197
int c, option_index;
198
params_t *params;
199
200
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
201
202
/* initialize the params data structure */
203
params->minlen = 1;
204
params->maxlen = -1;
205
params->minfreq = 10;
206
params->maxfreq = -1;
207
params->silent = 0;
208
params->filename = NULL;
209
params->clabelfile = NULL;
210
211
212
/* Parse the command line arguments */
213
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
214
switch (c) {
215
case CMD_MINLEN:
216
if (gk_optarg) params->minlen = atoi(gk_optarg);
217
break;
218
case CMD_MAXLEN:
219
if (gk_optarg) params->maxlen = atoi(gk_optarg);
220
break;
221
case CMD_MINFREQ:
222
if (gk_optarg) params->minfreq = atoi(gk_optarg);
223
break;
224
case CMD_MAXFREQ:
225
if (gk_optarg) params->maxfreq = atoi(gk_optarg);
226
break;
227
228
case CMD_SILENT:
229
params->silent = 1;
230
break;
231
232
case CMD_CLABELFILE:
233
if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
234
break;
235
236
case CMD_HELP:
237
for (i=0; strlen(helpstr[i]) > 0; i++)
238
printf("%s\n", helpstr[i]);
239
exit(0);
240
break;
241
case '?':
242
default:
243
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
244
exit(0);
245
}
246
}
247
248
if (argc-gk_optind != 1) {
249
printf("Unrecognized parameters.");
250
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
251
printf("%s\n", shorthelpstr[i]);
252
exit(0);
253
}
254
255
params->filename = gk_strdup(argv[gk_optind++]);
256
257
if (!gk_fexists(params->filename))
258
errexit("input file %s does not exist.\n", params->filename);
259
260
return params;
261
}
262
263
264
265
/*************************************************************************/
266
/*! This is the callback function for the itemset discovery routine */
267
/*************************************************************************/
268
void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans,
269
int *transids)
270
{
271
ssize_t i;
272
params_t *params;
273
274
params = (params_t *)stateptr;
275
params->nitemsets++;
276
277
if (!params->silent) {
278
printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
279
for (i=0; i<nitems; i++)
280
printf(" %s", params->clabels[itemids[i]]);
281
printf("\n");
282
for (i=0; i<ntrans; i++)
283
printf(" %d\n", transids[i]);
284
printf("\n");
285
}
286
}
287
288