Path: blob/devel/elmergrid/src/metis-5.1.0/GKlib/test/fis.c
3206 views
/*!1\file2\brief A simple frequent itemset discovery program to test GKlib's routines34\date 6/12/20085\author George6\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim7*/89#include <GKlib.h>1011/*************************************************************************/12/*! Data structures for the code */13/*************************************************************************/14typedef struct {15ssize_t minlen, maxlen;16ssize_t minfreq, maxfreq;17char *filename;18int silent;19ssize_t nitemsets;20char *clabelfile;21char **clabels;22} params_t;2324/*************************************************************************/25/*! Constants */26/*************************************************************************/27#define CMD_MINLEN 128#define CMD_MAXLEN 229#define CMD_MINFREQ 330#define CMD_MAXFREQ 431#define CMD_SILENT 532#define CMD_CLABELFILE 633#define CMD_HELP 10343536/*************************************************************************/37/*! Local variables */38/*************************************************************************/39static struct gk_option long_options[] = {40{"minlen", 1, 0, CMD_MINLEN},41{"maxlen", 1, 0, CMD_MAXLEN},42{"minfreq", 1, 0, CMD_MINFREQ},43{"maxfreq", 1, 0, CMD_MAXFREQ},44{"silent", 0, 0, CMD_SILENT},45{"clabels", 1, 0, CMD_CLABELFILE},46{"help", 0, 0, CMD_HELP},47{0, 0, 0, 0}48};495051/*-------------------------------------------------------------------*/52/* Mini help */53/*-------------------------------------------------------------------*/54static char helpstr[][100] = {55" ",56"Usage: fis [options] <mat-file>",57" ",58" Required parameters",59" mat-file",60" The name of the file storing the transactions. The file is in ",61" Cluto's .mat format.",62" ",63" Optional parameters",64" -minlen=int",65" Specifies the minimum length of the patterns. [default: 1]",66" ",67" -maxlen=int",68" Specifies the maximum length of the patterns. [default: none]",69" ",70" -minfreq=int",71" Specifies the minimum frequency of the patterns. [default: 10]",72" ",73" -maxfreq=int",74" Specifies the maximum frequency of the patterns. [default: none]",75" ",76" -silent",77" Does not print the discovered itemsets.",78" ",79" -clabels=filename",80" Specifies the name of the file that stores the column labels.",81" ",82" -help",83" Prints this message.",84""85};8687static char shorthelpstr[][100] = {88" ",89" Usage: fis [options] <mat-file>",90" use 'fis -help' for a summary of the options.",91""92};93949596/*************************************************************************/97/*! Function prototypes */98/*************************************************************************/99void print_init_info(params_t *params, gk_csr_t *mat);100void print_final_info(params_t *params);101params_t *parse_cmdline(int argc, char *argv[]);102void print_an_itemset(void *stateptr, int nitems, int *itemind,103int ntrans, int *tranind);104105106/*************************************************************************/107/*! the entry point */108/**************************************************************************/109int main(int argc, char *argv[])110{111ssize_t i;112char line[8192];113FILE *fpin;114params_t *params;115gk_csr_t *mat;116117params = parse_cmdline(argc, argv);118params->nitemsets = 0;119120/* read the data */121mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);122gk_csr_CreateIndex(mat, GK_CSR_COL);123124/* read the column labels */125params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");126if (params->clabelfile == NULL) {127for (i=0; i<mat->ncols; i++) {128sprintf(line, "%zd", i);129params->clabels[i] = gk_strdup(line);130}131}132else {133fpin = gk_fopen(params->clabelfile, "r", "main: fpin");134for (i=0; i<mat->ncols; i++) {135if (fgets(line, 8192, fpin) == NULL)136errexit("Failed on fgets.\n");137params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));138}139gk_fclose(fpin);140}141142143print_init_info(params, mat);144145gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,146params->minfreq, params->maxfreq, params->minlen, params->maxlen,147&print_an_itemset, (void *)params);148149printf("Total itemsets found: %zd\n", params->nitemsets);150151print_final_info(params);152}153154155156/*************************************************************************/157/*! This function prints run parameters */158/*************************************************************************/159void print_init_info(params_t *params, gk_csr_t *mat)160{161printf("*******************************************************************************\n");162printf(" fis\n\n");163printf("Matrix Information ---------------------------------------------------------\n");164printf(" input file=%s, [%d, %d, %zd]\n",165params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);166167printf("\n");168printf("Options --------------------------------------------------------------------\n");169printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",170params->minlen, params->maxlen, params->minfreq, params->maxfreq);171172printf("\n");173printf("Finding patterns... -----------------------------------------------------\n");174}175176177/*************************************************************************/178/*! This function prints final statistics */179/*************************************************************************/180void print_final_info(params_t *params)181{182printf("\n");183printf("Memory Usage Information -----------------------------------------------------\n");184printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());185printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());186printf("********************************************************************************\n");187}188189190/*************************************************************************/191/*! This is the entry point of the command-line argument parser */192/*************************************************************************/193params_t *parse_cmdline(int argc, char *argv[])194{195int i;196int c, option_index;197params_t *params;198199params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");200201/* initialize the params data structure */202params->minlen = 1;203params->maxlen = -1;204params->minfreq = 10;205params->maxfreq = -1;206params->silent = 0;207params->filename = NULL;208params->clabelfile = NULL;209210211/* Parse the command line arguments */212while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {213switch (c) {214case CMD_MINLEN:215if (gk_optarg) params->minlen = atoi(gk_optarg);216break;217case CMD_MAXLEN:218if (gk_optarg) params->maxlen = atoi(gk_optarg);219break;220case CMD_MINFREQ:221if (gk_optarg) params->minfreq = atoi(gk_optarg);222break;223case CMD_MAXFREQ:224if (gk_optarg) params->maxfreq = atoi(gk_optarg);225break;226227case CMD_SILENT:228params->silent = 1;229break;230231case CMD_CLABELFILE:232if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);233break;234235case CMD_HELP:236for (i=0; strlen(helpstr[i]) > 0; i++)237printf("%s\n", helpstr[i]);238exit(0);239break;240case '?':241default:242printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);243exit(0);244}245}246247if (argc-gk_optind != 1) {248printf("Unrecognized parameters.");249for (i=0; strlen(shorthelpstr[i]) > 0; i++)250printf("%s\n", shorthelpstr[i]);251exit(0);252}253254params->filename = gk_strdup(argv[gk_optind++]);255256if (!gk_fexists(params->filename))257errexit("input file %s does not exist.\n", params->filename);258259return params;260}261262263264/*************************************************************************/265/*! This is the callback function for the itemset discovery routine */266/*************************************************************************/267void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans,268int *transids)269{270ssize_t i;271params_t *params;272273params = (params_t *)stateptr;274params->nitemsets++;275276if (!params->silent) {277printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);278for (i=0; i<nitems; i++)279printf(" %s", params->clabels[itemids[i]]);280printf("\n");281for (i=0; i<ntrans; i++)282printf(" %d\n", transids[i]);283printf("\n");284}285}286287288