示例#1
0
文件: istree.cpp 项目: flouvat/ABS
ISTREE* ist_create (int itemcnt, int supp)
{                               /* --- create an item set tree */
  ISTREE *ist;                  /* created item set tree */
  ISNODE **lvl;                 /* vector of tree levels */
  ISNODE *root;                 /* root node of the tree */
  int    *buf;                  /* buffer vector */

  assert((itemcnt >= 0) && (supp >= 1));  /* check function arguments */

  /* --- allocate memory --- */ 
  ist = (ISTREE*)malloc(sizeof(ISTREE) +(itemcnt-1) *sizeof(int));
  if (!ist) return NULL;        /* allocate the tree body */
  ist->levels = lvl = (ISNODE**)malloc(BLKSIZE *sizeof(ISNODE*));
  if (!lvl) { free(ist); return NULL; }
  ist->buf    = buf = (int*)    malloc(BLKSIZE *sizeof(int));
  if (!buf) { free(lvl); free(ist); return NULL; }
  lvl[0] = ist->curr = root =   /* allocate a root node */
    (ISNODE*)calloc(1, sizeof(ISNODE) +(itemcnt-1) *sizeof(int));
  if (!root){ free(buf); free(lvl); free(ist); return NULL; }

  /* --- initialize structures --- */
  ist->lvlvsz  = BLKSIZE;       /* copy parameters to the structure */
  ist->lvlcnt  = 1;
  ist->tacnt   = 0;
  ist->supp    = supp;
  ist_init(ist);                /* initialize item set extraction */
  root->parent = root->succ = NULL;
  root->offset = root->id   = 0;
  root->chcnt  = 0;             /* initialize the root node */
  root->size   = itemcnt;
  return ist;                   /* return created item set tree */
}  /* ist_create() */
示例#2
0
ISTREE* ist_create (int itemcnt, double supp, double conf,
                    int rsdef, const char *apps)
{                               /* --- create an item set tree */
	ISTREE *ist;                  /* created item set tree */
	ISNODE **lvls;                /* level vector */
	int    *path;                 /* path vector */
	ISNODE *root;                 /* root node */
	char   *a;                    /* to traverse appearances vector */

	assert(itemcnt >= 0);         /* check the function arguments */
	assert((supp >= 0) && (supp <= 1) && (conf >= 0) && (conf <= 1));

	/* --- allocate memory --- */
	ist = (ISTREE*)malloc(sizeof(ISTREE) +(itemcnt-1) *sizeof(char));
	if (!ist) return NULL;        /* allocate tree body */
	ist->levels = lvls = (ISNODE**)malloc(BLKSIZE *sizeof(ISNODE*));
	if (!lvls) { free(ist); return NULL; }
	ist->path   = path = (int*)    malloc(BLKSIZE *sizeof(int));
	if (!path) { free(lvls); free(ist); return NULL; }
	lvls[0] = ist->curr = root =  /* allocate root node */
		(ISNODE*)calloc(1, sizeof(ISNODE) +itemcnt*2*sizeof(float));
	root->cnts=(float*)((char*)root+sizeof(ISNODE));
	root->occ_square = (float*)root->cnts+itemcnt;
	if (!root) { free(path); free(lvls); free(ist); return NULL; }

	/* --- initialize structures --- */
	ist->vecsize = BLKSIZE;       /* copy parameters to structure */
	ist->height  = 1;    ist->setcnt = 0;
	ist->supp    = supp; ist->conf   = conf;
	ist->rsdef   = rsdef & IST_BOTH;
	ist_init(ist, 1, EM_NONE, 1.0);
	root->parent = root->succ  = NULL;
	root->offs   = root->chcnt = root->id = 0;
	root->size   = itemcnt;
	a = ist->apps;                /* copy item appearances */
	if (apps) { while (--itemcnt >= 0) *a++ = *apps++ & IST_BOTH; }
	else      { while (--itemcnt >= 0) *a++ = IST_BOTH;           }
	return ist;                   /* return created item set tree */
}  /* ist_create() */
示例#3
0
int do_apriori (int argc, char *argv[])
{                               /* --- main function */
  int    i, k = 0, n;           /* loop variables, counters */
  char   *s;                    /* to traverse the options */
  char   **optarg = NULL;       /* option argument */
  char   *fn_in   = NULL;       /* name of input  file */
  char   *fn_out  = NULL;       /* name of output file */
  char   *fn_app  = NULL;       /* name of item appearances file */
  char   *blanks  = NULL;       /* blanks */
  char   *fldseps = NULL;       /* field  separators */
  char   *recseps = NULL;       /* record separators */
  char   *comment = NULL;       /* comment indicators */
  char   *used    = NULL;       /* item usage vector */
  double supp     = 0.1;        /* minimal support    (in percent) */
  double smax     = 1.0;        /* maximal support    (in percent) */
  double conf     = 0.8;        /* minimal confidence (in percent) */
  int    mode     = IST_BODY;   /* search mode (rule support def.) */
  int    target   = 'r';        /* target type (sets/rules/h.edges) */
  int    arem     = 0;          /* additional rule evaluation measure */
  int    lift     = 0;          /* flag for printing the lift */
  double minval   = 0.1;        /* minimal evaluation measure value */
  double lftval   = 0;          /* lift value (confidence/prior) */
  int    minlen   = 1;          /* minimal rule length */
  int    maxlen   = INT_MAX;    /* maximal rule length */
  int    load     = 1;          /* flag for loading transactions */
  int    sort     = 2;          /* flag for item sorting and recoding */
  double filter   = 0.1;        /* item usage filtering parameter */
  int    tree     = 1;          /* flag for transaction tree */
  int    heap     = 1;          /* flag for heap sort vs. quick sort */
  int    c2scf    = 0;          /* flag for conv. to scanable form */
  char   *sep     = " ";        /* item separator for output */
  char   *fmt     = "%.1f";     /* output format for support/conf. */
  int    sout     = 1;          /* flag for abs./rel. support output */
  int    ext      = 0;          /* flag for extended support output */
  int    aval     = 0;          /* flag for add. eval. measure value */
  int    maxcnt   = 0;          /* maximal number of items per set */
  int    tacnt;                 /* number of transactions */
  int    frq;                   /* frequency of an item set */
  int    *map, *set;            /* identifier map, item set */
  int    verbose  = 0;          /* flag for verboseness */
  const  char *name;            /* buffer for item names */
  static char buf[4*TS_SIZE+4]; /* buffer for formatting */
  clock_t     t, tt, tc, x;     /* timer for measurements */

  #ifndef QUIET                 /* if not quiet version */
  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no arguments given */
    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type (default: association rules)\n"
           "         (s: item sets, c: closed item sets,"
                    " m: maximal item sets,\n"
           "          r: association rules,"
                    " h: association hyperedges)\n");
    printf("-m#      minimal number of items per set/rule/hyperedge "
                    "(default: %d)\n", minlen);
    printf("-n#      maximal number of items per set/rule/hyperedge "
                    "(default: no limit)\n");
    printf("-s#      minimal support    of a     set/rule/hyperedge "
                    "(default: %g%%)\n", supp *100);
    printf("-S#      maximal support    of a     set/rule/hyperedge "
                    "(default: %g%%)\n", smax *100);
    printf("-c#      minimal confidence of a         rule/hyperedge "
                    "(default: %g%%)\n", conf *100);
    printf("-o       use original definition of the support of a rule "
                    "(body & head)\n");
    printf("-k#      item separator for output "
                    "(default: \"%s\")\n", sep);
    printf("-p#      output format for support/confidence "
                    "(default: \"%s\")\n", fmt);
    printf("-x       extended support output "
                    "(print both rule support types)\n");
    printf("-a       print absolute support "
                    "(number of transactions)\n");
    printf("-y       print lift value (confidence divided by prior)\n");
    printf("-e#      additional evaluation measure (default: none)\n");
    printf("-!       print a list of additional evaluation measures\n");
    printf("-d#      minimal value of additional evaluation measure "
                    "(default: %g%%)\n", minval *100);
    printf("-v       print value of additional "
                    "rule evaluation measure\n");
    printf("-g       write output in scanable form "
                    "(quote certain characters)\n");
    printf("-l       do not load transactions into memory "
                    "(work on input file)\n");
    printf("-q#      sort items w.r.t. their frequency (default: %d)\n"
           "         (1: ascending, -1: descending, 0: do not sort,\n"
           "          2: ascending, -2: descending w.r.t. "
                    "transaction size sum)\n", sort);
    printf("-u#      filter unused items from transactions "
                    "(default: %g)\n", filter);
    printf("         (0: do not filter items w.r.t. usage in sets,\n"
           "         <0: fraction of removed items for filtering,\n"
           "         >0: take execution times ratio into account)\n");
    printf("-h       do not organize transactions as a prefix tree\n");
    printf("-j       use quicksort to sort the transactions "
                    "(default: heapsort)\n");
    printf("-z       minimize memory usage "
                    "(default: maximize speed)\n");
    printf("-b/f/r#  blank characters, field and record separators\n"
           "         (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");
    printf("-C#      comment characters (default: \"#\")\n");
    printf("-V       verbose\n");

    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets/association rules"
                    "/hyperedges to\n");
    printf("appfile  file stating item appearances (optional)\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */
  #endif  /* #ifndef QUIET */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse arguments */
    s = argv[i];                /* get option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (*s) {              /* traverse options */
        switch (*s++) {         /* evaluate switches */
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 'r';     break;
          case 'm': minlen = (int)strtol(s, &s, 0); break;
          case 'n': maxlen = (int)strtol(s, &s, 0); break;
          case 's': supp   = 0.01*strtod(s, &s);    break;
          case 'S': smax   = 0.01*strtod(s, &s);    break;
          case 'c': conf   = 0.01*strtod(s, &s);    break;
          case 'o': mode  |= IST_BOTH;              break;
          case 'k': optarg = &sep;                  break;
          case 'p': optarg = &fmt;                  break;
          case 'x': ext    = 1;                     break;
          case 'a': sout  |= 2;                     break;
          case 'y': lift   = 1;                     break;
          case 'e': arem   = (*s) ? *s++ : 0;       break;
          case 'd': minval = 0.01*strtod(s, &s);    break;
          case 'v': aval   = 1;                     break;
          case 'g': c2scf  = 1;                     break;
          case 'l': load   = 0;                     break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'z': mode  |= IST_MEMOPT;            break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          case 'V': verbose = 1;                    break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set option variables */
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       /* get option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-options */
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check option argument */
  if ((k < 2) || (k > 3))       /* and the number of arguments */
    error(E_ARGCNT);            /* (either in/out or in/out/app) */
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             /* stdin must not be used twice */
  switch (target) {             /* check and translate target type */
    case 's': target = TT_SET;               break;
    case 'c': target = TT_CLSET;             break;
    case 'm': target = TT_MFSET;             break;
    case 'r': target = TT_RULE;              break;
    case 'h': target = TT_HEDGE;             break;
    case 'g': target = TT_GROUP;             break;
    default : error(E_TARGET, (char)target); break;
  }
  if (supp > 1)                 /* check the minimal support */
    error(E_SUPP, supp);        /* (< 0: absolute number) */
  if ((conf  <  0) || (conf > 1))
    error(E_CONF, conf);        /* check the minimal confidence */
  if (minlen <= 0) error(E_RULELEN, minlen);  /* check the limits */
  if (maxlen <= 0) error(E_RULELEN, maxlen);  /* for the rule length */
  switch (arem) {               /* check and translate measure */
    case  0 : case '0': arem = EM_NONE;     break;
    case 'd': case '1': arem = EM_DIFF;     break;
    case 'q': case '2': arem = EM_QUOT;     break;
    case 'a': case '3': arem = EM_AIMP;     break;
    case 'i': case '4': arem = EM_INFO;     break;
    case 'c': case '5': arem = EM_CHI2;     break;
    case 'p': case '6': arem = EM_PVAL;     break;
    default : error(E_MEASURE, (char)arem); break;
  }
  if (target <= TT_MFSET) {     /* in item set mode neutralize */
    mode |= IST_BOTH; conf = 1;}/* rule specific settings */
  if (arem == EM_NONE)          /* if no add. rule eval. measure, */
    aval = 0;                   /* clear the corresp. output flag */
  if ((filter <= -1) || (filter >= 1)) filter = 0;

  /* --- create item set and transaction set --- */
  itemset = is_create(-1);      /* create an item set and */
  if (!itemset) error(E_NOMEM); /* set the special characters */
  is_chars(itemset, blanks, fldseps, recseps, comment);
  if (load) {                   /* if to load the transactions */
    taset = tas_create(itemset);
    if (!taset) error(E_NOMEM); /* create a transaction set */
  }                             /* to store the transactions */
  MSG(fprintf(stderr, "\n"));   /* terminate the startup message */

  /* --- read item appearances --- */
  if (fn_app) {                 /* if item appearances are given */
    t = clock();                /* start the timer */
    if (*fn_app)                /* if an app. file name is given, */
      in = fopen(fn_app, "r");  /* open the item appearances file */
    else {                      /* if no app. file name is given, */
      in = stdin; fn_app = "<stdin>"; }   /* read from std. input */
    MSG(fprintf(stderr, "reading %s ... ", fn_app));
    if (!in) error(E_FOPEN, fn_app);
    k = is_readapp(itemset,in); /* read the item appearances */
    if (k  != 0) error(k, fn_app, RECCNT(itemset), BUFFER(itemset));
    if (in != stdin)            /* if not read from standard input, */
      fclose(in);               /* close the input file */
    MSG(fprintf(stderr, "[%d item(s)]", is_cnt(itemset)));
    MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
  }                             /* print a log message */

  /* --- read transactions --- */
  t = clock();                  /* start the timer */
  if (fn_in && *fn_in)          /* if an input file name is given, */
    in = fopen(fn_in, "r");     /* open input file for reading */
  else {                        /* if no input file name is given, */
    in = stdin; fn_in = "<stdin>"; }   /* read from standard input */
  MSG(fprintf(stderr, "reading %s ... \n", fn_in));
  if (!in) error(E_FOPEN, fn_in);
  while (1) {                   /* transaction read loop */
    k = is_read(itemset, in);   /* read the next transaction */
    if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));
    if (k > 0) break;           /* check for error and end of file */
    k = is_tsize(itemset);      /* update the maximal */
    if (k > maxcnt) maxcnt = k; /* transaction size */
    if (taset && (tas_add(taset, NULL, 0) != 0))
      error(E_NOMEM);           /* add the loaded transaction */
  }                             /* to the transaction set */
  if (taset) {                  /* if transactions have been loaded */
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
  }                             /* clear the file variable */
  n     = is_cnt(itemset);      /* get the number of items */
  tacnt = is_gettac(itemset);   /* and the number of transactions */
  MSG(fprintf(stderr, "[%d item(s), %d transaction(s)]", n, tacnt));
  MSG(fprintf(stderr, " done [%.2fs].", SEC_SINCE(t)));
  if ((n <= 0) || (tacnt <= 0)) error(E_NOTAS);
  MSG(fprintf(stderr, "\n"));   /* check for at least one transaction */
  if (supp >= 0)                /* if relative support is given */
    supp = ceil(tacnt *supp);   /* compute absolute support */
  else {                        /* if absolute support is given, */
    supp = ceil(-100  *supp);   /* make the support value positive */
    if (!(sout & 2)) sout = 2;  /* switch to absolute support output */
  }                             /* do the same with the max. support */
  smax = floor(((smax >= 0) ? tacnt : -100) *smax);

  /* --- sort and recode items --- */
  MSG(fprintf(stderr, "filtering, sorting and recoding items ... "));
  t   = clock();                /* start the timer */
  map = (int*)malloc(is_cnt(itemset) *sizeof(int));
  if (!map) error(E_NOMEM);     /* create an item identifier map */
  k = (int)((mode & IST_HEAD) ? supp : ceil(supp *conf));
  n = is_recode(itemset, k, sort, map);
  if (taset) {                  /* sort and recode the items and */
    tas_recode(taset, map,n);   /* recode the loaded transactions */
    maxcnt = tas_max(taset);    /* get the new maximal t.a. size */
  }                             /* (may be smaller than before) */
  free(map);                    /* delete the item identifier map */
  MSG(fprintf(stderr, "[%d item(s)] ", n));
  MSG(fprintf(stderr, "done [%.2fs].", SEC_SINCE(t)));
  if (n <= 0) error(E_NOFREQ);  /* print a log message and */
  MSG(fprintf(stderr, "\n"));   /* check the number of items */
  if (maxlen > maxcnt)          /* clamp the set/rule length */
    maxlen = maxcnt;            /* to the maximum set size */

  /* --- create a transaction tree --- */
  tt = 0;                       /* init. the tree construction time */
  if (tree && taset) {          /* if transactions were loaded */
    MSG(fprintf(stderr, "creating transaction tree ... "));
    t = clock();                /* start the timer */
    tatree = tat_create(taset, heap); 
    if (!tatree) error(E_NOMEM);/* create a transaction tree */
    if (filter == 0) {          /* if a tree rebuild is not needed, */
      tas_delete(taset, 0); taset = NULL; }  /* delete transactions */
    tt = clock() -t;            /* note the time for the construction */
    MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
  }                             /* print a log message */

  /* --- create an item set tree --- */
  t = clock(); tc = 0;          /* start the timer */
  istree = ist_create(itemset, mode, (int)supp, conf);
  if (!istree) error(E_NOMEM);  /* create an item set tree */

  /* --- check item subsets --- */
  if (filter) {                 /* if to filter unused items */
    used = (char*)malloc(is_cnt(itemset) *sizeof(char));
    if (!used) error(E_NOMEM);  /* create a flag vector */
  }                             /* for the items */
  MSG(fprintf(stderr, "checking subsets of size 1"));
  while (ist_height(istree) < maxlen) {
    if (filter != 0) {          /* if to filter w.r.t. item usage, */
      i = ist_check(istree, used);     /* check current item usage */
      if (i < maxlen) maxlen = i;      /* update the maximum size */
      if (ist_height(istree) >= i) break;
    }                           /* check the tree height */
    k = ist_addlvl(istree);     /* while max. height is not reached, */
    if (k <  0) error(E_NOMEM); /* add a level to the item set tree */
    if (k != 0) break;          /* if no level was added, abort */
    MSG(fprintf(stderr, " %d", ist_height(istree)));
    if (tatree) {               /* if a transaction tree was created */
      if (((filter < 0)         /* if to filter w.r.t. item usage */
      &&   (i < -filter *n))    /* and enough items were removed */
      ||  ((filter > 0)         /* or counting time is long enough */
      &&   (i < n) && (i *(double)tt < filter *n *tc))) {
        n = i; x = clock();     /* note the new number of items */
        tas_filter(taset, used);/* and remove unnecessary items */
        tat_delete(tatree);     /* delete the transaction tree */
        tatree = tat_create(taset, heap);
        if (!tatree) error(E_NOMEM);
        tt = clock() -x;        /* rebuild the transaction tree and */
      }                         /* note the new construction time */
      x  = clock();             /* count the transaction tree */
      ist_countx(istree, tatree);
      tc = clock() -x; }        /* note the new count time */
    else if (taset) {           /* if transactions were loaded */
      if (((filter < 0)         /* if to filter w.r.t. item usage */
      &&   (i <= -filter *n))   /* and enough items were removed */
      ||  ((filter > 0)         /* or counting time is long enough */
      &&   (i *(double)tt <= filter *n *tc))) {
        n = i; x = clock();     /* note the new number of items */
        tas_filter(taset, used);/* and remove unnecessary items */
        tt = clock() -t;        /* from the transactions */
      }                         /* note the filtering time */
      for (i = tacnt; --i >= 0;)/* traverse and count transactions */
        ist_count(istree, tas_tract(taset, i), tas_tsize(taset, i));
      tc = clock() -t; }        /* note the new count time */
    else {                      /* if to work on the input file, */
      rewind(in);               /* reset the file position */
      for (maxcnt = 0; (i = is_read(itemset, in)) == 0; ) {
        if (filter != 0)        /* (re)read the transactions and */
          is_filter(itemset, used);  /* remove unnecessary items */
        k = is_tsize(itemset);  /* update the maximum size */
        if (k > maxcnt) maxcnt = k;  /* of a transaction */
        ist_count(istree, is_tract(itemset), k);
      }                         /* count the transaction in the tree */
      if (i < 0) error(i, fn_in, RECCNT(itemset), BUFFER(itemset));
      if (maxcnt < maxlen)      /* update the maximal rule length */
        maxlen = maxcnt;        /* according to the max. t.a. size */
    }                           /* (may be smaller than before) */
  }
  if (!taset && !tatree) {      /* if transactions were not loaded */
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
  }                             /* clear the file variable */
  MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));

  /* --- filter found item sets --- */
  if ((target == TT_CLSET) || (target == TT_MFSET)) {
    MSG(fprintf(stderr, "filtering %s item sets ... ",
        (target == TT_MFSET) ? "maximal" : "closed"));
    t = clock();                /* filter the item sets */
    ist_filter(istree, (target == TT_MFSET) ? IST_MAXFRQ : IST_CLOSED);
    MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
  }                             /* (filter takes longer than print) */

  /* --- sort transactions --- */
  if (target <= TT_MFSET) {     /* if to find frequent item sets */
    if (!taset)                 /* transactions must be loaded */
      ext = 0;                  /* for extended support output */
    else if (ext) {             /* if extended output is requested */
      MSG(fprintf(stderr, "sorting transactions ... "));
      t = clock();              /* start the timer */
      tas_sort(taset, heap);    /* sort the transactions */
      MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
    }                           /* (sorting is necessary to find the */
  }                             /* number of identical transactions) */

  /* --- print item sets/rules/hyperedges --- */
  t = clock();                  /* start the timer */
  if (fn_out && *fn_out)        /* if an output file name is given, */
    out = fopen(fn_out, "w");   /* open the output file */
  else {                        /* if no output file name is given, */
    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */
  MSG(fprintf(stderr, "writing %s ... ", fn_out));
  if (!out) error(E_FOPEN, fn_out);
  ist_init(istree, minlen, arem, minval);
  set = is_tract(itemset);      /* get the transaction buffer */
  if (target <= TT_MFSET) {     /* if to find frequent item sets */
    for (n = 0; 1; ) {          /* extract item sets from the tree */
      k = ist_set(istree, set, &frq, &conf);
      if (k <= 0) break;        /* get the next frequent item set */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the set's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the name of the next item */
        fputs((i < k-1) ? sep : " ", out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the item set's support */
      if (sout & 1) { fprintf(out, fmt, (frq/(double)tacnt) *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      if (ext) {                /* if to print the extended support */
        frq = tas_occur(taset, set, k);
        fputs(", ", out);       /* get the number of occurrences */
        fprintf(out, fmt, (frq/(double)tacnt) *100);
        if (sout & 2) fprintf(out, "/%d", frq);
      }                         /* print the extended support data */
      if (aval) { fputs(", ", out); fprintf(out, fmt, conf *100); }
      fputs(")\n", out);        /* print the add. eval. measure, */
      n++;                      /* terminate the support output, */
    } }                         /* and count the item set */
  else if (target == TT_RULE) { /* if to find association rules, */
    for (n = 0; 1; ) {          /* extract rules from tree */
      k = ist_rule(istree, set, &frq, &conf, &lftval, &minval);
      if (k <= 0) break;        /* get the next association rule */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the rule's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the next item */
        fputs((i <= 0) ? " <- " : ((i < k-1) ? sep : " "), out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the rule evaluation */
      if (sout & 1) supp = frq/(double)tacnt;
      if (ext && !(mode & IST_HEAD)) {
        if (sout & 1) { fprintf(out, fmt, supp *conf *100);
                        if (sout & 2) fputc('/', out); }
        if (sout & 2) { fprintf(out, "%d", (int)(frq *conf +0.5));}
        fputs(", ", out);       /* print the support of the rule */
      }                         /* from  the support of the body */
      if (sout & 1) { fprintf(out, fmt, supp *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      fputs(", ", out);         /* print the rule support */
      if (ext && (mode & IST_HEAD)) {
        if (sout & 1) { fprintf(out, fmt, (supp/conf) *100);
                        if (sout & 2) fputc('/', out); }
        if (sout & 2) { fprintf(out, "%d", (int)(frq /conf +0.5));}
        fputs(", ", out);       /* print the support of the body */
      }                         /* from  the support of the rule */
      fprintf(out, fmt, conf *100); /* print the rule confidence */
      if (lift) { fputs(", ", out); fprintf(out, fmt, lftval *100); }
      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }
      fputs(")\n", out);        /* print the value of the additional */
      n++;                      /* rule evaluation measure and */
    } }                         /* count the association rule */
  else if (target == TT_HEDGE){ /* if to find association hyperedges */
    for (n = 0; 1; ) {          /* extract hyperedges from tree */
      k = ist_hedge(istree, set, &frq, &conf, &minval);
      if (k <= 0) break;        /* get the next hyperedge */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the edge's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the name of the next item */
        fputs((i < k-1) ? sep : " ", out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the hyperedge evaluation */
      if (sout & 1) { fprintf(out, fmt, (frq/(double)tacnt) *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      fputs(", ", out); fprintf(out, fmt, conf *100);
      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }
      fputs(")\n", out);        /* print support and confidence */
      n++;                      /* of the hyperedge and */
    } }                         /* count the hyperedge */
  else {                        /* if to find association groups */
    for (n = 0; 1; ) {          /* extract groups from tree */
      k = ist_group(istree, set, &frq, &minval);
      if (k <= 0) break;        /* get the next group */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the group's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the name of the next item */
        fputs((i < k-1) ? sep : " ", out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the group evaluation */
      if (sout & 1) { fprintf(out, fmt, (frq/(double)tacnt) *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }
      fputs(")\n", out);        /* print support and add. measure */
      n++;                      /* and count the group */
    }
  }  /* if (target <= TT_MFSET) .. else .. */
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
  if (out != stdout) fclose(out);
  out = NULL;                   /* close the output file */
  MSG(fprintf(stderr, "[%d %s(s)] done ", n, ttypes[target]));
  MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));
  #ifdef BENCH
  printf("number of support counters: %d\n", istree->sccnt);
  printf("necessary support counters: %d\n", istree->scnec);
  printf("number of child pointers  : %d\n", istree->cpcnt);
  printf("necessary child pointers  : %d\n", istree->cpnec);
  printf("allocated memory (bytes)  : %d\n", istree->bytes);
  #endif

  /* --- clean up --- */
  #ifndef NDEBUG                /* if this is a debug version */
  free(used);                   /* delete the item app. vector */
  ist_delete(istree);           /* delete the item set tree, */
  if (tatree) tat_delete(tatree);     /* the transaction tree, */
  if (taset)  tas_delete(taset, 0);   /* the transaction set, */
  is_delete(itemset);                 /* and the item set */
  #endif
  #ifdef STORAGE                /* if storage debugging */
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */
int main (int argc, char *argv[])
{                              
  int     i, k = 0, n;          
  char    *s;                   
  char    **optarg = NULL;     
  char    *fn_in   = NULL;      
  char    *fn_out  = NULL;    
  char    *fn_app  = NULL;     
  char    *blanks  = NULL;   
  char    *fldseps = NULL;     
  char    *recseps = NULL;      
  char    *comment = NULL;      
  char    *isep    = " ";      
  char    *impl    = " <- ";   
  char    *dflt    = "  (%1S)"; 
  char    *format  = dflt;      
  int     target   = 's';     
  int     min      = 1;        
  int     max      = INT_MAX;   
  double  supp     = 0.1;       
  double  smax     = 1.0;     
  double  conf     = 0.8;       
  int     dir      = 0;        
  int     eval     = 0;       
  int     aggm     = 0;        
  double  minval   = 0.1;       
  int     prune    = 0;         
  double  filter   = 0.1;       
  int     sort     = 2;         
  int     tree     = 1;         
  int     heap     = 1;         
  int     post     = 0;        
  int     report   = 0;       
  int     mode     = APP_BODY|IST_PERFECT;  
  int     size;               
  int     wgt;                  
  int     frq, body, head;     
  int     *items;             
  clock_t t, tt, tc, x;         

  #ifndef QUIET               
  prgname = argv[0];           

  if (argc > 1) {          
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } 
  else {                      
    printf("usage: %s [options] infile outfile\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent item sets, c: closed item sets,\n"
           "          m: maximal item sets,  r: association rules)\n");
    printf("-m#      minimum number of items per set/rule     "
                    "(default: %d)\n", min);
    printf("-n#      maximum number of items per set/rule     "
                    "(default: no limit)\n");
    printf("-s#      minimum support of a set/rule     "
                    "(default: %g%%)\n", supp *100);
    printf("-S#      maximum support of a set/rule     "
                    "(default: %g%%)\n", smax *100);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-c#      minimum confidence of a     rule         "
                    "(default: %g%%)\n", conf *100);
    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets to\n");
    return 0;                
  }                            
  #endif  
  for (i = 1; i < argc; i++) {  
    s = argv[i];                
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  
      while (*s) {             
        switch (*s++) {        
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 's';     break;
          case 'm': min    = (int)strtol(s, &s, 0); break;
          case 'n': max    = (int)strtol(s, &s, 0); break;
          case 's': supp   = 0.01*strtod(s, &s);    break;
          case 'S': smax   = 0.01*strtod(s, &s);    break;
          case 'c': conf   = 0.01*strtod(s, &s);    break;
          case 'o': mode  |= APP_BOTH;              break;
          case 'e': eval   = (*s) ? *s++ : 0;       break;
          case 'a': aggm   = (*s) ? *s++ : 0;       break;
          case 'd': minval = 0.01*strtod(s, &s);    break;
          case 'p': prune  = (int)strtol(s, &s, 0); break;
          case 'g': report = ISR_SCAN;              break;
          case 'k': optarg = &isep;                 break;
          case 'i': optarg = &impl;                 break;
          case 'v': optarg = &format;               break;
          case 'l': dir    = (int)strtol(s, &s, 0); break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'x': mode  &= ~IST_PERFECT;          break;
          case 'y': post   = 1;                     break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          default : error(E_OPTION, *--s);          break;
        }                       
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       
    else {                     
      switch (k++) {            
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         
    }
  }
  if (optarg) error(E_OPTARG);  
  if ((k < 2) || (k > 3))       
    error(E_ARGCNT);           
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             
  switch (target) {             
    case 's': target = TT_SET;               break;
    case 'c': target = TT_CLOSED;            break;
    case 'm': target = TT_MAXIMAL;           break;
    case 'r': target = TT_RULE;              break;
    default : error(E_TARGET, (char)target); break;
  }
  if (min < 0) error(E_SIZE, min); 
  if (max < 0) error(E_SIZE, max); 
  if (supp  > 1)                
    error(E_SUPP, supp);        
  if ((conf  < 0) || (conf > 1))
    error(E_CONF, conf);       
  switch (eval) {              
    case 'x': case 0: eval = IST_NONE;      break;
    case 'c': eval = IST_CONF;              break;
    case 'd': eval = IST_DIFF;              break;
    case 'l': eval = IST_LIFT;              break;
    case 'a': eval = IST_LD21;              break;
    case 'q': eval = IST_QUOT;              break;
    case 'n': eval = IST_CHI2;              break;
    case 'p': eval = IST_PVAL;              break;
    case 'i': eval = IST_INFO;              break;
    case 'g': eval = IST_PGST;              break;
    case 'b': eval = IST_LOGQ;              break;
    default : error(E_MEASURE, (char)eval); break;
  }
  switch (aggm) {            
    case 'x': case 0: aggm = IST_NONE;      break;
    case 'm': aggm = IST_MIN;               break;
    case 'n': aggm = IST_MAX;               break;
    case 'a': aggm = IST_AVG;               break;
    default : error(E_MEASURE, (char)aggm); break;
  }
  if ((target > TT_SET)         
  || ((eval > IST_NONE) && (eval < IST_LOGQ)))
    mode &= ~IST_PERFECT;      
  if (target <= TT_MAXIMAL) {  
    mode |= APP_BOTH; conf = 1;}
  if ((filter <= -1) || (filter >= 1))
    filter = 0;                 

  ibase = ib_create(-1);       
  if (!ibase) error(E_NOMEM);  
  ib_chars(ibase, blanks, fldseps, recseps, comment);
  MSG(stderr, "\n");          

  if (fn_app) {                
    t = clock();                
    if (*fn_app)            
      in = fopen(fn_app, "r");  
    else {                      
      in = stdin; fn_app = "<stdin>"; }   
    MSG(stderr, "reading %s ... ", fn_app);
    if (!in) error(E_FOPEN, fn_app);
    k = ib_readapp(ibase, in); 
    if (k  != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase));
    if (in != stdin) fclose(in);
    in = NULL;                  
    MSG(stderr, "[%d item(s)]", ib_cnt(ibase));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                           

  t = clock();                 
  if (fn_in && *fn_in)        
    in = fopen(fn_in, "r");     
  else {                       
    in = stdin; fn_in = "<stdin>"; }   
  MSG(stderr, "reading %s ... ", fn_in);
  if (!in) error(E_FOPEN, fn_in);
  tabag = tb_create(ibase);     
  if (!tabag) error(E_NOMEM);   
  while (1) {                
    k = ib_read(ibase, in);     
    if (k) { if (k > 0) break; 
      error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); }
    if (tb_add(tabag, NULL) != 0) error(E_NOMEM);
  }                            
  if (in != stdin) fclose(in);  
  in  = NULL;                  
  n   = ib_cnt(ibase);          
  k   = tb_cnt(tabag);         
  wgt = tb_wgt(tabag);          
  MSG(stderr, "[%d item(s), ", n);
  if (k == wgt) MSG(stderr,    "%d transaction(s)]", k);
  else          MSG(stderr, "%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].", SEC_SINCE(t));
  if ((n <= 0) || (wgt <= 0))  
    error(E_NOTRANS);           
  MSG(stderr, "\n");            
  if (format == dflt) {       
    if (target != TT_RULE) format = (supp < 0) ? "  (%a)" : "  (%1S)";
    else format = (supp < 0) ? "  (%b, %1C)" : "  (%1X, %1C)";
  }                            
  supp = ceil (((supp < 0) ? -100 : wgt) *supp);
  smax = floor(((smax < 0) ? -100 : wgt) *smax);

  
  t = clock();                  
  MSG(stderr, "filtering, sorting and recoding items ... ");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     
  k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf));
  n = ib_recode(ibase, k, sort, map);
  tb_recode(tabag, map);       
  tb_itsort(tabag, 1, heap);    
  free(map); map = NULL;        
  MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t));
  if (n <= 0) error(E_NOFREQ); 
  MSG(stderr, "\n");            
  k   = tb_max(tabag);         
  if (max > k) max = k;         


  t = clock();                  
  MSG(stderr, "reducing transactions ... ");
  tb_filter(tabag, min, NULL);  
  tb_sort(tabag, 1, heap);      
  k = tb_reduce(tabag);         
  if (k == wgt) MSG(stderr,    "[%d transaction(s)]", k);
  else          MSG(stderr, "[%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  
  tt = 0;                      
  if (tree) {                   
    t = clock();               
    MSG(stderr, "building transaction tree ... ");
    tatree = tt_create(tabag);  
    if (!tatree) error(E_NOMEM);
    if (filter == 0) {          
      tb_delete(tabag, 0);      
      tabag = NULL;             
    }
    MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
    tt = clock() -t;            
  }                             


  t = clock(); tc = 0;         
  istree = ist_create(ibase, mode, (int)supp, (int)smax, conf);
  if (!istree) error(E_NOMEM);  
  ist_seteval(istree, eval, aggm, minval, prune);

  /* --- check item subsets --- */
  MSG(stderr, "checking subsets of size 1");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     
  while (1) {                   
    size = ist_height(istree);  
    if (size >= max) break;     
    if ((filter != 0)        
    &&  (ist_check(istree, map) <= size))
      break;                  
    if (post)                  
      ist_prune(istree);       
    k = ist_addlvl(istree);     
    if (k) { if (k > 0) break;
             error(E_NOMEM);  } 
    if (((filter < 0)           
    &&   (i < -filter *n))      
    ||  ((filter > 0)          
    &&   (i < n) && (i *(double)tt < filter *n *tc))) {
      n = i;                   
      x = clock();             
      tb_filter(tabag, size+1, map);
      tb_sort(tabag, 0, heap);  
      tb_reduce(tabag);         
      if (tatree) {             
        tt_delete(tatree, 0);   
        tatree = tt_create(tabag);
        if (!tatree) error(E_NOMEM);
      }                         
      tt = clock() -x;          
    }
    MSG(stderr, " %d", ++size); 
    x = clock();             
    if (tatree) ist_countx(istree, tatree);
    else        ist_countb(istree, tabag);
    tc = clock() -x;           
  }                             
  free(map); map = NULL;        
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) {
    t = clock();               
    MSG(stderr, "filtering for %s item sets ... ",
        (target == TT_MAXIMAL) ? "maximal" : "closed");
    k = target | ((prune < 0) ? IST_EVAL : 0);
    ist_mark(istree, k);        
    MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t));
  }      

  t = clock();                  
  if (fn_out && *fn_out)        
    out = fopen(fn_out, "w");  
  else {                        
    out = stdout; fn_out = "<stdout>"; }    
  MSG(stderr, "writing %s ... ", fn_out);
  if (!out) error(E_FOPEN, fn_out);
  if (eval == IST_LOGQ) report |= ISR_LOGS;
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL))
    report |= ISR_CLOSED;      
  isrep = isr_create(ibase, out, report, isep, impl);
  if (!isrep) error(E_NOMEM);  
  isr_setfmt (isrep, format);   
  isr_setsize(isrep,  min, max);
  ist_setsize(istree, min, max, dir);
  ist_init   (istree);          
  items = t_items(ib_tract(ibase));
  if ((target <= TT_MAXIMAL)    
  &&  (dir == 0)) {            
    if      (eval == IST_LOGQ)  
      isr_seteval(isrep, isr_logq,  NULL,   minval);
    else if (eval >  IST_NONE)  
      isr_seteval(isrep, ist_evalx, istree, minval);
    n = ist_report(istree, isrep); } 
  else if (target <= TT_MAXIMAL) { 
    for (n = 0; 1; ) {         
      k = ist_set(istree, items, &frq, &minval);
      if (k < 0) break;         
      if (k > 0) fputs(isr_name(isrep, items[0]), out);
      for (i = 0; ++i < k; ) {  
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               
        isr_sinfo(isrep, frq, minval);
      fputc('\n', out); n++;    
    } }                       
  else if (target == TT_RULE) { 
    for (n = 0; 1; ) {          
      k = ist_rule(istree, items, &frq, &body, &head, &minval);
      if (k < 0) break;         
      fputs(isr_name(isrep, items[0]), out);
      fputs(impl, out);         
      if (k > 1) fputs(isr_name(isrep, items[1]), out);
      for (i = 1; ++i < k; ) {  
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               
        isr_rinfo(isrep, frq, body, head, minval);
      fputc('\n', out); n++;    
    }                           
  }  /
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
示例#5
0
文件: apriori.c 项目: crubeido/test
int main (int argc, char *argv[])
{                               /* --- main function */
  int     i, k = 0, n;          /* loop variables, counters */
  char    *s;                   /* to traverse the options */
  char    **optarg = NULL;      /* option argument */
  char    *fn_in   = NULL;      /* name of input  file */
  char    *fn_out  = NULL;      /* name of output file */
  char    *fn_app  = NULL;      /* name of item appearances file */
  char    *blanks  = NULL;      /* blanks */
  char    *fldseps = NULL;      /* field  separators */
  char    *recseps = NULL;      /* record separators */
  char    *comment = NULL;      /* comment indicators */
  char    *isep    = " ";       /* item separator for output */
  char    *impl    = " <- ";    /* implication sign for ass. rules */
  char    *dflt    = "  (%1S)"; /* default format for check */
  char    *format  = dflt;      /* format for information output */
  int     target   = 's';       /* target type (sets/rules/h.edges) */
  int     min      = 1;         /* minimum rule/item set size */
  int     max      = INT_MAX;   /* maximum rule/item set size */
  double  supp     = 10;        /* minimum support    (in percent) */
  double  smax     = 100;       /* maximum support    (in percent) */
  double  conf     = 80;        /* minimum confidence (in percent) */
  int     dir      = 0;         /* direction for size sorting */
  int     eval     = 0;         /* additional evaluation measure */
  int     zero     = 0;         /* flag for zero eval. below expect. */
  int     aggm     = 0;         /* aggregation mode for eval. measure */
  double  minval   = 10;        /* minimum evaluation measure value */
  int     prune    = 0;         /* (min. size for) evaluation pruning */
  double  filter   = 0.1;       /* item usage filtering parameter */
  int     sort     = 2;         /* flag for item sorting and recoding */
  int     tree     = 1;         /* flag for transaction tree */
  int     heap     = 1;         /* flag for heap sort vs. quick sort */
  int     post     = 0;         /* flag for a-posteriori pruning */
  int     report   = 0;         /* other flags for reporting */
  int     mode     = APP_BODY|IST_PERFECT;    /* search mode */
  int     size;                 /* current item set size */
  int     wgt;                  /* total transaction weight */
  int     frq, body, head;      /* frequency of an item set */
  int     *items;               /* item set (for reporting) */
  clock_t t, tt, tc, x;         /* timers for measurements */

  #ifndef QUIET                 /* if not quiet version */
  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no arguments are given */
    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent item sets, c: closed item sets,\n"
           "          m: maximal item sets,  r: association rules)\n");
    printf("-m#      minimum number of items per set/rule     "
                    "(default: %d)\n", min);
    printf("-n#      maximum number of items per set/rule     "
                    "(default: no limit)\n");
    printf("-s#      minimum support    of a     set/rule     "
                    "(default: %g%%)\n", supp);
    printf("-S#      maximum support    of a     set/rule     "
                    "(default: %g%%)\n", smax);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-c#      minimum confidence of a     rule         "
                    "(default: %g%%)\n", conf);
    printf("-o       use the original rule support definition "
                    "(body & head)\n");
    printf("-e#      additional evaluation measure            "
                    "(default: none)\n");
    printf("-a#      aggregation mode for evaluation measure  "
                    "(default: none)\n");
    printf("-z       zero evaluation below expected support   "
                    "(default: evaluate all)\n");
    printf("-d#      minimum value of add. evaluation measure "
                    "(default: %g%%)\n", minval);
    printf("-p#      (min. size for) pruning with evaluation  "
                    "(default: no pruning)\n");
    printf("         (< 0: backward,   > 0: forward)\n");
    printf("-l#      sort item sets in output by their size   "
                    "(default: no sorting)\n");
    printf("         (< 0: descending, > 0: ascending)\n");
    printf("-g       write item names in scanable form "
                    "(quote certain characters)\n");
    printf("-k#      item separator for output                "
                    "(default: \"%s\")\n", isep);
    printf("-i#      implication sign for association rules   "
                    "(default: \"%s\")\n", impl);
    printf("-v#      output format for set/rule information   "
                    "(default: \"%s\")\n", format);
    printf("-q#      sort items w.r.t. their frequency        "
                    "(default: %d)\n", sort);
    printf("         (1: ascending, -1: descending, 0: do not sort,\n"
           "          2: ascending, -2: descending w.r.t. "
                    "transaction size sum)\n");
    printf("-u#      filter unused items from transactions    "
                    "(default: %g)\n", filter);
    printf("         (0: do not filter items w.r.t. usage in sets,\n"
           "         <0: fraction of removed items for filtering,\n"
           "         >0: take execution times ratio into account)\n");
    printf("-j       use quicksort to sort the transactions   "
                    "(default: heapsort)\n");
    printf("-x       do not prune the search "
                    "with perfect extensions\n");
    printf("-y       a-posteriori pruning of infrequent item sets\n");
    printf("-h       do not organize transactions as a prefix tree\n");
    printf("-b#      blank   characters                       "
                    "(default: \" \\t\\r\")\n");
    printf("-f#      field   separators                       "
                    "(default: \" \\t,\")\n");
    printf("-r#      record  separators                       "
                    "(default: \"\\n\")\n");
    printf("-C#      comment characters                       "
                    "(default: \"#\")\n");
    printf("-!       print additional option information\n");
    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets/association rules"
                    "/hyperedges to\n");
    printf("appfile  file stating item appearances (optional)\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */
  #endif  /* #ifndef QUIET */
  /* free option characters: w [A-Z]\[SC] */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse the arguments */
    s = argv[i];                /* get an option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (*s) {              /* traverse the options */
        switch (*s++) {         /* evaluate the options */
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 's';     break;
          case 'm': min    = (int)strtol(s, &s, 0); break;
          case 'n': max    = (int)strtol(s, &s, 0); break;
          case 's': supp   =      strtod(s, &s);    break;
          case 'S': smax   =      strtod(s, &s);    break;
          case 'c': conf   =      strtod(s, &s);    break;
          case 'o': mode  |= APP_BOTH;              break;
          case 'e': eval   = (*s) ? *s++ : 0;       break;
          case 'z': zero   = IST_ZERO;              break;
          case 'a': aggm   = (*s) ? *s++ : 0;       break;
          case 'd': minval =      strtod(s, &s);    break;
          case 'p': prune  = (int)strtol(s, &s, 0); break;
          case 'g': report = ISR_SCAN;              break;
          case 'k': optarg = &isep;                 break;
          case 'i': optarg = &impl;                 break;
          case 'v': optarg = &format;               break;
          case 'l': dir    = (int)strtol(s, &s, 0); break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'x': mode  &= ~IST_PERFECT;          break;
          case 'y': post   = 1;                     break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set the option variables */
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       /* get an option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-options */
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check the option argument */
  if ((k < 2) || (k > 3))       /* and the number of arguments */
    error(E_ARGCNT);            /* (either in/out or in/out/app) */
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             /* stdin must not be used twice */
  switch (target) {             /* check and translate target type */
    case 's': target = TT_ALL;               break;
    case 'c': target = TT_CLOSED;            break;
    case 'm': target = TT_MAXIMAL;           break;
    case 'r': target = TT_RULE;              break;
    default : error(E_TARGET, (char)target); break;
  }
  if (min < 0) error(E_SIZE, min); /* check the limits */
  if (max < 0) error(E_SIZE, max); /* for the set size */
  if (supp > 100)               /* check the minimum support */
    error(E_SUPP, supp);        /* (< 0: absolute support) */
  if ((conf < 0) || (conf > 100))
    error(E_CONF, conf);        /* check the minimum confidence */
  switch (eval) {               /* check and translate measure */
    case 'x': case 0: eval = IST_NONE;      break;
    case 'c': eval = IST_CONF;              break;
    case 'd': eval = IST_CONF_DIFF;         break;
    case 'l': eval = IST_LIFT;              break;
    case 'a': eval = IST_LIFT_DIFF;         break;
    case 'q': eval = IST_LIFT_QUOT;         break;
    case 'v': eval = IST_CVCT;              break;
    case 'e': eval = IST_CVCT_DIFF;         break;
    case 'r': eval = IST_CVCT_QUOT;         break;
    case 'f': eval = IST_CERT;              break;
    case 'n': eval = IST_CHI2;              break;
    case 'p': eval = IST_CHI2_PVAL;         break;
    case 'i': eval = IST_INFO;              break;
    case 'g': eval = IST_INFO_PVAL;         break;
    case 'b': eval = IST_LOGQ;              break;
    default : error(E_MEASURE, (char)eval); break;
  }
  switch (aggm) {               /* check and translate agg. mode */
    case 'x': case 0: aggm = IST_NONE;      break;
    case 'm': aggm = IST_MIN;               break;
    case 'n': aggm = IST_MAX;               break;
    case 'a': aggm = IST_AVG;               break;
    default : error(E_MEASURE, (char)aggm); break;
  }
  if ((target > TT_ALL)         /* if individual set counters needed */
  || ((eval > IST_NONE) && (eval < IST_LOGQ)))
    mode &= ~IST_PERFECT;       /* remove perfect extension pruning */
  if (target <= TT_MAXIMAL) {   /* remove rule specific settings */
    mode |= APP_BOTH; conf = 100; } 
  if ((filter <= -1) || (filter >= 1))
    filter = 0;                 /* check and adapt the filter option */
  if (dir)                      /* if to sort output by size, */
    mode &= ~IST_PERFECT;       /* do not use perfect ext. pruning */

  /* --- create item base --- */
  ibase = ib_create(0, 0);      /* create an item base and */
  if (!ibase) error(E_NOMEM);   /* set the special characters */
  ib_chars(ibase, blanks, fldseps, recseps, "", comment);
  MSG(stderr, "\n");            /* terminate the startup message */

  /* --- read item appearance indicators --- */
  if (fn_app) {                 /* if item appearances are given */
    t = clock();                /* start the timer for the reading */
    if (*fn_app)                /* if an app. file name is given, */
      in = fopen(fn_app, "r");  /* open the item appearances file */
    else {                      /* if no app. file name is given, */
      in = stdin; fn_app = "<stdin>"; }   /* read from std. input */
    MSG(stderr, "reading %s ... ", fn_app);
    if (!in) error(E_FOPEN, fn_app);
    k = ib_readapp(ibase, in);  /* read the item appearances */
    if (k  != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase));
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
    MSG(stderr, "[%d item(s)]", ib_cnt(ibase));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- read transactions --- */
  t = clock();                  /* start the timer for the reading */
  if (fn_in && *fn_in)          /* if an input file name is given, */
    in = fopen(fn_in, "r");     /* open input file for reading */
  else {                        /* if no input file name is given, */
    in = stdin; fn_in = "<stdin>"; }   /* read from standard input */
  MSG(stderr, "reading %s ... ", fn_in);
  if (!in) error(E_FOPEN, fn_in);
  tabag = tb_create(ibase, 0);  /* create a transaction bag/multiset */
  if (!tabag) error(E_NOMEM);   /* to store the transactions */
  while (1) {                   /* transaction read loop */
    k = ib_read(ibase, in);     /* read the next transaction */
    if (k) { if (k > 0) break;  /* check for error and end of file */
      error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); }
    if (tb_add(tabag, NULL) != 0) error(E_NOMEM);
  }                             /* add transaction to bag/multiset */
  if (in != stdin) fclose(in);  /* if not read from standard input, */
  in  = NULL;                   /* close the input file */
  n   = ib_cnt(ibase);          /* get the number of items */
  k   = tb_cnt(tabag);          /* get the number of transactions */
  wgt = tb_wgt(tabag);          /* the total transaction weight */
  MSG(stderr, "[%d item(s), ", n);
  if (k == wgt) MSG(stderr,    "%d transaction(s)]", k);
  else          MSG(stderr, "%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].", SEC_SINCE(t));
  if ((n <= 0) || (wgt <= 0))   /* check for at least one item */
    error(E_NOTRANS);           /* and at least one transaction */
  MSG(stderr, "\n");            /* terminate the log message */
  if (format == dflt) {         /* if default info. format is used */
    if (target != TT_RULE) format = (supp < 0) ? "  (%a)" : "  (%1S)";
    else format = (supp < 0) ? "  (%b, %1C)" : "  (%1X, %1C)";
  }                             /* set default according to target */
  supp  = ceil ((supp >= 0) ? 0.01 *supp *wgt : -supp);
  smax  = floor((smax >= 0) ? 0.01 *smax *wgt : -smax);
  conf *= 0.01;                 /* transform support and confidence */

  /* --- sort and recode items --- */
  t = clock();                  /* compute absolute support values */
  MSG(stderr, "filtering, sorting and recoding items ... ");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     /* create an item identifier map */
  k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf));
  n = ib_recode(ibase, k, sort, map);
  tb_recode(tabag, map);        /* recode the items and transactions */
  tb_itsort(tabag, 1, heap);    /* and sort items in transactions */
  free(map); map = NULL;        /* delete the item identifier map */
  MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t));
  if (n <= 0) error(E_NOFREQ);  /* print a log message and */
  MSG(stderr, "\n");            /* check the number of items */
  k   = tb_max(tabag);          /* clamp the set/rule length to */
  if (max > k) max = k;         /* the maximum transaction size */

  /* --- reduce transactions --- */
  t = clock();                  /* start the timer for the reduction */
  MSG(stderr, "reducing transactions ... ");
  tb_filter(tabag, min, NULL);  /* remove items of short transactions */
  tb_sort(tabag, 1, heap);      /* sort the trans. lexicographically */
  k = tb_reduce(tabag);         /* reduce transactions to unique ones */
  if (k == wgt) MSG(stderr,    "[%d transaction(s)]", k);
  else          MSG(stderr, "[%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- create transaction tree --- */
  tt = 0;                       /* init. the tree construction time */
  if (tree) {                   /* if to use a transaction tree */
    t = clock();                /* start the timer for construction */
    MSG(stderr, "building transaction tree ... ");
    tatree = tt_create(tabag);  /* create a transaction tree */
    if (!tatree) error(E_NOMEM);
    if (filter == 0) {          /* if not to filter items, */
      tb_delete(tabag, 0);      /* delete the transaction bag */
      tabag = NULL;             /* (redundant data storage) */
    }
    MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
    tt = clock() -t;            /* note the time for the construction */
  }                             /* of the transaction tree */

  /* --- create item set tree --- */
  t = clock(); tc = 0;          /* start the timer for the search */
  istree = ist_create(ibase, mode, (int)supp, (int)smax, conf);
  if (!istree) error(E_NOMEM);  /* create an item set tree */
  ist_seteval(istree, eval|zero, aggm, 0.01*minval, prune);

  /* --- check item subsets --- */
  MSG(stderr, "checking subsets of size 1");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     /* create a filter map */
  while (1) {                   /* traverse the item set sizes */
    size = ist_height(istree);  /* get the current item set size and */
    if (size >= max) break;     /* abort if maximal size is reached */
    if ((filter != 0)           /* if to filter w.r.t. item usage */
    &&  (ist_check(istree, map) <= size))
      break;                    /* check which items are still used */
    if (post)                   /* if a-posteriori pruning requested, */
      ist_prune(istree);        /* prune infrequent item sets */
    k = ist_addlvl(istree);     /* while max. height is not reached, */
    if (k) { if (k > 0) break;  /* add a level to the item set tree */
             error(E_NOMEM);  } /* if no level was added, abort */
    if (((filter < 0)           /* if to filter w.r.t. item usage */
    &&   (i < -filter *n))      /* and enough items were removed */
    ||  ((filter > 0)           /* or counting time is long enough */
    &&   (i < n) && (i *(double)tt < filter *n *tc))) {
      n = i;                    /* note the new number of items */
      x = clock();              /* start the timer for filtering */
      tb_filter(tabag, size+1, map);
      tb_sort(tabag, 0, heap);  /* remove unnec. items and trans. */
      tb_reduce(tabag);         /* and reduce trans. to unique ones */
      if (tatree) {             /* if a transaction tree was created */
        tt_delete(tatree, 0);   /* delete the transaction tree */
        tatree = tt_create(tabag);
        if (!tatree) error(E_NOMEM);
      }                         /* rebuild the transaction tree */
      tt = clock() -x;          /* note the filter/rebuild time */
    }
    MSG(stderr, " %d", ++size); /* print the current item set size */
    x = clock();                /* start the timer for counting */
    if (tatree) ist_countx(istree, tatree);
    else        ist_countb(istree, tabag);
    tc = clock() -x;            /* count the transaction tree/bag */
  }                             /* and compute the new counting time */
  free(map); map = NULL;        /* delete the filter map */
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- filter found item sets --- */
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) {
    t = clock();                /* start the timer for filtering */
    MSG(stderr, "filtering for %s item sets ... ",
        (target == TT_MAXIMAL) ? "maximal" : "closed");
    k = target | ((prune < 0) ? IST_EVAL : 0);
    ist_mark(istree, k);        /* filter closed/maximal item sets */
    MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- print item sets/rules/hyperedges --- */
  t = clock();                  /* start the timer for the output */
  if (fn_out && *fn_out)        /* if an output file name is given, */
    out = fopen(fn_out, "w");   /* open the output file */
  else {                        /* if no output file name is given, */
    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */
  MSG(stderr, "writing %s ... ", fn_out);
  if (!out) error(E_FOPEN, fn_out);
  if (eval == IST_LOGQ) report |= ISR_LOGS;
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL))
    report |= ISR_NOEXP;        /* combine the report mode flags */
  isrep = isr_create(ibase, out, report, isep, impl);
  if (!isrep) error(E_NOMEM);   /* create an item set reporter */
  isr_setfmt (isrep, format);   /* and configure it */
  isr_setsize(isrep,  min, max);
  ist_setsize(istree, min, max, dir);
  ist_init   (istree);          /* initialize the extraction */
  items = t_items(ib_tract(ibase));
  if ((target <= TT_MAXIMAL)    /* if to find frequent item sets */
  &&  (dir == 0)) {             /* and not to sort them by size */
    if      (eval == IST_LOGQ)  /* if to compute an add. evaluation */
      isr_seteval(isrep, isr_logq,  NULL,   0.01*minval);
    else if (eval >  IST_NONE)  /* set the add. evaluation function */
      isr_seteval(isrep, ist_evalx, istree, 0.01*minval);
    n = ist_report(istree, isrep); } /* report the item sets */
  else if (target <= TT_MAXIMAL) { /* if to find frequent item sets */
    for (n = 0; 1; ) {          /* extract item sets from the tree */
      k = ist_set(istree, items, &frq, &minval);
      if (k < 0) break;         /* get the next frequent item set */
      if (k > 0) fputs(isr_name(isrep, items[0]), out);
      for (i = 0; ++i < k; ) {  /* print the item names */
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               /* if requested, print information */
        isr_sinfo(isrep, frq, minval);
      fputc('\n', out); n++;    /* terminate the output line and */
    } }                         /* count the reported item set */
  else if (target == TT_RULE) { /* if to find association rules, */
    for (n = 0; 1; ) {          /* extract rules from tree */
      k = ist_rule(istree, items, &frq, &body, &head, &minval);
      if (k < 0) break;         /* get the next association rule */
      fputs(isr_name(isrep, items[0]), out);
      fputs(impl, out);         /* print name of rule head item */
      if (k > 1) fputs(isr_name(isrep, items[1]), out);
      for (i = 1; ++i < k; ) {  /* print names of items in rule body */
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               /* if requested, print information */
        isr_rinfo(isrep, frq, body, head, minval);
      fputc('\n', out); n++;    /* terminate the output line and */
    }                           /* count the reported ass. rule */
  }  /* if (target <= TT_MAXIMAL) .. else .. */
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
  if (out != stdout) fclose(out);
  out = NULL;                   /* close the output file */
  MSG(stderr, "[%d %s(s)] done ", n,
              (target == TT_RULE) ? "rule" : "set");
  MSG(stderr, "[%.2fs].\n", SEC_SINCE(t));
  #ifdef BENCH                  /* if benchmark version, */
  ist_stats(istree);            /* show the search statistics */
  #endif                        /* (especially memory usage) */

  /* --- clean up --- */
  #ifndef NDEBUG                /* if this is a debug version */
  isr_delete(isrep, 0);             /* the item set reporter, */
  ist_delete(istree);               /* the item set tree, */
  if (tatree) tt_delete(tatree, 0); /* the transaction tree, */
  if (tabag)  tb_delete(tabag,  0); /* the transaction bag, */
  ib_delete(ibase);                 /* and the item base */
  #endif
  #ifdef STORAGE                /* if storage debugging */
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */