コード例 #1
0
ファイル: apriori_wrapper.c プロジェクト: Banta/apriori-rails
int do_apriori (int argc, char *argv[])
{                               /* --- main function */
  int    i, k = 0, n;           /* loop variables, counters */
  char   *s;                    /* to traverse the options */
  char   **optarg = NULL;       /* option argument */
  char   *fn_in   = NULL;       /* name of input  file */
  char   *fn_out  = NULL;       /* name of output file */
  char   *fn_app  = NULL;       /* name of item appearances file */
  char   *blanks  = NULL;       /* blanks */
  char   *fldseps = NULL;       /* field  separators */
  char   *recseps = NULL;       /* record separators */
  char   *comment = NULL;       /* comment indicators */
  char   *used    = NULL;       /* item usage vector */
  double supp     = 0.1;        /* minimal support    (in percent) */
  double smax     = 1.0;        /* maximal support    (in percent) */
  double conf     = 0.8;        /* minimal confidence (in percent) */
  int    mode     = IST_BODY;   /* search mode (rule support def.) */
  int    target   = 'r';        /* target type (sets/rules/h.edges) */
  int    arem     = 0;          /* additional rule evaluation measure */
  int    lift     = 0;          /* flag for printing the lift */
  double minval   = 0.1;        /* minimal evaluation measure value */
  double lftval   = 0;          /* lift value (confidence/prior) */
  int    minlen   = 1;          /* minimal rule length */
  int    maxlen   = INT_MAX;    /* maximal rule length */
  int    load     = 1;          /* flag for loading transactions */
  int    sort     = 2;          /* flag for item sorting and recoding */
  double filter   = 0.1;        /* item usage filtering parameter */
  int    tree     = 1;          /* flag for transaction tree */
  int    heap     = 1;          /* flag for heap sort vs. quick sort */
  int    c2scf    = 0;          /* flag for conv. to scanable form */
  char   *sep     = " ";        /* item separator for output */
  char   *fmt     = "%.1f";     /* output format for support/conf. */
  int    sout     = 1;          /* flag for abs./rel. support output */
  int    ext      = 0;          /* flag for extended support output */
  int    aval     = 0;          /* flag for add. eval. measure value */
  int    maxcnt   = 0;          /* maximal number of items per set */
  int    tacnt;                 /* number of transactions */
  int    frq;                   /* frequency of an item set */
  int    *map, *set;            /* identifier map, item set */
  int    verbose  = 0;          /* flag for verboseness */
  const  char *name;            /* buffer for item names */
  static char buf[4*TS_SIZE+4]; /* buffer for formatting */
  clock_t     t, tt, tc, x;     /* timer for measurements */

  #ifndef QUIET                 /* if not quiet version */
  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no arguments given */
    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type (default: association rules)\n"
           "         (s: item sets, c: closed item sets,"
                    " m: maximal item sets,\n"
           "          r: association rules,"
                    " h: association hyperedges)\n");
    printf("-m#      minimal number of items per set/rule/hyperedge "
                    "(default: %d)\n", minlen);
    printf("-n#      maximal number of items per set/rule/hyperedge "
                    "(default: no limit)\n");
    printf("-s#      minimal support    of a     set/rule/hyperedge "
                    "(default: %g%%)\n", supp *100);
    printf("-S#      maximal support    of a     set/rule/hyperedge "
                    "(default: %g%%)\n", smax *100);
    printf("-c#      minimal confidence of a         rule/hyperedge "
                    "(default: %g%%)\n", conf *100);
    printf("-o       use original definition of the support of a rule "
                    "(body & head)\n");
    printf("-k#      item separator for output "
                    "(default: \"%s\")\n", sep);
    printf("-p#      output format for support/confidence "
                    "(default: \"%s\")\n", fmt);
    printf("-x       extended support output "
                    "(print both rule support types)\n");
    printf("-a       print absolute support "
                    "(number of transactions)\n");
    printf("-y       print lift value (confidence divided by prior)\n");
    printf("-e#      additional evaluation measure (default: none)\n");
    printf("-!       print a list of additional evaluation measures\n");
    printf("-d#      minimal value of additional evaluation measure "
                    "(default: %g%%)\n", minval *100);
    printf("-v       print value of additional "
                    "rule evaluation measure\n");
    printf("-g       write output in scanable form "
                    "(quote certain characters)\n");
    printf("-l       do not load transactions into memory "
                    "(work on input file)\n");
    printf("-q#      sort items w.r.t. their frequency (default: %d)\n"
           "         (1: ascending, -1: descending, 0: do not sort,\n"
           "          2: ascending, -2: descending w.r.t. "
                    "transaction size sum)\n", sort);
    printf("-u#      filter unused items from transactions "
                    "(default: %g)\n", filter);
    printf("         (0: do not filter items w.r.t. usage in sets,\n"
           "         <0: fraction of removed items for filtering,\n"
           "         >0: take execution times ratio into account)\n");
    printf("-h       do not organize transactions as a prefix tree\n");
    printf("-j       use quicksort to sort the transactions "
                    "(default: heapsort)\n");
    printf("-z       minimize memory usage "
                    "(default: maximize speed)\n");
    printf("-b/f/r#  blank characters, field and record separators\n"
           "         (default: \" \\t\\r\", \" \\t\", \"\\n\")\n");
    printf("-C#      comment characters (default: \"#\")\n");
    printf("-V       verbose\n");

    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets/association rules"
                    "/hyperedges to\n");
    printf("appfile  file stating item appearances (optional)\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */
  #endif  /* #ifndef QUIET */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse arguments */
    s = argv[i];                /* get option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (*s) {              /* traverse options */
        switch (*s++) {         /* evaluate switches */
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 'r';     break;
          case 'm': minlen = (int)strtol(s, &s, 0); break;
          case 'n': maxlen = (int)strtol(s, &s, 0); break;
          case 's': supp   = 0.01*strtod(s, &s);    break;
          case 'S': smax   = 0.01*strtod(s, &s);    break;
          case 'c': conf   = 0.01*strtod(s, &s);    break;
          case 'o': mode  |= IST_BOTH;              break;
          case 'k': optarg = &sep;                  break;
          case 'p': optarg = &fmt;                  break;
          case 'x': ext    = 1;                     break;
          case 'a': sout  |= 2;                     break;
          case 'y': lift   = 1;                     break;
          case 'e': arem   = (*s) ? *s++ : 0;       break;
          case 'd': minval = 0.01*strtod(s, &s);    break;
          case 'v': aval   = 1;                     break;
          case 'g': c2scf  = 1;                     break;
          case 'l': load   = 0;                     break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'z': mode  |= IST_MEMOPT;            break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          case 'V': verbose = 1;                    break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set option variables */
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       /* get option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-options */
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check option argument */
  if ((k < 2) || (k > 3))       /* and the number of arguments */
    error(E_ARGCNT);            /* (either in/out or in/out/app) */
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             /* stdin must not be used twice */
  switch (target) {             /* check and translate target type */
    case 's': target = TT_SET;               break;
    case 'c': target = TT_CLSET;             break;
    case 'm': target = TT_MFSET;             break;
    case 'r': target = TT_RULE;              break;
    case 'h': target = TT_HEDGE;             break;
    case 'g': target = TT_GROUP;             break;
    default : error(E_TARGET, (char)target); break;
  }
  if (supp > 1)                 /* check the minimal support */
    error(E_SUPP, supp);        /* (< 0: absolute number) */
  if ((conf  <  0) || (conf > 1))
    error(E_CONF, conf);        /* check the minimal confidence */
  if (minlen <= 0) error(E_RULELEN, minlen);  /* check the limits */
  if (maxlen <= 0) error(E_RULELEN, maxlen);  /* for the rule length */
  switch (arem) {               /* check and translate measure */
    case  0 : case '0': arem = EM_NONE;     break;
    case 'd': case '1': arem = EM_DIFF;     break;
    case 'q': case '2': arem = EM_QUOT;     break;
    case 'a': case '3': arem = EM_AIMP;     break;
    case 'i': case '4': arem = EM_INFO;     break;
    case 'c': case '5': arem = EM_CHI2;     break;
    case 'p': case '6': arem = EM_PVAL;     break;
    default : error(E_MEASURE, (char)arem); break;
  }
  if (target <= TT_MFSET) {     /* in item set mode neutralize */
    mode |= IST_BOTH; conf = 1;}/* rule specific settings */
  if (arem == EM_NONE)          /* if no add. rule eval. measure, */
    aval = 0;                   /* clear the corresp. output flag */
  if ((filter <= -1) || (filter >= 1)) filter = 0;

  /* --- create item set and transaction set --- */
  itemset = is_create(-1);      /* create an item set and */
  if (!itemset) error(E_NOMEM); /* set the special characters */
  is_chars(itemset, blanks, fldseps, recseps, comment);
  if (load) {                   /* if to load the transactions */
    taset = tas_create(itemset);
    if (!taset) error(E_NOMEM); /* create a transaction set */
  }                             /* to store the transactions */
  MSG(fprintf(stderr, "\n"));   /* terminate the startup message */

  /* --- read item appearances --- */
  if (fn_app) {                 /* if item appearances are given */
    t = clock();                /* start the timer */
    if (*fn_app)                /* if an app. file name is given, */
      in = fopen(fn_app, "r");  /* open the item appearances file */
    else {                      /* if no app. file name is given, */
      in = stdin; fn_app = "<stdin>"; }   /* read from std. input */
    MSG(fprintf(stderr, "reading %s ... ", fn_app));
    if (!in) error(E_FOPEN, fn_app);
    k = is_readapp(itemset,in); /* read the item appearances */
    if (k  != 0) error(k, fn_app, RECCNT(itemset), BUFFER(itemset));
    if (in != stdin)            /* if not read from standard input, */
      fclose(in);               /* close the input file */
    MSG(fprintf(stderr, "[%d item(s)]", is_cnt(itemset)));
    MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
  }                             /* print a log message */

  /* --- read transactions --- */
  t = clock();                  /* start the timer */
  if (fn_in && *fn_in)          /* if an input file name is given, */
    in = fopen(fn_in, "r");     /* open input file for reading */
  else {                        /* if no input file name is given, */
    in = stdin; fn_in = "<stdin>"; }   /* read from standard input */
  MSG(fprintf(stderr, "reading %s ... \n", fn_in));
  if (!in) error(E_FOPEN, fn_in);
  while (1) {                   /* transaction read loop */
    k = is_read(itemset, in);   /* read the next transaction */
    if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));
    if (k > 0) break;           /* check for error and end of file */
    k = is_tsize(itemset);      /* update the maximal */
    if (k > maxcnt) maxcnt = k; /* transaction size */
    if (taset && (tas_add(taset, NULL, 0) != 0))
      error(E_NOMEM);           /* add the loaded transaction */
  }                             /* to the transaction set */
  if (taset) {                  /* if transactions have been loaded */
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
  }                             /* clear the file variable */
  n     = is_cnt(itemset);      /* get the number of items */
  tacnt = is_gettac(itemset);   /* and the number of transactions */
  MSG(fprintf(stderr, "[%d item(s), %d transaction(s)]", n, tacnt));
  MSG(fprintf(stderr, " done [%.2fs].", SEC_SINCE(t)));
  if ((n <= 0) || (tacnt <= 0)) error(E_NOTAS);
  MSG(fprintf(stderr, "\n"));   /* check for at least one transaction */
  if (supp >= 0)                /* if relative support is given */
    supp = ceil(tacnt *supp);   /* compute absolute support */
  else {                        /* if absolute support is given, */
    supp = ceil(-100  *supp);   /* make the support value positive */
    if (!(sout & 2)) sout = 2;  /* switch to absolute support output */
  }                             /* do the same with the max. support */
  smax = floor(((smax >= 0) ? tacnt : -100) *smax);

  /* --- sort and recode items --- */
  MSG(fprintf(stderr, "filtering, sorting and recoding items ... "));
  t   = clock();                /* start the timer */
  map = (int*)malloc(is_cnt(itemset) *sizeof(int));
  if (!map) error(E_NOMEM);     /* create an item identifier map */
  k = (int)((mode & IST_HEAD) ? supp : ceil(supp *conf));
  n = is_recode(itemset, k, sort, map);
  if (taset) {                  /* sort and recode the items and */
    tas_recode(taset, map,n);   /* recode the loaded transactions */
    maxcnt = tas_max(taset);    /* get the new maximal t.a. size */
  }                             /* (may be smaller than before) */
  free(map);                    /* delete the item identifier map */
  MSG(fprintf(stderr, "[%d item(s)] ", n));
  MSG(fprintf(stderr, "done [%.2fs].", SEC_SINCE(t)));
  if (n <= 0) error(E_NOFREQ);  /* print a log message and */
  MSG(fprintf(stderr, "\n"));   /* check the number of items */
  if (maxlen > maxcnt)          /* clamp the set/rule length */
    maxlen = maxcnt;            /* to the maximum set size */

  /* --- create a transaction tree --- */
  tt = 0;                       /* init. the tree construction time */
  if (tree && taset) {          /* if transactions were loaded */
    MSG(fprintf(stderr, "creating transaction tree ... "));
    t = clock();                /* start the timer */
    tatree = tat_create(taset, heap); 
    if (!tatree) error(E_NOMEM);/* create a transaction tree */
    if (filter == 0) {          /* if a tree rebuild is not needed, */
      tas_delete(taset, 0); taset = NULL; }  /* delete transactions */
    tt = clock() -t;            /* note the time for the construction */
    MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
  }                             /* print a log message */

  /* --- create an item set tree --- */
  t = clock(); tc = 0;          /* start the timer */
  istree = ist_create(itemset, mode, (int)supp, conf);
  if (!istree) error(E_NOMEM);  /* create an item set tree */

  /* --- check item subsets --- */
  if (filter) {                 /* if to filter unused items */
    used = (char*)malloc(is_cnt(itemset) *sizeof(char));
    if (!used) error(E_NOMEM);  /* create a flag vector */
  }                             /* for the items */
  MSG(fprintf(stderr, "checking subsets of size 1"));
  while (ist_height(istree) < maxlen) {
    if (filter != 0) {          /* if to filter w.r.t. item usage, */
      i = ist_check(istree, used);     /* check current item usage */
      if (i < maxlen) maxlen = i;      /* update the maximum size */
      if (ist_height(istree) >= i) break;
    }                           /* check the tree height */
    k = ist_addlvl(istree);     /* while max. height is not reached, */
    if (k <  0) error(E_NOMEM); /* add a level to the item set tree */
    if (k != 0) break;          /* if no level was added, abort */
    MSG(fprintf(stderr, " %d", ist_height(istree)));
    if (tatree) {               /* if a transaction tree was created */
      if (((filter < 0)         /* if to filter w.r.t. item usage */
      &&   (i < -filter *n))    /* and enough items were removed */
      ||  ((filter > 0)         /* or counting time is long enough */
      &&   (i < n) && (i *(double)tt < filter *n *tc))) {
        n = i; x = clock();     /* note the new number of items */
        tas_filter(taset, used);/* and remove unnecessary items */
        tat_delete(tatree);     /* delete the transaction tree */
        tatree = tat_create(taset, heap);
        if (!tatree) error(E_NOMEM);
        tt = clock() -x;        /* rebuild the transaction tree and */
      }                         /* note the new construction time */
      x  = clock();             /* count the transaction tree */
      ist_countx(istree, tatree);
      tc = clock() -x; }        /* note the new count time */
    else if (taset) {           /* if transactions were loaded */
      if (((filter < 0)         /* if to filter w.r.t. item usage */
      &&   (i <= -filter *n))   /* and enough items were removed */
      ||  ((filter > 0)         /* or counting time is long enough */
      &&   (i *(double)tt <= filter *n *tc))) {
        n = i; x = clock();     /* note the new number of items */
        tas_filter(taset, used);/* and remove unnecessary items */
        tt = clock() -t;        /* from the transactions */
      }                         /* note the filtering time */
      for (i = tacnt; --i >= 0;)/* traverse and count transactions */
        ist_count(istree, tas_tract(taset, i), tas_tsize(taset, i));
      tc = clock() -t; }        /* note the new count time */
    else {                      /* if to work on the input file, */
      rewind(in);               /* reset the file position */
      for (maxcnt = 0; (i = is_read(itemset, in)) == 0; ) {
        if (filter != 0)        /* (re)read the transactions and */
          is_filter(itemset, used);  /* remove unnecessary items */
        k = is_tsize(itemset);  /* update the maximum size */
        if (k > maxcnt) maxcnt = k;  /* of a transaction */
        ist_count(istree, is_tract(itemset), k);
      }                         /* count the transaction in the tree */
      if (i < 0) error(i, fn_in, RECCNT(itemset), BUFFER(itemset));
      if (maxcnt < maxlen)      /* update the maximal rule length */
        maxlen = maxcnt;        /* according to the max. t.a. size */
    }                           /* (may be smaller than before) */
  }
  if (!taset && !tatree) {      /* if transactions were not loaded */
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
  }                             /* clear the file variable */
  MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));

  /* --- filter found item sets --- */
  if ((target == TT_CLSET) || (target == TT_MFSET)) {
    MSG(fprintf(stderr, "filtering %s item sets ... ",
        (target == TT_MFSET) ? "maximal" : "closed"));
    t = clock();                /* filter the item sets */
    ist_filter(istree, (target == TT_MFSET) ? IST_MAXFRQ : IST_CLOSED);
    MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
  }                             /* (filter takes longer than print) */

  /* --- sort transactions --- */
  if (target <= TT_MFSET) {     /* if to find frequent item sets */
    if (!taset)                 /* transactions must be loaded */
      ext = 0;                  /* for extended support output */
    else if (ext) {             /* if extended output is requested */
      MSG(fprintf(stderr, "sorting transactions ... "));
      t = clock();              /* start the timer */
      tas_sort(taset, heap);    /* sort the transactions */
      MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
    }                           /* (sorting is necessary to find the */
  }                             /* number of identical transactions) */

  /* --- print item sets/rules/hyperedges --- */
  t = clock();                  /* start the timer */
  if (fn_out && *fn_out)        /* if an output file name is given, */
    out = fopen(fn_out, "w");   /* open the output file */
  else {                        /* if no output file name is given, */
    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */
  MSG(fprintf(stderr, "writing %s ... ", fn_out));
  if (!out) error(E_FOPEN, fn_out);
  ist_init(istree, minlen, arem, minval);
  set = is_tract(itemset);      /* get the transaction buffer */
  if (target <= TT_MFSET) {     /* if to find frequent item sets */
    for (n = 0; 1; ) {          /* extract item sets from the tree */
      k = ist_set(istree, set, &frq, &conf);
      if (k <= 0) break;        /* get the next frequent item set */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the set's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the name of the next item */
        fputs((i < k-1) ? sep : " ", out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the item set's support */
      if (sout & 1) { fprintf(out, fmt, (frq/(double)tacnt) *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      if (ext) {                /* if to print the extended support */
        frq = tas_occur(taset, set, k);
        fputs(", ", out);       /* get the number of occurrences */
        fprintf(out, fmt, (frq/(double)tacnt) *100);
        if (sout & 2) fprintf(out, "/%d", frq);
      }                         /* print the extended support data */
      if (aval) { fputs(", ", out); fprintf(out, fmt, conf *100); }
      fputs(")\n", out);        /* print the add. eval. measure, */
      n++;                      /* terminate the support output, */
    } }                         /* and count the item set */
  else if (target == TT_RULE) { /* if to find association rules, */
    for (n = 0; 1; ) {          /* extract rules from tree */
      k = ist_rule(istree, set, &frq, &conf, &lftval, &minval);
      if (k <= 0) break;        /* get the next association rule */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the rule's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the next item */
        fputs((i <= 0) ? " <- " : ((i < k-1) ? sep : " "), out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the rule evaluation */
      if (sout & 1) supp = frq/(double)tacnt;
      if (ext && !(mode & IST_HEAD)) {
        if (sout & 1) { fprintf(out, fmt, supp *conf *100);
                        if (sout & 2) fputc('/', out); }
        if (sout & 2) { fprintf(out, "%d", (int)(frq *conf +0.5));}
        fputs(", ", out);       /* print the support of the rule */
      }                         /* from  the support of the body */
      if (sout & 1) { fprintf(out, fmt, supp *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      fputs(", ", out);         /* print the rule support */
      if (ext && (mode & IST_HEAD)) {
        if (sout & 1) { fprintf(out, fmt, (supp/conf) *100);
                        if (sout & 2) fputc('/', out); }
        if (sout & 2) { fprintf(out, "%d", (int)(frq /conf +0.5));}
        fputs(", ", out);       /* print the support of the body */
      }                         /* from  the support of the rule */
      fprintf(out, fmt, conf *100); /* print the rule confidence */
      if (lift) { fputs(", ", out); fprintf(out, fmt, lftval *100); }
      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }
      fputs(")\n", out);        /* print the value of the additional */
      n++;                      /* rule evaluation measure and */
    } }                         /* count the association rule */
  else if (target == TT_HEDGE){ /* if to find association hyperedges */
    for (n = 0; 1; ) {          /* extract hyperedges from tree */
      k = ist_hedge(istree, set, &frq, &conf, &minval);
      if (k <= 0) break;        /* get the next hyperedge */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the edge's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the name of the next item */
        fputs((i < k-1) ? sep : " ", out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the hyperedge evaluation */
      if (sout & 1) { fprintf(out, fmt, (frq/(double)tacnt) *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      fputs(", ", out); fprintf(out, fmt, conf *100);
      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }
      fputs(")\n", out);        /* print support and confidence */
      n++;                      /* of the hyperedge and */
    } }                         /* count the hyperedge */
  else {                        /* if to find association groups */
    for (n = 0; 1; ) {          /* extract groups from tree */
      k = ist_group(istree, set, &frq, &minval);
      if (k <= 0) break;        /* get the next group */
      if (frq > smax) continue; /* check against maximal support */
      for (i = 0; i < k; i++) { /* traverse the group's items */
        name = is_name(itemset, set[i]);
        if (c2scf) { sc_format(buf, name, 0); name = buf; }
        fputs(name, out);       /* print the name of the next item */
        fputs((i < k-1) ? sep : " ", out);
      }                         /* print a separator */
      fputs(" (", out);         /* print the group evaluation */
      if (sout & 1) { fprintf(out, fmt, (frq/(double)tacnt) *100);
                      if (sout & 2) fputc('/', out); }
      if (sout & 2) { fprintf(out, "%d", frq); }
      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }
      fputs(")\n", out);        /* print support and add. measure */
      n++;                      /* and count the group */
    }
  }  /* if (target <= TT_MFSET) .. else .. */
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
  if (out != stdout) fclose(out);
  out = NULL;                   /* close the output file */
  MSG(fprintf(stderr, "[%d %s(s)] done ", n, ttypes[target]));
  MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));
  #ifdef BENCH
  printf("number of support counters: %d\n", istree->sccnt);
  printf("necessary support counters: %d\n", istree->scnec);
  printf("number of child pointers  : %d\n", istree->cpcnt);
  printf("necessary child pointers  : %d\n", istree->cpnec);
  printf("allocated memory (bytes)  : %d\n", istree->bytes);
  #endif

  /* --- clean up --- */
  #ifndef NDEBUG                /* if this is a debug version */
  free(used);                   /* delete the item app. vector */
  ist_delete(istree);           /* delete the item set tree, */
  if (tatree) tat_delete(tatree);     /* the transaction tree, */
  if (taset)  tas_delete(taset, 0);   /* the transaction set, */
  is_delete(itemset);                 /* and the item set */
  #endif
  #ifdef STORAGE                /* if storage debugging */
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */
コード例 #2
0
int main (int argc, char *argv[])
{                               /* --- main function */
  int     i, k = 0;             /* loop variables */
  char    *s;                   /* to traverse the options */
  CCHAR   **optarg = NULL;      /* option argument */
  CCHAR   *fn_inp  = NULL;      /* name of input  file */
  CCHAR   *fn_out  = NULL;      /* name of output file */
  CCHAR   *fn_sel  = NULL;      /* name of item selection file */
  #ifdef ISR_PATSPEC            /* if to allow a pattern spectrum */
  CCHAR   *fn_psp  = NULL;      /* name of pattern spectrum file */
  #endif
  CCHAR   *recseps = NULL;      /* record  separators */
  CCHAR   *fldseps = NULL;      /* field   separators */
  CCHAR   *blanks  = NULL;      /* blank   characters */
  CCHAR   *comment = NULL;      /* comment characters */
  CCHAR   *hdr     = "";        /* record header  for output */
  CCHAR   *sep     = " ";       /* item separator for output */
  CCHAR   *dflt    = "  (%1S)"; /* default format for check */
  CCHAR   *format  = dflt;      /* format for information output */
  int     target   = 's';       /* target type (closed/maximal) */
  ITEM    min      =  1;        /* minimum size of an item set */
  ITEM    max      = 16;        /* maximum size of an item set */
  double  supp     = 10;        /* minimum support (in percent) */
  int     eval     = 'x';       /* additional evaluation measure */
  double  minval   = 10;        /* minimum evaluation measure value */
  int     sort     =  2;        /* flag for item sorting and recoding */
  int     dir      = +1;        /* item processing order */
  long    repeat   =  1;        /* number of repetitions */
  int     mtar     =  0;        /* mode for transaction reading */
  int     mrep     =  0;        /* mode for item set reporting */
  int     stats    =  0;        /* flag for item set statistics */
  ITEM    m;                    /* number of items */
  TID     n;                    /* number of transactions */
  SUPP    w;                    /* total transaction weight */
  clock_t t;                    /* timer for measurements */
  ISEVALFN *evalfn = (ISEVALFN*)0; /* evaluation function */

  #ifndef QUIET                 /* if not quiet version */
  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no arguments given */
    printf("usage: %s [options] infile [outfile]\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent, c: closed, m: maximal item sets, "
                     "g: generators)\n");
    printf("-m#      minimum number of items per item set     "
                    "(default: %"ITEM_FMT")\n", min);
    printf("-n#      maximum number of items per item set     "
                    "(default: %"ITEM_FMT")\n", max);
    printf("-s#      minimum support of an item set           "
                    "(default: %g%%)\n", supp);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-e#      additional evaluation measure            "
                    "(default: none)\n");
    printf("-d#      minimum value of add. evaluation measure "
                    "(default: %g%%)\n", minval);
    printf("-q#      sort items w.r.t. their frequency        "
                    "(default: %d)\n", sort);
    printf("         (1: ascending, -1: descending, 0: do not sort,\n"
           "          2: ascending, -2: descending w.r.t. "
                    "transaction size sum)\n");
    printf("-u#      item processing order/search direction   "
                    "(default: %d)\n", dir);
    printf("         (fixed to -1 for closed/maximal item sets\n"
           "          fixed to +1 for generators, free otherwise)\n");
    printf("-x#      number of repetitions (for benchmarking) "
                    "(default: 1)\n");
    printf("-R#      read an item selection from a file\n");
    #ifdef ISR_PATSPEC
    printf("-P#      write a pattern spectrum to a file\n");
    #endif
    printf("-Z       print item set statistics (counts per size)\n");
    printf("-g       write output in scanable form "
                    "(quote certain characters)\n");
    printf("-h#      record header  for output                "
                    "(default: \"%s\")\n", hdr);
    printf("-k#      item separator for output                "
                    "(default: \"%s\")\n", sep);
    printf("-v#      output format for item set information   "
                    "(default: \"%s\")\n", format);
    printf("-w       transaction weight in last field         "
                    "(default: only items)\n");
    printf("-r#      record/transaction separators            "
                    "(default: \"\\n\")\n");
    printf("-f#      field /item        separators            "
                    "(default: \" \\t,\")\n");
    printf("-b#      blank   characters                       "
                    "(default: \" \\t\\r\")\n");
    printf("-C#      comment characters                       "
                    "(default: \"#\")\n");
    printf("-!       print additional option information\n");
    printf("infile   file to read transactions from           "
                    "[required]\n");
    printf("outfile  file to write frequent item sets to      "
                    "[optional]\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */
  #endif  /* #ifndef QUIET */
  /* free option characters: acilopy [A-Z]\[CT] */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse arguments */
    s = argv[i];                /* get option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (*s) {              /* traverse options */
        switch (*s++) {         /* evaluate switches */
          case '!': help();                          break;
          case 't': target = (*s) ? *s++ : 's';      break;
          case 'm': min    = (ITEM)strtol(s, &s, 0); break;
          case 'n': max    = (ITEM)strtol(s, &s, 0); break;
          case 's': supp   =       strtod(s, &s);    break;
          case 'e': eval   = (*s) ? *s++ : 0;        break;
          case 'd': minval =       strtod(s, &s);    break;
          case 'q': sort   = (int) strtol(s, &s, 0); break;
          case 'u': dir    = (int) strtol(s, &s, 0); break;
          case 'x': repeat =       strtol(s, &s, 0); break;
          case 'R': optarg = &fn_sel;                break;
          #ifdef ISR_PATSPEC
          case 'P': optarg = &fn_psp;                break;
          #endif
          case 'Z': stats  = 1;                      break;
          case 'g': mrep   = ISR_SCAN;               break;
          case 'h': optarg = &hdr;                   break;
          case 'k': optarg = &sep;                   break;
          case 'v': optarg = &format;                break;
          case 'w': mtar  |= TA_WEIGHT;              break;
          case 'r': optarg = &recseps;               break;
          case 'f': optarg = &fldseps;               break;
          case 'b': optarg = &blanks;                break;
          case 'C': optarg = &comment;               break;
          default : error(E_OPTION, *--s);           break;
        }                       /* set option variables */
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       /* get option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-options */
        case  0: fn_inp = s;      break;
        case  1: fn_out = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg)     error(E_OPTARG);      /* check (option) arguments */
  if (k    < 1)   error(E_ARGCNT);      /* and number of arguments */
  if (min  < 0)   error(E_SIZE, min);   /* check the size limits */
  if (max  < 0)   error(E_SIZE, max);   /* and the minimum support */
  if (max  > 16)  error(E_SIZE, max);   /* and the minimum support */
  if (supp > 100) error(E_SUPPORT, supp);
  if (repeat < 1) error(E_REPEAT, repeat);
  if ((!fn_inp || !*fn_inp) && (fn_sel && !*fn_sel))
    error(E_STDIN);             /* stdin must not be used twice */
  switch (target) {             /* check and translate target type */
    case 's': target = ISR_ALL;              break;
    case 'c': target = ISR_CLOSED;           break;
    case 'm': target = ISR_MAXIMAL;          break;
    case 'g': target = ISR_GENERA;           break;
    default : error(E_TARGET, (char)target); break;
  }                             /* (get target type code) */
  switch (eval) {               /* check and translate measure */
    case 'x': evalfn = (ISEVALFN*)0;         break;
    case 'b': evalfn = isr_logrto;           break;
    default : error(E_MEASURE, (char)eval);  break;
  }                             /* (get evaluation measure code) */
  if ((format == dflt) && (supp < 0))
    format = "  (%a)";          /* adapt the default info. format */
  MSG(stderr, "\n");            /* terminate the startup message */

  /* --- read item selection --- */
  ibase = ib_create(0, 0);      /* create an item base */
  if (!ibase) error(E_NOMEM);   /* to manage the items */
  tread = trd_create();         /* create a transaction reader */
  if (!tread) error(E_NOMEM);   /* and configure the characters */
  trd_allchs(tread, recseps, fldseps, blanks, "", comment);
  if (fn_sel) {                 /* if item appearances are given */
    t = clock();                /* start timer, open input file */
    if (trd_open(tread, NULL, fn_sel) != 0)
      error(E_FOPEN, trd_name(tread));
    MSG(stderr, "reading %s ... ", trd_name(tread));
    m = ib_readsel(ibase,tread);/* read the given item selection */
    if (m < 0) error((int)-m, ib_errmsg(ibase, NULL, 0));
    trd_close(tread);           /* close the input file */
    MSG(stderr, "[%"ITEM_FMT" item(s)]", m);
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- read transaction database --- */
  tabag = tbg_create(ibase);    /* create a transaction bag */
  if (!tabag) error(E_NOMEM);   /* to store the transactions */
  t = clock();                  /* start timer, open input file */
  if (trd_open(tread, NULL, fn_inp) != 0)
    error(E_FOPEN, trd_name(tread));
  MSG(stderr, "reading %s ... ", trd_name(tread));
  k = tbg_read(tabag, tread, mtar);
  if (k < 0)                    /* read the transaction database */
    error(-k, tbg_errmsg(tabag, NULL, 0));
  trd_delete(tread, 1);         /* close the input file and */
  tread = NULL;                 /* delete the table reader */
  m = ib_cnt(ibase);            /* get the number of items, */
  n = tbg_cnt(tabag);           /* the number of transactions, */
  w = tbg_wgt(tabag);           /* the total transaction weight */
  MSG(stderr, "[%"ITEM_FMT" item(s), %"TID_FMT, m, n);
  if (w != (SUPP)n) MSG(stderr, "/%"SUPP_FMT, w);
  MSG(stderr, " transaction(s)] done [%.2fs].", SEC_SINCE(t));
  if ((m <= 0) || (n <= 0))     /* check for at least one item */
    error(E_NOITEMS);           /* and at least one transaction */
  MSG(stderr, "\n");            /* compute absolute support value */
  supp = ceilsupp((supp >= 0) ? 0.01 *supp *(double)w : -supp);

  /* --- sort and recode items --- */
  t = clock();                  /* start timer, print log message */
  MSG(stderr, "filtering, sorting and recoding items ... ");
  m = tbg_recode(tabag, (SUPP)supp, -1, 16, -sort);
  if (m <  0) error(E_NOMEM);   /* recode items and transactions */
  if (m <= 0) error(E_NOITEMS); /* and check the number of items */
  MSG(stderr, "[%"ITEM_FMT" item(s)]", m);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- sort and reduce transactions --- */
  t = clock();                  /* start timer, print log message */
  MSG(stderr, "sorting and reducing transactions ... ");
  tbg_filter(tabag,min,NULL,0); /* remove items of short transactions */
  tbg_itsort(tabag, +1, 1);     /* sort items in transactions and */
  tbg_sort  (tabag, +1, 1);     /* sort the trans. lexicographically */
  n = tbg_reduce(tabag, 0);     /* reduce transactions to unique ones */
  tbg_pack(tabag, 16);          /* pack items with codes < 16 */
  MSG(stderr, "[%"TID_FMT, n);  /* print number of transactions */
  if (w != (SUPP)n) MSG(stderr, "/%"SUPP_FMT, w);
  MSG(stderr, " transaction(s)] done [%.2fs].\n", SEC_SINCE(t));

  /* --- find frequent item sets --- */
  t = clock();                  /* start the timer */
  if (eval == 'b') mrep |= ISR_LOGS;
  report = isr_create(ibase, target|mrep, -1, hdr, sep, NULL);
  if (!report) error(E_NOMEM);  /* create an item set reporter */
  isr_setfmt (report, format);  /* and configure it: set flags, */
  isr_setsize(report, min, max);/* info. format and size range, */
  if (evalfn)                   /* and the evaluation function */
    isr_seteval(report, evalfn, NULL, +1, 0.01*minval);
  #ifdef ISR_PATSPEC            /* if to allow a pattern spectrum */
  if (fn_psp && (isr_addpsp(report, NULL) < 0))
    error(E_NOMEM);             /* add a pattern spectrum if req. */
  #endif
  if (isr_open(report, NULL, fn_out) != 0)
    error(E_FOPEN, isr_name(report));  /* open the output file */
  MSG(stderr, "writing %s ... ", isr_name(report));
  if      (target == ISR_GENERA)              dir = +1;
  else if (target & (ISR_CLOSED|ISR_MAXIMAL)) dir = -1;
  fim16 = m16_create(dir, (SUPP)supp, report);
  if (!fim16) error(E_NOMEM);   /* create a 16 items machine */
  for (i = 0; i < repeat; i++){ /* repeated mining loop */
    isr_reset(report);          /* (re)init. the output counters */
    m16_addtbg(fim16, tabag);   /* add trans. bag to 16 items machine */
    k = m16_mine(fim16);        /* find frequent item sets */
    if (k < 0) error(E_NOMEM);  /* with 16 items machine */
  }
  if (isr_report(report) < 0)   /* report the empty set (if needed) */
    error(E_NOMEM);
  if (isr_close(report) != 0)   /* close the output file */
    error(E_FWRITE, isr_name(report));
  MSG(stderr, "[%"SIZE_FMT" set(s)]", isr_repcnt(report));
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  if (stats) isr_prstats(report, stdout, 0);

  /* --- write pattern spectrum --- */
  #ifdef ISR_PATSPEC            /* if to allow a pattern spectrum */
  if (fn_psp) {                 /* if to write a pattern spectrum */
    psp    = isr_getpsp(report);/* get the pattern spectrum */
    twrite = twr_create();      /* create a table writer and */
    if (!twrite) error(E_NOMEM);/* open the output file */
    if (twr_open(twrite, NULL, fn_psp) != 0)
      error(E_FOPEN,  twr_name(twrite));
    if (psp_report(psp, twrite) != 0)
      error(E_FWRITE, twr_name(twrite));
    twr_delete(twrite, 1);      /* write the pattern spectrum, */
    twrite = NULL;              /* delete the table writer, and */
  }                             /* clear the writer variable */
  #endif

  /* --- clean up --- */
  CLEANUP;                      /* clean up memory and close files */
  SHOWMEM;                      /* show (final) memory usage */
  return 0;                     /* return 'ok' */
}  /* main() */
コード例 #3
0
int main (int argc, char *argv[])
{                              
  int     i, k = 0, n;          
  char    *s;                   
  char    **optarg = NULL;     
  char    *fn_in   = NULL;      
  char    *fn_out  = NULL;    
  char    *fn_app  = NULL;     
  char    *blanks  = NULL;   
  char    *fldseps = NULL;     
  char    *recseps = NULL;      
  char    *comment = NULL;      
  char    *isep    = " ";      
  char    *impl    = " <- ";   
  char    *dflt    = "  (%1S)"; 
  char    *format  = dflt;      
  int     target   = 's';     
  int     min      = 1;        
  int     max      = INT_MAX;   
  double  supp     = 0.1;       
  double  smax     = 1.0;     
  double  conf     = 0.8;       
  int     dir      = 0;        
  int     eval     = 0;       
  int     aggm     = 0;        
  double  minval   = 0.1;       
  int     prune    = 0;         
  double  filter   = 0.1;       
  int     sort     = 2;         
  int     tree     = 1;         
  int     heap     = 1;         
  int     post     = 0;        
  int     report   = 0;       
  int     mode     = APP_BODY|IST_PERFECT;  
  int     size;               
  int     wgt;                  
  int     frq, body, head;     
  int     *items;             
  clock_t t, tt, tc, x;         

  #ifndef QUIET               
  prgname = argv[0];           

  if (argc > 1) {          
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } 
  else {                      
    printf("usage: %s [options] infile outfile\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent item sets, c: closed item sets,\n"
           "          m: maximal item sets,  r: association rules)\n");
    printf("-m#      minimum number of items per set/rule     "
                    "(default: %d)\n", min);
    printf("-n#      maximum number of items per set/rule     "
                    "(default: no limit)\n");
    printf("-s#      minimum support of a set/rule     "
                    "(default: %g%%)\n", supp *100);
    printf("-S#      maximum support of a set/rule     "
                    "(default: %g%%)\n", smax *100);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-c#      minimum confidence of a     rule         "
                    "(default: %g%%)\n", conf *100);
    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets to\n");
    return 0;                
  }                            
  #endif  
  for (i = 1; i < argc; i++) {  
    s = argv[i];                
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  
      while (*s) {             
        switch (*s++) {        
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 's';     break;
          case 'm': min    = (int)strtol(s, &s, 0); break;
          case 'n': max    = (int)strtol(s, &s, 0); break;
          case 's': supp   = 0.01*strtod(s, &s);    break;
          case 'S': smax   = 0.01*strtod(s, &s);    break;
          case 'c': conf   = 0.01*strtod(s, &s);    break;
          case 'o': mode  |= APP_BOTH;              break;
          case 'e': eval   = (*s) ? *s++ : 0;       break;
          case 'a': aggm   = (*s) ? *s++ : 0;       break;
          case 'd': minval = 0.01*strtod(s, &s);    break;
          case 'p': prune  = (int)strtol(s, &s, 0); break;
          case 'g': report = ISR_SCAN;              break;
          case 'k': optarg = &isep;                 break;
          case 'i': optarg = &impl;                 break;
          case 'v': optarg = &format;               break;
          case 'l': dir    = (int)strtol(s, &s, 0); break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'x': mode  &= ~IST_PERFECT;          break;
          case 'y': post   = 1;                     break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          default : error(E_OPTION, *--s);          break;
        }                       
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       
    else {                     
      switch (k++) {            
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         
    }
  }
  if (optarg) error(E_OPTARG);  
  if ((k < 2) || (k > 3))       
    error(E_ARGCNT);           
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             
  switch (target) {             
    case 's': target = TT_SET;               break;
    case 'c': target = TT_CLOSED;            break;
    case 'm': target = TT_MAXIMAL;           break;
    case 'r': target = TT_RULE;              break;
    default : error(E_TARGET, (char)target); break;
  }
  if (min < 0) error(E_SIZE, min); 
  if (max < 0) error(E_SIZE, max); 
  if (supp  > 1)                
    error(E_SUPP, supp);        
  if ((conf  < 0) || (conf > 1))
    error(E_CONF, conf);       
  switch (eval) {              
    case 'x': case 0: eval = IST_NONE;      break;
    case 'c': eval = IST_CONF;              break;
    case 'd': eval = IST_DIFF;              break;
    case 'l': eval = IST_LIFT;              break;
    case 'a': eval = IST_LD21;              break;
    case 'q': eval = IST_QUOT;              break;
    case 'n': eval = IST_CHI2;              break;
    case 'p': eval = IST_PVAL;              break;
    case 'i': eval = IST_INFO;              break;
    case 'g': eval = IST_PGST;              break;
    case 'b': eval = IST_LOGQ;              break;
    default : error(E_MEASURE, (char)eval); break;
  }
  switch (aggm) {            
    case 'x': case 0: aggm = IST_NONE;      break;
    case 'm': aggm = IST_MIN;               break;
    case 'n': aggm = IST_MAX;               break;
    case 'a': aggm = IST_AVG;               break;
    default : error(E_MEASURE, (char)aggm); break;
  }
  if ((target > TT_SET)         
  || ((eval > IST_NONE) && (eval < IST_LOGQ)))
    mode &= ~IST_PERFECT;      
  if (target <= TT_MAXIMAL) {  
    mode |= APP_BOTH; conf = 1;}
  if ((filter <= -1) || (filter >= 1))
    filter = 0;                 

  ibase = ib_create(-1);       
  if (!ibase) error(E_NOMEM);  
  ib_chars(ibase, blanks, fldseps, recseps, comment);
  MSG(stderr, "\n");          

  if (fn_app) {                
    t = clock();                
    if (*fn_app)            
      in = fopen(fn_app, "r");  
    else {                      
      in = stdin; fn_app = "<stdin>"; }   
    MSG(stderr, "reading %s ... ", fn_app);
    if (!in) error(E_FOPEN, fn_app);
    k = ib_readapp(ibase, in); 
    if (k  != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase));
    if (in != stdin) fclose(in);
    in = NULL;                  
    MSG(stderr, "[%d item(s)]", ib_cnt(ibase));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                           

  t = clock();                 
  if (fn_in && *fn_in)        
    in = fopen(fn_in, "r");     
  else {                       
    in = stdin; fn_in = "<stdin>"; }   
  MSG(stderr, "reading %s ... ", fn_in);
  if (!in) error(E_FOPEN, fn_in);
  tabag = tb_create(ibase);     
  if (!tabag) error(E_NOMEM);   
  while (1) {                
    k = ib_read(ibase, in);     
    if (k) { if (k > 0) break; 
      error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); }
    if (tb_add(tabag, NULL) != 0) error(E_NOMEM);
  }                            
  if (in != stdin) fclose(in);  
  in  = NULL;                  
  n   = ib_cnt(ibase);          
  k   = tb_cnt(tabag);         
  wgt = tb_wgt(tabag);          
  MSG(stderr, "[%d item(s), ", n);
  if (k == wgt) MSG(stderr,    "%d transaction(s)]", k);
  else          MSG(stderr, "%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].", SEC_SINCE(t));
  if ((n <= 0) || (wgt <= 0))  
    error(E_NOTRANS);           
  MSG(stderr, "\n");            
  if (format == dflt) {       
    if (target != TT_RULE) format = (supp < 0) ? "  (%a)" : "  (%1S)";
    else format = (supp < 0) ? "  (%b, %1C)" : "  (%1X, %1C)";
  }                            
  supp = ceil (((supp < 0) ? -100 : wgt) *supp);
  smax = floor(((smax < 0) ? -100 : wgt) *smax);

  
  t = clock();                  
  MSG(stderr, "filtering, sorting and recoding items ... ");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     
  k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf));
  n = ib_recode(ibase, k, sort, map);
  tb_recode(tabag, map);       
  tb_itsort(tabag, 1, heap);    
  free(map); map = NULL;        
  MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t));
  if (n <= 0) error(E_NOFREQ); 
  MSG(stderr, "\n");            
  k   = tb_max(tabag);         
  if (max > k) max = k;         


  t = clock();                  
  MSG(stderr, "reducing transactions ... ");
  tb_filter(tabag, min, NULL);  
  tb_sort(tabag, 1, heap);      
  k = tb_reduce(tabag);         
  if (k == wgt) MSG(stderr,    "[%d transaction(s)]", k);
  else          MSG(stderr, "[%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  
  tt = 0;                      
  if (tree) {                   
    t = clock();               
    MSG(stderr, "building transaction tree ... ");
    tatree = tt_create(tabag);  
    if (!tatree) error(E_NOMEM);
    if (filter == 0) {          
      tb_delete(tabag, 0);      
      tabag = NULL;             
    }
    MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
    tt = clock() -t;            
  }                             


  t = clock(); tc = 0;         
  istree = ist_create(ibase, mode, (int)supp, (int)smax, conf);
  if (!istree) error(E_NOMEM);  
  ist_seteval(istree, eval, aggm, minval, prune);

  /* --- check item subsets --- */
  MSG(stderr, "checking subsets of size 1");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     
  while (1) {                   
    size = ist_height(istree);  
    if (size >= max) break;     
    if ((filter != 0)        
    &&  (ist_check(istree, map) <= size))
      break;                  
    if (post)                  
      ist_prune(istree);       
    k = ist_addlvl(istree);     
    if (k) { if (k > 0) break;
             error(E_NOMEM);  } 
    if (((filter < 0)           
    &&   (i < -filter *n))      
    ||  ((filter > 0)          
    &&   (i < n) && (i *(double)tt < filter *n *tc))) {
      n = i;                   
      x = clock();             
      tb_filter(tabag, size+1, map);
      tb_sort(tabag, 0, heap);  
      tb_reduce(tabag);         
      if (tatree) {             
        tt_delete(tatree, 0);   
        tatree = tt_create(tabag);
        if (!tatree) error(E_NOMEM);
      }                         
      tt = clock() -x;          
    }
    MSG(stderr, " %d", ++size); 
    x = clock();             
    if (tatree) ist_countx(istree, tatree);
    else        ist_countb(istree, tabag);
    tc = clock() -x;           
  }                             
  free(map); map = NULL;        
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) {
    t = clock();               
    MSG(stderr, "filtering for %s item sets ... ",
        (target == TT_MAXIMAL) ? "maximal" : "closed");
    k = target | ((prune < 0) ? IST_EVAL : 0);
    ist_mark(istree, k);        
    MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t));
  }      

  t = clock();                  
  if (fn_out && *fn_out)        
    out = fopen(fn_out, "w");  
  else {                        
    out = stdout; fn_out = "<stdout>"; }    
  MSG(stderr, "writing %s ... ", fn_out);
  if (!out) error(E_FOPEN, fn_out);
  if (eval == IST_LOGQ) report |= ISR_LOGS;
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL))
    report |= ISR_CLOSED;      
  isrep = isr_create(ibase, out, report, isep, impl);
  if (!isrep) error(E_NOMEM);  
  isr_setfmt (isrep, format);   
  isr_setsize(isrep,  min, max);
  ist_setsize(istree, min, max, dir);
  ist_init   (istree);          
  items = t_items(ib_tract(ibase));
  if ((target <= TT_MAXIMAL)    
  &&  (dir == 0)) {            
    if      (eval == IST_LOGQ)  
      isr_seteval(isrep, isr_logq,  NULL,   minval);
    else if (eval >  IST_NONE)  
      isr_seteval(isrep, ist_evalx, istree, minval);
    n = ist_report(istree, isrep); } 
  else if (target <= TT_MAXIMAL) { 
    for (n = 0; 1; ) {         
      k = ist_set(istree, items, &frq, &minval);
      if (k < 0) break;         
      if (k > 0) fputs(isr_name(isrep, items[0]), out);
      for (i = 0; ++i < k; ) {  
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               
        isr_sinfo(isrep, frq, minval);
      fputc('\n', out); n++;    
    } }                       
  else if (target == TT_RULE) { 
    for (n = 0; 1; ) {          
      k = ist_rule(istree, items, &frq, &body, &head, &minval);
      if (k < 0) break;         
      fputs(isr_name(isrep, items[0]), out);
      fputs(impl, out);         
      if (k > 1) fputs(isr_name(isrep, items[1]), out);
      for (i = 1; ++i < k; ) {  
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               
        isr_rinfo(isrep, frq, body, head, minval);
      fputc('\n', out); n++;    
    }                           
  }  /
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
コード例 #4
0
ファイル: dom.c プロジェクト: CauanCabral/SpamFilter
int main (int argc, char *argv[])
{                               /* --- main function */
  int  i, k = 0;                /* loop variables, counter */
  char *s;                      /* to traverse the options */
  char **optarg = NULL;         /* option argument */
  char *fn_hdr  = NULL;         /* name of table header file */
  char *fn_tab  = NULL;         /* name of table file */
  char *fn_dom  = NULL;         /* name of domains file */
  char *blanks  = NULL;         /* blanks */
  char *fldseps = NULL;         /* field  separators */
  char *recseps = NULL;         /* record separators */
  char *nullchs = NULL;         /* null value characters */
  char *comment = NULL;         /* comment characters */
  int  flags    = 0;            /* table file read flags */
  int  sort     = 0;            /* flag for domain sorting */
  int  atdet    = 0;            /* flag for automatic type determ. */
  int  ivals    = AS_IVALS;     /* flag for numeric intervals */
  int  maxlen   = 0;            /* maximal output line length */
  int  attid;                   /* loop variable for attributes */
  ATT  *att;                    /* to traverse attributes */
  clock_t t;                    /* timer for measurement */

  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print startup/usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no argument is given */
    printf("usage: %s [options] "
                     "[-d|-h hdrfile] tabfile domfile\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-s       sort domains alphabetically "
                    "(default: order of appearance)\n");
    printf("-S       sort domains numerically/alphabetically\n");
    printf("-a       automatic type determination "
                    "(default: all nominal)\n");
    printf("-i       do not print intervals for numeric attributes\n");
    printf("-l#      output line length (default: no limit)\n");
    printf("-b#      blank   characters    (default: \" \\t\\r\")\n");
    printf("-f#      field   separators    (default: \" \\t\")\n");
    printf("-r#      record  separators    (default: \"\\n\")\n");
    printf("-C#      comment characters    (default: \"#\")\n");
    printf("-u#      null value characters (default: \"?*\")\n");
    printf("-n       number of tuple occurrences in last field\n");
    printf("-d       use default header "
                    "(field names = field numbers)\n");
    printf("-h       read table header (field names) from hdrfile\n");
    printf("hdrfile  file containing table header (field names)\n");
    printf("tabfile  table file to read "
                    "(field names in first record)\n");
    printf("domfile  file to write domain descriptions to\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse the arguments */
    s = argv[i];                /* get option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (1) {               /* traverse characters */
        switch (*s++) {         /* evaluate option */
          case 's': sort   = 1;                     break;
          case 'S': sort   = 2;                     break;
          case 'a': atdet  = 1;                     break;
          case 'i': ivals  = 0;                     break;
          case 'l': maxlen = (int)strtol(s, &s, 0); break;
  	  case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'u': optarg = &nullchs;              break;
          case 'C': optarg = &comment;              break;
          case 'n': flags |= AS_WEIGHT;             break;
          case 'd': flags |= AS_DFLT;               break;
          case 'h': optarg = &fn_hdr;               break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set option variables */
        if (!*s) break;         /* if at end of string, abort loop */
        if (optarg) { *optarg = s; optarg = NULL; break; }
      } }                       /* get option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-option */
        case  0: fn_tab = s;      break;
        case  1: fn_dom = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check option argument */
  if (k != 2) error(E_ARGCNT);  /* check number of arguments */
  if (fn_hdr && (strcmp(fn_hdr, "-") == 0))
    fn_hdr = "";                /* convert "-" to "" */
  if (fn_hdr)                   /* set header flags */
    flags = AS_ATT | (flags & ~AS_DFLT);

  /* --- determine attributes and domains --- */
  attset = as_create("domains", att_delete);
  if (!attset) error(E_NOMEM);  /* create an attribute set */
  as_chars(attset, recseps, fldseps, blanks, nullchs, comment);
  fprintf(stderr, "\n");        /* set delimiter characters */
  i = io_tab(attset, fn_hdr, fn_tab, flags, 1);
  if (i != 0) error(-i);        /* read the table */

  /* --- convert/sort domains --- */
  if (atdet) {                  /* if automatic type determination */
    for (attid = as_attcnt(attset); --attid >= 0; )
      att_conv(as_att(attset, attid), AT_AUTO, NULL);
  }                             /* try to convert attributes */
  if (sort) {                   /* if to sort domains (values) */
    for (attid = as_attcnt(attset); --attid >= 0; ) {
      att = as_att(attset, attid);
      if (att_type(att) != AT_NOM) continue;
      att_valsort(att, (sort > 1) ? numcmp : strcmp, NULL, 0);
    }                           /* traverse nominal attributes */
  }                             /* and sort their domains */

  /* --- write output file --- */
  t = clock();                  /* start the timer */
  if (fn_dom && *fn_dom)        /* if a domain file name is given, */
    out = fopen(fn_dom, "w");   /* open domain file for writing */
  else {                        /* if no domain file name is given, */
    out = stdout; fn_dom = "<stdout>"; }         /* write to stdout */
  fprintf(stderr, "writing %s ... ", fn_dom);
  if (!out) error(E_FOPEN, fn_dom);
  if (as_desc(attset, out, AS_TITLE|ivals, maxlen) != 0)
    error(E_FWRITE, fn_dom);    /* write domain descriptions */
  if (out != stdout) {          /* if not written to stdout, */
    i = fclose(out); out = NULL;/* close the output file */
    if (i != 0) error(E_FWRITE, fn_dom);
  }                             /* print a success message */
  fprintf(stderr, "[%d attribute(s)] done", as_attcnt(attset));
  fprintf(stderr, " [%.2fs].\n", SEC_SINCE(t));

  /* --- clean up --- */
  #ifndef NDEBUG
  as_delete(attset);            /* delete attribute set */
  #endif
  #ifdef STORAGE
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */
コード例 #5
0
ファイル: apriori.c プロジェクト: crubeido/test
int main (int argc, char *argv[])
{                               /* --- main function */
  int     i, k = 0, n;          /* loop variables, counters */
  char    *s;                   /* to traverse the options */
  char    **optarg = NULL;      /* option argument */
  char    *fn_in   = NULL;      /* name of input  file */
  char    *fn_out  = NULL;      /* name of output file */
  char    *fn_app  = NULL;      /* name of item appearances file */
  char    *blanks  = NULL;      /* blanks */
  char    *fldseps = NULL;      /* field  separators */
  char    *recseps = NULL;      /* record separators */
  char    *comment = NULL;      /* comment indicators */
  char    *isep    = " ";       /* item separator for output */
  char    *impl    = " <- ";    /* implication sign for ass. rules */
  char    *dflt    = "  (%1S)"; /* default format for check */
  char    *format  = dflt;      /* format for information output */
  int     target   = 's';       /* target type (sets/rules/h.edges) */
  int     min      = 1;         /* minimum rule/item set size */
  int     max      = INT_MAX;   /* maximum rule/item set size */
  double  supp     = 10;        /* minimum support    (in percent) */
  double  smax     = 100;       /* maximum support    (in percent) */
  double  conf     = 80;        /* minimum confidence (in percent) */
  int     dir      = 0;         /* direction for size sorting */
  int     eval     = 0;         /* additional evaluation measure */
  int     zero     = 0;         /* flag for zero eval. below expect. */
  int     aggm     = 0;         /* aggregation mode for eval. measure */
  double  minval   = 10;        /* minimum evaluation measure value */
  int     prune    = 0;         /* (min. size for) evaluation pruning */
  double  filter   = 0.1;       /* item usage filtering parameter */
  int     sort     = 2;         /* flag for item sorting and recoding */
  int     tree     = 1;         /* flag for transaction tree */
  int     heap     = 1;         /* flag for heap sort vs. quick sort */
  int     post     = 0;         /* flag for a-posteriori pruning */
  int     report   = 0;         /* other flags for reporting */
  int     mode     = APP_BODY|IST_PERFECT;    /* search mode */
  int     size;                 /* current item set size */
  int     wgt;                  /* total transaction weight */
  int     frq, body, head;      /* frequency of an item set */
  int     *items;               /* item set (for reporting) */
  clock_t t, tt, tc, x;         /* timers for measurements */

  #ifndef QUIET                 /* if not quiet version */
  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no arguments are given */
    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent item sets, c: closed item sets,\n"
           "          m: maximal item sets,  r: association rules)\n");
    printf("-m#      minimum number of items per set/rule     "
                    "(default: %d)\n", min);
    printf("-n#      maximum number of items per set/rule     "
                    "(default: no limit)\n");
    printf("-s#      minimum support    of a     set/rule     "
                    "(default: %g%%)\n", supp);
    printf("-S#      maximum support    of a     set/rule     "
                    "(default: %g%%)\n", smax);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-c#      minimum confidence of a     rule         "
                    "(default: %g%%)\n", conf);
    printf("-o       use the original rule support definition "
                    "(body & head)\n");
    printf("-e#      additional evaluation measure            "
                    "(default: none)\n");
    printf("-a#      aggregation mode for evaluation measure  "
                    "(default: none)\n");
    printf("-z       zero evaluation below expected support   "
                    "(default: evaluate all)\n");
    printf("-d#      minimum value of add. evaluation measure "
                    "(default: %g%%)\n", minval);
    printf("-p#      (min. size for) pruning with evaluation  "
                    "(default: no pruning)\n");
    printf("         (< 0: backward,   > 0: forward)\n");
    printf("-l#      sort item sets in output by their size   "
                    "(default: no sorting)\n");
    printf("         (< 0: descending, > 0: ascending)\n");
    printf("-g       write item names in scanable form "
                    "(quote certain characters)\n");
    printf("-k#      item separator for output                "
                    "(default: \"%s\")\n", isep);
    printf("-i#      implication sign for association rules   "
                    "(default: \"%s\")\n", impl);
    printf("-v#      output format for set/rule information   "
                    "(default: \"%s\")\n", format);
    printf("-q#      sort items w.r.t. their frequency        "
                    "(default: %d)\n", sort);
    printf("         (1: ascending, -1: descending, 0: do not sort,\n"
           "          2: ascending, -2: descending w.r.t. "
                    "transaction size sum)\n");
    printf("-u#      filter unused items from transactions    "
                    "(default: %g)\n", filter);
    printf("         (0: do not filter items w.r.t. usage in sets,\n"
           "         <0: fraction of removed items for filtering,\n"
           "         >0: take execution times ratio into account)\n");
    printf("-j       use quicksort to sort the transactions   "
                    "(default: heapsort)\n");
    printf("-x       do not prune the search "
                    "with perfect extensions\n");
    printf("-y       a-posteriori pruning of infrequent item sets\n");
    printf("-h       do not organize transactions as a prefix tree\n");
    printf("-b#      blank   characters                       "
                    "(default: \" \\t\\r\")\n");
    printf("-f#      field   separators                       "
                    "(default: \" \\t,\")\n");
    printf("-r#      record  separators                       "
                    "(default: \"\\n\")\n");
    printf("-C#      comment characters                       "
                    "(default: \"#\")\n");
    printf("-!       print additional option information\n");
    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets/association rules"
                    "/hyperedges to\n");
    printf("appfile  file stating item appearances (optional)\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */
  #endif  /* #ifndef QUIET */
  /* free option characters: w [A-Z]\[SC] */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse the arguments */
    s = argv[i];                /* get an option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (*s) {              /* traverse the options */
        switch (*s++) {         /* evaluate the options */
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 's';     break;
          case 'm': min    = (int)strtol(s, &s, 0); break;
          case 'n': max    = (int)strtol(s, &s, 0); break;
          case 's': supp   =      strtod(s, &s);    break;
          case 'S': smax   =      strtod(s, &s);    break;
          case 'c': conf   =      strtod(s, &s);    break;
          case 'o': mode  |= APP_BOTH;              break;
          case 'e': eval   = (*s) ? *s++ : 0;       break;
          case 'z': zero   = IST_ZERO;              break;
          case 'a': aggm   = (*s) ? *s++ : 0;       break;
          case 'd': minval =      strtod(s, &s);    break;
          case 'p': prune  = (int)strtol(s, &s, 0); break;
          case 'g': report = ISR_SCAN;              break;
          case 'k': optarg = &isep;                 break;
          case 'i': optarg = &impl;                 break;
          case 'v': optarg = &format;               break;
          case 'l': dir    = (int)strtol(s, &s, 0); break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'x': mode  &= ~IST_PERFECT;          break;
          case 'y': post   = 1;                     break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set the option variables */
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       /* get an option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-options */
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check the option argument */
  if ((k < 2) || (k > 3))       /* and the number of arguments */
    error(E_ARGCNT);            /* (either in/out or in/out/app) */
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             /* stdin must not be used twice */
  switch (target) {             /* check and translate target type */
    case 's': target = TT_ALL;               break;
    case 'c': target = TT_CLOSED;            break;
    case 'm': target = TT_MAXIMAL;           break;
    case 'r': target = TT_RULE;              break;
    default : error(E_TARGET, (char)target); break;
  }
  if (min < 0) error(E_SIZE, min); /* check the limits */
  if (max < 0) error(E_SIZE, max); /* for the set size */
  if (supp > 100)               /* check the minimum support */
    error(E_SUPP, supp);        /* (< 0: absolute support) */
  if ((conf < 0) || (conf > 100))
    error(E_CONF, conf);        /* check the minimum confidence */
  switch (eval) {               /* check and translate measure */
    case 'x': case 0: eval = IST_NONE;      break;
    case 'c': eval = IST_CONF;              break;
    case 'd': eval = IST_CONF_DIFF;         break;
    case 'l': eval = IST_LIFT;              break;
    case 'a': eval = IST_LIFT_DIFF;         break;
    case 'q': eval = IST_LIFT_QUOT;         break;
    case 'v': eval = IST_CVCT;              break;
    case 'e': eval = IST_CVCT_DIFF;         break;
    case 'r': eval = IST_CVCT_QUOT;         break;
    case 'f': eval = IST_CERT;              break;
    case 'n': eval = IST_CHI2;              break;
    case 'p': eval = IST_CHI2_PVAL;         break;
    case 'i': eval = IST_INFO;              break;
    case 'g': eval = IST_INFO_PVAL;         break;
    case 'b': eval = IST_LOGQ;              break;
    default : error(E_MEASURE, (char)eval); break;
  }
  switch (aggm) {               /* check and translate agg. mode */
    case 'x': case 0: aggm = IST_NONE;      break;
    case 'm': aggm = IST_MIN;               break;
    case 'n': aggm = IST_MAX;               break;
    case 'a': aggm = IST_AVG;               break;
    default : error(E_MEASURE, (char)aggm); break;
  }
  if ((target > TT_ALL)         /* if individual set counters needed */
  || ((eval > IST_NONE) && (eval < IST_LOGQ)))
    mode &= ~IST_PERFECT;       /* remove perfect extension pruning */
  if (target <= TT_MAXIMAL) {   /* remove rule specific settings */
    mode |= APP_BOTH; conf = 100; } 
  if ((filter <= -1) || (filter >= 1))
    filter = 0;                 /* check and adapt the filter option */
  if (dir)                      /* if to sort output by size, */
    mode &= ~IST_PERFECT;       /* do not use perfect ext. pruning */

  /* --- create item base --- */
  ibase = ib_create(0, 0);      /* create an item base and */
  if (!ibase) error(E_NOMEM);   /* set the special characters */
  ib_chars(ibase, blanks, fldseps, recseps, "", comment);
  MSG(stderr, "\n");            /* terminate the startup message */

  /* --- read item appearance indicators --- */
  if (fn_app) {                 /* if item appearances are given */
    t = clock();                /* start the timer for the reading */
    if (*fn_app)                /* if an app. file name is given, */
      in = fopen(fn_app, "r");  /* open the item appearances file */
    else {                      /* if no app. file name is given, */
      in = stdin; fn_app = "<stdin>"; }   /* read from std. input */
    MSG(stderr, "reading %s ... ", fn_app);
    if (!in) error(E_FOPEN, fn_app);
    k = ib_readapp(ibase, in);  /* read the item appearances */
    if (k  != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase));
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
    MSG(stderr, "[%d item(s)]", ib_cnt(ibase));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- read transactions --- */
  t = clock();                  /* start the timer for the reading */
  if (fn_in && *fn_in)          /* if an input file name is given, */
    in = fopen(fn_in, "r");     /* open input file for reading */
  else {                        /* if no input file name is given, */
    in = stdin; fn_in = "<stdin>"; }   /* read from standard input */
  MSG(stderr, "reading %s ... ", fn_in);
  if (!in) error(E_FOPEN, fn_in);
  tabag = tb_create(ibase, 0);  /* create a transaction bag/multiset */
  if (!tabag) error(E_NOMEM);   /* to store the transactions */
  while (1) {                   /* transaction read loop */
    k = ib_read(ibase, in);     /* read the next transaction */
    if (k) { if (k > 0) break;  /* check for error and end of file */
      error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); }
    if (tb_add(tabag, NULL) != 0) error(E_NOMEM);
  }                             /* add transaction to bag/multiset */
  if (in != stdin) fclose(in);  /* if not read from standard input, */
  in  = NULL;                   /* close the input file */
  n   = ib_cnt(ibase);          /* get the number of items */
  k   = tb_cnt(tabag);          /* get the number of transactions */
  wgt = tb_wgt(tabag);          /* the total transaction weight */
  MSG(stderr, "[%d item(s), ", n);
  if (k == wgt) MSG(stderr,    "%d transaction(s)]", k);
  else          MSG(stderr, "%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].", SEC_SINCE(t));
  if ((n <= 0) || (wgt <= 0))   /* check for at least one item */
    error(E_NOTRANS);           /* and at least one transaction */
  MSG(stderr, "\n");            /* terminate the log message */
  if (format == dflt) {         /* if default info. format is used */
    if (target != TT_RULE) format = (supp < 0) ? "  (%a)" : "  (%1S)";
    else format = (supp < 0) ? "  (%b, %1C)" : "  (%1X, %1C)";
  }                             /* set default according to target */
  supp  = ceil ((supp >= 0) ? 0.01 *supp *wgt : -supp);
  smax  = floor((smax >= 0) ? 0.01 *smax *wgt : -smax);
  conf *= 0.01;                 /* transform support and confidence */

  /* --- sort and recode items --- */
  t = clock();                  /* compute absolute support values */
  MSG(stderr, "filtering, sorting and recoding items ... ");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     /* create an item identifier map */
  k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf));
  n = ib_recode(ibase, k, sort, map);
  tb_recode(tabag, map);        /* recode the items and transactions */
  tb_itsort(tabag, 1, heap);    /* and sort items in transactions */
  free(map); map = NULL;        /* delete the item identifier map */
  MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t));
  if (n <= 0) error(E_NOFREQ);  /* print a log message and */
  MSG(stderr, "\n");            /* check the number of items */
  k   = tb_max(tabag);          /* clamp the set/rule length to */
  if (max > k) max = k;         /* the maximum transaction size */

  /* --- reduce transactions --- */
  t = clock();                  /* start the timer for the reduction */
  MSG(stderr, "reducing transactions ... ");
  tb_filter(tabag, min, NULL);  /* remove items of short transactions */
  tb_sort(tabag, 1, heap);      /* sort the trans. lexicographically */
  k = tb_reduce(tabag);         /* reduce transactions to unique ones */
  if (k == wgt) MSG(stderr,    "[%d transaction(s)]", k);
  else          MSG(stderr, "[%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- create transaction tree --- */
  tt = 0;                       /* init. the tree construction time */
  if (tree) {                   /* if to use a transaction tree */
    t = clock();                /* start the timer for construction */
    MSG(stderr, "building transaction tree ... ");
    tatree = tt_create(tabag);  /* create a transaction tree */
    if (!tatree) error(E_NOMEM);
    if (filter == 0) {          /* if not to filter items, */
      tb_delete(tabag, 0);      /* delete the transaction bag */
      tabag = NULL;             /* (redundant data storage) */
    }
    MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
    tt = clock() -t;            /* note the time for the construction */
  }                             /* of the transaction tree */

  /* --- create item set tree --- */
  t = clock(); tc = 0;          /* start the timer for the search */
  istree = ist_create(ibase, mode, (int)supp, (int)smax, conf);
  if (!istree) error(E_NOMEM);  /* create an item set tree */
  ist_seteval(istree, eval|zero, aggm, 0.01*minval, prune);

  /* --- check item subsets --- */
  MSG(stderr, "checking subsets of size 1");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     /* create a filter map */
  while (1) {                   /* traverse the item set sizes */
    size = ist_height(istree);  /* get the current item set size and */
    if (size >= max) break;     /* abort if maximal size is reached */
    if ((filter != 0)           /* if to filter w.r.t. item usage */
    &&  (ist_check(istree, map) <= size))
      break;                    /* check which items are still used */
    if (post)                   /* if a-posteriori pruning requested, */
      ist_prune(istree);        /* prune infrequent item sets */
    k = ist_addlvl(istree);     /* while max. height is not reached, */
    if (k) { if (k > 0) break;  /* add a level to the item set tree */
             error(E_NOMEM);  } /* if no level was added, abort */
    if (((filter < 0)           /* if to filter w.r.t. item usage */
    &&   (i < -filter *n))      /* and enough items were removed */
    ||  ((filter > 0)           /* or counting time is long enough */
    &&   (i < n) && (i *(double)tt < filter *n *tc))) {
      n = i;                    /* note the new number of items */
      x = clock();              /* start the timer for filtering */
      tb_filter(tabag, size+1, map);
      tb_sort(tabag, 0, heap);  /* remove unnec. items and trans. */
      tb_reduce(tabag);         /* and reduce trans. to unique ones */
      if (tatree) {             /* if a transaction tree was created */
        tt_delete(tatree, 0);   /* delete the transaction tree */
        tatree = tt_create(tabag);
        if (!tatree) error(E_NOMEM);
      }                         /* rebuild the transaction tree */
      tt = clock() -x;          /* note the filter/rebuild time */
    }
    MSG(stderr, " %d", ++size); /* print the current item set size */
    x = clock();                /* start the timer for counting */
    if (tatree) ist_countx(istree, tatree);
    else        ist_countb(istree, tabag);
    tc = clock() -x;            /* count the transaction tree/bag */
  }                             /* and compute the new counting time */
  free(map); map = NULL;        /* delete the filter map */
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- filter found item sets --- */
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) {
    t = clock();                /* start the timer for filtering */
    MSG(stderr, "filtering for %s item sets ... ",
        (target == TT_MAXIMAL) ? "maximal" : "closed");
    k = target | ((prune < 0) ? IST_EVAL : 0);
    ist_mark(istree, k);        /* filter closed/maximal item sets */
    MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- print item sets/rules/hyperedges --- */
  t = clock();                  /* start the timer for the output */
  if (fn_out && *fn_out)        /* if an output file name is given, */
    out = fopen(fn_out, "w");   /* open the output file */
  else {                        /* if no output file name is given, */
    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */
  MSG(stderr, "writing %s ... ", fn_out);
  if (!out) error(E_FOPEN, fn_out);
  if (eval == IST_LOGQ) report |= ISR_LOGS;
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL))
    report |= ISR_NOEXP;        /* combine the report mode flags */
  isrep = isr_create(ibase, out, report, isep, impl);
  if (!isrep) error(E_NOMEM);   /* create an item set reporter */
  isr_setfmt (isrep, format);   /* and configure it */
  isr_setsize(isrep,  min, max);
  ist_setsize(istree, min, max, dir);
  ist_init   (istree);          /* initialize the extraction */
  items = t_items(ib_tract(ibase));
  if ((target <= TT_MAXIMAL)    /* if to find frequent item sets */
  &&  (dir == 0)) {             /* and not to sort them by size */
    if      (eval == IST_LOGQ)  /* if to compute an add. evaluation */
      isr_seteval(isrep, isr_logq,  NULL,   0.01*minval);
    else if (eval >  IST_NONE)  /* set the add. evaluation function */
      isr_seteval(isrep, ist_evalx, istree, 0.01*minval);
    n = ist_report(istree, isrep); } /* report the item sets */
  else if (target <= TT_MAXIMAL) { /* if to find frequent item sets */
    for (n = 0; 1; ) {          /* extract item sets from the tree */
      k = ist_set(istree, items, &frq, &minval);
      if (k < 0) break;         /* get the next frequent item set */
      if (k > 0) fputs(isr_name(isrep, items[0]), out);
      for (i = 0; ++i < k; ) {  /* print the item names */
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               /* if requested, print information */
        isr_sinfo(isrep, frq, minval);
      fputc('\n', out); n++;    /* terminate the output line and */
    } }                         /* count the reported item set */
  else if (target == TT_RULE) { /* if to find association rules, */
    for (n = 0; 1; ) {          /* extract rules from tree */
      k = ist_rule(istree, items, &frq, &body, &head, &minval);
      if (k < 0) break;         /* get the next association rule */
      fputs(isr_name(isrep, items[0]), out);
      fputs(impl, out);         /* print name of rule head item */
      if (k > 1) fputs(isr_name(isrep, items[1]), out);
      for (i = 1; ++i < k; ) {  /* print names of items in rule body */
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               /* if requested, print information */
        isr_rinfo(isrep, frq, body, head, minval);
      fputc('\n', out); n++;    /* terminate the output line and */
    }                           /* count the reported ass. rule */
  }  /* if (target <= TT_MAXIMAL) .. else .. */
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
  if (out != stdout) fclose(out);
  out = NULL;                   /* close the output file */
  MSG(stderr, "[%d %s(s)] done ", n,
              (target == TT_RULE) ? "rule" : "set");
  MSG(stderr, "[%.2fs].\n", SEC_SINCE(t));
  #ifdef BENCH                  /* if benchmark version, */
  ist_stats(istree);            /* show the search statistics */
  #endif                        /* (especially memory usage) */

  /* --- clean up --- */
  #ifndef NDEBUG                /* if this is a debug version */
  isr_delete(isrep, 0);             /* the item set reporter, */
  ist_delete(istree);               /* the item set tree, */
  if (tatree) tt_delete(tatree, 0); /* the transaction tree, */
  if (tabag)  tb_delete(tabag,  0); /* the transaction bag, */
  ib_delete(ibase);                 /* and the item base */
  #endif
  #ifdef STORAGE                /* if storage debugging */
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */