int main (int argc, char *argv[])
{                              
  int     i, k = 0, n;          
  char    *s;                   
  char    **optarg = NULL;     
  char    *fn_in   = NULL;      
  char    *fn_out  = NULL;    
  char    *fn_app  = NULL;     
  char    *blanks  = NULL;   
  char    *fldseps = NULL;     
  char    *recseps = NULL;      
  char    *comment = NULL;      
  char    *isep    = " ";      
  char    *impl    = " <- ";   
  char    *dflt    = "  (%1S)"; 
  char    *format  = dflt;      
  int     target   = 's';     
  int     min      = 1;        
  int     max      = INT_MAX;   
  double  supp     = 0.1;       
  double  smax     = 1.0;     
  double  conf     = 0.8;       
  int     dir      = 0;        
  int     eval     = 0;       
  int     aggm     = 0;        
  double  minval   = 0.1;       
  int     prune    = 0;         
  double  filter   = 0.1;       
  int     sort     = 2;         
  int     tree     = 1;         
  int     heap     = 1;         
  int     post     = 0;        
  int     report   = 0;       
  int     mode     = APP_BODY|IST_PERFECT;  
  int     size;               
  int     wgt;                  
  int     frq, body, head;     
  int     *items;             
  clock_t t, tt, tc, x;         

  #ifndef QUIET               
  prgname = argv[0];           

  if (argc > 1) {          
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } 
  else {                      
    printf("usage: %s [options] infile outfile\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent item sets, c: closed item sets,\n"
           "          m: maximal item sets,  r: association rules)\n");
    printf("-m#      minimum number of items per set/rule     "
                    "(default: %d)\n", min);
    printf("-n#      maximum number of items per set/rule     "
                    "(default: no limit)\n");
    printf("-s#      minimum support of a set/rule     "
                    "(default: %g%%)\n", supp *100);
    printf("-S#      maximum support of a set/rule     "
                    "(default: %g%%)\n", smax *100);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-c#      minimum confidence of a     rule         "
                    "(default: %g%%)\n", conf *100);
    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets to\n");
    return 0;                
  }                            
  #endif  
  for (i = 1; i < argc; i++) {  
    s = argv[i];                
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  
      while (*s) {             
        switch (*s++) {        
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 's';     break;
          case 'm': min    = (int)strtol(s, &s, 0); break;
          case 'n': max    = (int)strtol(s, &s, 0); break;
          case 's': supp   = 0.01*strtod(s, &s);    break;
          case 'S': smax   = 0.01*strtod(s, &s);    break;
          case 'c': conf   = 0.01*strtod(s, &s);    break;
          case 'o': mode  |= APP_BOTH;              break;
          case 'e': eval   = (*s) ? *s++ : 0;       break;
          case 'a': aggm   = (*s) ? *s++ : 0;       break;
          case 'd': minval = 0.01*strtod(s, &s);    break;
          case 'p': prune  = (int)strtol(s, &s, 0); break;
          case 'g': report = ISR_SCAN;              break;
          case 'k': optarg = &isep;                 break;
          case 'i': optarg = &impl;                 break;
          case 'v': optarg = &format;               break;
          case 'l': dir    = (int)strtol(s, &s, 0); break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'x': mode  &= ~IST_PERFECT;          break;
          case 'y': post   = 1;                     break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          default : error(E_OPTION, *--s);          break;
        }                       
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       
    else {                     
      switch (k++) {            
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         
    }
  }
  if (optarg) error(E_OPTARG);  
  if ((k < 2) || (k > 3))       
    error(E_ARGCNT);           
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             
  switch (target) {             
    case 's': target = TT_SET;               break;
    case 'c': target = TT_CLOSED;            break;
    case 'm': target = TT_MAXIMAL;           break;
    case 'r': target = TT_RULE;              break;
    default : error(E_TARGET, (char)target); break;
  }
  if (min < 0) error(E_SIZE, min); 
  if (max < 0) error(E_SIZE, max); 
  if (supp  > 1)                
    error(E_SUPP, supp);        
  if ((conf  < 0) || (conf > 1))
    error(E_CONF, conf);       
  switch (eval) {              
    case 'x': case 0: eval = IST_NONE;      break;
    case 'c': eval = IST_CONF;              break;
    case 'd': eval = IST_DIFF;              break;
    case 'l': eval = IST_LIFT;              break;
    case 'a': eval = IST_LD21;              break;
    case 'q': eval = IST_QUOT;              break;
    case 'n': eval = IST_CHI2;              break;
    case 'p': eval = IST_PVAL;              break;
    case 'i': eval = IST_INFO;              break;
    case 'g': eval = IST_PGST;              break;
    case 'b': eval = IST_LOGQ;              break;
    default : error(E_MEASURE, (char)eval); break;
  }
  switch (aggm) {            
    case 'x': case 0: aggm = IST_NONE;      break;
    case 'm': aggm = IST_MIN;               break;
    case 'n': aggm = IST_MAX;               break;
    case 'a': aggm = IST_AVG;               break;
    default : error(E_MEASURE, (char)aggm); break;
  }
  if ((target > TT_SET)         
  || ((eval > IST_NONE) && (eval < IST_LOGQ)))
    mode &= ~IST_PERFECT;      
  if (target <= TT_MAXIMAL) {  
    mode |= APP_BOTH; conf = 1;}
  if ((filter <= -1) || (filter >= 1))
    filter = 0;                 

  ibase = ib_create(-1);       
  if (!ibase) error(E_NOMEM);  
  ib_chars(ibase, blanks, fldseps, recseps, comment);
  MSG(stderr, "\n");          

  if (fn_app) {                
    t = clock();                
    if (*fn_app)            
      in = fopen(fn_app, "r");  
    else {                      
      in = stdin; fn_app = "<stdin>"; }   
    MSG(stderr, "reading %s ... ", fn_app);
    if (!in) error(E_FOPEN, fn_app);
    k = ib_readapp(ibase, in); 
    if (k  != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase));
    if (in != stdin) fclose(in);
    in = NULL;                  
    MSG(stderr, "[%d item(s)]", ib_cnt(ibase));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                           

  t = clock();                 
  if (fn_in && *fn_in)        
    in = fopen(fn_in, "r");     
  else {                       
    in = stdin; fn_in = "<stdin>"; }   
  MSG(stderr, "reading %s ... ", fn_in);
  if (!in) error(E_FOPEN, fn_in);
  tabag = tb_create(ibase);     
  if (!tabag) error(E_NOMEM);   
  while (1) {                
    k = ib_read(ibase, in);     
    if (k) { if (k > 0) break; 
      error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); }
    if (tb_add(tabag, NULL) != 0) error(E_NOMEM);
  }                            
  if (in != stdin) fclose(in);  
  in  = NULL;                  
  n   = ib_cnt(ibase);          
  k   = tb_cnt(tabag);         
  wgt = tb_wgt(tabag);          
  MSG(stderr, "[%d item(s), ", n);
  if (k == wgt) MSG(stderr,    "%d transaction(s)]", k);
  else          MSG(stderr, "%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].", SEC_SINCE(t));
  if ((n <= 0) || (wgt <= 0))  
    error(E_NOTRANS);           
  MSG(stderr, "\n");            
  if (format == dflt) {       
    if (target != TT_RULE) format = (supp < 0) ? "  (%a)" : "  (%1S)";
    else format = (supp < 0) ? "  (%b, %1C)" : "  (%1X, %1C)";
  }                            
  supp = ceil (((supp < 0) ? -100 : wgt) *supp);
  smax = floor(((smax < 0) ? -100 : wgt) *smax);

  
  t = clock();                  
  MSG(stderr, "filtering, sorting and recoding items ... ");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     
  k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf));
  n = ib_recode(ibase, k, sort, map);
  tb_recode(tabag, map);       
  tb_itsort(tabag, 1, heap);    
  free(map); map = NULL;        
  MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t));
  if (n <= 0) error(E_NOFREQ); 
  MSG(stderr, "\n");            
  k   = tb_max(tabag);         
  if (max > k) max = k;         


  t = clock();                  
  MSG(stderr, "reducing transactions ... ");
  tb_filter(tabag, min, NULL);  
  tb_sort(tabag, 1, heap);      
  k = tb_reduce(tabag);         
  if (k == wgt) MSG(stderr,    "[%d transaction(s)]", k);
  else          MSG(stderr, "[%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  
  tt = 0;                      
  if (tree) {                   
    t = clock();               
    MSG(stderr, "building transaction tree ... ");
    tatree = tt_create(tabag);  
    if (!tatree) error(E_NOMEM);
    if (filter == 0) {          
      tb_delete(tabag, 0);      
      tabag = NULL;             
    }
    MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
    tt = clock() -t;            
  }                             


  t = clock(); tc = 0;         
  istree = ist_create(ibase, mode, (int)supp, (int)smax, conf);
  if (!istree) error(E_NOMEM);  
  ist_seteval(istree, eval, aggm, minval, prune);

  /* --- check item subsets --- */
  MSG(stderr, "checking subsets of size 1");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     
  while (1) {                   
    size = ist_height(istree);  
    if (size >= max) break;     
    if ((filter != 0)        
    &&  (ist_check(istree, map) <= size))
      break;                  
    if (post)                  
      ist_prune(istree);       
    k = ist_addlvl(istree);     
    if (k) { if (k > 0) break;
             error(E_NOMEM);  } 
    if (((filter < 0)           
    &&   (i < -filter *n))      
    ||  ((filter > 0)          
    &&   (i < n) && (i *(double)tt < filter *n *tc))) {
      n = i;                   
      x = clock();             
      tb_filter(tabag, size+1, map);
      tb_sort(tabag, 0, heap);  
      tb_reduce(tabag);         
      if (tatree) {             
        tt_delete(tatree, 0);   
        tatree = tt_create(tabag);
        if (!tatree) error(E_NOMEM);
      }                         
      tt = clock() -x;          
    }
    MSG(stderr, " %d", ++size); 
    x = clock();             
    if (tatree) ist_countx(istree, tatree);
    else        ist_countb(istree, tabag);
    tc = clock() -x;           
  }                             
  free(map); map = NULL;        
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) {
    t = clock();               
    MSG(stderr, "filtering for %s item sets ... ",
        (target == TT_MAXIMAL) ? "maximal" : "closed");
    k = target | ((prune < 0) ? IST_EVAL : 0);
    ist_mark(istree, k);        
    MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t));
  }      

  t = clock();                  
  if (fn_out && *fn_out)        
    out = fopen(fn_out, "w");  
  else {                        
    out = stdout; fn_out = "<stdout>"; }    
  MSG(stderr, "writing %s ... ", fn_out);
  if (!out) error(E_FOPEN, fn_out);
  if (eval == IST_LOGQ) report |= ISR_LOGS;
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL))
    report |= ISR_CLOSED;      
  isrep = isr_create(ibase, out, report, isep, impl);
  if (!isrep) error(E_NOMEM);  
  isr_setfmt (isrep, format);   
  isr_setsize(isrep,  min, max);
  ist_setsize(istree, min, max, dir);
  ist_init   (istree);          
  items = t_items(ib_tract(ibase));
  if ((target <= TT_MAXIMAL)    
  &&  (dir == 0)) {            
    if      (eval == IST_LOGQ)  
      isr_seteval(isrep, isr_logq,  NULL,   minval);
    else if (eval >  IST_NONE)  
      isr_seteval(isrep, ist_evalx, istree, minval);
    n = ist_report(istree, isrep); } 
  else if (target <= TT_MAXIMAL) { 
    for (n = 0; 1; ) {         
      k = ist_set(istree, items, &frq, &minval);
      if (k < 0) break;         
      if (k > 0) fputs(isr_name(isrep, items[0]), out);
      for (i = 0; ++i < k; ) {  
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               
        isr_sinfo(isrep, frq, minval);
      fputc('\n', out); n++;    
    } }                       
  else if (target == TT_RULE) { 
    for (n = 0; 1; ) {          
      k = ist_rule(istree, items, &frq, &body, &head, &minval);
      if (k < 0) break;         
      fputs(isr_name(isrep, items[0]), out);
      fputs(impl, out);         
      if (k > 1) fputs(isr_name(isrep, items[1]), out);
      for (i = 1; ++i < k; ) {  
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               
        isr_rinfo(isrep, frq, body, head, minval);
      fputc('\n', out); n++;    
    }                           
  }  /
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
Пример #2
0
int main (int argc, char *argv[])
{                               /* --- main function */
  int     i, k = 0, n;          /* loop variables, counters */
  char    *s;                   /* to traverse the options */
  char    **optarg = NULL;      /* option argument */
  char    *fn_in   = NULL;      /* name of input  file */
  char    *fn_out  = NULL;      /* name of output file */
  char    *fn_app  = NULL;      /* name of item appearances file */
  char    *blanks  = NULL;      /* blanks */
  char    *fldseps = NULL;      /* field  separators */
  char    *recseps = NULL;      /* record separators */
  char    *comment = NULL;      /* comment indicators */
  char    *isep    = " ";       /* item separator for output */
  char    *impl    = " <- ";    /* implication sign for ass. rules */
  char    *dflt    = "  (%1S)"; /* default format for check */
  char    *format  = dflt;      /* format for information output */
  int     target   = 's';       /* target type (sets/rules/h.edges) */
  int     min      = 1;         /* minimum rule/item set size */
  int     max      = INT_MAX;   /* maximum rule/item set size */
  double  supp     = 10;        /* minimum support    (in percent) */
  double  smax     = 100;       /* maximum support    (in percent) */
  double  conf     = 80;        /* minimum confidence (in percent) */
  int     dir      = 0;         /* direction for size sorting */
  int     eval     = 0;         /* additional evaluation measure */
  int     zero     = 0;         /* flag for zero eval. below expect. */
  int     aggm     = 0;         /* aggregation mode for eval. measure */
  double  minval   = 10;        /* minimum evaluation measure value */
  int     prune    = 0;         /* (min. size for) evaluation pruning */
  double  filter   = 0.1;       /* item usage filtering parameter */
  int     sort     = 2;         /* flag for item sorting and recoding */
  int     tree     = 1;         /* flag for transaction tree */
  int     heap     = 1;         /* flag for heap sort vs. quick sort */
  int     post     = 0;         /* flag for a-posteriori pruning */
  int     report   = 0;         /* other flags for reporting */
  int     mode     = APP_BODY|IST_PERFECT;    /* search mode */
  int     size;                 /* current item set size */
  int     wgt;                  /* total transaction weight */
  int     frq, body, head;      /* frequency of an item set */
  int     *items;               /* item set (for reporting) */
  clock_t t, tt, tc, x;         /* timers for measurements */

  #ifndef QUIET                 /* if not quiet version */
  prgname = argv[0];            /* get program name for error msgs. */

  /* --- print usage message --- */
  if (argc > 1) {               /* if arguments are given */
    fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION);
    fprintf(stderr, VERSION); } /* print a startup message */
  else {                        /* if no arguments are given */
    printf("usage: %s [options] infile outfile [appfile]\n", argv[0]);
    printf("%s\n", DESCRIPTION);
    printf("%s\n", VERSION);
    printf("-t#      target type                              "
                    "(default: %c)\n", target);
    printf("         (s: frequent item sets, c: closed item sets,\n"
           "          m: maximal item sets,  r: association rules)\n");
    printf("-m#      minimum number of items per set/rule     "
                    "(default: %d)\n", min);
    printf("-n#      maximum number of items per set/rule     "
                    "(default: no limit)\n");
    printf("-s#      minimum support    of a     set/rule     "
                    "(default: %g%%)\n", supp);
    printf("-S#      maximum support    of a     set/rule     "
                    "(default: %g%%)\n", smax);
    printf("         (positive: percentage, "
                     "negative: absolute number)\n");
    printf("-c#      minimum confidence of a     rule         "
                    "(default: %g%%)\n", conf);
    printf("-o       use the original rule support definition "
                    "(body & head)\n");
    printf("-e#      additional evaluation measure            "
                    "(default: none)\n");
    printf("-a#      aggregation mode for evaluation measure  "
                    "(default: none)\n");
    printf("-z       zero evaluation below expected support   "
                    "(default: evaluate all)\n");
    printf("-d#      minimum value of add. evaluation measure "
                    "(default: %g%%)\n", minval);
    printf("-p#      (min. size for) pruning with evaluation  "
                    "(default: no pruning)\n");
    printf("         (< 0: backward,   > 0: forward)\n");
    printf("-l#      sort item sets in output by their size   "
                    "(default: no sorting)\n");
    printf("         (< 0: descending, > 0: ascending)\n");
    printf("-g       write item names in scanable form "
                    "(quote certain characters)\n");
    printf("-k#      item separator for output                "
                    "(default: \"%s\")\n", isep);
    printf("-i#      implication sign for association rules   "
                    "(default: \"%s\")\n", impl);
    printf("-v#      output format for set/rule information   "
                    "(default: \"%s\")\n", format);
    printf("-q#      sort items w.r.t. their frequency        "
                    "(default: %d)\n", sort);
    printf("         (1: ascending, -1: descending, 0: do not sort,\n"
           "          2: ascending, -2: descending w.r.t. "
                    "transaction size sum)\n");
    printf("-u#      filter unused items from transactions    "
                    "(default: %g)\n", filter);
    printf("         (0: do not filter items w.r.t. usage in sets,\n"
           "         <0: fraction of removed items for filtering,\n"
           "         >0: take execution times ratio into account)\n");
    printf("-j       use quicksort to sort the transactions   "
                    "(default: heapsort)\n");
    printf("-x       do not prune the search "
                    "with perfect extensions\n");
    printf("-y       a-posteriori pruning of infrequent item sets\n");
    printf("-h       do not organize transactions as a prefix tree\n");
    printf("-b#      blank   characters                       "
                    "(default: \" \\t\\r\")\n");
    printf("-f#      field   separators                       "
                    "(default: \" \\t,\")\n");
    printf("-r#      record  separators                       "
                    "(default: \"\\n\")\n");
    printf("-C#      comment characters                       "
                    "(default: \"#\")\n");
    printf("-!       print additional option information\n");
    printf("infile   file to read transactions from\n");
    printf("outfile  file to write item sets/association rules"
                    "/hyperedges to\n");
    printf("appfile  file stating item appearances (optional)\n");
    return 0;                   /* print a usage message */
  }                             /* and abort the program */
  #endif  /* #ifndef QUIET */
  /* free option characters: w [A-Z]\[SC] */

  /* --- evaluate arguments --- */
  for (i = 1; i < argc; i++) {  /* traverse the arguments */
    s = argv[i];                /* get an option argument */
    if (optarg) { *optarg = s; optarg = NULL; continue; }
    if ((*s == '-') && *++s) {  /* -- if argument is an option */
      while (*s) {              /* traverse the options */
        switch (*s++) {         /* evaluate the options */
          case '!': help();                         break;
          case 't': target = (*s) ? *s++ : 's';     break;
          case 'm': min    = (int)strtol(s, &s, 0); break;
          case 'n': max    = (int)strtol(s, &s, 0); break;
          case 's': supp   =      strtod(s, &s);    break;
          case 'S': smax   =      strtod(s, &s);    break;
          case 'c': conf   =      strtod(s, &s);    break;
          case 'o': mode  |= APP_BOTH;              break;
          case 'e': eval   = (*s) ? *s++ : 0;       break;
          case 'z': zero   = IST_ZERO;              break;
          case 'a': aggm   = (*s) ? *s++ : 0;       break;
          case 'd': minval =      strtod(s, &s);    break;
          case 'p': prune  = (int)strtol(s, &s, 0); break;
          case 'g': report = ISR_SCAN;              break;
          case 'k': optarg = &isep;                 break;
          case 'i': optarg = &impl;                 break;
          case 'v': optarg = &format;               break;
          case 'l': dir    = (int)strtol(s, &s, 0); break;
          case 'q': sort   = (int)strtol(s, &s, 0); break;
          case 'u': filter =      strtod(s, &s);    break;
          case 'h': tree   = 0;                     break;
          case 'j': heap   = 0;                     break;
          case 'x': mode  &= ~IST_PERFECT;          break;
          case 'y': post   = 1;                     break;
          case 'b': optarg = &blanks;               break;
          case 'f': optarg = &fldseps;              break;
          case 'r': optarg = &recseps;              break;
          case 'C': optarg = &comment;              break;
          default : error(E_OPTION, *--s);          break;
        }                       /* set the option variables */
        if (optarg && *s) { *optarg = s; optarg = NULL; break; }
      } }                       /* get an option argument */
    else {                      /* -- if argument is no option */
      switch (k++) {            /* evaluate non-options */
        case  0: fn_in  = s;      break;
        case  1: fn_out = s;      break;
        case  2: fn_app = s;      break;
        default: error(E_ARGCNT); break;
      }                         /* note filenames */
    }
  }
  if (optarg) error(E_OPTARG);  /* check the option argument */
  if ((k < 2) || (k > 3))       /* and the number of arguments */
    error(E_ARGCNT);            /* (either in/out or in/out/app) */
  if ((!fn_in || !*fn_in) && (fn_app && !*fn_app))
    error(E_STDIN);             /* stdin must not be used twice */
  switch (target) {             /* check and translate target type */
    case 's': target = TT_ALL;               break;
    case 'c': target = TT_CLOSED;            break;
    case 'm': target = TT_MAXIMAL;           break;
    case 'r': target = TT_RULE;              break;
    default : error(E_TARGET, (char)target); break;
  }
  if (min < 0) error(E_SIZE, min); /* check the limits */
  if (max < 0) error(E_SIZE, max); /* for the set size */
  if (supp > 100)               /* check the minimum support */
    error(E_SUPP, supp);        /* (< 0: absolute support) */
  if ((conf < 0) || (conf > 100))
    error(E_CONF, conf);        /* check the minimum confidence */
  switch (eval) {               /* check and translate measure */
    case 'x': case 0: eval = IST_NONE;      break;
    case 'c': eval = IST_CONF;              break;
    case 'd': eval = IST_CONF_DIFF;         break;
    case 'l': eval = IST_LIFT;              break;
    case 'a': eval = IST_LIFT_DIFF;         break;
    case 'q': eval = IST_LIFT_QUOT;         break;
    case 'v': eval = IST_CVCT;              break;
    case 'e': eval = IST_CVCT_DIFF;         break;
    case 'r': eval = IST_CVCT_QUOT;         break;
    case 'f': eval = IST_CERT;              break;
    case 'n': eval = IST_CHI2;              break;
    case 'p': eval = IST_CHI2_PVAL;         break;
    case 'i': eval = IST_INFO;              break;
    case 'g': eval = IST_INFO_PVAL;         break;
    case 'b': eval = IST_LOGQ;              break;
    default : error(E_MEASURE, (char)eval); break;
  }
  switch (aggm) {               /* check and translate agg. mode */
    case 'x': case 0: aggm = IST_NONE;      break;
    case 'm': aggm = IST_MIN;               break;
    case 'n': aggm = IST_MAX;               break;
    case 'a': aggm = IST_AVG;               break;
    default : error(E_MEASURE, (char)aggm); break;
  }
  if ((target > TT_ALL)         /* if individual set counters needed */
  || ((eval > IST_NONE) && (eval < IST_LOGQ)))
    mode &= ~IST_PERFECT;       /* remove perfect extension pruning */
  if (target <= TT_MAXIMAL) {   /* remove rule specific settings */
    mode |= APP_BOTH; conf = 100; } 
  if ((filter <= -1) || (filter >= 1))
    filter = 0;                 /* check and adapt the filter option */
  if (dir)                      /* if to sort output by size, */
    mode &= ~IST_PERFECT;       /* do not use perfect ext. pruning */

  /* --- create item base --- */
  ibase = ib_create(0, 0);      /* create an item base and */
  if (!ibase) error(E_NOMEM);   /* set the special characters */
  ib_chars(ibase, blanks, fldseps, recseps, "", comment);
  MSG(stderr, "\n");            /* terminate the startup message */

  /* --- read item appearance indicators --- */
  if (fn_app) {                 /* if item appearances are given */
    t = clock();                /* start the timer for the reading */
    if (*fn_app)                /* if an app. file name is given, */
      in = fopen(fn_app, "r");  /* open the item appearances file */
    else {                      /* if no app. file name is given, */
      in = stdin; fn_app = "<stdin>"; }   /* read from std. input */
    MSG(stderr, "reading %s ... ", fn_app);
    if (!in) error(E_FOPEN, fn_app);
    k = ib_readapp(ibase, in);  /* read the item appearances */
    if (k  != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase));
    if (in != stdin) fclose(in);/* if not read from standard input, */
    in = NULL;                  /* close the input file */
    MSG(stderr, "[%d item(s)]", ib_cnt(ibase));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- read transactions --- */
  t = clock();                  /* start the timer for the reading */
  if (fn_in && *fn_in)          /* if an input file name is given, */
    in = fopen(fn_in, "r");     /* open input file for reading */
  else {                        /* if no input file name is given, */
    in = stdin; fn_in = "<stdin>"; }   /* read from standard input */
  MSG(stderr, "reading %s ... ", fn_in);
  if (!in) error(E_FOPEN, fn_in);
  tabag = tb_create(ibase, 0);  /* create a transaction bag/multiset */
  if (!tabag) error(E_NOMEM);   /* to store the transactions */
  while (1) {                   /* transaction read loop */
    k = ib_read(ibase, in);     /* read the next transaction */
    if (k) { if (k > 0) break;  /* check for error and end of file */
      error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); }
    if (tb_add(tabag, NULL) != 0) error(E_NOMEM);
  }                             /* add transaction to bag/multiset */
  if (in != stdin) fclose(in);  /* if not read from standard input, */
  in  = NULL;                   /* close the input file */
  n   = ib_cnt(ibase);          /* get the number of items */
  k   = tb_cnt(tabag);          /* get the number of transactions */
  wgt = tb_wgt(tabag);          /* the total transaction weight */
  MSG(stderr, "[%d item(s), ", n);
  if (k == wgt) MSG(stderr,    "%d transaction(s)]", k);
  else          MSG(stderr, "%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].", SEC_SINCE(t));
  if ((n <= 0) || (wgt <= 0))   /* check for at least one item */
    error(E_NOTRANS);           /* and at least one transaction */
  MSG(stderr, "\n");            /* terminate the log message */
  if (format == dflt) {         /* if default info. format is used */
    if (target != TT_RULE) format = (supp < 0) ? "  (%a)" : "  (%1S)";
    else format = (supp < 0) ? "  (%b, %1C)" : "  (%1X, %1C)";
  }                             /* set default according to target */
  supp  = ceil ((supp >= 0) ? 0.01 *supp *wgt : -supp);
  smax  = floor((smax >= 0) ? 0.01 *smax *wgt : -smax);
  conf *= 0.01;                 /* transform support and confidence */

  /* --- sort and recode items --- */
  t = clock();                  /* compute absolute support values */
  MSG(stderr, "filtering, sorting and recoding items ... ");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     /* create an item identifier map */
  k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf));
  n = ib_recode(ibase, k, sort, map);
  tb_recode(tabag, map);        /* recode the items and transactions */
  tb_itsort(tabag, 1, heap);    /* and sort items in transactions */
  free(map); map = NULL;        /* delete the item identifier map */
  MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t));
  if (n <= 0) error(E_NOFREQ);  /* print a log message and */
  MSG(stderr, "\n");            /* check the number of items */
  k   = tb_max(tabag);          /* clamp the set/rule length to */
  if (max > k) max = k;         /* the maximum transaction size */

  /* --- reduce transactions --- */
  t = clock();                  /* start the timer for the reduction */
  MSG(stderr, "reducing transactions ... ");
  tb_filter(tabag, min, NULL);  /* remove items of short transactions */
  tb_sort(tabag, 1, heap);      /* sort the trans. lexicographically */
  k = tb_reduce(tabag);         /* reduce transactions to unique ones */
  if (k == wgt) MSG(stderr,    "[%d transaction(s)]", k);
  else          MSG(stderr, "[%d/%d transaction(s)]", k, wgt);
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- create transaction tree --- */
  tt = 0;                       /* init. the tree construction time */
  if (tree) {                   /* if to use a transaction tree */
    t = clock();                /* start the timer for construction */
    MSG(stderr, "building transaction tree ... ");
    tatree = tt_create(tabag);  /* create a transaction tree */
    if (!tatree) error(E_NOMEM);
    if (filter == 0) {          /* if not to filter items, */
      tb_delete(tabag, 0);      /* delete the transaction bag */
      tabag = NULL;             /* (redundant data storage) */
    }
    MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree));
    MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));
    tt = clock() -t;            /* note the time for the construction */
  }                             /* of the transaction tree */

  /* --- create item set tree --- */
  t = clock(); tc = 0;          /* start the timer for the search */
  istree = ist_create(ibase, mode, (int)supp, (int)smax, conf);
  if (!istree) error(E_NOMEM);  /* create an item set tree */
  ist_seteval(istree, eval|zero, aggm, 0.01*minval, prune);

  /* --- check item subsets --- */
  MSG(stderr, "checking subsets of size 1");
  map = (int*)malloc(n *sizeof(int));
  if (!map) error(E_NOMEM);     /* create a filter map */
  while (1) {                   /* traverse the item set sizes */
    size = ist_height(istree);  /* get the current item set size and */
    if (size >= max) break;     /* abort if maximal size is reached */
    if ((filter != 0)           /* if to filter w.r.t. item usage */
    &&  (ist_check(istree, map) <= size))
      break;                    /* check which items are still used */
    if (post)                   /* if a-posteriori pruning requested, */
      ist_prune(istree);        /* prune infrequent item sets */
    k = ist_addlvl(istree);     /* while max. height is not reached, */
    if (k) { if (k > 0) break;  /* add a level to the item set tree */
             error(E_NOMEM);  } /* if no level was added, abort */
    if (((filter < 0)           /* if to filter w.r.t. item usage */
    &&   (i < -filter *n))      /* and enough items were removed */
    ||  ((filter > 0)           /* or counting time is long enough */
    &&   (i < n) && (i *(double)tt < filter *n *tc))) {
      n = i;                    /* note the new number of items */
      x = clock();              /* start the timer for filtering */
      tb_filter(tabag, size+1, map);
      tb_sort(tabag, 0, heap);  /* remove unnec. items and trans. */
      tb_reduce(tabag);         /* and reduce trans. to unique ones */
      if (tatree) {             /* if a transaction tree was created */
        tt_delete(tatree, 0);   /* delete the transaction tree */
        tatree = tt_create(tabag);
        if (!tatree) error(E_NOMEM);
      }                         /* rebuild the transaction tree */
      tt = clock() -x;          /* note the filter/rebuild time */
    }
    MSG(stderr, " %d", ++size); /* print the current item set size */
    x = clock();                /* start the timer for counting */
    if (tatree) ist_countx(istree, tatree);
    else        ist_countb(istree, tabag);
    tc = clock() -x;            /* count the transaction tree/bag */
  }                             /* and compute the new counting time */
  free(map); map = NULL;        /* delete the filter map */
  MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t));

  /* --- filter found item sets --- */
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) {
    t = clock();                /* start the timer for filtering */
    MSG(stderr, "filtering for %s item sets ... ",
        (target == TT_MAXIMAL) ? "maximal" : "closed");
    k = target | ((prune < 0) ? IST_EVAL : 0);
    ist_mark(istree, k);        /* filter closed/maximal item sets */
    MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t));
  }                             /* print a log message */

  /* --- print item sets/rules/hyperedges --- */
  t = clock();                  /* start the timer for the output */
  if (fn_out && *fn_out)        /* if an output file name is given, */
    out = fopen(fn_out, "w");   /* open the output file */
  else {                        /* if no output file name is given, */
    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */
  MSG(stderr, "writing %s ... ", fn_out);
  if (!out) error(E_FOPEN, fn_out);
  if (eval == IST_LOGQ) report |= ISR_LOGS;
  if ((target == TT_CLOSED) || (target == TT_MAXIMAL))
    report |= ISR_NOEXP;        /* combine the report mode flags */
  isrep = isr_create(ibase, out, report, isep, impl);
  if (!isrep) error(E_NOMEM);   /* create an item set reporter */
  isr_setfmt (isrep, format);   /* and configure it */
  isr_setsize(isrep,  min, max);
  ist_setsize(istree, min, max, dir);
  ist_init   (istree);          /* initialize the extraction */
  items = t_items(ib_tract(ibase));
  if ((target <= TT_MAXIMAL)    /* if to find frequent item sets */
  &&  (dir == 0)) {             /* and not to sort them by size */
    if      (eval == IST_LOGQ)  /* if to compute an add. evaluation */
      isr_seteval(isrep, isr_logq,  NULL,   0.01*minval);
    else if (eval >  IST_NONE)  /* set the add. evaluation function */
      isr_seteval(isrep, ist_evalx, istree, 0.01*minval);
    n = ist_report(istree, isrep); } /* report the item sets */
  else if (target <= TT_MAXIMAL) { /* if to find frequent item sets */
    for (n = 0; 1; ) {          /* extract item sets from the tree */
      k = ist_set(istree, items, &frq, &minval);
      if (k < 0) break;         /* get the next frequent item set */
      if (k > 0) fputs(isr_name(isrep, items[0]), out);
      for (i = 0; ++i < k; ) {  /* print the item names */
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               /* if requested, print information */
        isr_sinfo(isrep, frq, minval);
      fputc('\n', out); n++;    /* terminate the output line and */
    } }                         /* count the reported item set */
  else if (target == TT_RULE) { /* if to find association rules, */
    for (n = 0; 1; ) {          /* extract rules from tree */
      k = ist_rule(istree, items, &frq, &body, &head, &minval);
      if (k < 0) break;         /* get the next association rule */
      fputs(isr_name(isrep, items[0]), out);
      fputs(impl, out);         /* print name of rule head item */
      if (k > 1) fputs(isr_name(isrep, items[1]), out);
      for (i = 1; ++i < k; ) {  /* print names of items in rule body */
        fputs(isep, out); fputs(isr_name(isrep, items[i]), out); }
      if (format)               /* if requested, print information */
        isr_rinfo(isrep, frq, body, head, minval);
      fputc('\n', out); n++;    /* terminate the output line and */
    }                           /* count the reported ass. rule */
  }  /* if (target <= TT_MAXIMAL) .. else .. */
  if (fflush(out) != 0) error(E_FWRITE, fn_out);
  if (out != stdout) fclose(out);
  out = NULL;                   /* close the output file */
  MSG(stderr, "[%d %s(s)] done ", n,
              (target == TT_RULE) ? "rule" : "set");
  MSG(stderr, "[%.2fs].\n", SEC_SINCE(t));
  #ifdef BENCH                  /* if benchmark version, */
  ist_stats(istree);            /* show the search statistics */
  #endif                        /* (especially memory usage) */

  /* --- clean up --- */
  #ifndef NDEBUG                /* if this is a debug version */
  isr_delete(isrep, 0);             /* the item set reporter, */
  ist_delete(istree);               /* the item set tree, */
  if (tatree) tt_delete(tatree, 0); /* the transaction tree, */
  if (tabag)  tb_delete(tabag,  0); /* the transaction bag, */
  ib_delete(ibase);                 /* and the item base */
  #endif
  #ifdef STORAGE                /* if storage debugging */
  showmem("at end of program"); /* check memory usage */
  #endif
  return 0;                     /* return 'ok' */
}  /* main() */