int isr_reportx (ISREPORT *rep, int *tids, int n) { /* --- report the current item set */ assert(rep); /* check the function arguments */ rep->tids = tids; /* store the transaction id array */ rep->tidcnt = n; /* and the number of transaction ids */ n = isr_report(rep); /* report the current item set */ rep->tids = NULL; /* clear the transaction id array */ return n; /* return number of rep. item sets */ } /* isr_reportx() */
int main (int argc, char *argv[]) { /* --- main function */ int i, k = 0; /* loop variables */ char *s; /* to traverse the options */ CCHAR **optarg = NULL; /* option argument */ CCHAR *fn_inp = NULL; /* name of input file */ CCHAR *fn_out = NULL; /* name of output file */ CCHAR *fn_sel = NULL; /* name of item selection file */ #ifdef ISR_PATSPEC /* if to allow a pattern spectrum */ CCHAR *fn_psp = NULL; /* name of pattern spectrum file */ #endif CCHAR *recseps = NULL; /* record separators */ CCHAR *fldseps = NULL; /* field separators */ CCHAR *blanks = NULL; /* blank characters */ CCHAR *comment = NULL; /* comment characters */ CCHAR *hdr = ""; /* record header for output */ CCHAR *sep = " "; /* item separator for output */ CCHAR *dflt = " (%1S)"; /* default format for check */ CCHAR *format = dflt; /* format for information output */ int target = 's'; /* target type (closed/maximal) */ ITEM min = 1; /* minimum size of an item set */ ITEM max = 16; /* maximum size of an item set */ double supp = 10; /* minimum support (in percent) */ int eval = 'x'; /* additional evaluation measure */ double minval = 10; /* minimum evaluation measure value */ int sort = 2; /* flag for item sorting and recoding */ int dir = +1; /* item processing order */ long repeat = 1; /* number of repetitions */ int mtar = 0; /* mode for transaction reading */ int mrep = 0; /* mode for item set reporting */ int stats = 0; /* flag for item set statistics */ ITEM m; /* number of items */ TID n; /* number of transactions */ SUPP w; /* total transaction weight */ clock_t t; /* timer for measurements */ ISEVALFN *evalfn = (ISEVALFN*)0; /* evaluation function */ #ifndef QUIET /* if not quiet version */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no arguments given */ printf("usage: %s [options] infile [outfile]\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-t# target type " "(default: %c)\n", target); printf(" (s: frequent, c: closed, m: maximal item sets, " "g: generators)\n"); printf("-m# minimum number of items per item set " "(default: %"ITEM_FMT")\n", min); printf("-n# maximum number of items per item set " "(default: %"ITEM_FMT")\n", max); printf("-s# minimum support of an item set " "(default: %g%%)\n", supp); printf(" (positive: percentage, " "negative: absolute number)\n"); printf("-e# additional evaluation measure " "(default: none)\n"); printf("-d# minimum value of add. evaluation measure " "(default: %g%%)\n", minval); printf("-q# sort items w.r.t. their frequency " "(default: %d)\n", sort); printf(" (1: ascending, -1: descending, 0: do not sort,\n" " 2: ascending, -2: descending w.r.t. " "transaction size sum)\n"); printf("-u# item processing order/search direction " "(default: %d)\n", dir); printf(" (fixed to -1 for closed/maximal item sets\n" " fixed to +1 for generators, free otherwise)\n"); printf("-x# number of repetitions (for benchmarking) " "(default: 1)\n"); printf("-R# read an item selection from a file\n"); #ifdef ISR_PATSPEC printf("-P# write a pattern spectrum to a file\n"); #endif printf("-Z print item set statistics (counts per size)\n"); printf("-g write output in scanable form " "(quote certain characters)\n"); printf("-h# record header for output " "(default: \"%s\")\n", hdr); printf("-k# item separator for output " "(default: \"%s\")\n", sep); printf("-v# output format for item set information " "(default: \"%s\")\n", format); printf("-w transaction weight in last field " "(default: only items)\n"); printf("-r# record/transaction separators " "(default: \"\\n\")\n"); printf("-f# field /item separators " "(default: \" \\t,\")\n"); printf("-b# blank characters " "(default: \" \\t\\r\")\n"); printf("-C# comment characters " "(default: \"#\")\n"); printf("-! print additional option information\n"); printf("infile file to read transactions from " "[required]\n"); printf("outfile file to write frequent item sets to " "[optional]\n"); return 0; /* print a usage message */ } /* and abort the program */ #endif /* #ifndef QUIET */ /* free option characters: acilopy [A-Z]\[CT] */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (*s) { /* traverse options */ switch (*s++) { /* evaluate switches */ case '!': help(); break; case 't': target = (*s) ? *s++ : 's'; break; case 'm': min = (ITEM)strtol(s, &s, 0); break; case 'n': max = (ITEM)strtol(s, &s, 0); break; case 's': supp = strtod(s, &s); break; case 'e': eval = (*s) ? *s++ : 0; break; case 'd': minval = strtod(s, &s); break; case 'q': sort = (int) strtol(s, &s, 0); break; case 'u': dir = (int) strtol(s, &s, 0); break; case 'x': repeat = strtol(s, &s, 0); break; case 'R': optarg = &fn_sel; break; #ifdef ISR_PATSPEC case 'P': optarg = &fn_psp; break; #endif case 'Z': stats = 1; break; case 'g': mrep = ISR_SCAN; break; case 'h': optarg = &hdr; break; case 'k': optarg = &sep; break; case 'v': optarg = &format; break; case 'w': mtar |= TA_WEIGHT; break; case 'r': optarg = &recseps; break; case 'f': optarg = &fldseps; break; case 'b': optarg = &blanks; break; case 'C': optarg = &comment; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (optarg && *s) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-options */ case 0: fn_inp = s; break; case 1: fn_out = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check (option) arguments */ if (k < 1) error(E_ARGCNT); /* and number of arguments */ if (min < 0) error(E_SIZE, min); /* check the size limits */ if (max < 0) error(E_SIZE, max); /* and the minimum support */ if (max > 16) error(E_SIZE, max); /* and the minimum support */ if (supp > 100) error(E_SUPPORT, supp); if (repeat < 1) error(E_REPEAT, repeat); if ((!fn_inp || !*fn_inp) && (fn_sel && !*fn_sel)) error(E_STDIN); /* stdin must not be used twice */ switch (target) { /* check and translate target type */ case 's': target = ISR_ALL; break; case 'c': target = ISR_CLOSED; break; case 'm': target = ISR_MAXIMAL; break; case 'g': target = ISR_GENERA; break; default : error(E_TARGET, (char)target); break; } /* (get target type code) */ switch (eval) { /* check and translate measure */ case 'x': evalfn = (ISEVALFN*)0; break; case 'b': evalfn = isr_logrto; break; default : error(E_MEASURE, (char)eval); break; } /* (get evaluation measure code) */ if ((format == dflt) && (supp < 0)) format = " (%a)"; /* adapt the default info. format */ MSG(stderr, "\n"); /* terminate the startup message */ /* --- read item selection --- */ ibase = ib_create(0, 0); /* create an item base */ if (!ibase) error(E_NOMEM); /* to manage the items */ tread = trd_create(); /* create a transaction reader */ if (!tread) error(E_NOMEM); /* and configure the characters */ trd_allchs(tread, recseps, fldseps, blanks, "", comment); if (fn_sel) { /* if item appearances are given */ t = clock(); /* start timer, open input file */ if (trd_open(tread, NULL, fn_sel) != 0) error(E_FOPEN, trd_name(tread)); MSG(stderr, "reading %s ... ", trd_name(tread)); m = ib_readsel(ibase,tread);/* read the given item selection */ if (m < 0) error((int)-m, ib_errmsg(ibase, NULL, 0)); trd_close(tread); /* close the input file */ MSG(stderr, "[%"ITEM_FMT" item(s)]", m); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); } /* print a log message */ /* --- read transaction database --- */ tabag = tbg_create(ibase); /* create a transaction bag */ if (!tabag) error(E_NOMEM); /* to store the transactions */ t = clock(); /* start timer, open input file */ if (trd_open(tread, NULL, fn_inp) != 0) error(E_FOPEN, trd_name(tread)); MSG(stderr, "reading %s ... ", trd_name(tread)); k = tbg_read(tabag, tread, mtar); if (k < 0) /* read the transaction database */ error(-k, tbg_errmsg(tabag, NULL, 0)); trd_delete(tread, 1); /* close the input file and */ tread = NULL; /* delete the table reader */ m = ib_cnt(ibase); /* get the number of items, */ n = tbg_cnt(tabag); /* the number of transactions, */ w = tbg_wgt(tabag); /* the total transaction weight */ MSG(stderr, "[%"ITEM_FMT" item(s), %"TID_FMT, m, n); if (w != (SUPP)n) MSG(stderr, "/%"SUPP_FMT, w); MSG(stderr, " transaction(s)] done [%.2fs].", SEC_SINCE(t)); if ((m <= 0) || (n <= 0)) /* check for at least one item */ error(E_NOITEMS); /* and at least one transaction */ MSG(stderr, "\n"); /* compute absolute support value */ supp = ceilsupp((supp >= 0) ? 0.01 *supp *(double)w : -supp); /* --- sort and recode items --- */ t = clock(); /* start timer, print log message */ MSG(stderr, "filtering, sorting and recoding items ... "); m = tbg_recode(tabag, (SUPP)supp, -1, 16, -sort); if (m < 0) error(E_NOMEM); /* recode items and transactions */ if (m <= 0) error(E_NOITEMS); /* and check the number of items */ MSG(stderr, "[%"ITEM_FMT" item(s)]", m); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); /* --- sort and reduce transactions --- */ t = clock(); /* start timer, print log message */ MSG(stderr, "sorting and reducing transactions ... "); tbg_filter(tabag,min,NULL,0); /* remove items of short transactions */ tbg_itsort(tabag, +1, 1); /* sort items in transactions and */ tbg_sort (tabag, +1, 1); /* sort the trans. lexicographically */ n = tbg_reduce(tabag, 0); /* reduce transactions to unique ones */ tbg_pack(tabag, 16); /* pack items with codes < 16 */ MSG(stderr, "[%"TID_FMT, n); /* print number of transactions */ if (w != (SUPP)n) MSG(stderr, "/%"SUPP_FMT, w); MSG(stderr, " transaction(s)] done [%.2fs].\n", SEC_SINCE(t)); /* --- find frequent item sets --- */ t = clock(); /* start the timer */ if (eval == 'b') mrep |= ISR_LOGS; report = isr_create(ibase, target|mrep, -1, hdr, sep, NULL); if (!report) error(E_NOMEM); /* create an item set reporter */ isr_setfmt (report, format); /* and configure it: set flags, */ isr_setsize(report, min, max);/* info. format and size range, */ if (evalfn) /* and the evaluation function */ isr_seteval(report, evalfn, NULL, +1, 0.01*minval); #ifdef ISR_PATSPEC /* if to allow a pattern spectrum */ if (fn_psp && (isr_addpsp(report, NULL) < 0)) error(E_NOMEM); /* add a pattern spectrum if req. */ #endif if (isr_open(report, NULL, fn_out) != 0) error(E_FOPEN, isr_name(report)); /* open the output file */ MSG(stderr, "writing %s ... ", isr_name(report)); if (target == ISR_GENERA) dir = +1; else if (target & (ISR_CLOSED|ISR_MAXIMAL)) dir = -1; fim16 = m16_create(dir, (SUPP)supp, report); if (!fim16) error(E_NOMEM); /* create a 16 items machine */ for (i = 0; i < repeat; i++){ /* repeated mining loop */ isr_reset(report); /* (re)init. the output counters */ m16_addtbg(fim16, tabag); /* add trans. bag to 16 items machine */ k = m16_mine(fim16); /* find frequent item sets */ if (k < 0) error(E_NOMEM); /* with 16 items machine */ } if (isr_report(report) < 0) /* report the empty set (if needed) */ error(E_NOMEM); if (isr_close(report) != 0) /* close the output file */ error(E_FWRITE, isr_name(report)); MSG(stderr, "[%"SIZE_FMT" set(s)]", isr_repcnt(report)); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); if (stats) isr_prstats(report, stdout, 0); /* --- write pattern spectrum --- */ #ifdef ISR_PATSPEC /* if to allow a pattern spectrum */ if (fn_psp) { /* if to write a pattern spectrum */ psp = isr_getpsp(report);/* get the pattern spectrum */ twrite = twr_create(); /* create a table writer and */ if (!twrite) error(E_NOMEM);/* open the output file */ if (twr_open(twrite, NULL, fn_psp) != 0) error(E_FOPEN, twr_name(twrite)); if (psp_report(psp, twrite) != 0) error(E_FWRITE, twr_name(twrite)); twr_delete(twrite, 1); /* write the pattern spectrum, */ twrite = NULL; /* delete the table writer, and */ } /* clear the writer variable */ #endif /* --- clean up --- */ CLEANUP; /* clean up memory and close files */ SHOWMEM; /* show (final) memory usage */ return 0; /* return 'ok' */ } /* main() */
static int rec_neg (FIM16 *fim, int n, BITTA mask) { /* --- find frequent item sets */ int i, r; /* loop variable, error status */ SUPP s; /* item support */ BITTA m; /* filtering mask */ assert(fim && (n >= 0)); /* check the function arguments */ for (i = n; --i >= 2; ) { /* traverse the remaining items */ s = fim->supps[i]; /* get the support of the item and */ if (s < fim->smin) continue;/* skip infrequent/eliminated items */ r = isr_add(fim->report, fim->map[i], s); if (r < 0) return -1; /* add item i to the reporter */ if (r <= 0) { /* check if item i needs processing, */ clear(fim, i); continue;} /* otherwise delete its transactions */ assert(mask & (1 << i)); /* item must not be excluded */ if (fim->ends[i]-fim->btas[i] <= 1){ /* if only one transaction, */ fim->ends[i] = fim->btas[i]; /* add all items as perfect exts. */ fim->wgts[m = *fim->btas[i]] = 0; for (r = 0; (unsigned int)(1 << r) <= (unsigned int)m; r++) if (m & (1 << r)) isr_addpex(fim->report, fim->map[r]); } else { /* if more than one transaction */ project(fim,i,mask,fim+1);/* project database to item i */ count(fim+1, i); /* count th support of all items */ m = filter (fim+1, i, s); /* remove infrequent items and */ r = rec_neg(fim+1, i, m); /* prefect extension items and then */ if (r < 0) return -1; /* find freq. item sets recursively */ } /* and check for a recursion error */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report, 1); /* remove the item i again */ } /* from the item set reporter */ if ((mask & 2) /* if item 1 is frequent */ && (fim->supps[1] >= fim->smin)) { r = isr_add(fim->report, fim->map[1], fim->supps[1]); if (r < 0) return -1; /* add item 1 to the reporter */ if (r > 0) { /* if item 1 needs processing, */ if ((mask & 1) /* if item 0 is frequent given 1 */ && (fim->wgts[3] >= fim->smin)) { if (fim->wgts[3] >= fim->supps[1]) isr_addpex(fim->report, fim->map[0]); else { /* check for a perfect extension */ r = isr_add(fim->report, fim->map[0], fim->wgts[3]); if (r < 0) return -1; /* add item 0 to the reporter */ if (r > 0) { /* if item 0 needs processing */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report, 1); } /* remove the item 0 again */ } /* from the item set reporter */ } if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report,1);/* remove the item 1 again */ } /* from the item set reporter */ } if ((mask & 1) /* if item 0 is frequent */ && (fim->supps[0] >= fim->smin)) { r = isr_add(fim->report, fim->map[0], fim->supps[0]); if (r < 0) return -1; /* add item 0 to the reporter */ if (r > 0) { /* if item 0 needs processing */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report,1);/* remove the item 0 again */ } /* from the item set reporter */ } memset(fim->wgts+1, 0, 3*sizeof(SUPP)); /* clear weights, */ memset(fim->supps, 0, 2*sizeof(SUPP)); /* support and lists */ memcpy(fim->ends, fim->btas, 2*sizeof(BITTA*)); return 0; /* return 'ok' */ } /* rec_neg() */
static int rec_pos (FIM16 *fim, int n, BITTA mask) { /* --- find frequent item sets */ int i, r; /* loop variable, error status */ SUPP s; /* item support */ BITTA m; /* filtering mask */ assert(fim && (n >= 0)); /* check the function arguments */ /* The item mask needs to be checked, because items 0,1,2 may have */ /* been identified as perfect extensions in higher recursion levels.*/ if ((mask & 1) /* if item 0 is frequent */ && (fim->supps[0] >= fim->smin)) { r = isr_add(fim->report, fim->map[0], fim->supps[0]); if (r < 0) return -1; /* add item 0 to the reporter */ if (r > 0) { /* if item 0 needs processing */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report,1);/* remove the item 0 again */ } /* from the item set reporter */ } if ((mask & 2) /* if item 1 is frequent */ && (fim->supps[1] >= fim->smin)) { r = isr_add(fim->report, fim->map[1], fim->supps[1]); if (r < 0) return -1; /* add item 1 to the reporter */ if (r > 0) { /* if item 1 needs processing, */ if ((mask & 1) /* if item 0 is frequent given 1 */ && (fim->wgts[3] >= fim->smin)) { if (fim->wgts[3] >= fim->supps[1]) isr_addpex(fim->report, fim->map[0]); else { /* check for a perfect extension */ r = isr_add(fim->report, fim->map[0], fim->wgts[3]); if (r < 0) return -1; /* add item 0 to the reporter */ if (r > 0) { /* if item 0 needs processing */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report, 1); } /* remove the item 0 again */ } /* from the item set reporter */ } if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report,1);/* remove the item 1 again */ } /* from the item set reporter */ } if (n <= 2) { /* if only two items to process */ memset(fim->wgts+1, 0, 3*sizeof(SUPP)); memset(fim->supps, 0, 2*sizeof(SUPP)); memcpy(fim->ends, fim->btas, 2*sizeof(BITTA*)); return 0; /* clear counters for items 0 and 1 */ } /* and clear the transaction lists */ if ((mask & 4) /* if item 2 is frequent */ && (fim->supps[2] >= fim->smin)) { r = isr_add(fim->report, fim->map[2], fim->supps[2]); if (r < 0) return -1; /* add item 2 to the reporter */ if (r > 0) { /* if item 2 needs processing, */ m = 0; /* project/count for items 0 and 1 */ fim->wgts[3] = fim->wgts[7]; fim->wgts[2] = fim->wgts[6]; fim->wgts[1] = fim->wgts[5]; fim->supps[0] = s = fim->wgts[3] +fim->wgts[1]; if (s >= fim->supps[2]) isr_addpex(fim->report, fim->map[0]); else if (s >= fim->smin) m = (BITTA)(m | (mask & 1)); fim->supps[1] = s = fim->wgts[3] +fim->wgts[2]; if (s >= fim->supps[2]) isr_addpex(fim->report, fim->map[1]); else if (s >= fim->smin) m = (BITTA)(m | (mask & 2)); r = rec_pos(fim, 2, m); /* perfect extension items and then */ if (r < 0) return -1; /* find freq. item sets recursively */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report,1);/* remove the item 2 again */ } /* from the item set reporter */ } if (n <= 3) { /* if only two items to process */ memset(fim->wgts+1, 0, 7*sizeof(SUPP)); /* clear weights, */ memset(fim->supps, 0, 3*sizeof(SUPP)); /* support and lists */ memcpy(fim->ends, fim->btas, 3*sizeof(BITTA*)); return 0; /* clear counters for items 0, 1, 2 */ } /* and clear the transaction lists */ if ((mask & 8) /* if item 3 is frequent */ && (fim->supps[3] >= fim->smin)) { r = isr_add(fim->report, fim->map[3], fim->supps[3]); if (r < 0) return -1; /* add item 3 to the reporter */ if (r > 0) { /* if item 3 needs processing, */ m = 0; /* project/count for items 0 and 1 */ fim->wgts[4] = fim->wgts[12]; fim->wgts[3] = fim->wgts[11] +(fim->wgts[7] = fim->wgts[15]); fim->wgts[2] = fim->wgts[10] +(fim->wgts[6] = fim->wgts[14]); fim->wgts[1] = fim->wgts[ 9] +(fim->wgts[5] = fim->wgts[13]); fim->supps[0] = s = fim->wgts[3] +fim->wgts[1]; if (s >= fim->supps[3]) isr_addpex(fim->report, fim->map[0]); else if (s >= fim->smin) m = (BITTA)(m | (mask & 1)); fim->supps[1] = s = fim->wgts[3] +fim->wgts[2]; if (s >= fim->supps[3]) isr_addpex(fim->report, fim->map[1]); else if (s >= fim->smin) m = (BITTA)(m | (mask & 2)); fim->supps[2] = s = fim->wgts[7] +fim->wgts[6] + fim->wgts[5] +fim->wgts[4]; if (s >= fim->supps[3]) isr_addpex(fim->report, fim->map[2]); else if (s >= fim->smin) m = (BITTA)(m | (mask & 4)); r = rec_pos(fim, 3, m); /* perfect extension items and then */ if (r < 0) return -1; /* find freq. item sets recursively */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report,1);/* remove the item 3 again */ } /* from the item set reporter */ } memset(fim->wgts+1, 0, 15*sizeof(SUPP)); /* clear weights, */ memset(fim->supps, 0, 4*sizeof(SUPP)); /* support and lists */ memcpy(fim->ends, fim->btas, 4*sizeof(BITTA*)); for (i = 4; i < n; i++) { /* traverse the (remaining) items */ s = fim->supps[i]; /* get the support of the item and */ if (s < fim->smin) continue;/* skip infrequent/eliminated items */ r = isr_add(fim->report, fim->map[i], s); if (r < 0) return -1; /* add item i to the reporter */ if (r <= 0) { /* check if item i needs processing, */ clear(fim, i); continue;} /* otherwise delete its transactions */ assert(mask & (1 << i)); /* item must not be excluded */ if (fim->ends[i]-fim->btas[i] <= 1){ /* if only one transaction, */ fim->ends[i] = fim->btas[i]; /* add all items as perfect exts. */ fim->wgts[m = *fim->btas[i]] = 0; for (r = 0; (unsigned int)(1 << r) <= (unsigned int)m; r++) if (m & (1 << r)) isr_addpex(fim->report, fim->map[r]); } else { /* if more than one transaction */ project(fim,i,mask, fim); /* project database to item i */ count(fim, i); /* count the support of all items */ m = filter (fim, i, s); /* remove infrequent items and */ r = rec_pos(fim, i, m); /* perfect extension items and then */ if (r < 0) return -1; /* find freq. item sets recursively */ } /* and check for a recursion error */ if (isr_report(fim->report) < 0) return -1; /* report the current item set */ isr_remove(fim->report, 1); /* remove the item i again */ } /* from the item set reporter */ return 0; /* return 'ok' */ } /* rec_pos() */