static void error (int code, ...) { #ifndef QUIET va_list args; const char *msg; assert(prgname); if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { msg = errmsgs[-code]; if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); vfprintf(stderr, msg, args); va_end(args); } #endif #ifndef NDEBUG if (map) free(map); if (isrep) isr_delete(isrep, 0); if (istree) ist_delete(istree); if (tatree) tt_delete(tatree, 0); if (tabag) tb_delete(tabag, 0); if (ibase) ib_delete(ibase); if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE showmem("at end of program"); #endif exit(code); } /* error() */
static void error (int code, ...) { /* --- print an error message */ #ifndef QUIET /* if not quiet version */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #endif #ifndef NDEBUG /* if debug version */ if (map) free(map); /* clean up memory */ if (isrep) isr_delete(isrep, 0); /* and close files */ if (istree) ist_delete(istree); if (tatree) tt_delete(tatree, 0); if (tabag) tb_delete(tabag, 0); if (ibase) ib_delete(ibase); if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */ } /* error() */
/* test test_tt_setbyte() */ void test_tt_setbyte(void) { int i, rc; int rows = 10; int cols = rows * 8; tag_table *tt = NULL; char *row_ptr, *octet_ptr; char octet1 = (char)0x82; char octet2 = (char)0x91; char octet; /* create the tt object */ rc = tt_create(&tt, rows, cols); CU_ASSERT_EQUAL_FATAL(rc, TT_SUCCESS); CU_ASSERT_PTR_NOT_NULL_FATAL(tt); /* systematically set various bits within the table */ for (i = 0; i < rows; i++) { rc = tt_setbyte(tt, i, i, octet1); if (rc != TT_SUCCESS) CU_FAIL("tt_setbits() returned an error code"); rc = tt_setbyte(tt, i, (i + 1) % 8, octet1); if (rc != TT_SUCCESS) CU_FAIL("tt_setbits() returned an error code"); rc = tt_setbyte(tt, i, (i + 1) % 8, octet2); if (rc != TT_SUCCESS) CU_FAIL("tt_setbits() returned an error code"); rc = tt_setbyte(tt, i, (i + 2) % 8, octet2); if (rc != TT_SUCCESS) CU_FAIL("tt_setbits() returned an error code"); } /* check those bits */ for (i = 0; i < rows; i++) { rc = tt_getrow(tt, i, &row_ptr); if (rc != TT_SUCCESS || row_ptr == NULL) CU_FAIL("tt_getrow() failed"); octet_ptr = row_ptr + i; octet = *octet_ptr; if (octet != octet1) CU_FAIL("matching of first octet failed"); octet_ptr = row_ptr + ((i + 1) % 8); octet = *octet_ptr; if (octet != octet2) CU_FAIL("matching of second octet failed"); octet_ptr = row_ptr + ((i + 2) % 8); octet = *octet_ptr; if (octet != octet2) CU_FAIL("matching of third octet failed"); } /* destroy tt object */ rc = tt_delete(&tt); CU_ASSERT_EQUAL(rc, TT_SUCCESS); CU_ASSERT_PTR_NULL(tt); }
/* test test_tt_getcount_col() */ void test_tt_getcount_col(void) { int i, j, rc; int count = 999; int rows = 10; int cols = rows * 8; tag_table *tt = NULL; char octet1 = (char)0x00; char octet2 = (char)0xff; /* create the tt object */ rc = tt_create(&tt, rows, cols); CU_ASSERT_EQUAL_FATAL(rc, TT_SUCCESS); CU_ASSERT_PTR_NOT_NULL_FATAL(tt); /* check count for newly created table */ for (i = 0; i < cols; i++) { rc = tt_getcount_col(tt, i, &count); if (rc != TT_SUCCESS) CU_FAIL("tt_getcount() returned with an error"); if (count != 0) CU_FAIL("count != 0"); } /* systematically set various bits within the table */ for (i = 0; i < rows; i++) { for (j = 0; j < (int)tt->row_size; j++) { if (i % 2 == 0) { rc = tt_setbyte(tt, i, j, octet1); if (rc != TT_SUCCESS) CU_FAIL("tt_setbits() returned an error code"); } else { rc = tt_setbyte(tt, i, j, octet2); if (rc != TT_SUCCESS) CU_FAIL("tt_setbits() returned an error code"); } } } /* recheck count for table */ for (i = 0; i < cols; i++) { rc = tt_getcount_col(tt, i, &count); if (rc != TT_SUCCESS) CU_FAIL("tt_getcount() returned with an error"); if (count != (rows / 2)) CU_FAIL("count != rows/2"); } /* destroy tt object */ rc = tt_delete(&tt); CU_ASSERT_EQUAL(rc, TT_SUCCESS); CU_ASSERT_PTR_NULL(tt); }
/*! * \brief Tag messages in the board * \param[in] node address of CommQueue node * \return Return Code * * This routine is to be executed during the firs communication stage * (::MB_COMM_OLD_PRE_TAGGING). * * Pre: * - node->mb is valid * - board->locked == ::MB_TRUE * - board->syncCompleted == ::MB_FALSE * - node->stage == ::MB_COMM_OLD_PRE_TAGGING * - node->flag_fdrFallback = ::MB_FALSE * - node->flag_shareOutbuf = ::MB_FALSE * - node->incount == \c NULL * - node->outcount == \c NULL * - node->inbuf == \c NULL * - node->outbuf == \c NULL * - node->sendreq == \c NULL * - node->recvreq == \c NULL * - node->board == \c NULL * * Steps: * -# Get pointer to board object and cache it in node->board * -# Allocate memory for node->outcount * -# If node->mb->filter == \c NULL or node->board->data->count_current = 0 * -# set node->outcount[*] = node->board->data->count_current * -# set node->flag_shareOutbuf = ::MB_TRUE * -# If node->board->filter != \c NULL * -# Use node->board->filter to build tag table in node->board->tt * -# Allocate memory for node->outcount * -# Initialise values in node->outcount[] based on contents of node->board->tt. * Keep count of outcount total as we go along. If total > node->board->data->count_current, * fallback to full data replication * - clear tag table * - set node->outcount[*] = node->board->data->count_current * - set node->flag_fdrFallback = ::MB_TRUE * - set node->flag_shareOutbuf = ::MB_TRUE * -# set node->stage to ::MB_COMM_OLD_READY_FOR_PROP * * Post: * - node->stage == ::MB_COMM_OLD_READY_FOR_PROP * - node->outcount != \c NULL * - node->board != \c NULL * - if (node->board->filter != NULL) * - if (node->flag_fdrFallback == ::MB_TRUE) node->board->tt == \c NULL * - if (node->flag_fdrFallback == ::MB_FALSE) node->board->tt != \c NULL * - if node->board->filter == \c NULL or node->fdr_fallback == \c ::MB_TRUE * - node->flag_shareOutbuf == ::MB_TRUE */ int MBI_CommRoutine_OLD_TagMessages(struct MBIt_commqueue *node) { char window; int rc, i, j, c, w, p; int total_tagged, mcount; void *msg; MBIt_TagTable *tt; pl_address_node *pl_itr; /* check that initial values are set properly */ assert(node->stage == MB_COMM_OLD_PRE_TAGGING); assert(node->flag_fdrFallback == MB_FALSE); assert(node->flag_shareOutbuf == MB_FALSE); assert(node->incount == NULL); assert(node->outcount == NULL); assert(node->inbuf == NULL); assert(node->outbuf == NULL); assert(node->recvreq == NULL); assert(node->sendreq == NULL); assert(node->board == NULL); /* get reference to board object and cache ptr in node */ node->board = (MBIt_Board *)MBI_getMBoardRef(node->mb); assert(node->board != NULL); if (node->board == NULL) return MB_ERR_INVALID; P_INFO("COMM: Preparing (Board %d) for sync process", node->mb); /* check board state */ assert(node->board->locked == MB_TRUE); assert(node->board->syncCompleted == MB_FALSE); /* get message count */ /* ignore messages that have already been synced */ mcount = (int)node->board->data->count_current - (int)node->board->synced_cursor; /* allocate memory for outcount */ node->outcount = (int *)calloc((size_t)MBI_CommSize, sizeof(int)); assert(node->outcount != NULL); if (node->outcount == NULL) return MB_ERR_MEMALLOC; /* determined number of messages to send to remote procs */ if (mcount == 0 || MBI_CommSize == 1) /* nothing to send */ { /* outcount already initialised to 0 (calloc) */ /*for (i = 0; i < MBI_CommSize; i++) node->outcount[i] = 0;*/ } else if (node->board->filter == (MBIt_filterfunc)NULL) /* no filter */ { /* send all messages to all procs (except self) */ for (i = 0; i < MBI_CommSize; i++) { node->outcount[i] = (i == MBI_CommRank) ? 0 : mcount; } /* outgoing buffer can be shared */ node->flag_shareOutbuf = MB_TRUE; } else /* filter assigned */ { P_INFO("COMM: (Board %d) is filtered. Tagging messages", (int)node->mb); /* create tag_table and assign to board */ rc = tt_create(&tt, mcount, MBI_CommSize); assert(rc == TT_SUCCESS); if (rc != TT_SUCCESS) { if (rc == TT_ERR_MEMALLOC) return MB_ERR_MEMALLOC; else return MB_ERR_INTERNAL; } node->board->tt = tt; /* assign to board */ /* initialise counters */ i = j = 0; total_tagged = 0; /* loop thru messages and fill up tag table */ for (pl_itr = PL_ITERATOR(node->board->data); pl_itr; pl_itr = pl_itr->next) { assert(i < (int)node->board->data->count_current); /* skip messages that have already been synced */ if (i < (int)node->board->synced_cursor) { i++; continue; } /* get reference to message from iterator */ msg = PL_NODEDATA(pl_itr); assert(msg != NULL); if (msg == NULL) return MB_ERR_INTERNAL; /* c : offset within byte buffer (window) * w : window offset within table row */ c = w = 0; SETZEROS(window); /* run filter on message per MPI task */ for (p = 0; p < MBI_CommSize; p++) { if (p != MBI_CommRank) { /* if message accepted by filter */ if (1 == (*node->board->filter)(msg, p)) { /* set bit within our byte buffer */ SETBIT(window, c); /* update outcount */ node->outcount[p]++; total_tagged++; } } /* move index within window */ c++; /* when window full, write to table and shift window */ if (c == 8) { /* write byte buffer to table */ rc = tt_setbyte(node->board->tt, j, w, window); assert(rc == TT_SUCCESS); /* move window */ w += 1; /* reset byte buffer */ SETZEROS(window); c = 0; } } /* write remaining byte buffer */ if (w < (int)node->board->tt->row_size) { rc = tt_setbyte(node->board->tt, j, w, window); assert(rc == TT_SUCCESS); } /* increment counter */ i++; j++; } assert(node->outcount[MBI_CommRank] == 0); /* Should we fall back to full data replication? */ if (total_tagged > mcount) { P_INFO("COMM: (Board %d) Tagged messages <%d> exceeds message count <%d>. " "Delegating filtering to recipient", (int)node->mb, total_tagged, mcount); /* we don't need the tagtable any more */ node->board->tt = NULL; rc = tt_delete(&tt); assert(rc == TT_SUCCESS); /* send all messages to all remote procs */ node->flag_fdrFallback = MB_TRUE; /* fallback to full data replication */ node->flag_shareOutbuf = MB_TRUE; /* use shared buffer */ for (i = 0; i < MBI_CommSize; i++) { if (node->outcount[i] != 0) node->outcount[i] = mcount; } } } /* move on to next stage */ P_INFO("COMM: (Board %d) moving to MB_COMM_OLD_READY_FOR_PROP stage", node->mb); node->stage = MB_COMM_OLD_READY_FOR_PROP; return MB_SUCCESS; }
/*! * \brief Start propagation of messages * \param[in] node address of CommQueue node * \return Return Code * * This routine is to be executed during the ::MB_COMM_OLD_PRE_PROPAGATION stage. * * Steps: * -# Allocate memory for node->inbuf (based on node->incount) * -# Issue MPI_Irecv() for each non-0 counts. node->pending_in++ * -# Allocate memory for node->outbuf * -# Set up non-blocking sends * - If node->flag_shareOutbuf == ::MB_TRUE * - Allocate memory for node->outbuf[0] + 1 byte for header * - if node->board->filter != \c NULL Set delayed_filtering flag in header to ::MB_TRUE * - if node->board->filter == \c NULL Set delayed_filtering flag in header to ::MB_FALSE * - Issue MPI_Issend() to all remote procs. node->pending_out++ * - If node->flag_shareOutbuf == ::MB_FLASE * - Ensure that node->board->filter != \c NULL and * node->flag_fdrFallback == ::MB_FALSE * - For each remote node i, if node->outcount[i] != 0 * - Allocate memory for node->outbuf[i] + 1 byte for header * - Set delayed_filtering flag in header to ::MB_FALSE * - Copy tagged messages for proc i to buffer * - delete node->board->tt * - Issue MPI_Issend(). node->pending_out++ * -# free node->outcount * -# Set node->stage == ::MB_COMM_OLD_PROPAGATION * * Post: * -# node->stage == ::MB_COMM_OLD_PROPAGATION * -# node->outcount == \c NULL * -# node->outbuf != \c NULL * -# node->inbuf != \c NULL * -# node->board->tt == \c NULL * */ int MBI_CommRoutine_OLD_InitPropagation(struct MBIt_commqueue *node) { int mcount; int w, b, p; int i, j, rc, tag, bufloc; void *msg; char *outptr, *row; char *header_byte; char **loc; char window; size_t msgsize; pl_address_node *pl_itr; #ifdef _EXTRA_CHECKS int *msg_copied; msg_copied = (int*)calloc((size_t)MBI_CommSize, sizeof(int)); #endif assert(node->stage == MB_COMM_OLD_PRE_PROPAGATION); assert(node->outcount != NULL); assert(node->incount != NULL); assert(node->sendreq != NULL); assert(node->recvreq != NULL); assert(node->pending_in == 0); assert(node->pending_out == 0); assert(node->inbuf == NULL); assert(node->outbuf == NULL); assert(node->board != NULL); /* generate unique tag from this board */ assert(node->mb <= MBI_TAG_BASE); tag = MBI_TAG_MSGDATA | node->mb; assert(tag < MBI_TAG_MAX); /* get message size and count */ msgsize = node->board->data->elem_size; mcount = (int)node->board->data->count_current - (int)node->board->synced_cursor; /* Allocate memory for input buffers */ node->inbuf = (void **)malloc(sizeof(void*) * MBI_CommSize); assert(node->inbuf != NULL); if (node->inbuf == NULL) return MB_ERR_MEMALLOC; /* Allocate memory for output buffers */ node->outbuf = (void **)malloc(sizeof(void*) * MBI_CommSize); assert(node->outbuf != NULL); if (node->outbuf == NULL) return MB_ERR_MEMALLOC; /* ------- issue receives --------- */ assert(node->incount[MBI_CommRank] == 0); for (i = 0; i < MBI_CommSize; i++) { if (node->incount[i] == 0) { /* no comms from this proc */ node->inbuf[i] = NULL; node->recvreq[i] = MPI_REQUEST_NULL; P_INFO("COMM: (Board %d) no data expected from P%d", (int)node->mb, i); } else { /* allocate memory for input buffer */ node->inbuf[i] = (void*)malloc(1 + (msgsize * node->incount[i])); assert(node->inbuf[i] != NULL); if (node->inbuf[i] == NULL) return MB_ERR_MEMALLOC; /* issue non-blocking receive */ rc = MPI_Irecv(node->inbuf[i], 1 + (int)(msgsize * node->incount[i]), MPI_BYTE, i, tag, MBI_CommWorld, &(node->recvreq[i])); assert(rc == MPI_SUCCESS); if (rc != MPI_SUCCESS) return MB_ERR_MPI; P_INFO("COMM: (Board %d) expecting %d messages from P%d", (int)node->mb, node->incount[i], i); /* increment counter */ node->pending_in++; } } /* ----------- build output buffers ----------------- */ for (i = 0; i < MBI_CommSize; i++) node->outbuf[i] = NULL; /* create output buffers and copy in messages */ if (MBI_CommSize == 1 || mcount == 0) { /* nothing to do if only one proc or no messages */ } else if (node->flag_shareOutbuf == MB_TRUE) { #ifdef _EXTRA_CHECKS /* if filter is assigned, buffer sharing only occurs during * fallback to full data replication */ if (node->board->filter != (MBIt_filterfunc)NULL && node->board->data->count_current != 0) { assert(node->flag_fdrFallback == MB_TRUE); if (node->flag_fdrFallback != MB_TRUE) return MB_ERR_INTERNAL; for (i = 0; i< MBI_CommSize; i++) { if (i == MBI_CommRank) { assert(node->outcount[i] == 0); } else { assert(node->outcount[i] == mcount || node->outcount[i] == 0); } } } #endif /* allocate shared buffer */ node->outbuf[0] = (void*)malloc(1 + /* one byte for header info */ (msgsize * mcount)); assert(node->outbuf[0] != NULL); if (node->outbuf[0] == NULL) return MB_ERR_MEMALLOC; /* set header byte */ header_byte = (char*)(node->outbuf[0]); *header_byte = ALLZEROS; if (node->flag_fdrFallback == MB_TRUE) /* set flag for FDR */ *header_byte = *header_byte | MBI_COMM_HEADERBYTE_FDR; /* location of message buffer is one byte after header */ outptr = (char*)(node->outbuf[0]) + 1; /* copy messages into output buffer */ i = j = 0; for (pl_itr = PL_ITERATOR(node->board->data); pl_itr; pl_itr = pl_itr->next) { /* skip messages that have already been synced */ if (i < (int)node->board->synced_cursor) { i++; continue; } /* get reference to message object */ msg = PL_NODEDATA(pl_itr); assert(msg != NULL); /* copy into buffer */ memcpy(outptr + (j*msgsize), msg, msgsize); /* increment counters */ i++; j++; } assert(i == (int)node->board->data->count_current); assert(j == (int)node->board->data->count_current - (int)node->board->synced_cursor); } else /* messages are tagged */ { assert(node->board->filter != (MBIt_filterfunc)NULL); assert(node->flag_fdrFallback == MB_FALSE); /* array of pointers to store next location in output buffer */ loc = (char **)malloc(sizeof(char*) * MBI_CommSize); /* initialise output buffers */ assert(node->outcount[MBI_CommRank] == 0); for (i = 0; i < MBI_CommSize; i++) { if (node->outcount[i] == 0) { loc[i] = NULL; } else { /* allocate memory for output buffers */ node->outbuf[i] = (void*)malloc(1 + (msgsize * node->outcount[i])); assert(node->outbuf[i] != NULL); if (node->outbuf[i] == NULL) return MB_ERR_MEMALLOC; /* set header byte */ header_byte = (char*)(node->outbuf[i]); *header_byte = ALLZEROS; /* move loc to first message, after header */ loc[i] = (char*)(node->outbuf[i]) + 1; } } /* copy in tagged messages */ i = j = 0; for (pl_itr = PL_ITERATOR(node->board->data); pl_itr; pl_itr = pl_itr->next) { /* skip messages that have already been synced */ if (i < (int)node->board->synced_cursor) { i++; continue; } /* get reference to message object */ msg = PL_NODEDATA(pl_itr); assert(msg != NULL); /* get ptr to row in tag table */ rc = tt_getrow(node->board->tt, j, &row); assert(rc == TT_SUCCESS); assert(row != NULL); if (rc != TT_SUCCESS || row == NULL) return MB_ERR_INTERNAL; /* w: window index within row (in units of bytes) * b: bit index within window (in units of bits) * p: process (mpi task) represented by w&b */ for (w = 0; w < (int)node->board->tt->row_size; w++) { window = *(row + w); b = 0; while (window != ALLZEROS) { if (MSB_IS_SET(window)) { /* determine which MPI task this refers to */ p = (w * 8) + b; assert(p >= 0); assert(p < MBI_CommSize); assert(p != MBI_CommRank); assert(node->outcount[p] != 0); assert(loc[p] != NULL); #ifdef _EXTRA_CHECKS /* keep track of messages copied into each buffer */ msg_copied[p] ++; assert(msg_copied[p] <= node->outcount[p]); #endif /* copy message to appropriate output buffer */ memcpy(loc[p], msg, msgsize); /* move to next free location in buffer */ loc[p] += msgsize; } /* shift bit and repeat */ window = window << 1; b++; } } /* on to next message */ i++; j++; } assert(i == (int)node->board->data->count_current); assert(j == (int)node->board->data->count_current - (int)node->board->synced_cursor); free(loc); /* tag table no longer needed */ rc = tt_delete(&(node->board->tt)); assert(rc == TT_SUCCESS); assert(node->board->tt == NULL); #ifdef _EXTRA_CHECKS for (i = 0; i < MBI_CommSize; i++) { assert(msg_copied[i] == node->outcount[i]); } #endif } /* ----------- issue sends ----------------- */ for (i = 0; i < MBI_CommSize; i++) { if (node->outcount[i] == 0) { node->sendreq[i] = MPI_REQUEST_NULL; } else { /* choose output bufer */ bufloc = (node->flag_shareOutbuf == MB_TRUE) ? 0 : i; assert(node->outbuf[bufloc] != NULL); /* issue non-blocking send */ rc = MPI_Issend(node->outbuf[bufloc], 1 + (int)(node->outcount[i] * msgsize), MPI_BYTE, i, tag, MBI_CommWorld, &(node->sendreq[i])); assert(rc == MPI_SUCCESS); if (rc != MPI_SUCCESS) return MB_ERR_MPI; P_INFO("COMM: (Board %d) sending %d messages to P%d", (int)node->mb, node->outcount[i], i); /* increment counter */ node->pending_out++; } } #ifdef _EXTRA_CHECKS free(msg_copied); #endif /* outcount no longer needed */ free(node->outcount); node->outcount = NULL; /* move on to next stage */ P_INFO("COMM: (Board %d) moving to MB_COMM_OLD_PROPAGATION stage", node->mb); node->stage = MB_COMM_OLD_PROPAGATION; return MB_SUCCESS; }
int main (int argc, char *argv[]) { int i, k = 0, n; char *s; char **optarg = NULL; char *fn_in = NULL; char *fn_out = NULL; char *fn_app = NULL; char *blanks = NULL; char *fldseps = NULL; char *recseps = NULL; char *comment = NULL; char *isep = " "; char *impl = " <- "; char *dflt = " (%1S)"; char *format = dflt; int target = 's'; int min = 1; int max = INT_MAX; double supp = 0.1; double smax = 1.0; double conf = 0.8; int dir = 0; int eval = 0; int aggm = 0; double minval = 0.1; int prune = 0; double filter = 0.1; int sort = 2; int tree = 1; int heap = 1; int post = 0; int report = 0; int mode = APP_BODY|IST_PERFECT; int size; int wgt; int frq, body, head; int *items; clock_t t, tt, tc, x; #ifndef QUIET prgname = argv[0]; if (argc > 1) { fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } else { printf("usage: %s [options] infile outfile\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-t# target type " "(default: %c)\n", target); printf(" (s: frequent item sets, c: closed item sets,\n" " m: maximal item sets, r: association rules)\n"); printf("-m# minimum number of items per set/rule " "(default: %d)\n", min); printf("-n# maximum number of items per set/rule " "(default: no limit)\n"); printf("-s# minimum support of a set/rule " "(default: %g%%)\n", supp *100); printf("-S# maximum support of a set/rule " "(default: %g%%)\n", smax *100); printf(" (positive: percentage, " "negative: absolute number)\n"); printf("-c# minimum confidence of a rule " "(default: %g%%)\n", conf *100); printf("infile file to read transactions from\n"); printf("outfile file to write item sets to\n"); return 0; } #endif for (i = 1; i < argc; i++) { s = argv[i]; if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { while (*s) { switch (*s++) { case '!': help(); break; case 't': target = (*s) ? *s++ : 's'; break; case 'm': min = (int)strtol(s, &s, 0); break; case 'n': max = (int)strtol(s, &s, 0); break; case 's': supp = 0.01*strtod(s, &s); break; case 'S': smax = 0.01*strtod(s, &s); break; case 'c': conf = 0.01*strtod(s, &s); break; case 'o': mode |= APP_BOTH; break; case 'e': eval = (*s) ? *s++ : 0; break; case 'a': aggm = (*s) ? *s++ : 0; break; case 'd': minval = 0.01*strtod(s, &s); break; case 'p': prune = (int)strtol(s, &s, 0); break; case 'g': report = ISR_SCAN; break; case 'k': optarg = &isep; break; case 'i': optarg = &impl; break; case 'v': optarg = &format; break; case 'l': dir = (int)strtol(s, &s, 0); break; case 'q': sort = (int)strtol(s, &s, 0); break; case 'u': filter = strtod(s, &s); break; case 'h': tree = 0; break; case 'j': heap = 0; break; case 'x': mode &= ~IST_PERFECT; break; case 'y': post = 1; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'C': optarg = &comment; break; default : error(E_OPTION, *--s); break; } if (optarg && *s) { *optarg = s; optarg = NULL; break; } } } else { switch (k++) { case 0: fn_in = s; break; case 1: fn_out = s; break; case 2: fn_app = s; break; default: error(E_ARGCNT); break; } } } if (optarg) error(E_OPTARG); if ((k < 2) || (k > 3)) error(E_ARGCNT); if ((!fn_in || !*fn_in) && (fn_app && !*fn_app)) error(E_STDIN); switch (target) { case 's': target = TT_SET; break; case 'c': target = TT_CLOSED; break; case 'm': target = TT_MAXIMAL; break; case 'r': target = TT_RULE; break; default : error(E_TARGET, (char)target); break; } if (min < 0) error(E_SIZE, min); if (max < 0) error(E_SIZE, max); if (supp > 1) error(E_SUPP, supp); if ((conf < 0) || (conf > 1)) error(E_CONF, conf); switch (eval) { case 'x': case 0: eval = IST_NONE; break; case 'c': eval = IST_CONF; break; case 'd': eval = IST_DIFF; break; case 'l': eval = IST_LIFT; break; case 'a': eval = IST_LD21; break; case 'q': eval = IST_QUOT; break; case 'n': eval = IST_CHI2; break; case 'p': eval = IST_PVAL; break; case 'i': eval = IST_INFO; break; case 'g': eval = IST_PGST; break; case 'b': eval = IST_LOGQ; break; default : error(E_MEASURE, (char)eval); break; } switch (aggm) { case 'x': case 0: aggm = IST_NONE; break; case 'm': aggm = IST_MIN; break; case 'n': aggm = IST_MAX; break; case 'a': aggm = IST_AVG; break; default : error(E_MEASURE, (char)aggm); break; } if ((target > TT_SET) || ((eval > IST_NONE) && (eval < IST_LOGQ))) mode &= ~IST_PERFECT; if (target <= TT_MAXIMAL) { mode |= APP_BOTH; conf = 1;} if ((filter <= -1) || (filter >= 1)) filter = 0; ibase = ib_create(-1); if (!ibase) error(E_NOMEM); ib_chars(ibase, blanks, fldseps, recseps, comment); MSG(stderr, "\n"); if (fn_app) { t = clock(); if (*fn_app) in = fopen(fn_app, "r"); else { in = stdin; fn_app = "<stdin>"; } MSG(stderr, "reading %s ... ", fn_app); if (!in) error(E_FOPEN, fn_app); k = ib_readapp(ibase, in); if (k != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase)); if (in != stdin) fclose(in); in = NULL; MSG(stderr, "[%d item(s)]", ib_cnt(ibase)); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); } t = clock(); if (fn_in && *fn_in) in = fopen(fn_in, "r"); else { in = stdin; fn_in = "<stdin>"; } MSG(stderr, "reading %s ... ", fn_in); if (!in) error(E_FOPEN, fn_in); tabag = tb_create(ibase); if (!tabag) error(E_NOMEM); while (1) { k = ib_read(ibase, in); if (k) { if (k > 0) break; error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); } if (tb_add(tabag, NULL) != 0) error(E_NOMEM); } if (in != stdin) fclose(in); in = NULL; n = ib_cnt(ibase); k = tb_cnt(tabag); wgt = tb_wgt(tabag); MSG(stderr, "[%d item(s), ", n); if (k == wgt) MSG(stderr, "%d transaction(s)]", k); else MSG(stderr, "%d/%d transaction(s)]", k, wgt); MSG(stderr, " done [%.2fs].", SEC_SINCE(t)); if ((n <= 0) || (wgt <= 0)) error(E_NOTRANS); MSG(stderr, "\n"); if (format == dflt) { if (target != TT_RULE) format = (supp < 0) ? " (%a)" : " (%1S)"; else format = (supp < 0) ? " (%b, %1C)" : " (%1X, %1C)"; } supp = ceil (((supp < 0) ? -100 : wgt) *supp); smax = floor(((smax < 0) ? -100 : wgt) *smax); t = clock(); MSG(stderr, "filtering, sorting and recoding items ... "); map = (int*)malloc(n *sizeof(int)); if (!map) error(E_NOMEM); k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf)); n = ib_recode(ibase, k, sort, map); tb_recode(tabag, map); tb_itsort(tabag, 1, heap); free(map); map = NULL; MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t)); if (n <= 0) error(E_NOFREQ); MSG(stderr, "\n"); k = tb_max(tabag); if (max > k) max = k; t = clock(); MSG(stderr, "reducing transactions ... "); tb_filter(tabag, min, NULL); tb_sort(tabag, 1, heap); k = tb_reduce(tabag); if (k == wgt) MSG(stderr, "[%d transaction(s)]", k); else MSG(stderr, "[%d/%d transaction(s)]", k, wgt); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); tt = 0; if (tree) { t = clock(); MSG(stderr, "building transaction tree ... "); tatree = tt_create(tabag); if (!tatree) error(E_NOMEM); if (filter == 0) { tb_delete(tabag, 0); tabag = NULL; } MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree)); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); tt = clock() -t; } t = clock(); tc = 0; istree = ist_create(ibase, mode, (int)supp, (int)smax, conf); if (!istree) error(E_NOMEM); ist_seteval(istree, eval, aggm, minval, prune); /* --- check item subsets --- */ MSG(stderr, "checking subsets of size 1"); map = (int*)malloc(n *sizeof(int)); if (!map) error(E_NOMEM); while (1) { size = ist_height(istree); if (size >= max) break; if ((filter != 0) && (ist_check(istree, map) <= size)) break; if (post) ist_prune(istree); k = ist_addlvl(istree); if (k) { if (k > 0) break; error(E_NOMEM); } if (((filter < 0) && (i < -filter *n)) || ((filter > 0) && (i < n) && (i *(double)tt < filter *n *tc))) { n = i; x = clock(); tb_filter(tabag, size+1, map); tb_sort(tabag, 0, heap); tb_reduce(tabag); if (tatree) { tt_delete(tatree, 0); tatree = tt_create(tabag); if (!tatree) error(E_NOMEM); } tt = clock() -x; } MSG(stderr, " %d", ++size); x = clock(); if (tatree) ist_countx(istree, tatree); else ist_countb(istree, tabag); tc = clock() -x; } free(map); map = NULL; MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) { t = clock(); MSG(stderr, "filtering for %s item sets ... ", (target == TT_MAXIMAL) ? "maximal" : "closed"); k = target | ((prune < 0) ? IST_EVAL : 0); ist_mark(istree, k); MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t)); } t = clock(); if (fn_out && *fn_out) out = fopen(fn_out, "w"); else { out = stdout; fn_out = "<stdout>"; } MSG(stderr, "writing %s ... ", fn_out); if (!out) error(E_FOPEN, fn_out); if (eval == IST_LOGQ) report |= ISR_LOGS; if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) report |= ISR_CLOSED; isrep = isr_create(ibase, out, report, isep, impl); if (!isrep) error(E_NOMEM); isr_setfmt (isrep, format); isr_setsize(isrep, min, max); ist_setsize(istree, min, max, dir); ist_init (istree); items = t_items(ib_tract(ibase)); if ((target <= TT_MAXIMAL) && (dir == 0)) { if (eval == IST_LOGQ) isr_seteval(isrep, isr_logq, NULL, minval); else if (eval > IST_NONE) isr_seteval(isrep, ist_evalx, istree, minval); n = ist_report(istree, isrep); } else if (target <= TT_MAXIMAL) { for (n = 0; 1; ) { k = ist_set(istree, items, &frq, &minval); if (k < 0) break; if (k > 0) fputs(isr_name(isrep, items[0]), out); for (i = 0; ++i < k; ) { fputs(isep, out); fputs(isr_name(isrep, items[i]), out); } if (format) isr_sinfo(isrep, frq, minval); fputc('\n', out); n++; } } else if (target == TT_RULE) { for (n = 0; 1; ) { k = ist_rule(istree, items, &frq, &body, &head, &minval); if (k < 0) break; fputs(isr_name(isrep, items[0]), out); fputs(impl, out); if (k > 1) fputs(isr_name(isrep, items[1]), out); for (i = 1; ++i < k; ) { fputs(isep, out); fputs(isr_name(isrep, items[i]), out); } if (format) isr_rinfo(isrep, frq, body, head, minval); fputc('\n', out); n++; } } / if (fflush(out) != 0) error(E_FWRITE, fn_out);
int main (int argc, char *argv[]) { /* --- main function */ int i, k = 0, n; /* loop variables, counters */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *fn_app = NULL; /* name of item appearances file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *comment = NULL; /* comment indicators */ char *isep = " "; /* item separator for output */ char *impl = " <- "; /* implication sign for ass. rules */ char *dflt = " (%1S)"; /* default format for check */ char *format = dflt; /* format for information output */ int target = 's'; /* target type (sets/rules/h.edges) */ int min = 1; /* minimum rule/item set size */ int max = INT_MAX; /* maximum rule/item set size */ double supp = 10; /* minimum support (in percent) */ double smax = 100; /* maximum support (in percent) */ double conf = 80; /* minimum confidence (in percent) */ int dir = 0; /* direction for size sorting */ int eval = 0; /* additional evaluation measure */ int zero = 0; /* flag for zero eval. below expect. */ int aggm = 0; /* aggregation mode for eval. measure */ double minval = 10; /* minimum evaluation measure value */ int prune = 0; /* (min. size for) evaluation pruning */ double filter = 0.1; /* item usage filtering parameter */ int sort = 2; /* flag for item sorting and recoding */ int tree = 1; /* flag for transaction tree */ int heap = 1; /* flag for heap sort vs. quick sort */ int post = 0; /* flag for a-posteriori pruning */ int report = 0; /* other flags for reporting */ int mode = APP_BODY|IST_PERFECT; /* search mode */ int size; /* current item set size */ int wgt; /* total transaction weight */ int frq, body, head; /* frequency of an item set */ int *items; /* item set (for reporting) */ clock_t t, tt, tc, x; /* timers for measurements */ #ifndef QUIET /* if not quiet version */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no arguments are given */ printf("usage: %s [options] infile outfile [appfile]\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-t# target type " "(default: %c)\n", target); printf(" (s: frequent item sets, c: closed item sets,\n" " m: maximal item sets, r: association rules)\n"); printf("-m# minimum number of items per set/rule " "(default: %d)\n", min); printf("-n# maximum number of items per set/rule " "(default: no limit)\n"); printf("-s# minimum support of a set/rule " "(default: %g%%)\n", supp); printf("-S# maximum support of a set/rule " "(default: %g%%)\n", smax); printf(" (positive: percentage, " "negative: absolute number)\n"); printf("-c# minimum confidence of a rule " "(default: %g%%)\n", conf); printf("-o use the original rule support definition " "(body & head)\n"); printf("-e# additional evaluation measure " "(default: none)\n"); printf("-a# aggregation mode for evaluation measure " "(default: none)\n"); printf("-z zero evaluation below expected support " "(default: evaluate all)\n"); printf("-d# minimum value of add. evaluation measure " "(default: %g%%)\n", minval); printf("-p# (min. size for) pruning with evaluation " "(default: no pruning)\n"); printf(" (< 0: backward, > 0: forward)\n"); printf("-l# sort item sets in output by their size " "(default: no sorting)\n"); printf(" (< 0: descending, > 0: ascending)\n"); printf("-g write item names in scanable form " "(quote certain characters)\n"); printf("-k# item separator for output " "(default: \"%s\")\n", isep); printf("-i# implication sign for association rules " "(default: \"%s\")\n", impl); printf("-v# output format for set/rule information " "(default: \"%s\")\n", format); printf("-q# sort items w.r.t. their frequency " "(default: %d)\n", sort); printf(" (1: ascending, -1: descending, 0: do not sort,\n" " 2: ascending, -2: descending w.r.t. " "transaction size sum)\n"); printf("-u# filter unused items from transactions " "(default: %g)\n", filter); printf(" (0: do not filter items w.r.t. usage in sets,\n" " <0: fraction of removed items for filtering,\n" " >0: take execution times ratio into account)\n"); printf("-j use quicksort to sort the transactions " "(default: heapsort)\n"); printf("-x do not prune the search " "with perfect extensions\n"); printf("-y a-posteriori pruning of infrequent item sets\n"); printf("-h do not organize transactions as a prefix tree\n"); printf("-b# blank characters " "(default: \" \\t\\r\")\n"); printf("-f# field separators " "(default: \" \\t,\")\n"); printf("-r# record separators " "(default: \"\\n\")\n"); printf("-C# comment characters " "(default: \"#\")\n"); printf("-! print additional option information\n"); printf("infile file to read transactions from\n"); printf("outfile file to write item sets/association rules" "/hyperedges to\n"); printf("appfile file stating item appearances (optional)\n"); return 0; /* print a usage message */ } /* and abort the program */ #endif /* #ifndef QUIET */ /* free option characters: w [A-Z]\[SC] */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse the arguments */ s = argv[i]; /* get an option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (*s) { /* traverse the options */ switch (*s++) { /* evaluate the options */ case '!': help(); break; case 't': target = (*s) ? *s++ : 's'; break; case 'm': min = (int)strtol(s, &s, 0); break; case 'n': max = (int)strtol(s, &s, 0); break; case 's': supp = strtod(s, &s); break; case 'S': smax = strtod(s, &s); break; case 'c': conf = strtod(s, &s); break; case 'o': mode |= APP_BOTH; break; case 'e': eval = (*s) ? *s++ : 0; break; case 'z': zero = IST_ZERO; break; case 'a': aggm = (*s) ? *s++ : 0; break; case 'd': minval = strtod(s, &s); break; case 'p': prune = (int)strtol(s, &s, 0); break; case 'g': report = ISR_SCAN; break; case 'k': optarg = &isep; break; case 'i': optarg = &impl; break; case 'v': optarg = &format; break; case 'l': dir = (int)strtol(s, &s, 0); break; case 'q': sort = (int)strtol(s, &s, 0); break; case 'u': filter = strtod(s, &s); break; case 'h': tree = 0; break; case 'j': heap = 0; break; case 'x': mode &= ~IST_PERFECT; break; case 'y': post = 1; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'C': optarg = &comment; break; default : error(E_OPTION, *--s); break; } /* set the option variables */ if (optarg && *s) { *optarg = s; optarg = NULL; break; } } } /* get an option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-options */ case 0: fn_in = s; break; case 1: fn_out = s; break; case 2: fn_app = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check the option argument */ if ((k < 2) || (k > 3)) /* and the number of arguments */ error(E_ARGCNT); /* (either in/out or in/out/app) */ if ((!fn_in || !*fn_in) && (fn_app && !*fn_app)) error(E_STDIN); /* stdin must not be used twice */ switch (target) { /* check and translate target type */ case 's': target = TT_ALL; break; case 'c': target = TT_CLOSED; break; case 'm': target = TT_MAXIMAL; break; case 'r': target = TT_RULE; break; default : error(E_TARGET, (char)target); break; } if (min < 0) error(E_SIZE, min); /* check the limits */ if (max < 0) error(E_SIZE, max); /* for the set size */ if (supp > 100) /* check the minimum support */ error(E_SUPP, supp); /* (< 0: absolute support) */ if ((conf < 0) || (conf > 100)) error(E_CONF, conf); /* check the minimum confidence */ switch (eval) { /* check and translate measure */ case 'x': case 0: eval = IST_NONE; break; case 'c': eval = IST_CONF; break; case 'd': eval = IST_CONF_DIFF; break; case 'l': eval = IST_LIFT; break; case 'a': eval = IST_LIFT_DIFF; break; case 'q': eval = IST_LIFT_QUOT; break; case 'v': eval = IST_CVCT; break; case 'e': eval = IST_CVCT_DIFF; break; case 'r': eval = IST_CVCT_QUOT; break; case 'f': eval = IST_CERT; break; case 'n': eval = IST_CHI2; break; case 'p': eval = IST_CHI2_PVAL; break; case 'i': eval = IST_INFO; break; case 'g': eval = IST_INFO_PVAL; break; case 'b': eval = IST_LOGQ; break; default : error(E_MEASURE, (char)eval); break; } switch (aggm) { /* check and translate agg. mode */ case 'x': case 0: aggm = IST_NONE; break; case 'm': aggm = IST_MIN; break; case 'n': aggm = IST_MAX; break; case 'a': aggm = IST_AVG; break; default : error(E_MEASURE, (char)aggm); break; } if ((target > TT_ALL) /* if individual set counters needed */ || ((eval > IST_NONE) && (eval < IST_LOGQ))) mode &= ~IST_PERFECT; /* remove perfect extension pruning */ if (target <= TT_MAXIMAL) { /* remove rule specific settings */ mode |= APP_BOTH; conf = 100; } if ((filter <= -1) || (filter >= 1)) filter = 0; /* check and adapt the filter option */ if (dir) /* if to sort output by size, */ mode &= ~IST_PERFECT; /* do not use perfect ext. pruning */ /* --- create item base --- */ ibase = ib_create(0, 0); /* create an item base and */ if (!ibase) error(E_NOMEM); /* set the special characters */ ib_chars(ibase, blanks, fldseps, recseps, "", comment); MSG(stderr, "\n"); /* terminate the startup message */ /* --- read item appearance indicators --- */ if (fn_app) { /* if item appearances are given */ t = clock(); /* start the timer for the reading */ if (*fn_app) /* if an app. file name is given, */ in = fopen(fn_app, "r"); /* open the item appearances file */ else { /* if no app. file name is given, */ in = stdin; fn_app = "<stdin>"; } /* read from std. input */ MSG(stderr, "reading %s ... ", fn_app); if (!in) error(E_FOPEN, fn_app); k = ib_readapp(ibase, in); /* read the item appearances */ if (k != 0) error(k, fn_app, RECCNT(ibase), BUFFER(ibase)); if (in != stdin) fclose(in);/* if not read from standard input, */ in = NULL; /* close the input file */ MSG(stderr, "[%d item(s)]", ib_cnt(ibase)); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); } /* print a log message */ /* --- read transactions --- */ t = clock(); /* start the timer for the reading */ if (fn_in && *fn_in) /* if an input file name is given, */ in = fopen(fn_in, "r"); /* open input file for reading */ else { /* if no input file name is given, */ in = stdin; fn_in = "<stdin>"; } /* read from standard input */ MSG(stderr, "reading %s ... ", fn_in); if (!in) error(E_FOPEN, fn_in); tabag = tb_create(ibase, 0); /* create a transaction bag/multiset */ if (!tabag) error(E_NOMEM); /* to store the transactions */ while (1) { /* transaction read loop */ k = ib_read(ibase, in); /* read the next transaction */ if (k) { if (k > 0) break; /* check for error and end of file */ error(k, fn_in, RECCNT(ibase), BUFFER(ibase)); } if (tb_add(tabag, NULL) != 0) error(E_NOMEM); } /* add transaction to bag/multiset */ if (in != stdin) fclose(in); /* if not read from standard input, */ in = NULL; /* close the input file */ n = ib_cnt(ibase); /* get the number of items */ k = tb_cnt(tabag); /* get the number of transactions */ wgt = tb_wgt(tabag); /* the total transaction weight */ MSG(stderr, "[%d item(s), ", n); if (k == wgt) MSG(stderr, "%d transaction(s)]", k); else MSG(stderr, "%d/%d transaction(s)]", k, wgt); MSG(stderr, " done [%.2fs].", SEC_SINCE(t)); if ((n <= 0) || (wgt <= 0)) /* check for at least one item */ error(E_NOTRANS); /* and at least one transaction */ MSG(stderr, "\n"); /* terminate the log message */ if (format == dflt) { /* if default info. format is used */ if (target != TT_RULE) format = (supp < 0) ? " (%a)" : " (%1S)"; else format = (supp < 0) ? " (%b, %1C)" : " (%1X, %1C)"; } /* set default according to target */ supp = ceil ((supp >= 0) ? 0.01 *supp *wgt : -supp); smax = floor((smax >= 0) ? 0.01 *smax *wgt : -smax); conf *= 0.01; /* transform support and confidence */ /* --- sort and recode items --- */ t = clock(); /* compute absolute support values */ MSG(stderr, "filtering, sorting and recoding items ... "); map = (int*)malloc(n *sizeof(int)); if (!map) error(E_NOMEM); /* create an item identifier map */ k = (int)((mode & APP_HEAD) ? supp : ceil(supp *conf)); n = ib_recode(ibase, k, sort, map); tb_recode(tabag, map); /* recode the items and transactions */ tb_itsort(tabag, 1, heap); /* and sort items in transactions */ free(map); map = NULL; /* delete the item identifier map */ MSG(stderr, "[%d item(s)] done [%.2fs].", n, SEC_SINCE(t)); if (n <= 0) error(E_NOFREQ); /* print a log message and */ MSG(stderr, "\n"); /* check the number of items */ k = tb_max(tabag); /* clamp the set/rule length to */ if (max > k) max = k; /* the maximum transaction size */ /* --- reduce transactions --- */ t = clock(); /* start the timer for the reduction */ MSG(stderr, "reducing transactions ... "); tb_filter(tabag, min, NULL); /* remove items of short transactions */ tb_sort(tabag, 1, heap); /* sort the trans. lexicographically */ k = tb_reduce(tabag); /* reduce transactions to unique ones */ if (k == wgt) MSG(stderr, "[%d transaction(s)]", k); else MSG(stderr, "[%d/%d transaction(s)]", k, wgt); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); /* --- create transaction tree --- */ tt = 0; /* init. the tree construction time */ if (tree) { /* if to use a transaction tree */ t = clock(); /* start the timer for construction */ MSG(stderr, "building transaction tree ... "); tatree = tt_create(tabag); /* create a transaction tree */ if (!tatree) error(E_NOMEM); if (filter == 0) { /* if not to filter items, */ tb_delete(tabag, 0); /* delete the transaction bag */ tabag = NULL; /* (redundant data storage) */ } MSG(stderr, "[%d node(s)]", tt_nodecnt(tatree)); MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); tt = clock() -t; /* note the time for the construction */ } /* of the transaction tree */ /* --- create item set tree --- */ t = clock(); tc = 0; /* start the timer for the search */ istree = ist_create(ibase, mode, (int)supp, (int)smax, conf); if (!istree) error(E_NOMEM); /* create an item set tree */ ist_seteval(istree, eval|zero, aggm, 0.01*minval, prune); /* --- check item subsets --- */ MSG(stderr, "checking subsets of size 1"); map = (int*)malloc(n *sizeof(int)); if (!map) error(E_NOMEM); /* create a filter map */ while (1) { /* traverse the item set sizes */ size = ist_height(istree); /* get the current item set size and */ if (size >= max) break; /* abort if maximal size is reached */ if ((filter != 0) /* if to filter w.r.t. item usage */ && (ist_check(istree, map) <= size)) break; /* check which items are still used */ if (post) /* if a-posteriori pruning requested, */ ist_prune(istree); /* prune infrequent item sets */ k = ist_addlvl(istree); /* while max. height is not reached, */ if (k) { if (k > 0) break; /* add a level to the item set tree */ error(E_NOMEM); } /* if no level was added, abort */ if (((filter < 0) /* if to filter w.r.t. item usage */ && (i < -filter *n)) /* and enough items were removed */ || ((filter > 0) /* or counting time is long enough */ && (i < n) && (i *(double)tt < filter *n *tc))) { n = i; /* note the new number of items */ x = clock(); /* start the timer for filtering */ tb_filter(tabag, size+1, map); tb_sort(tabag, 0, heap); /* remove unnec. items and trans. */ tb_reduce(tabag); /* and reduce trans. to unique ones */ if (tatree) { /* if a transaction tree was created */ tt_delete(tatree, 0); /* delete the transaction tree */ tatree = tt_create(tabag); if (!tatree) error(E_NOMEM); } /* rebuild the transaction tree */ tt = clock() -x; /* note the filter/rebuild time */ } MSG(stderr, " %d", ++size); /* print the current item set size */ x = clock(); /* start the timer for counting */ if (tatree) ist_countx(istree, tatree); else ist_countb(istree, tabag); tc = clock() -x; /* count the transaction tree/bag */ } /* and compute the new counting time */ free(map); map = NULL; /* delete the filter map */ MSG(stderr, " done [%.2fs].\n", SEC_SINCE(t)); /* --- filter found item sets --- */ if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) { t = clock(); /* start the timer for filtering */ MSG(stderr, "filtering for %s item sets ... ", (target == TT_MAXIMAL) ? "maximal" : "closed"); k = target | ((prune < 0) ? IST_EVAL : 0); ist_mark(istree, k); /* filter closed/maximal item sets */ MSG(stderr, "done [%.2fs].\n", SEC_SINCE(t)); } /* print a log message */ /* --- print item sets/rules/hyperedges --- */ t = clock(); /* start the timer for the output */ if (fn_out && *fn_out) /* if an output file name is given, */ out = fopen(fn_out, "w"); /* open the output file */ else { /* if no output file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to std. output */ MSG(stderr, "writing %s ... ", fn_out); if (!out) error(E_FOPEN, fn_out); if (eval == IST_LOGQ) report |= ISR_LOGS; if ((target == TT_CLOSED) || (target == TT_MAXIMAL)) report |= ISR_NOEXP; /* combine the report mode flags */ isrep = isr_create(ibase, out, report, isep, impl); if (!isrep) error(E_NOMEM); /* create an item set reporter */ isr_setfmt (isrep, format); /* and configure it */ isr_setsize(isrep, min, max); ist_setsize(istree, min, max, dir); ist_init (istree); /* initialize the extraction */ items = t_items(ib_tract(ibase)); if ((target <= TT_MAXIMAL) /* if to find frequent item sets */ && (dir == 0)) { /* and not to sort them by size */ if (eval == IST_LOGQ) /* if to compute an add. evaluation */ isr_seteval(isrep, isr_logq, NULL, 0.01*minval); else if (eval > IST_NONE) /* set the add. evaluation function */ isr_seteval(isrep, ist_evalx, istree, 0.01*minval); n = ist_report(istree, isrep); } /* report the item sets */ else if (target <= TT_MAXIMAL) { /* if to find frequent item sets */ for (n = 0; 1; ) { /* extract item sets from the tree */ k = ist_set(istree, items, &frq, &minval); if (k < 0) break; /* get the next frequent item set */ if (k > 0) fputs(isr_name(isrep, items[0]), out); for (i = 0; ++i < k; ) { /* print the item names */ fputs(isep, out); fputs(isr_name(isrep, items[i]), out); } if (format) /* if requested, print information */ isr_sinfo(isrep, frq, minval); fputc('\n', out); n++; /* terminate the output line and */ } } /* count the reported item set */ else if (target == TT_RULE) { /* if to find association rules, */ for (n = 0; 1; ) { /* extract rules from tree */ k = ist_rule(istree, items, &frq, &body, &head, &minval); if (k < 0) break; /* get the next association rule */ fputs(isr_name(isrep, items[0]), out); fputs(impl, out); /* print name of rule head item */ if (k > 1) fputs(isr_name(isrep, items[1]), out); for (i = 1; ++i < k; ) { /* print names of items in rule body */ fputs(isep, out); fputs(isr_name(isrep, items[i]), out); } if (format) /* if requested, print information */ isr_rinfo(isrep, frq, body, head, minval); fputc('\n', out); n++; /* terminate the output line and */ } /* count the reported ass. rule */ } /* if (target <= TT_MAXIMAL) .. else .. */ if (fflush(out) != 0) error(E_FWRITE, fn_out); if (out != stdout) fclose(out); out = NULL; /* close the output file */ MSG(stderr, "[%d %s(s)] done ", n, (target == TT_RULE) ? "rule" : "set"); MSG(stderr, "[%.2fs].\n", SEC_SINCE(t)); #ifdef BENCH /* if benchmark version, */ ist_stats(istree); /* show the search statistics */ #endif /* (especially memory usage) */ /* --- clean up --- */ #ifndef NDEBUG /* if this is a debug version */ isr_delete(isrep, 0); /* the item set reporter, */ ist_delete(istree); /* the item set tree, */ if (tatree) tt_delete(tatree, 0); /* the transaction tree, */ if (tabag) tb_delete(tabag, 0); /* the transaction bag, */ ib_delete(ibase); /* and the item base */ #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */ } /* main() */