void gk_find_frequent_itemsets(int ntrans, int *tranptr, int *tranind, int minfreq, int maxfreq, int minlen, int maxlen, void (*process_itemset)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids), void *stateptr) { ssize_t i; gk_csr_t *mat, *pmat; isparams_t params; int *pattern; /* Create the matrix */ mat = gk_csr_Create(); mat->nrows = ntrans; mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1; mat->rowptr = gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"); for (i=0; i<ntrans+1; i++) mat->rowptr[i] = tranptr[i]; mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind")); mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids")); /* Setup the parameters */ params.minfreq = minfreq; params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq); params.minlen = minlen; params.maxlen = (maxlen == -1 ? mat->ncols : maxlen); params.tnitems = mat->ncols; params.callback = process_itemset; params.stateptr = stateptr; params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker"); params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand"); /* Perform the initial projection */ gk_csr_CreateIndex(mat, GK_CSR_COL); pmat = itemsets_project_matrix(¶ms, mat, -1); gk_csr_Free(&mat); pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern"); itemsets_find_frequent_itemsets(¶ms, pmat, 0, pattern); gk_csr_Free(&pmat); gk_free((void **)&pattern, ¶ms.rmarker, ¶ms.cand, LTERM); }
gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf) { ssize_t i, j, nnz; int nrows, ncols; ssize_t *rowptr, *nrowptr; int *rowind, *nrowind, *collen; float *rowval, *nrowval; gk_graph_t *ngraph; ngraph = gk_graph_Create(); nrows = ngraph->nrows = graph->nrows; ncols = ngraph->ncols = graph->ncols; rowptr = graph->rowptr; rowind = graph->rowind; rowval = graph->rowval; nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr"); nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind"); nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval"); switch (what) { case GK_CSR_COL: collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen"); for (i=0; i<nrows; i++) { for (j=rowptr[i]; j<rowptr[i+1]; j++) { ASSERT(rowind[j] < ncols); collen[rowind[j]]++; } } for (i=0; i<ncols; i++) collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0); nrowptr[0] = 0; for (nnz=0, i=0; i<nrows; i++) { for (j=rowptr[i]; j<rowptr[i+1]; j++) { if (collen[rowind[j]]) { nrowind[nnz] = rowind[j]; nrowval[nnz] = rowval[j]; nnz++; } } nrowptr[i+1] = nnz; } gk_free((void **)&collen, LTERM); break; case GK_CSR_ROW: nrowptr[0] = 0; for (nnz=0, i=0; i<nrows; i++) { if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) { for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) { nrowind[nnz] = rowind[j]; nrowval[nnz] = rowval[j]; } } nrowptr[i+1] = nnz; } break; default: gk_graph_Free(&ngraph); gk_errexit(SIGERR, "Unknown prunning type of %d\n", what); return NULL; } return ngraph; }