void gk_find_frequent_itemsets(int ntrans, int *tranptr, int *tranind, int minfreq, int maxfreq, int minlen, int maxlen, void (*process_itemset)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids), void *stateptr) { ssize_t i; gk_csr_t *mat, *pmat; isparams_t params; int *pattern; /* Create the matrix */ mat = gk_csr_Create(); mat->nrows = ntrans; mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1; mat->rowptr = gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"); for (i=0; i<ntrans+1; i++) mat->rowptr[i] = tranptr[i]; mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind")); mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids")); /* Setup the parameters */ params.minfreq = minfreq; params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq); params.minlen = minlen; params.maxlen = (maxlen == -1 ? mat->ncols : maxlen); params.tnitems = mat->ncols; params.callback = process_itemset; params.stateptr = stateptr; params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker"); params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand"); /* Perform the initial projection */ gk_csr_CreateIndex(mat, GK_CSR_COL); pmat = itemsets_project_matrix(¶ms, mat, -1); gk_csr_Free(&mat); pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern"); itemsets_find_frequent_itemsets(¶ms, pmat, 0, pattern); gk_csr_Free(&pmat); gk_free((void **)&pattern, ¶ms.rmarker, ¶ms.cand, LTERM); }
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, int preflen, int *prefix) { ssize_t i; gk_csr_t *cmat; /* Project each frequent column */ for (i=0; i<mat->ncols; i++) { prefix[preflen] = mat->colids[i]; if (preflen+1 >= params->minlen) (*params->callback)(params->stateptr, preflen+1, prefix, mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]); if (preflen+1 < params->maxlen) { cmat = itemsets_project_matrix(params, mat, i); itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix); gk_csr_Free(&cmat); } } }
void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }
void FreeVault(vault_t *vault) { gk_csr_Free(&vault->mat); gk_free((void **)&vault, LTERM); }
int main(int argc, char *argv[]) { ssize_t i, j, niter; params_t *params; gk_csr_t *mat; FILE *fpout; /* get command-line options */ params = parse_cmdline(argc, argv); /* read the data */ mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1); /* display some basic stats */ print_init_info(params, mat); if (params->ntvs != -1) { /* compute the pr for different randomly generated restart-distribution vectors */ float **prs; prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs"); /* generate the random restart vectors */ for (j=0; j<params->ntvs; j++) { for (i=0; i<mat->nrows; i++) prs[j][i] = RandomInRange(931); gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1); niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]); printf("tvs#: %zd; niters: %zd\n", j, niter); } /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) { for (j=0; j<params->ntvs; j++) fprintf(fpout, "%.4e ", prs[j][i]); fprintf(fpout, "\n"); } gk_fclose(fpout); gk_fFreeMatrix(&prs, params->ntvs, mat->nrows); } else if (params->ppr != -1) { /* compute the personalized pr from the specified vertex */ float *pr; pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr"); pr[params->ppr-1] = 1.0; niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); printf("ppr: %d; niters: %zd\n", params->ppr, niter); /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) fprintf(fpout, "%.4e\n", pr[i]); gk_fclose(fpout); gk_free((void **)&pr, LTERM); } else { /* compute the standard pr */ int jmax; float diff, maxdiff; float *pr; pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr"); niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); printf("pr; niters: %zd\n", niter); /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) { for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) { maxdiff = diff; jmax = mat->rowind[j]; } } fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1); } gk_fclose(fpout); gk_free((void **)&pr, LTERM); } gk_csr_Free(&mat); /* display some final stats */ print_final_info(params); }