void gk_find_frequent_itemsets(int ntrans, int *tranptr, int *tranind, int minfreq, int maxfreq, int minlen, int maxlen, void (*process_itemset)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids), void *stateptr) { ssize_t i; gk_csr_t *mat, *pmat; isparams_t params; int *pattern; /* Create the matrix */ mat = gk_csr_Create(); mat->nrows = ntrans; mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1; mat->rowptr = gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"); for (i=0; i<ntrans+1; i++) mat->rowptr[i] = tranptr[i]; mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind")); mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids")); /* Setup the parameters */ params.minfreq = minfreq; params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq); params.minlen = minlen; params.maxlen = (maxlen == -1 ? mat->ncols : maxlen); params.tnitems = mat->ncols; params.callback = process_itemset; params.stateptr = stateptr; params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker"); params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand"); /* Perform the initial projection */ gk_csr_CreateIndex(mat, GK_CSR_COL); pmat = itemsets_project_matrix(¶ms, mat, -1); gk_csr_Free(&mat); pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern"); itemsets_find_frequent_itemsets(¶ms, pmat, 0, pattern); gk_csr_Free(&pmat); gk_free((void **)&pattern, ¶ms.rmarker, ¶ms.cand, LTERM); }
int main(int argc, char** argv) { int rank, p; int n; int nb; int prow; char ** a; float *b; float * pb; gk_csr_t *mat; int * nEachData; int ndata; int i, j; if (argc != 3) { printf("error: invalid arguments\n"); exit(-1); } mat = (gk_csr_t *) malloc(sizeof(gk_csr_t)); n = numberOfLines(argv[1]); nb = numberOfLines(argv[2]); mat->nrows = nb; mat->ncols = nb; a = getDataA(argv[1], n); b = getDataB(argv[2], nb); mat->rowptr = (int *) calloc(nb + 1, sizeof(int)); mat->rowval = (float *) malloc(sizeof(float) * n); mat->rowind = (int *) malloc(sizeof(int) * n); if (parseInput(a, mat, n, nb) == -1) { printf("ERROR: Failed to parse data.\n"); exit(-1); } gk_csr_CreateIndex(mat, GK_CSR_COL); float * result = (float *) calloc(nb, sizeof(float)); for (i = 0; i < nb; i++) { float output = 0.0; int total = mat->colptr[i+1] - mat->colptr[i]; int * startind = mat->colind + mat->colptr[i]; float * startval = mat->colval + mat->colptr[i]; for (j = 0; j < total; j++) { result[startind[j]] += b[i] * startval[j]; // printf("%f : val %f\n", b[startind[j]] ,startval[j]); } } /* float * result = (float *) malloc(sizeof(float) * nb); for (i = 0; i < nb; i++) { float output = 0.0; int total = mat->rowptr[i+1] - mat->rowptr[i]; int * startind = mat->rowind + mat->rowptr[i]; float * startval = mat->rowval + mat->rowptr[i]; for (j = 0; j < total; j++) { output += b[startind[j]] * startval[j]; printf("%f : val %f\n", b[startind[j]] ,startval[j]); } result[i] = output; } */ for (i = 0; i < nb; i++) { printf("%f\n", result[i]); } /* for (i = 0; i < nb; i++) { int total = mat->rowptr[i+1] - mat->rowptr[i]; int * startind = mat->rowind + mat->rowptr[i]; float * startval = mat->rowval + mat->rowptr[i]; for (j = 0; j < total; j++) { printf("row %d col %d : val %f\n",i, startind[j] ,startval[j]); } } printf("cols\n\n"); for (i = 0; i < nb; i++) { int total = mat->colptr[i+1] - mat->colptr[i]; int * startind = mat->colind + mat->colptr[i]; float * startval = mat->colval + mat->colptr[i]; for (j = 0; j < total; j++) { printf("col %d row %d : val %f\n",i, startind[j] ,startval[j]); } } */ return 0; }
void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }