int getTopSimUsers(gk_csr_t *adjMat, int user, gk_fkv_t *topUsers, int nsim) { //to store the restart distributon of vertices float *pr; int i, j, count; int iter = 0; gk_fkv_t *pRanks; pr = (float*) malloc(sizeof(float)*adjMat->nrows); //initialize the restart distribution for user pr[user] = 1.0; iter = gk_rw_PageRank(adjMat, 0.5, 0.000001, 100, pr); fprintf(stderr, "Iter: %d\n", iter); count = 0; //count the non-zero values in pr and sort them for (i = 0; i < adjMat->nrows; i++) { if (pr[i] > 0 && i != user) { count++; } } //printf("Found %d similar users.\n", count); pRanks = gk_fkvmalloc(count, "store page ranks"); for (i = 0, j = 0; i < adjMat->nrows; i++) { if (pr[i] > 0 && i != user) { pRanks[j].key = pr[i]; pRanks[j].val = i; j++; } } nsim = gk_min(nsim, count); gk_dfkvkselect(count, nsim, pRanks); gk_fkvsortd(nsim, pRanks); gk_fkvcopy(nsim, pRanks, topUsers); free(pr); free(pRanks); return nsim; }
void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }