vault_t *ReadData(params_t *params) { vault_t *vault; gk_csr_t *mat1, *mat2; vault = (vault_t *)gk_malloc(sizeof(vault_t), "ReadData: vault"); /* read the document vectors */ printf("Reading documents...\n"); ASSERT(gk_fexists(params->infstem)); vault->mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); vault->ndocs = vault->mat->nrows; return vault; }
int main(int argc, char *argv[]) { int i, j, k, countTopUsers; //file containing matrix of form CSR char *ipCSRAdjFileName; //list of users on which to apply page rank char *usersFileName; //number of cpus int cpucount; //to store graph adjacency matrix gk_csr_t *adjMat; int numUsers; //araay of user id int *users; gk_fkv_t **topUsers; int *topUserCount; int minSimUsers; char *opFile = "GraphRead.txt"; if (argc < 4) { //not wnough arguments passed printf("\n Not enough arguments passed. \n"); return -1; } //parse commandline arguments ipCSRAdjFileName = argv[1]; usersFileName = argv[2]; minSimUsers = atoi(argv[3]); cpucount = atoi(argv[4]); //printf("\nBuilding adjacency matrix...\n"); //read the adjacency matrix adjMat = gk_csr_Read(ipCSRAdjFileName, GK_CSR_FMT_CSR, 0, 0); //gk_csr_Write(adjMat, opFile, GK_CSR_FMT_CSR, 0, 0); fprintf(stderr, "\nMatrix building completed...\n"); //get the number of users numUsers = getLineCount(usersFileName); users = getUsers(usersFileName, numUsers); //maintain storage of top similar users of cpucount users topUsers = (gk_fkv_t**) malloc(sizeof(gk_fkv_t*) * cpucount); for (i = 0; i < cpucount; i++) { topUsers[i] = (gk_fkv_t*) malloc(sizeof(gk_fkv_t) * minSimUsers); } //storage for top users count of chunk topUserCount = (int *) malloc(sizeof(int) * cpucount); //apply the personalized page rank for each user for (i = 0; i < numUsers; i+=cpucount) { #pragma omp parallel default(none) private(j) shared(users, topUsers, topUserCount, adjMat) \ firstprivate(i, minSimUsers, numUsers, cpucount) { #pragma omp for for (j = 0; j < cpucount; j++) { if (i+j < numUsers) { //find top users for users[i+j] //get the top rank vertices from personalized page rank iteration topUserCount[j] = getTopSimUsers(adjMat, users[i+j], topUsers[j], minSimUsers); } } } //write the values for users chunks for (j = 0; j < cpucount; j++) { if (i+j < numUsers) { //user printf("%d", users[i+j]); for (k = 0; k < topUserCount[j]; k ++) { //print the top similar users with corresponding pr printf("\t%d:%f", topUsers[j][k].val, topUsers[j][k].key); } printf("\n"); } } } return 0; }
void ComputeNeighbors(params_t *params) { int i, j, nhits; gk_csr_t *mat; int32_t *marker; gk_fkv_t *hits, *cand; FILE *fpout; printf("Reading data for %s...\n", params->infstem); mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0); printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]); /* compact the column-space of the matrices */ gk_csr_CompactColumns(mat); /* perform auxiliary normalizations/pre-computations based on similarity */ gk_csr_Normalize(mat, GK_CSR_ROW, 2); /* create the inverted index */ gk_csr_CreateIndex(mat, GK_CSR_COL); /* create the output file */ fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL); /* allocate memory for the necessary working arrays */ hits = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits"); marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker"); cand = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand"); /* find the best neighbors for each query document */ gk_startwctimer(params->timer_1); for (i=0; i<mat->nrows; i++) { if (params->verbosity > 0) printf("Working on query %7d\n", i); /* find the neighbors of the ith document */ nhits = gk_csr_GetSimilarRows(mat, mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], mat->rowval+mat->rowptr[i], GK_CSR_COS, params->nnbrs, params->minsim, hits, marker, cand); /* write the results in the file */ if (fpout) { for (j=0; j<nhits; j++) fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key); } } gk_stopwctimer(params->timer_1); /* cleanup and exit */ if (fpout) gk_fclose(fpout); gk_free((void **)&hits, &marker, &cand, LTERM); gk_csr_Free(&mat); return; }
int main(int argc, char *argv[]) { ssize_t i, j, niter; params_t *params; gk_csr_t *mat; FILE *fpout; /* get command-line options */ params = parse_cmdline(argc, argv); /* read the data */ mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1); /* display some basic stats */ print_init_info(params, mat); if (params->ntvs != -1) { /* compute the pr for different randomly generated restart-distribution vectors */ float **prs; prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs"); /* generate the random restart vectors */ for (j=0; j<params->ntvs; j++) { for (i=0; i<mat->nrows; i++) prs[j][i] = RandomInRange(931); gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1); niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]); printf("tvs#: %zd; niters: %zd\n", j, niter); } /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) { for (j=0; j<params->ntvs; j++) fprintf(fpout, "%.4e ", prs[j][i]); fprintf(fpout, "\n"); } gk_fclose(fpout); gk_fFreeMatrix(&prs, params->ntvs, mat->nrows); } else if (params->ppr != -1) { /* compute the personalized pr from the specified vertex */ float *pr; pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr"); pr[params->ppr-1] = 1.0; niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); printf("ppr: %d; niters: %zd\n", params->ppr, niter); /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) fprintf(fpout, "%.4e\n", pr[i]); gk_fclose(fpout); gk_free((void **)&pr, LTERM); } else { /* compute the standard pr */ int jmax; float diff, maxdiff; float *pr; pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr"); niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr); printf("pr; niters: %zd\n", niter); /* output the computed pr scores */ fpout = gk_fopen(params->outfile, "w", "main: outfile"); for (i=0; i<mat->nrows; i++) { for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) { if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) { maxdiff = diff; jmax = mat->rowind[j]; } } fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1); } gk_fclose(fpout); gk_free((void **)&pr, LTERM); } gk_csr_Free(&mat); /* display some final stats */ print_final_info(params); }