Ejemplo n.º 1
0
vault_t *ReadData(params_t *params)
{
  vault_t *vault;
  gk_csr_t *mat1, *mat2;

  vault = (vault_t *)gk_malloc(sizeof(vault_t), "ReadData: vault");

  /* read the document vectors */
  printf("Reading documents...\n");
  ASSERT(gk_fexists(params->infstem)); 
  vault->mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  vault->ndocs = vault->mat->nrows;

  return vault;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[]) {

  int i, j, k, countTopUsers;
  
  //file containing matrix of form CSR
  char *ipCSRAdjFileName;

  //list of users on which to apply page rank
  char *usersFileName;

  //number of cpus
  int cpucount;

  //to store graph adjacency matrix
  gk_csr_t *adjMat;

  int numUsers;

  //araay of user id
  int *users;

  gk_fkv_t **topUsers;
  int *topUserCount;

  int minSimUsers;

  char *opFile = "GraphRead.txt";
  
  if (argc < 4) {
    //not wnough arguments passed
    printf("\n Not enough arguments passed. \n");
    return -1;
  } 
  
  //parse commandline arguments
  ipCSRAdjFileName = argv[1];
  usersFileName = argv[2];
  minSimUsers = atoi(argv[3]);
  cpucount = atoi(argv[4]);

  //printf("\nBuilding adjacency matrix...\n");
  //read the adjacency matrix
  adjMat = gk_csr_Read(ipCSRAdjFileName, GK_CSR_FMT_CSR, 0, 0);
  //gk_csr_Write(adjMat, opFile, GK_CSR_FMT_CSR, 0, 0);
    
  fprintf(stderr, "\nMatrix building completed...\n");
  //get the number of users
  numUsers = getLineCount(usersFileName);
  users = getUsers(usersFileName, numUsers);
  
  //maintain storage of top similar users of cpucount users
  topUsers = (gk_fkv_t**) malloc(sizeof(gk_fkv_t*) * cpucount);
  for (i = 0; i < cpucount; i++) {
    topUsers[i] = (gk_fkv_t*) malloc(sizeof(gk_fkv_t) * minSimUsers);
  }

  //storage for top users count of chunk
  topUserCount = (int *) malloc(sizeof(int) * cpucount);
  
  //apply the personalized page rank for each user
  for (i = 0; i < numUsers; i+=cpucount) {

#pragma omp parallel default(none) private(j) shared(users, topUsers, topUserCount, adjMat) \
  firstprivate(i, minSimUsers, numUsers, cpucount)
    {
#pragma omp for
      for (j = 0; j < cpucount; j++) {
	if (i+j < numUsers) {
	  //find top users for users[i+j]
	  //get the top rank vertices from personalized page rank iteration	
	  topUserCount[j] = getTopSimUsers(adjMat, users[i+j], topUsers[j], minSimUsers);
	}
      }
    }

    //write the values for users chunks
    for (j = 0; j < cpucount; j++) {
      if (i+j < numUsers) {
	//user 
	printf("%d", users[i+j]);
	for (k = 0; k < topUserCount[j]; k ++) {
	  //print the top similar users with corresponding pr
	  printf("\t%d:%f", topUsers[j][k].val, topUsers[j][k].key);
	}
	printf("\n");
      }
    }
    

  }
    
  return 0;
}
Ejemplo n.º 3
0
Archivo: sd.c Proyecto: shizunge/XU_1
void ComputeNeighbors(params_t *params)
{
  int i, j, nhits;
  gk_csr_t *mat;
  int32_t *marker;
  gk_fkv_t *hits, *cand;
  FILE *fpout;

  printf("Reading data for %s...\n", params->infstem);

  mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]);

  /* compact the column-space of the matrices */
  gk_csr_CompactColumns(mat);

  /* perform auxiliary normalizations/pre-computations based on similarity */
  gk_csr_Normalize(mat, GK_CSR_ROW, 2);

  /* create the inverted index */
  gk_csr_CreateIndex(mat, GK_CSR_COL);

  /* create the output file */
  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL);

  /* allocate memory for the necessary working arrays */
  hits   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits");
  marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker");
  cand   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand");


  /* find the best neighbors for each query document */
  gk_startwctimer(params->timer_1);
  for (i=0; i<mat->nrows; i++) {
    if (params->verbosity > 0)
      printf("Working on query %7d\n", i);

    /* find the neighbors of the ith document */ 
    nhits = gk_csr_GetSimilarRows(mat, 
                 mat->rowptr[i+1]-mat->rowptr[i], 
                 mat->rowind+mat->rowptr[i], 
                 mat->rowval+mat->rowptr[i], 
                 GK_CSR_COS, params->nnbrs, params->minsim, hits, 
                 marker, cand);

    /* write the results in the file */
    if (fpout) {
      for (j=0; j<nhits; j++) 
        fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key);
    }
  }
  gk_stopwctimer(params->timer_1);


  /* cleanup and exit */
  if (fpout) gk_fclose(fpout);

  gk_free((void **)&hits, &marker, &cand, LTERM);

  gk_csr_Free(&mat);

  return;
}
Ejemplo n.º 4
0
int main(int argc, char *argv[])
{
  ssize_t i, j, niter;
  params_t *params;
  gk_csr_t *mat;
  FILE *fpout;
 
  /* get command-line options */
  params = parse_cmdline(argc, argv);

  /* read the data */
  mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);

  /* display some basic stats */
  print_init_info(params, mat);



  if (params->ntvs != -1) {
    /* compute the pr for different randomly generated restart-distribution vectors */
    float **prs;

    prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");

    /* generate the random restart vectors */
    for (j=0; j<params->ntvs; j++) {
      for (i=0; i<mat->nrows; i++)
        prs[j][i] = RandomInRange(931);
      gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);

      niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
      printf("tvs#: %zd; niters: %zd\n", j, niter);
    }

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) {
      for (j=0; j<params->ntvs; j++) 
        fprintf(fpout, "%.4e ", prs[j][i]);
      fprintf(fpout, "\n");
    }
    gk_fclose(fpout);

    gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
  }
  else if (params->ppr != -1) {
    /* compute the personalized pr from the specified vertex */
    float *pr;

    pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");

    pr[params->ppr-1] = 1.0;

    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
    printf("ppr: %d; niters: %zd\n", params->ppr, niter);

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) 
      fprintf(fpout, "%.4e\n", pr[i]);
    gk_fclose(fpout);

    gk_free((void **)&pr, LTERM);
  }
  else {
    /* compute the standard pr */
    int jmax;
    float diff, maxdiff;
    float *pr;

    pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");

    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
    printf("pr; niters: %zd\n", niter);

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) {
      for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
        if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
          maxdiff = diff;
          jmax = mat->rowind[j];
        }
      }
      fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], 
          mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
    }
    gk_fclose(fpout);

    gk_free((void **)&pr, LTERM);
  }

  gk_csr_Free(&mat);

  /* display some final stats */
  print_final_info(params);
}