Пример #1
0
int getTopSimUsers(gk_csr_t *adjMat, int user, gk_fkv_t *topUsers, int nsim) {
  //to store the restart distributon of vertices
  float *pr;
  int i, j, count;
  int iter = 0;
  
  gk_fkv_t *pRanks;

  pr = (float*) malloc(sizeof(float)*adjMat->nrows);
  
  //initialize the restart distribution for user
  pr[user] = 1.0;
  iter = gk_rw_PageRank(adjMat, 0.5, 0.000001, 100, pr);
  fprintf(stderr, "Iter: %d\n", iter);
  count = 0;
  
  //count the non-zero values in pr and sort them
  for (i = 0; i < adjMat->nrows; i++) {
    if (pr[i] > 0 && i != user) {
      count++;
    }
  }
  
  //printf("Found %d similar users.\n", count);
  pRanks = gk_fkvmalloc(count, "store page ranks");

  for (i = 0, j = 0; i < adjMat->nrows; i++) {
    if (pr[i] > 0 && i != user) {
      pRanks[j].key = pr[i];
      pRanks[j].val = i;
      j++;
    }
  }

  nsim = gk_min(nsim, count);
  gk_dfkvkselect(count, nsim, pRanks);
  gk_fkvsortd(nsim, pRanks);
  gk_fkvcopy(nsim, pRanks, topUsers);
  
  free(pr);
  free(pRanks);

  return nsim;
}
Пример #2
0
Файл: sd.c Проект: shizunge/XU_1
void ComputeNeighbors(params_t *params)
{
  int i, j, nhits;
  gk_csr_t *mat;
  int32_t *marker;
  gk_fkv_t *hits, *cand;
  FILE *fpout;

  printf("Reading data for %s...\n", params->infstem);

  mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]);

  /* compact the column-space of the matrices */
  gk_csr_CompactColumns(mat);

  /* perform auxiliary normalizations/pre-computations based on similarity */
  gk_csr_Normalize(mat, GK_CSR_ROW, 2);

  /* create the inverted index */
  gk_csr_CreateIndex(mat, GK_CSR_COL);

  /* create the output file */
  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL);

  /* allocate memory for the necessary working arrays */
  hits   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits");
  marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker");
  cand   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand");


  /* find the best neighbors for each query document */
  gk_startwctimer(params->timer_1);
  for (i=0; i<mat->nrows; i++) {
    if (params->verbosity > 0)
      printf("Working on query %7d\n", i);

    /* find the neighbors of the ith document */ 
    nhits = gk_csr_GetSimilarRows(mat, 
                 mat->rowptr[i+1]-mat->rowptr[i], 
                 mat->rowind+mat->rowptr[i], 
                 mat->rowval+mat->rowptr[i], 
                 GK_CSR_COS, params->nnbrs, params->minsim, hits, 
                 marker, cand);

    /* write the results in the file */
    if (fpout) {
      for (j=0; j<nhits; j++) 
        fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key);
    }
  }
  gk_stopwctimer(params->timer_1);


  /* cleanup and exit */
  if (fpout) gk_fclose(fpout);

  gk_free((void **)&hits, &marker, &cand, LTERM);

  gk_csr_Free(&mat);

  return;
}