예제 #1
0
파일: itemsets.c 프로젝트: certik/libmesh
void gk_find_frequent_itemsets(int ntrans, int *tranptr, int *tranind, 
        int minfreq, int maxfreq, int minlen, int maxlen, 
        void (*process_itemset)(void *stateptr, int nitems, int *itemids, 
                                int ntrans, int *transids),
        void *stateptr)
{
  ssize_t i;
  gk_csr_t *mat, *pmat;
  isparams_t params;
  int *pattern;

  /* Create the matrix */
  mat = gk_csr_Create();
  mat->nrows  = ntrans;
  mat->ncols  = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1;
  mat->rowptr = gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr");
  for (i=0; i<ntrans+1; i++)
    mat->rowptr[i] = tranptr[i];
  mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
  mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));

  /* Setup the parameters */
  params.minfreq  = minfreq;
  params.maxfreq  = (maxfreq == -1 ? mat->nrows : maxfreq);
  params.minlen   = minlen;
  params.maxlen   = (maxlen == -1 ? mat->ncols : maxlen);
  params.tnitems  = mat->ncols;
  params.callback = process_itemset;
  params.stateptr = stateptr;
  params.rmarker  = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
  params.cand     = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");

  /* Perform the initial projection */
  gk_csr_CreateIndex(mat, GK_CSR_COL);
  pmat = itemsets_project_matrix(&params, mat, -1);
  gk_csr_Free(&mat);

  pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
  itemsets_find_frequent_itemsets(&params, pmat, 0, pattern); 

  gk_csr_Free(&pmat);
  gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);

}
예제 #2
0
파일: itemsets.c 프로젝트: certik/libmesh
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
         int preflen, int *prefix)
{
  ssize_t i;
  gk_csr_t *cmat;

  /* Project each frequent column */
  for (i=0; i<mat->ncols; i++) {
    prefix[preflen] = mat->colids[i];

    if (preflen+1 >= params->minlen)
      (*params->callback)(params->stateptr, preflen+1, prefix, 
           mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);

    if (preflen+1 < params->maxlen) {
      cmat = itemsets_project_matrix(params, mat, i);
      itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
      gk_csr_Free(&cmat);
    }
  }

}
예제 #3
0
파일: sd.c 프로젝트: shizunge/XU_1
void ComputeNeighbors(params_t *params)
{
  int i, j, nhits;
  gk_csr_t *mat;
  int32_t *marker;
  gk_fkv_t *hits, *cand;
  FILE *fpout;

  printf("Reading data for %s...\n", params->infstem);

  mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]);

  /* compact the column-space of the matrices */
  gk_csr_CompactColumns(mat);

  /* perform auxiliary normalizations/pre-computations based on similarity */
  gk_csr_Normalize(mat, GK_CSR_ROW, 2);

  /* create the inverted index */
  gk_csr_CreateIndex(mat, GK_CSR_COL);

  /* create the output file */
  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL);

  /* allocate memory for the necessary working arrays */
  hits   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits");
  marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker");
  cand   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand");


  /* find the best neighbors for each query document */
  gk_startwctimer(params->timer_1);
  for (i=0; i<mat->nrows; i++) {
    if (params->verbosity > 0)
      printf("Working on query %7d\n", i);

    /* find the neighbors of the ith document */ 
    nhits = gk_csr_GetSimilarRows(mat, 
                 mat->rowptr[i+1]-mat->rowptr[i], 
                 mat->rowind+mat->rowptr[i], 
                 mat->rowval+mat->rowptr[i], 
                 GK_CSR_COS, params->nnbrs, params->minsim, hits, 
                 marker, cand);

    /* write the results in the file */
    if (fpout) {
      for (j=0; j<nhits; j++) 
        fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key);
    }
  }
  gk_stopwctimer(params->timer_1);


  /* cleanup and exit */
  if (fpout) gk_fclose(fpout);

  gk_free((void **)&hits, &marker, &cand, LTERM);

  gk_csr_Free(&mat);

  return;
}
예제 #4
0
파일: all_common.c 프로젝트: WDavidX/par
void FreeVault(vault_t *vault)
{
  gk_csr_Free(&vault->mat);

  gk_free((void **)&vault, LTERM);
}
예제 #5
0
int main(int argc, char *argv[])
{
  ssize_t i, j, niter;
  params_t *params;
  gk_csr_t *mat;
  FILE *fpout;
 
  /* get command-line options */
  params = parse_cmdline(argc, argv);

  /* read the data */
  mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);

  /* display some basic stats */
  print_init_info(params, mat);



  if (params->ntvs != -1) {
    /* compute the pr for different randomly generated restart-distribution vectors */
    float **prs;

    prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");

    /* generate the random restart vectors */
    for (j=0; j<params->ntvs; j++) {
      for (i=0; i<mat->nrows; i++)
        prs[j][i] = RandomInRange(931);
      gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);

      niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
      printf("tvs#: %zd; niters: %zd\n", j, niter);
    }

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) {
      for (j=0; j<params->ntvs; j++) 
        fprintf(fpout, "%.4e ", prs[j][i]);
      fprintf(fpout, "\n");
    }
    gk_fclose(fpout);

    gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
  }
  else if (params->ppr != -1) {
    /* compute the personalized pr from the specified vertex */
    float *pr;

    pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");

    pr[params->ppr-1] = 1.0;

    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
    printf("ppr: %d; niters: %zd\n", params->ppr, niter);

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) 
      fprintf(fpout, "%.4e\n", pr[i]);
    gk_fclose(fpout);

    gk_free((void **)&pr, LTERM);
  }
  else {
    /* compute the standard pr */
    int jmax;
    float diff, maxdiff;
    float *pr;

    pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");

    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
    printf("pr; niters: %zd\n", niter);

    /* output the computed pr scores */
    fpout = gk_fopen(params->outfile, "w", "main: outfile");
    for (i=0; i<mat->nrows; i++) {
      for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
        if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
          maxdiff = diff;
          jmax = mat->rowind[j];
        }
      }
      fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], 
          mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
    }
    gk_fclose(fpout);

    gk_free((void **)&pr, LTERM);
  }

  gk_csr_Free(&mat);

  /* display some final stats */
  print_final_info(params);
}