Exemple #1
0
void gk_find_frequent_itemsets(int ntrans, int *tranptr, int *tranind, 
        int minfreq, int maxfreq, int minlen, int maxlen, 
        void (*process_itemset)(void *stateptr, int nitems, int *itemids, 
                                int ntrans, int *transids),
        void *stateptr)
{
  ssize_t i;
  gk_csr_t *mat, *pmat;
  isparams_t params;
  int *pattern;

  /* Create the matrix */
  mat = gk_csr_Create();
  mat->nrows  = ntrans;
  mat->ncols  = tranind[gk_iargmax(tranptr[ntrans], tranind)]+1;
  mat->rowptr = gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr");
  for (i=0; i<ntrans+1; i++)
    mat->rowptr[i] = tranptr[i];
  mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
  mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));

  /* Setup the parameters */
  params.minfreq  = minfreq;
  params.maxfreq  = (maxfreq == -1 ? mat->nrows : maxfreq);
  params.minlen   = minlen;
  params.maxlen   = (maxlen == -1 ? mat->ncols : maxlen);
  params.tnitems  = mat->ncols;
  params.callback = process_itemset;
  params.stateptr = stateptr;
  params.rmarker  = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
  params.cand     = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");

  /* Perform the initial projection */
  gk_csr_CreateIndex(mat, GK_CSR_COL);
  pmat = itemsets_project_matrix(&params, mat, -1);
  gk_csr_Free(&mat);

  pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
  itemsets_find_frequent_itemsets(&params, pmat, 0, pattern); 

  gk_csr_Free(&pmat);
  gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);

}
Exemple #2
0
int main(int argc, char** argv)
{
	int rank, p;
    	int n;
	int nb;
	int prow;
    	char ** a;
    	float *b;
	float * pb;
	gk_csr_t *mat;
	int * nEachData;
	int ndata;
	int i, j;
		if (argc != 3)
		{
			printf("error: invalid arguments\n");
			exit(-1);
		}
		mat = (gk_csr_t *) malloc(sizeof(gk_csr_t));
		n = numberOfLines(argv[1]);
		nb = numberOfLines(argv[2]);
		mat->nrows = nb;
		mat->ncols = nb;
		a = getDataA(argv[1], n);
		b = getDataB(argv[2], nb);
		mat->rowptr = (int *) calloc(nb + 1, sizeof(int));
		mat->rowval = (float *) malloc(sizeof(float) * n);
		mat->rowind = (int *) malloc(sizeof(int) * n);

		if (parseInput(a, mat, n, nb) == -1)
		{
			printf("ERROR: Failed to parse data.\n");
			exit(-1);
		}

	gk_csr_CreateIndex(mat, GK_CSR_COL);

	float * result = (float *) calloc(nb, sizeof(float));
	for (i = 0; i < nb; i++)
	{
		float output = 0.0;
		int total = mat->colptr[i+1] - mat->colptr[i];
		int * startind = mat->colind + mat->colptr[i];
		float * startval = mat->colval + mat->colptr[i];
		for (j = 0; j < total; j++)
		{
			result[startind[j]] += b[i] * startval[j];
//			printf("%f : val %f\n", b[startind[j]] ,startval[j]);
		}
	}
/*
	float * result = (float *) malloc(sizeof(float) * nb);
	for (i = 0; i < nb; i++)
	{
		float output = 0.0;
		int total = mat->rowptr[i+1] - mat->rowptr[i];
		int * startind = mat->rowind + mat->rowptr[i];
		float * startval = mat->rowval + mat->rowptr[i];
		for (j = 0; j < total; j++)
		{
			output += b[startind[j]] * startval[j];
			printf("%f : val %f\n", b[startind[j]] ,startval[j]);
		}
		result[i] = output;
	}
*/
	for (i = 0; i < nb; i++)
	{
		printf("%f\n", result[i]);
	}

/*
	for (i = 0; i < nb; i++)
	{
		int total = mat->rowptr[i+1] - mat->rowptr[i];
		int * startind = mat->rowind + mat->rowptr[i];
		float * startval = mat->rowval + mat->rowptr[i];
		for (j = 0; j < total; j++)
		{
			printf("row %d col %d : val %f\n",i, startind[j] ,startval[j]);
		}
	}
	printf("cols\n\n");
	for (i = 0; i < nb; i++)
	{
		int total = mat->colptr[i+1] - mat->colptr[i];
		int * startind = mat->colind + mat->colptr[i];
		float * startval = mat->colval + mat->colptr[i];
		for (j = 0; j < total; j++)
		{
			printf("col %d row %d : val %f\n",i, startind[j] ,startval[j]);
		}
	}
*/
	return 0;

    }
Exemple #3
0
void ComputeNeighbors(params_t *params)
{
  int i, j, nhits;
  gk_csr_t *mat;
  int32_t *marker;
  gk_fkv_t *hits, *cand;
  FILE *fpout;

  printf("Reading data for %s...\n", params->infstem);

  mat = gk_csr_Read(params->infstem, GK_CSR_FMT_CSR, 1, 0);

  printf("#docs: %d, #nnz: %d.\n", mat->nrows, mat->rowptr[mat->nrows]);

  /* compact the column-space of the matrices */
  gk_csr_CompactColumns(mat);

  /* perform auxiliary normalizations/pre-computations based on similarity */
  gk_csr_Normalize(mat, GK_CSR_ROW, 2);

  /* create the inverted index */
  gk_csr_CreateIndex(mat, GK_CSR_COL);

  /* create the output file */
  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "ComputeNeighbors: fpout") : NULL);

  /* allocate memory for the necessary working arrays */
  hits   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: hits");
  marker = gk_i32smalloc(mat->nrows, -1, "ComputeNeighbors: marker");
  cand   = gk_fkvmalloc(mat->nrows, "ComputeNeighbors: cand");


  /* find the best neighbors for each query document */
  gk_startwctimer(params->timer_1);
  for (i=0; i<mat->nrows; i++) {
    if (params->verbosity > 0)
      printf("Working on query %7d\n", i);

    /* find the neighbors of the ith document */ 
    nhits = gk_csr_GetSimilarRows(mat, 
                 mat->rowptr[i+1]-mat->rowptr[i], 
                 mat->rowind+mat->rowptr[i], 
                 mat->rowval+mat->rowptr[i], 
                 GK_CSR_COS, params->nnbrs, params->minsim, hits, 
                 marker, cand);

    /* write the results in the file */
    if (fpout) {
      for (j=0; j<nhits; j++) 
        fprintf(fpout, "%8d %8d %.3f\n", i, hits[j].val, hits[j].key);
    }
  }
  gk_stopwctimer(params->timer_1);


  /* cleanup and exit */
  if (fpout) gk_fclose(fpout);

  gk_free((void **)&hits, &marker, &cand, LTERM);

  gk_csr_Free(&mat);

  return;
}