예제 #1
0
파일: nn.c 프로젝트: GarfieldEr007/yael
float *knn_thread (int npt, int nclust, int d, int k,
		   const float *codebook, const float *coords,
		   int *vw, int n_thread) 
{
  float *vwdis2=fvec_new(k*npt);
  knn_full_thread (2, npt, nclust, d, k, codebook, coords, NULL, vw, vwdis2, n_thread);
  return vwdis2;
}
예제 #2
0
파일: nn.c 프로젝트: GarfieldEr007/yael
double nn_thread (int npt, int nclust, int d,
		  const float *codebook, const float *coords,
		  int *vw, int n_thread)
{
  float *vwdis2=fvec_new(npt);
  knn_full_thread (2, npt, nclust, d, 1, codebook, coords, NULL, vw, vwdis2, n_thread);
   
  double toterr = fvec_sum(vwdis2, npt); 

  free(vwdis2);
  return toterr;
}
예제 #3
0
void Clustering::generate_cluster(fDataSet *ds, fDataSet *lds, int niter, int nth, int seed, int nredo)
{
	int	i, j;
	int	d = ds->d;
	int	n = ds->n;
	int	n_l = lds->n;

	/// allocate storage space for necessary data
	float	*tmp_dis = fvec_new_set(n, -1);
	centroid = fvec_new_set(ncenter*d, 0);
	int 	*tmp_assign = ivec_new_set(n, -1);
	

	/// learning for centroids
	printf("-------------- kmeans on learning set -------------\nn_l-%d, nt-%d\n", n_l, nth);
	float quantierror = kmeans(d, n_l, ncenter, niter, lds->data, nth, seed, nredo, centroid, NULL, NULL, NULL);
	printf(">>> finished clustering learning, quantization error: %f\n", quantierror);


	/// find 1-nn among all centroids for each base vector: query=basedata, dataset=centroids
	knn_full_thread (	
				2,				// euclidean distance
				n, ncenter, d, 
				1,				// 1-nn
				centroid, ds->data, NULL, tmp_assign, tmp_dis, nth);
	
	/// extract the assign and the member belongness
	for(i = 0; i < n; i++){
		assign.push_back(tmp_assign[i]);
	}

	member = list2inverted(assign, ncenter);
	puts(">>> finished cluster assign and member points extraction");

	puts("member count for each cluster");
	for(i = 0;  i < ncenter; i++){
		printf("%d - %d\n", i, member[i].size());
	}

	/// disallocation
	FREE(tmp_dis);
	FREE(tmp_assign);
}
예제 #4
0
float ANC::neighbor_cluster_estimation(const fDataSet *ds, int nth)
{
	/// check for necessary data: centroids, basedata
	ASSERTINFO(ds == NULL || centroid == NULL || ds->data == NULL, "IPP");

	/// prepare for necessary variables
	neighbor.resize(ncenter);
	int	i, iclu = -1, ineighbor = -1;
	int	K = ncenter;
	int	n = ds->n;
	int	*tmp_assign = ivec_new_set(n * g, -1);
	float	*tmp_dis = fvec_new_0(n * g);
	int *neighbor_flag = ivec_new_set(ncenter*ncenter, 0);

	/// find k-nn among all centroids for each base vector: query=basedata, dataset=centroids, k=2 for neighbor cluster
	knn_full_thread (	
				2,		// euclidean distance
				n, K, d, 
				g,		// g-nn
				centroid, ds->data, NULL, tmp_assign, tmp_dis, nth);

	// extract neighbor clusters for each cluster
	for(i = 0; i < n; i++)
	{
		iclu = tmp_assign[i*g];					// current cluster = current point's 1-NN
		for (int ig = 1; ig < g; ig++){
			ineighbor = tmp_assign[i*g+ig];				// current neighbor cluster = current point's g-thNN
			if(0 == neighbor_flag[iclu*ncenter+ineighbor]){
				neighbor[iclu].push_back(ineighbor);
				neighbor_flag[iclu*ncenter+ineighbor] = 1;
			}
		}
	}

	puts("end neighbor");

	// check number of neighbor cluster
	for(i = 0; i < K; i++){
		ASSERTINFO(neighbor[i].size() == 0, "warning: there is a cluster who has no neighbors");
	}
	puts(">>> finished neighbor cluster registration");

	///### display neighbor cluster count
	puts(">>> neighbor cluster");
	int sum_neighbor = 0;
	for(i = 0;  i < K; i++){
		// printf("\n%d - %d\t", i, neighbor[i].size());
		sum_neighbor += neighbor[i].size();
	}
	float avg_neighbor = sum_neighbor / (float)K;

	/*
	if(K <= 10){
		for(i = 0;  i < K; i++){
			printf("\n%d - %d\t", i, neighbor[i].size());
			for(ineighbor = 0; ineighbor < neighbor[i].size(); ineighbor++){
				printf("%d ", neighbor[i][ineighbor]);
			}
		}
	}*/
	

	/// disallocate space
	FREE(tmp_assign);
	FREE(tmp_dis);

	return avg_neighbor;
}
예제 #5
0
파일: knn.c 프로젝트: GarfieldEr007/yael
int main (int argc, char ** argv)
{
  int i;
  int k = 10;
  int d = 0;
  int nb = 0;
  int nq = 0;
  int nt = count_cpu();
  int verbose = 1;
  int ret = 0;

  int fmt_b = FMT_FVEC;
  int fmt_q = FMT_FVEC;
  int fmt_nn = FMT_IVEC;
  int fmt_dis = FMT_FVEC;

  const char * fb_name = NULL;    /* database filename */
  const char * fq_name = NULL;    /* query filename */
  const char * fnn_name = "nn.out";   /* nn idx filename */
  const char * fdis_name = "dis.out";  /* nn dis filename */

  if (argc == 1)
    usage (argv[0]);

  for (i = 1 ; i < argc ; i++) {
    char *a = argv[i];

    if (!strcmp (a, "-h") || !strcmp (a, "--help"))
      usage (argv[0]);
    else if (!strcmp (a, "-silence")) {
      verbose = 0;
    }
    else if (!strcmp (a, "-verbose")) {
      verbose = 2;
    }
    else if (!strcmp (a, "-k") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &k);
      assert (ret);
    }
    else if (!strcmp (a, "-d") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &d);
      assert (ret);
    }
    else if (!strcmp (a, "-nt") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nt);
      assert (ret);
    }
    else if (!strcmp (a, "-nb") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nb);
      assert (ret);
    }
    else if (!strcmp (a, "-nq") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nq);
      assert (ret);
    }
    else if (!strcmp (a, "-b") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_FVEC;
    }
    else if (!strcmp (a, "-bb") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_BVEC;
    }
    else if (!strcmp (a, "-bt") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_TEXT;
    }
    else if (!strcmp (a, "-q") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_FVEC;
    }
    else if (!strcmp (a, "-qb") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_BVEC;
    }
    else if (!strcmp (a, "-qt") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_TEXT;
    }
    else if (!strcmp (a, "-onn") && i+1 < argc) {
      fnn_name = argv[++i];
      fmt_nn = FMT_IVEC;
    }
    else if (!strcmp (a, "-onnt") && i+1 < argc) {
      fnn_name = argv[++i];
      fmt_nn = FMT_TEXT;
    }
    else if (!strcmp (a, "-odis") && i+1 < argc) {
      fdis_name = argv[++i];
      fmt_dis = FMT_FVEC;
    }
    else if (!strcmp (a, "-odist") && i+1 < argc) {
      fdis_name = argv[++i];
      fmt_dis = FMT_TEXT;
    }
  }

  assert (fb_name && fq_name);

  fprintf (stderr, "k = %d\nd = %d\nnt = %d\n", k, d, nt);

  if (verbose) {
    fprintf (stderr, "fb = %s  (fmt = %s)\n", fb_name, 
	     (fmt_b == FMT_FVEC ? "fvec" : (fmt_b == FMT_BVEC ? "bvec" : "txt")));
    fprintf (stderr, "fq = %s  (fmt = %s)\n", fq_name, 
	     (fmt_q == FMT_FVEC ? "fvec" : (fmt_q == FMT_BVEC ? "bvec" : "txt")));
    fprintf (stderr, "fnn = %s  (fmt = %s)\n", fnn_name, 
	     (fmt_nn == FMT_IVEC ? "ivec" : "txt"));
    fprintf (stderr, "fdis = %s  (fmt = %s)\n", fdis_name, 
	     (fmt_dis == FMT_FVEC ? "fvec" : "txt"));
  }


  /* read the input vectors for database and queries */
  float * vb = my_fvec_read (fb_name, fmt_b, verbose, &nb, &d);
  float * vq = my_fvec_read (fq_name, fmt_q, verbose, &nq, &d);


  /* Search */
  int * idx = ivec_new (k * nq);
  float * dis = fvec_new (k * nq);

  knn_full_thread (2, nq, nb, d, k, vb, vq, NULL, idx, dis, nt);
  knn_reorder_shortlist (nq, nb, d, k, vb, vq, idx, dis);

  /* write the distance output file */
  if (fmt_dis == FMT_FVEC)
    ret = fvecs_write (fdis_name, k, nq, dis);
  else if (fmt_dis == FMT_TEXT)
    ret = fvecs_write_txt (fdis_name, k, nq, dis);
  else assert (0 || "Unknow output format\n");
  assert (ret == nq);
  
  /* write the distance output file */
  if (fmt_nn == FMT_IVEC)
    ret = ivecs_write (fnn_name, k, nq, idx);
  else if (fmt_nn == FMT_TEXT)
    ret = ivecs_write_txt (fnn_name, k, nq, idx);
  else assert (0 || "Unknow output format\n");
  assert (ret == nq);
  
  free (idx);
  free (dis);
  free (vb);
  free (vq);
  return 0;
}
예제 #6
0
void Clustering::neighbor_cluster_estimation(const fDataSet *ds, int nth)
{
	/// check for necessary data: centroids, basedata
	ASSERTINFO(ds == NULL || centroid == NULL || ds->data == NULL, "IPP");

	/// prepare for necessary variables
	neighbor.resize(ncenter);
	int	i, iclu = -1, ineighbor = -1;
	int	K = ncenter;
	int	n = ds->n;
	int	d = ds->d;
	int	*tmp_assign = ivec_new_set(n * 2, -1);
	float	*tmp_dis = fvec_new_0(n * 2);
	int *neighbor_flag = ivec_new_set(ncenter*ncenter, 0);

	/// find k-nn among all centroids for each base vector: query=basedata, dataset=centroids, k=2 for neighbor cluster
	knn_full_thread (	
				2,		// euclidean distance
				n, K, d, 
				2,		// 2-nn
				centroid, ds->data, NULL, tmp_assign, tmp_dis, nth);

	// extract neighbor clusters for each cluster
	for(i = 0; i < n; i++)
	{
		iclu = tmp_assign[i*2];					// current cluster = current point's 1-NN
		ineighbor = tmp_assign[i*2+1];				// current neighbor cluster = current point's 2-NN
		if(0 == neighbor_flag[iclu*ncenter+ineighbor]){
			neighbor[iclu].push_back(ineighbor);
			neighbor_flag[iclu*ncenter+ineighbor] = 1;
		}

	}
	puts(">>> finished neighbor cluster registration");

	///### display neighbor cluster count
	puts(">>> neighbor cluster");
	int sum_neighbor = 0;
	for(i = 0;  i < K; i++){
		printf("\n%d - %d\t", i, neighbor[i].size());
		sum_neighbor += neighbor[i].size();
	}
	printf("\naveragely %lf neighbors\n", sum_neighbor / (float)K);

	/*
	if(K <= 500){
		for(i = 0;  i < K; i++){
			printf("\n%d - %d\t", i, neighbor[i].size());
			int ineighbor;
			for(ineighbor = 0; ineighbor < neighbor[i].size(); ineighbor++){
				printf("%d ", neighbor[i][ineighbor]);
			}
		}
	}
	*/
	printf("\naveragely %lf neighbors\n", sum_neighbor / (float)K);

	/// disallocate space
	FREE(tmp_assign);
	FREE(tmp_dis);
}
예제 #7
0
void mexFunction (int nlhs, mxArray *plhs[],
                  int nrhs, const mxArray*prhs[])

{
  if (nrhs < 2 || nrhs > 5) 
    mexErrMsgTxt ("Invalid number of input arguments");
  
  if (nlhs != 2)
    mexErrMsgTxt ("2 output arguments required");

  int d = mxGetM (prhs[0]);
  int n = mxGetN (prhs[0]);
  int nq = mxGetN (prhs[1]);
  int nt = 1;

  if (mxGetM (prhs[1]) != d)
      mexErrMsgTxt("Dimension of base and query vectors are not consistent");
  
  
  if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS 
      || mxGetClassID(prhs[1]) != mxSINGLE_CLASS )
    mexErrMsgTxt ("need single precision array"); 


  float *b = (float*) mxGetPr (prhs[0]);  /* database vectors */
  float *v = (float*) mxGetPr (prhs[1]);  /* query vectors */
  int k = 1; 
  int distype = 2;

  if (nrhs >= 3)
    k = (int) mxGetScalar(prhs[2]);

  if (nrhs >= 4)
    distype = (int) mxGetScalar(prhs[3]);

  /* If practice, the following is not used (all threads by default) */
  if (nrhs >= 5)
    nt = (int) mxGetScalar(prhs[4]); 

  if (n < k) 
    mexErrMsgTxt("fewer vectors than number to be returned");    


  /* ouptut: centroids, assignment, distances */

  plhs[0] = mxCreateNumericMatrix (k, nq, mxINT32_CLASS, mxREAL);
  int *assign = (int*) mxGetPr (plhs[0]);
  
  plhs[1] = mxCreateNumericMatrix (k, nq, mxSINGLE_CLASS, mxREAL);
  float *dis = (float*) mxGetPr (plhs[1]);

  /* With Matlab, we have to avoid using threads for the L2 distance, 
     because this one makes a call to MKL, which is no thread-safe */
  if (distype == 2 || distype == 16)
    knn_full (distype, nq, n, d, k, b, v, NULL, assign, dis);
  else
    knn_full_thread (distype, nq, n, d, k, b, v, NULL, assign, dis, nt);

  /* post-processing: convert to matlab indices, and enforce full sort */

  int i;

  for (i = 0 ; i < nq * k ; i++)
    assign[i]++;
}