Point findEyeCenterByColorSegmentation(const Mat& image, float coordinateWeight, int kmeansIterations, int kmeansRepeats, int blurSize)  {
    
    Mat img, gray_img;
    Mat colorpoints, kmeansPoints;
    
    img = equalizeImage(image);
    
    medianBlur(img, img, blurSize);
    cvtColor(image, gray_img, CV_BGR2GRAY);
    gray_img = imcomplement(gray_img);
    vector<Mat> layers(3);
    split(img, layers);
    for (int i = 0 ; i < layers.size(); i++) {
        layers[i] = layers[i].reshape(1,1).t();
    }
    hconcat(layers, colorpoints);
    
    // add coordinates
    colorpoints.convertTo(colorpoints, CV_32FC1);
    Mat coordinates = matrixPointCoordinates(img.rows,img.cols,false) *coordinateWeight;
    hconcat(colorpoints, coordinates, kmeansPoints);
    
    Mat locIndex(img.size().area(),kmeansIterations,CV_32FC1,Scalar::all(-1));
    linspace(0, img.size().area(), 1).copyTo(locIndex.col(0));
    Mat index_img(img.rows,img.cols,CV_32FC1,Scalar::all(0));
    Mat bestLabels, centers, clustered , colorsum , minColorPtIndex;
    for(int it = 1 ; it < kmeansIterations ; it++) {
        if (kmeansPoints.rows < 2) {
            break;
        }
        kmeans(kmeansPoints,2,bestLabels,TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, kmeansRepeats, 0.001),kmeansRepeats,KMEANS_PP_CENTERS,centers);
        reduce(centers.colRange(0, 3), colorsum, 1, CV_REDUCE_SUM);

        if (colorsum.at<float>(0) < colorsum.at<float>(1)) {
            
            findNonZero(bestLabels==0, minColorPtIndex);
        }
        else {
            findNonZero(bestLabels==1, minColorPtIndex);
        }
        
        minColorPtIndex = minColorPtIndex.reshape(1).col(1);
        
        for (int  i = 0; i <minColorPtIndex.rows ; i ++) {
            locIndex.at<float>(i,it) = locIndex.at<float>(minColorPtIndex.at<int>(i),it-1);
        }
        Mat temp;
        for (int  i = 0; i <minColorPtIndex.rows ; i ++) {
            temp.push_back(kmeansPoints.row(minColorPtIndex.at<int>(i)));
        }
        temp.copyTo(kmeansPoints);
        temp.release();
        for (int i = 0 ; i < minColorPtIndex.rows ; i ++) {
            int r, c;
            ind2sub(locIndex.at<float>(i,it), index_img.cols, index_img.rows, r, c);
            index_img.at<float>(r,c) +=1;
        }
    }
//    imagesc("layered",mat2gray(index_img));
    Mat layerweighted_img = index_img.mul(index_img);
    layerweighted_img = mat2gray(layerweighted_img);
    gray_img.convertTo(gray_img, CV_32FC1,1/255.0);
    Mat composed  = gray_img.mul(layerweighted_img);
    Mat score = calculateImageSymmetryScore(composed);
    Mat scoresum;
    reduce(score.rowRange(0, composed.cols/6), scoresum, 0, CV_REDUCE_SUM,CV_32FC1);
//    plotVectors("live", scoresum.t());
    double minVal , maxVal;
    Point minLoc, maxLoc;
    minMaxLoc(scoresum,&minVal,&maxVal,&minLoc,&maxLoc);
    int initialHC = maxLoc.x;

    int bestx = 0,bestlayer = 0;
    Mat bestIndex_img = index_img >=1;
    minMaxLoc(index_img,&minVal,&maxVal,&minLoc,&maxLoc);
    for (int i = 1 ; i<=maxVal; i++) {
        Mat indexlayer_img = index_img >=i;
        medianBlur(indexlayer_img, indexlayer_img, 5);
        erode(indexlayer_img, indexlayer_img, blurSize);
        erode(indexlayer_img, indexlayer_img, blurSize);
        indexlayer_img = removeSmallBlobs(indexlayer_img);
        
        indexlayer_img = fillHoleInBinary(indexlayer_img);
        indexlayer_img = fillConvexHulls(indexlayer_img);
        Mat score = calculateImageSymmetryScore(indexlayer_img);
        Mat scoresum;
        reduce(score.rowRange(0, indexlayer_img.cols/6), scoresum, 0, CV_REDUCE_SUM,CV_32FC1);
        minMaxLoc(scoresum,&minVal,&maxVal,&minLoc,&maxLoc);
        if (abs(maxLoc.x - initialHC) < abs(bestx - initialHC)) {
            
            if (sum(indexlayer_img)[0]/255 < indexlayer_img.size().area()/5*2 &&
                sum(indexlayer_img)[0]/255 > indexlayer_img.size().area()/6) {
                bestx = maxLoc.x;
                bestlayer = i;
                bestIndex_img = indexlayer_img.clone();
                
            }
            
        }
    }
    
    Point massCenter = findMassCenter_BinaryBiggestBlob(bestIndex_img);
    
    
    return Point(initialHC,massCenter.y);
}
예제 #2
0
파일: example2.c 프로젝트: pramsey/kmeans
int
main(int nargs, char **args)
{
	kmeans_config config;
	kmeans_result result;
	int i, j;
	int spread = 3;
	point *pts;
	point *init;
	int print_results = 0;
	unsigned long start;

	int nptsincluster = 10000;
	int k = 10;

	srand(time(NULL));

	/* Constants */
	config.k = k;
	config.num_objs = config.k * nptsincluster;
	config.max_iterations = 200;
	config.distance_method = pt_distance;
	config.centroid_method = pt_centroid;

	/* Inputs for K-means */
	config.objs = calloc(config.num_objs, sizeof(Pointer));
	config.centers = calloc(config.k, sizeof(Pointer));
	config.clusters = calloc(config.num_objs, sizeof(int));

	/* Storage for raw data */
	pts = calloc(config.num_objs, sizeof(point));
	init = calloc(config.k, sizeof(point));

	/* Create test data! */
	/* Populate with K gaussian clusters of data */
	for (j = 0; j < config.k; j++) {
		for (i = 0; i < nptsincluster; i++)
		{
			double u1 = 1.0 * random() / RAND_MAX;
			double u2 = 1.0 * random() / RAND_MAX;
			double z1 = spread * j + sqrt(-2*log2(u1))*cos(2*M_PI*u2);
			double z2 = spread * j + sqrt(-2*log2(u1))*sin(2*M_PI*u2);
			int n = j*nptsincluster + i;

			/* Populate raw data */
			pts[n].x = z1;
			pts[n].y = z2;

			/* Pointer to raw data */
			config.objs[n] = &(pts[n]);
		}
	}

	/* Populate the initial means vector with random start points */
	for (i = 0; i < config.k; i++)
	{
		int r = lround(config.num_objs * (1.0 * rand() / RAND_MAX));
		/* Populate raw data */
		init[i] = pts[r];
		/* Pointers to raw data */
		config.centers[i] = &(init[i]);

		if (print_results)
			printf("center[%d]\t%g\t%g\n", i, init[i].x, init[i].y);
	}

	/* run k-means! */
	start = time(NULL);
	result = kmeans(&config);

	printf("\n");
	printf("Iteration count: %d\n", config.total_iterations);
	printf("     Time taken: %ld seconds\n", (time(NULL) - start));
	printf(" Iterations/sec: %.3g\n", (1.0*config.total_iterations)/(time(NULL) - start));
	printf("\n");

	/* print results */
	if (print_results)
	{
		for (i = 0; i < config.num_objs; i++)
		{
			point *pt = (point*)(config.objs[i]);

			if (config.objs[i])
				printf("%g\t%g\t%d\n", pt->x, pt->y, config.clusters[i]);
			else
				printf("N\tN\t%d\n", config.clusters[i]);
		}
	}

	free(config.objs);
	free(config.clusters);
	free(config.centers);

	free(init);
	free(pts);

}
예제 #3
0
파일: gmm.c 프로젝트: czxxjtu/videosearch
gmm_t * gmm_learn (int di, int ni, int ki, int niter,
                   const float * v, int nt, int seed, int nredo,
                   int flags)
{
    long d=di,k=ki,n=ni;

    int iter, iter_tot = 0;
    double old_key, key = 666;

    niter = (niter == 0 ? 10000 : niter);

    /* the GMM parameters */
    float * p = fvec_new_0 (n * k);      /* p(ci|x) for all i */
    gmm_t * g = gmm_new (d, k);

    /* initialize the GMM: k-means + variance estimation */
    int * nassign = ivec_new (n);  /* not useful -> to be removed when debugged */
    float * dis = fvec_new (n);
    kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign);

    fflush (stderr);
    fprintf (stderr, "assign = ");
    ivec_print (nassign, k);
    fprintf (stderr, "\n");
    free (nassign);

    /* initialization of the GMM parameters assuming a diagonal matrix */
    fvec_set (g->w, k, 1.0 / k);
    double sig = fvec_sum (dis, n) / n;
    printf ("sigma at initialization = %.3f\n", sig);
    fvec_set (g->sigma, k * d, sig);
    free (dis);


    /* start the EM algorithm */
    fprintf (stdout, "<><><><> GMM  <><><><><>\n");

    if(flags & GMM_FLAGS_PURE_KMEANS) niter=0;

    for (iter = 1 ; iter <= niter ; iter++) {

        gmm_compute_p_thread (n, v, g, p, flags, nt);
        fflush(stdout);

        gmm_handle_empty(n, v, g, p);

        gmm_compute_params (n, v, p, g, flags, nt);
        fflush(stdout);


        iter_tot++;

        /* convergence reached -> leave */
        old_key = key;
        key = fvec_sum (g->mu, k * d);

        printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key);
        fflush(stdout);

        if (key == old_key)
            break;
    }
    fprintf (stderr, "\n");

    free(p);

    return g;
}
예제 #4
0
void mexFunction (int nlhs, mxArray *plhs[],
                  int nrhs, const mxArray*prhs[])

{
  if (nrhs < 2 || nrhs % 2 != 0) 
    mexErrMsgTxt("even nb of input arguments required.");
  else if (nlhs > 4 || nlhs < 1) 
    mexErrMsgTxt("1 to 3 output arguments are expected.");

  int flags = 0;
  int d = mxGetM (prhs[0]);
  int n = mxGetN (prhs[0]);
  long seed = 0L;
  
  if(mxGetClassID(prhs[0])!=mxSINGLE_CLASS)
    mexErrMsgTxt("need single precision array.");

  float *v = (float*) mxGetPr (prhs[0]);
  int k = (int) mxGetScalar (prhs[1]);

  int niter = 50, redo = 1, nt = 1, verbose = 1;
  int init_type = 0;  /* random selection by default */

  {
    int i;
    for(i = 2 ; i < nrhs ; i += 2) {
      char varname[256];
      if (mxGetClassID(prhs[i]) != mxCHAR_CLASS) 
        mexErrMsgTxt ("variable name required");         

      if (mxGetString (prhs[i], varname, 256) != 0)
        mexErrMsgTxt ("Could not convert string data");

      if (!strcmp(varname, "niter")) 
        niter = (int) mxGetScalar (prhs[i+1]);

      else if (!strcmp(varname,"redo")) 
        redo = (int) mxGetScalar (prhs[i+1]);

      else if (!strcmp(varname,"seed")) 
        seed = (int) mxGetScalar (prhs[i+1]);

      else if (!strcmp(varname,"verbose")) 
        verbose = (int) mxGetScalar (prhs[i+1]);

      else if (!strcmp(varname,"init")) {
	init_type = (int) mxGetScalar (prhs[i+1]);
	assert (init_type == 0 || init_type == 1);
      }

      else 
        mexErrMsgTxt("unknown variable name");  
    }
  }
  
  if (init_type == 1)  /* Berkeley */
    flags = flags | KMEANS_INIT_BERKELEY;
  else if (init_type == 0) /* random vectors */
    flags = flags | KMEANS_INIT_RANDOM;
  

  flags |= nt;

  if (verbose > 0)
    printf("Input: %d vectors of dimension %d\nk=%d niter=%d "
	   "redo=%d verbose=%d seed=%d v1=[%g %g ...], v2=[%g %g... ]\n",
	   n, d, k, niter, redo, verbose, seed, v[0], v[1], v[d], v[d+1]); 
  else
    flags |= KMEANS_QUIET;
  

  if(n < k) {
    mexErrMsgTxt("fewer points than centroids");    
  }


  /* ouptut: centroids, assignment, distances */

  plhs[0] = mxCreateNumericMatrix (d, k, mxSINGLE_CLASS, mxREAL);
  float *centroids = (float*) mxGetPr (plhs[0]);

  float * dis = NULL;
  int * assign = NULL;
  int * nassign = NULL;

  if (nlhs == 2) {
    plhs[1] = mxCreateNumericMatrix (n, 1, mxINT32_CLASS, mxREAL);
    assign = (int*) mxGetPr (plhs[1]);
  }
  else if (nlhs >= 3) {
    plhs[1] = mxCreateNumericMatrix (n, 1, mxSINGLE_CLASS, mxREAL);
    dis = (float*) mxGetPr (plhs[1]);
    plhs[2] = mxCreateNumericMatrix (n, 1, mxINT32_CLASS, mxREAL);
    assign = (int*) mxGetPr (plhs[2]);
  }

  if (nlhs >=4)  {
    plhs[3] = mxCreateNumericMatrix (k, 1, mxINT32_CLASS, mxREAL);
    nassign = (int*) mxGetPr (plhs[3]);
  }

/*  [centroids_tmp, dis, assign] = yael_kmeans (vs, ks, 'niter', 100, 'verbose', 0); */
  kmeans (d, n, k, niter, v, flags, seed, 
	  redo, centroids, dis, assign, nassign);

  /* post-processing: Matlab starts from 1 */
  if (assign) {
    int i;
    for (i = 0 ; i < n ; i++)
      assign[i]++;
  }
}
예제 #5
0
void ahc_clustering(DyArray *ahct, int bf, int rho, const fDataSet *ds){
	ASSERTINFO(ahct == NULL || bf <= 0 || rho <= 0 || ds == NULL, "IPP");

	int		n 	= ds->n;
	int		d 	= ds->d;
	Cluster	_clu, clu, *pclu = NULL, *p0clu = NULL;
	int		i;
	float	qerror;
	int		iclu, bfi, ni, ichild, ori_id;			// the pointer, branch factor and volume of the i-th cluster
	int 	*nassign = ivec_new_set(bf, 0);
	int 	*assign = NULL;
	float	*cent = fvec_new(d*bf);
	float	*mem_points = NULL;
	DyArray	*member = (DyArray*)malloc(sizeof(DyArray)*bf);

	/* initialize the first cluster (root) to add it to the ahc tree */
	Cluster_init(&clu, n);
	for(i = 0; i < n; i++){
		clu.idx[i] = i;
	}
	clu.type = ClusterType_Root;
	DyArray_add(ahct, (void*)&clu, 1);

	/* begin the loop of adaptive hierarchical clustering */
	iclu = 0;
	while(iclu < ahct->count){
		/* deal with the i-th cluster */
		// figure out the adaptive branch factor of the i-th cluster
		pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
		ni = pclu->npts;
		bfi = i_min(bf, (int)round(ni / (float)rho));

		// deal with the cluster according to its size
		if(bfi < 2){
			/*
			 *	this is a leaf cluster
			 *	- mark it, release the children
			 *	* not necessary to store real data points
			 */
			pclu->type = ClusterType_Leaf;
		}else{
			printf("----------------- cluster %d, bfi-%d:\n", iclu, bfi);

			/*
			 * this is an inner cluster
			 * - divide it
			 */
			memcpy(&_clu, pclu, sizeof(Cluster));

			// extract data points from the original dataset according to the idx
			mem_points = fvec_new(ni * d);
			for(i = 0; i < ni; i++){
				memcpy(mem_points+i*d, ds->data+_clu.idx[i]*d, d);
			}

			// divide this cluster
			assign = ivec_new(ni);

			if(iclu == 30){
				int _a = 1;
				_a++;

				ivec_print(_clu.idx, _clu.npts);
			}

			qerror = kmeans(	d, ni, bfi, CLUSTERING_NITER, mem_points,
								CLUSTERING_NTHREAD | KMEANS_QUIET | KMEANS_INIT_BERKELEY, CLUSTERING_SEED, CLUSTERING_NREDO,
								cent, NULL, assign, nassign);

			// prepare space for members' ids
			for(i = 0; i < bfi; i++){
				DyArray_init(&member[i], sizeof(int), nassign[i]);
			}
			// extract member points' ids for each children cluster
			for(i = 0; i < ni; i++){
				ori_id = _clu.idx[i];
				DyArray_add(&member[assign[i]], (void*)&ori_id, 1);
			}

			// fulfill the type, centroids and the children of this cluster, add them to the ahct
			_clu.type = ClusterType_Inner;
			_clu.cents = fvec_new(d * bfi);
			memcpy(_clu.cents, cent, sizeof(float)*d*bfi);

			DyArray_init(&_clu.children, sizeof(int), bfi);
			for(i = 0; i < bfi; i++){
				Cluster_init(&clu, nassign[i]);
				memcpy(clu.idx, (int*)member[i].elem, sizeof(int)*nassign[i]);

				DyArray_add(&_clu.children, (void*)&ahct->count, 1);	/* the i-th child's position */
				DyArray_add(ahct, (void*)&clu, 1);						/* add the i-th child to the ahct */
			}

			/* as per the elems of ahct may change when expanding the space
			 * we decide to get the brand new address of the element
			 */
			pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
			memcpy(pclu, &_clu, sizeof(Cluster));


			/* report */
			ivec_print(nassign, bfi);
			ivec_print((int*)_clu.children.elem, _clu.children.count);

			/* unset or release */
			FREE(mem_points);
			FREE(assign);
			for(i = 0; i < bfi; i++){
				DyArray_unset(&member[i]);
			}
		}

		// move to next cluster
		iclu++;
	}

	FREE(nassign);
	FREE(cent);
	FREE(member);
	pclu = NULL;
}
예제 #6
0
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid,
             const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  // FIXME: this returns nan!!
  dt_iop_colortransfer_data_t *data = (dt_iop_colortransfer_data_t *)piece->data;
  float *in = (float *)ivoid;
  float *out = (float *)ovoid;
  const int ch = piece->colors;

  if(data->flag == ACQUIRE)
  {
    if(piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW)
    {
      // only get stuff from the preview pipe, rest stays untouched.
      int hist[HISTN];
      // get histogram of L
      capture_histogram(in, roi_in, hist);
      // invert histogram of L
      invert_histogram(hist, data->hist);

      // get n clusters
      kmeans(in, roi_in, data->n, data->mean, data->var);

      // notify gui that commit_params should let stuff flow back!
      data->flag = ACQUIRED;
      dt_iop_colortransfer_params_t *p = (dt_iop_colortransfer_params_t *)self->params;
      p->flag = ACQUIRE2;
    }
    memcpy(out, in, (size_t)sizeof(float) * ch * roi_out->width * roi_out->height);
  }
  else if(data->flag == APPLY)
  {
    // apply histogram of L and clustering of (a,b)
    int hist[HISTN];
    capture_histogram(in, roi_in, hist);
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static) shared(roi_out, data, in, out, hist)
#endif
    for(int k = 0; k < roi_out->height; k++)
    {
      size_t j = (size_t)ch * roi_out->width * k;
      for(int i = 0; i < roi_out->width; i++)
      {
        // L: match histogram
        out[j] = data->hist[hist[(int)CLAMP(HISTN * in[j] / 100.0, 0, HISTN - 1)]];
        out[j] = CLAMP(out[j], 0, 100);
        j += ch;
      }
    }

    // cluster input buffer
    float mean[data->n][2], var[data->n][2];
    kmeans(in, roi_in, data->n, mean, var);

    // get mapping from input clusters to target clusters
    int mapio[data->n];
    get_cluster_mapping(data->n, mean, data->mean, mapio);

// for all pixels: find input cluster, transfer to mapped target cluster
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static) shared(roi_out, data, mean, var, mapio, in, out)
#endif
    for(int k = 0; k < roi_out->height; k++)
    {
      float weight[MAXN];
      size_t j = (size_t)ch * roi_out->width * k;
      for(int i = 0; i < roi_out->width; i++)
      {
        const float L = in[j];
        const float Lab[3] = { L, in[j + 1], in[j + 2] };
// a, b: subtract mean, scale nvar/var, add nmean
#if 0 // single cluster, gives color banding
        const int ki = get_cluster(in + j, data->n, mean);
        out[j+1] = 100.0/out[j] * ((Lab[1] - mean[ki][0])*data->var[mapio[ki]][0]/var[ki][0] + data->mean[mapio[ki]][0]);
        out[j+2] = 100.0/out[j] * ((Lab[2] - mean[ki][1])*data->var[mapio[ki]][1]/var[ki][1] + data->mean[mapio[ki]][1]);
#else // fuzzy weighting
        get_clusters(in + j, data->n, mean, weight);
        out[j + 1] = out[j + 2] = 0.0f;
        for(int c = 0; c < data->n; c++)
        {
          out[j + 1] += weight[c] * ((Lab[1] - mean[c][0]) * data->var[mapio[c]][0] / var[c][0]
                                     + data->mean[mapio[c]][0]);
          out[j + 2] += weight[c] * ((Lab[2] - mean[c][1]) * data->var[mapio[c]][1] / var[c][1]
                                     + data->mean[mapio[c]][1]);
        }
#endif
        out[j + 3] = in[j + 3];
        j += ch;
      }
    }
  }
  else
  {
    memcpy(out, in, (size_t)sizeof(float) * ch * roi_out->width * roi_out->height);
  }
}
int main(int argc, char *argv[])
{
    ArrayXXd data;

  
    // Creating dummy arrays for the covariates and the observations.
    // They're not used because we compute our Likelihood directly. 

    ArrayXd covariates;
    ArrayXd observations;
    
    
    // -------------------------------------------------------------------
    // ----- First step. Set up the models for the inference problem ----- 
    // -------------------------------------------------------------------

    // Set up a dummy model. This won't be used because we're computing
    // the Likelihood directly, but the Likelihood nevertheless expects a model in 
    // its constructor.
    
    ZeroModel model(covariates);


    // -------------------------------------------------------
    // ----- Second step. Set up all prior distributions -----
    // -------------------------------------------------------

    int Ndimensions = 3;        // Number of free parameters (dimensions) of the problem
    vector<Prior*> ptrPriors(1);
    ArrayXd parametersMinima(Ndimensions);
    ArrayXd parametersMaxima(Ndimensions);
    parametersMinima.fill(-20);         
    parametersMaxima.fill(20);
    UniformPrior uniformPrior(parametersMinima, parametersMaxima);
    ptrPriors[0] = &uniformPrior;
    

    // -----------------------------------------------------------------
    // ----- Third step. Set up the likelihood function to be used -----
    // -----------------------------------------------------------------
    
    SingleNDGaussianLikelihood likelihood(observations, model, Ndimensions);


    // -------------------------------------------------------------------------------
    // ----- Fourth step. Set up the K-means clusterer using an Euclidean metric -----
    // -------------------------------------------------------------------------------

    EuclideanMetric myMetric;
    int minNclusters = 1;
    int maxNclusters = 10;
    int Ntrials = 10;
    double relTolerance = 0.01;

    KmeansClusterer kmeans(myMetric, minNclusters, maxNclusters, Ntrials, relTolerance); 


    // ---------------------------------------------------------------------
    // ----- Sixth step. Configure and start nested sampling inference -----
    // ---------------------------------------------------------------------
    
    bool printOnTheScreen = true;                   // Print results on the screen
    int initialNobjects = 500;                      // Initial number of active points evolving within the nested sampling process.
    int minNobjects = 500;                          // Minimum number of active points allowed in the nesting process.
    int maxNdrawAttempts = 5000;                    // Maximum number of attempts when trying to draw a new sampling point.
    int NinitialIterationsWithoutClustering = 1000; // The first N iterations, we assume that there is only 1 cluster.
    int NiterationsWithSameClustering = 50;         // Clustering is only happening every X iterations.
    double initialEnlargementFraction = 2.0;        // Fraction by which each axis in an ellipsoid has to be enlarged.
                                                    // It can be a number >= 0, where 0 means no enlargement.
    double shrinkingRate = 0.8;                     // Exponent for remaining prior mass in ellipsoid enlargement fraction.
                                                    // It is a number between 0 and 1. The smaller the slower the shrinkage
                                                    // of the ellipsoids.
    double terminationFactor = 0.01;                // Termination factor for nesting loop.


    // Start the computation

    MultiEllipsoidSampler nestedSampler(printOnTheScreen, ptrPriors, likelihood, myMetric, kmeans, 
                                        initialNobjects, minNobjects, initialEnlargementFraction, shrinkingRate);
        
    double tolerance = 1.e2;
    double exponent = 0.4;
    PowerlawReducer livePointsReducer(nestedSampler, tolerance, exponent, terminationFactor);
    //FerozReducer livePointsReducer(nestedSampler, tolerance);

    ostringstream numberString;
    numberString << Ndimensions;
    string outputPathPrefix = "demoSingle" + numberString.str() + "DGaussian_";
    nestedSampler.run(livePointsReducer, NinitialIterationsWithoutClustering, NiterationsWithSameClustering, 
                      maxNdrawAttempts, terminationFactor, outputPathPrefix);

    nestedSampler.outputFile << "# List of configuring parameters used for the ellipsoidal sampler and X-means" << endl;
    nestedSampler.outputFile << "# Row #1: Minimum Nclusters" << endl;
    nestedSampler.outputFile << "# Row #2: Maximum Nclusters" << endl;
    nestedSampler.outputFile << "# Row #3: Initial Enlargement Fraction" << endl;
    nestedSampler.outputFile << "# Row #4: Shrinking Rate" << endl;
    nestedSampler.outputFile << minNclusters << endl;
    nestedSampler.outputFile << maxNclusters << endl;
    nestedSampler.outputFile << initialEnlargementFraction << endl;
    nestedSampler.outputFile << shrinkingRate << endl;
    nestedSampler.outputFile.close();


    // -------------------------------------------------------
    // ----- Last step. Save the results in output files -----
    // -------------------------------------------------------
   
    Results results(nestedSampler);
    results.writeParametersToFile("parameter");
    results.writeLogLikelihoodToFile("logLikelihood.txt");
    results.writeEvidenceInformationToFile("evidenceInformation.txt");
    results.writePosteriorProbabilityToFile("posteriorDistribution.txt");

    double credibleLevel = 68.3;
    bool writeMarginalDistributionToFile = true;
    results.writeParametersSummaryToFile("parameterSummary.txt", credibleLevel, writeMarginalDistributionToFile);


    // That's it!

    return EXIT_SUCCESS;
}
예제 #8
0
int main(int argc, char** argv)
{
	// Kmeans
	int class_n, data_n, iteration_n;
	float *centroids, *data;
	int* partitioned;
	FILE *io_file;
	struct timespec start, end, spent;

	// Check parameters
	if (argc < 4) {
		fprintf(stderr, "usage: %s <centroid file> <data file> <paritioned result> [<final centroids>] [<iteration number>]\n", argv[0]);
		exit(EXIT_FAILURE);
	}

	// Read initial centroid data
	io_file = fopen(argv[1], "rb");
	if (io_file == NULL) {
		fprintf(stderr, "File open error %s\n", argv[1]);
		exit(EXIT_FAILURE);
	}
	class_n = read_data(io_file, &centroids);
	fclose(io_file);

	// Read input data
	io_file = fopen(argv[2], "rb");
	if (io_file == NULL) {
		fprintf(stderr, "File open error %s\n", argv[2]);
		exit(EXIT_FAILURE);
	}
	data_n = read_data(io_file, &data);
	fclose(io_file);

	iteration_n = argc > 5 ? atoi(argv[5]) : DEFAULT_ITERATION;


	partitioned = (int*)malloc(sizeof(int)*data_n);


	clock_gettime(CLOCK_MONOTONIC, &start);

	// Run Kmeans algorithm
	kmeans(iteration_n, class_n, data_n, (Point*)centroids, (Point*)data, partitioned);

	clock_gettime(CLOCK_MONOTONIC, &end);

	timespec_subtract(&spent, &end, &start);

	printf("Time spent: %ld.%09ld\n", spent.tv_sec, spent.tv_nsec);

	// Write classified result
	io_file = fopen(argv[3], "wb");
	fwrite(&data_n, sizeof(data_n), 1, io_file);
	fwrite(partitioned, sizeof(int), data_n, io_file); 
	fclose(io_file);


	// Write final centroid data
	if (argc > 4) {
		io_file = fopen(argv[4], "wb");
		fwrite(&class_n, sizeof(class_n), 1, io_file);
		fwrite(centroids, sizeof(Point), class_n, io_file); 
		fclose(io_file);
	}

	// Free allocated buffers
	free(centroids);
	free(data);
	free(partitioned);

	return 0;
}
예제 #9
0
int *
lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k)
{
	int i;
	int num_centroids = 0;
	LWGEOM **centroids;
	POINT2D *centers_raw;
	const POINT2D *cp;
	POINT2D min = { DBL_MAX,   DBL_MAX };
	POINT2D max = { -DBL_MAX, -DBL_MAX };
	double dx, dy;
	kmeans_config config;
	kmeans_result result;
	int *seen;
	int sidx = 0;

	assert(k>0);
	assert(ngeoms>0);
	assert(geoms);

    /* Initialize our static structs */
    memset(&config, 0, sizeof(kmeans_config));
    memset(&result, 0, sizeof(kmeans_result));

	if (ngeoms<k)
	{
		lwerror("%s: number of geometries is less than the number of clusters requested", __func__);
	}

	/* We'll hold the temporary centroid objects here */
	centroids = lwalloc(sizeof(LWGEOM*) * ngeoms);
	memset(centroids, 0, sizeof(LWGEOM*) * ngeoms);

	/* The vector of cluster means. We have to allocate a */
	/* chunk of memory for these because we'll be mutating them */
	/* in the kmeans algorithm */
	centers_raw = lwalloc(sizeof(POINT2D) * k);
	memset(centers_raw, 0, sizeof(POINT2D) * k);

	/* K-means configuration setup */
	config.objs = lwalloc(sizeof(Pointer) * ngeoms);
	config.num_objs = ngeoms;
	config.clusters = lwalloc(sizeof(int) * ngeoms);
	config.centers = lwalloc(sizeof(Pointer) * k);
	config.k = k;
	config.max_iterations = 0;
	config.distance_method = lwkmeans_pt_distance;
	config.centroid_method = lwkmeans_pt_centroid;

	/* Clean the memory */
	memset(config.objs, 0, sizeof(Pointer) * ngeoms);
	memset(config.clusters, 0, sizeof(int) * ngeoms);
	memset(config.centers, 0, sizeof(Pointer) * k);

	/* Prepare the list of object pointers for K-means */
	for (i = 0; i < ngeoms; i++)
	{
		const LWGEOM *geom = geoms[i];
		LWPOINT *lwpoint;

		/* Null/empty geometries get a NULL pointer */
		if ((!geom) || lwgeom_is_empty(geom))
		{
			config.objs[i] = NULL;
			continue;
		}

		/* If the input is a point, use its coordinates */
		/* If its not a point, convert it to one via centroid */
		if (lwgeom_get_type(geom) != POINTTYPE)
		{
			LWGEOM *centroid = lwgeom_centroid(geom);
			if ((!centroid) || lwgeom_is_empty(centroid))
			{
				config.objs[i] = NULL;
				continue;
			}
			centroids[num_centroids++] = centroid;
			lwpoint = lwgeom_as_lwpoint(centroid);
		}
		else
		{
			lwpoint = lwgeom_as_lwpoint(geom);
		}

		/* Store a pointer to the POINT2D we are interested in */
		cp = getPoint2d_cp(lwpoint->point, 0);
		config.objs[i] = (Pointer)cp;

		/* Since we're already here, let's calculate the extrema of the set */
		if (cp->x < min.x) min.x = cp->x;
		if (cp->y < min.y) min.y = cp->y;
		if (cp->x > max.x) max.x = cp->x;
		if (cp->y > max.y) max.y = cp->y;
	}

	/*
	* We map a uniform assignment of points in the area covered by the set
	* onto actual points in the set
	*/
	dx = (max.x - min.x)/k;
	dy = (max.y - min.y)/k;
	seen = lwalloc(sizeof(int)*config.k);
	memset(seen, 0, sizeof(int)*config.k);
	for (i = 0; i < k; i++)
	{
		int closest;
		POINT2D p;
		int j;

		/* Calculate a point in the range */
		p.x = min.x + dx * (i + 0.5);
		p.y = min.y + dy * (i + 0.5);

		/* Find the data point closest to the calculated point */
		closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p);

		/* If something is terrible wrong w/ data, cannot find a closest */
		if (closest < 0)
			lwerror("unable to calculate cluster seed points, too many NULLs or empties?");

		/* Ensure we aren't already using that point as a seed */
		j = 0;
		while(j < sidx)
		{
			if (seen[j] == closest)
			{
				closest = (closest + 1) % config.num_objs;
				j = 0;
			}
			else
			{
				j++;
			}
		}
		seen[sidx++] = closest;

		/* Copy the point coordinates into the initial centers array */
		/* This is ugly, but the centers array is an array of */
		/* pointers to points, not an array of points */
		centers_raw[i] = *((POINT2D*)config.objs[closest]);
		config.centers[i] = &(centers_raw[i]);
	}

	result = kmeans(&config);

	/* Before error handling, might as well clean up all the inputs */
	lwfree(config.objs);
	lwfree(config.centers);
	lwfree(centers_raw);
	lwfree(centroids);
	lwfree(seen);

	/* Good result */
	if (result == KMEANS_OK)
		return config.clusters;

	/* Bad result, not going to need the answer */
	lwfree(config.clusters);
	if (result == KMEANS_EXCEEDED_MAX_ITERATIONS)
	{
		lwerror("%s did not converge after %d iterations", __func__, config.max_iterations);
		return NULL;
	}

	/* Unknown error */
	return NULL;
}
예제 #10
0
Mat TableObjectDetector::clusterObjects(Mat P, int K, bool removeOutliers) {
    Mat L;
    int attempts = 5;
    P.convertTo(P, CV_32F);
    kmeans(P, K, L, TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 10000, 0.0001), attempts, KMEANS_PP_CENTERS);

    // We remove outliers that are outside a number of standard deviations away
    // from the object centroid. We do this by just setting their cluster label
    // to -1
    
    if (removeOutliers) {
        float numStdDevs = 3;
        float maxDist = 0.1;

        // Caluclate centroids
        vector<Mat> clusterData;
        Mat clusterCentroids(K, 3, CV_32F);
        for (int k=0; k<K; k++) {
            Mat D = Mat(0, 3, CV_64F);
            for (int i=0; i<L.rows; i++) {
                if (L.at<int>(i)==k) {
                    D.push_back(P.row(i));
                }
            }
            clusterData.push_back(D);
            reduce(D, clusterCentroids.row(k), 0, CV_REDUCE_AVG);
        }


        // Now calculate distances of each point, and the std. devs. of each
        // cluster
        Mat Dist = Mat::zeros(P.rows, 1, CV_32F);
        vector<Mat> centroidDistances;
        for (int k=0; k<K; k++) {
            centroidDistances.push_back(Mat(0, 1, CV_32F));
        }
        for (int i=0; i<L.rows; i++) {
            Mat centroid = clusterCentroids.row(L.at<int>(i));
            Mat pt = P.row(i);
            int k = L.at<int>(i);
            float d = std::sqrt(
                    std::pow(pt.at<float>(0) - centroid.at<float>(0), 2) + 
                    std::pow(pt.at<float>(1) - centroid.at<float>(1), 2) + 
                    std::pow(pt.at<float>(2) - centroid.at<float>(2), 2) );

            Dist.at<float>(i) = d;
            centroidDistances.at(k).push_back(d);
        }
        for (int k=0; k<K; k++) {
            Mat ignore;
            Mat std_dev;
            meanStdDev(centroidDistances.at(k), ignore, std_dev);
            float k_std = std_dev.at<Scalar>(0)(0);

            for (int i=0; i<P.rows; i++) {
                if (L.at<int>(i) == k) {
                    //if (Dist.at<float>(i) > numStdDevs*k_std) {
                    if (Dist.at<float>(i) > maxDist) {
                        L.at<int>(i) = -1;
                    }
                }
            }
        }

        // Now compute standard deviations for all clusters
        //Mat centroidStdDevs = Mat::zeros(K, 1);
        //for (int k=0; k<K; k++) {

        //}
    }
    
    return L;
}
예제 #11
0
/**
 * Run hierarchical k-means with a distance threshold of 0.15 meters
 */
Mat TableObjectDetector::clusterObjectsHierarchical(cv::Mat P, int max_clusters) {
    Mat L = Mat::zeros	(P.rows, 1, CV_32S);
    double dthresh = 0.15;
    int currentClusterNum = 0;
    int num_iter = 30;
    std::stack<Cluster*> c_stack;
    
    // Initialize with a single cluster containing all datapoints
    Cluster* C = new Cluster();
    Mat D = Mat(P.rows, 4, CV_64F);
    P.copyTo(D.colRange(0, 3));
    // ID each point so we return them in the same order
    for (int i=0; i<D.rows; i++) {
        D.at<double>(i, 3) = i;
    }
    C->setData(D);
    c_stack.push(C);
        
    
    // Run hierarchical k-means
    for (int t=0; t<num_iter; t++) {
        if (currentClusterNum == max_clusters) {
            return L;
        }

        if (c_stack.empty()) {
            return L;
        }
        
        Cluster* C = (Cluster*)c_stack.top();
        c_stack.pop();
        Mat D = C->getData();
        // Calculate cluster centroid
        Mat Cmean; reduce(D, Cmean, 0, CV_REDUCE_AVG);
        // Calculate max distance
        double maxDist = 0;
        for (int i=0; i<D.rows; i++) {
            double dx = D.at<double>(i, 0) - Cmean.at<double>(0);
            double dy = D.at<double>(i, 1) - Cmean.at<double>(1);
            double dz = D.at<double>(i, 2) - Cmean.at<double>(2);
            double dist = sqrt(dx*dx + dy*dy + dz*dz);
            
            if (dist > maxDist) {
                maxDist = dist;
            }
        }
        
        // Check to see if this cluster satisfies the conditions
        if (maxDist < dthresh) {
            for (int i=0; i<D.rows; i++) {
                int idx = (int)D.at<double>(i, 3);
                L.at<int>(idx) = currentClusterNum;
            }
            currentClusterNum++;
        } else {

            Mat L_iter;
            int attempts = 5;
            Mat D32 = Mat(D.rows, 3, CV_64F);
            D.colRange(0, 3).copyTo(D32);
            D32.convertTo(D32, CV_32F);
            kmeans(D32, 2, L_iter, TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 10000, 0.0001), attempts, KMEANS_PP_CENTERS);
            
            Mat D0 = Mat(0, 3, CV_64F);
            Mat D1 = Mat(0, 3, CV_64F);

            for (int i=0; i<L_iter.rows; i++) {
                if (L_iter.at<int>(i) == 0) {
                    D0.push_back(D.row(i));
                } else {
                    D1.push_back(D.row(i));
                }
            }
            
            Cluster* C0 = new Cluster();
            C0->setData(D0);
            Cluster* C1 = new Cluster();
            C1->setData(D1);
            
            c_stack.push(C0);
            c_stack.push(C1);

        }
        
    }
    
    return L;
}
int main(int argc, char* argv[])
{

  const char* program_name = "auto_heir_decomp_sparse";
  bool optsOK = true;
  gmx::initForCommandLine(&argc,&argv);
  copyright(program_name);
  cout << "   Reads the symmetric CSC format sparse matrix from" << endl;
  cout << "   input-file, and heirarchically decomposes the " << endl;
  cout << "   Laplacian matrix until relaxation time convergence" << endl;
  cout << "   criteria are met as the following reference:" << endl;
  cout << "   [1] B. Nadler and M. Galun, \"Fundamental Limitations" << endl;
  cout << "   of Spectral Clustering,\" in Advances in Neural Information" << endl;
  cout << "   Processing Systems 19, 2007, pp. 1017–1024." << endl;
  cout << "   eigenvalues/vectors of the normalized laplacian" << endl;
  cout << endl;
  cout << "   Use -h or --help to see the complete list of options." << endl;
  cout << endl;

  // Option vars...
  int k_a;
  double sigma;
  int nev = 2;
  double c1 = 1.2;
  double c2 = 2.0;
  double K;
  bool pSet = false;
  string ssm_filename;
  string output_filename;
  string ndx_filename;
  string residuals_filename;

  // Declare the supported options.
  po::options_description cmdline_options;
  po::options_description program_options("Program options");
  program_options.add_options()
    ("help,h", "show this help message and exit")
    ("sigma,s", po::value<double>(&sigma)->default_value(1.0), "Input:  Kernel sigma (double)")
    ("relaxation,r", po::value<double>(&c1)->default_value(1.2), "Input:  Relaxation cutoff parameter, c1 (double)")
    ("partition,p", po::value<double>(&c2)->default_value(2.0), "Input:  Partition cutoff parameter, c2 (double)")
    ("ssm-file,f", po::value<string>(&ssm_filename)->default_value("distances.ssm"), "Input:  Symmetric sparse matrix file (string:filename)")
    ("output,o", po::value<string>(&output_filename)->default_value("clusters.dat"), "Output:  Cluster assignment file (string:filename)")
    ("ndx,n", po::value<string>(&ndx_filename)->default_value("clusters.ndx"), "Output: Cluster assignment index file (string:filename)")    
    ;
  cmdline_options.add(program_options);

  po::variables_map vm;
  po::store(po::parse_command_line(argc, argv, cmdline_options), vm);
  po::notify(vm);    

  if (vm.count("help")) {
    cout << "usage: " << program_name << " [options]" << endl;
    cout << cmdline_options << endl;
    return 1;
  }

  if (!optsOK) {
    return -1;
  }

  cout << "Running with the following options:" << endl;
  cout << "sigma =      " << sigma << endl;
  cout << "ssm-file =   " << ssm_filename << endl;
  cout << "output =     " << output_filename << endl;
  cout << "ndx =        " << ndx_filename << endl;
  cout << endl;

  // Stacks
  vector<vector<int> > work;
  vector<vector<int> > completed;

  // Defining variables;
  double  *Ax;  // Array for residual calculation
  double  residual = 0.0;
  double  max_residual = 0.0;

  // SSM Matrix
  CSC_matrix A(ssm_filename);
  Ax = new double[A.n];

  // File output streams
  ofstream output;
  ofstream ndx;

  // EPS
  double eps = getEPS();

  // Open files
  output.open(output_filename.c_str());
  ndx.open(ndx_filename.c_str());

  // Get affinities
  affinity(A,sigma);

  // Setup work
  work.resize(1);
  work[0].resize(A.n);
  for (int x = 0; x < A.n; x++)
    work[0][x] = x;

  while (work.size()) {
    vector<int> current = work[work.size()-1];
    work.pop_back();
    CSC_matrix current_A;
    A.syslice(current,current_A);
    normalize(current_A);

    double* d = NULL; // values
    double* Z = NULL; // vectors
    double nevm = runARPACK(nev,current_A,d,Z); 
    cout << "Number of converged eigenvalues/vectors found: "
	 <<  nevm << endl;
  
    int *labels = new int[current_A.n];
    kmeans(current_A.n,nev,nev,Z,labels);
    // kmeans(current_A.n,1,nev,&Z[A.n],labels);
    
    // Get slice indices
    vector<int> islice1 = select(current,0,labels);
    vector<int> islice2 = select(current,1,labels);

    if (islice1.size() == 0 || islice2.size() == 0) {
      cout << "Defunct partition..." << endl;
    }

    // Get slices
    CSC_matrix slice1;
    A.syslice(islice1,slice1);
    normalize(slice1);
    CSC_matrix slice2;
    A.syslice(islice2,slice2);
    normalize(slice2);
    
    double *slice1_d = NULL;
    double *slice1_Z = NULL;
    int nev1 = runARPACK(nev,slice1,slice1_d,slice1_Z);
    cout << "Number of converged eigenvalues/vectors found: "
	 <<  nev1 << endl;
    
    double *slice2_d = NULL;
    double *slice2_Z = NULL;
    int nev2 = runARPACK(nev,slice2,slice2_d,slice2_Z);
    cout << "Number of converged eigenvalues/vectors found: "
	 << nev2 << endl;

    double current_t,slice1_t,slice2_t,ratio;
    current_t = (1.0/(1.0-d[0]));
    slice1_t = (1.0/(1.0-slice1_d[0]));
    slice2_t = (1.0/(1.0-slice2_d[0]));
    ratio = slice1_t / slice2_t;
    if (ratio < 1.0)
      ratio = slice2_t / slice1_t;

    cout << "Main:   " << current_t << endl;
    cout << "Slice1: " << slice1_t << endl;
    cout << "Slice2: " << slice2_t << endl;
    cout << "Relaxation: " << c1*(slice1_t + slice2_t) << endl;
    cout << "Partition:  " << ratio << endl;

    if (nev1+nev2+nevm!=6) {
      cout << "No convergence. Skipping decomposition..." << endl;
      completed.push_back(current);
    }
    else {
      if (current_t < c1*(slice1_t + slice2_t)) {
	completed.push_back(current);
      }
      else if (ratio > c2) {
	if (slice1_t > slice2_t) {
	  work.push_back(islice1);
	  completed.push_back(islice2);
	}
	else {
	  work.push_back(islice2);
	  completed.push_back(islice1);
	}
      }
      else {
	work.push_back(islice1);
	work.push_back(islice2);
      }
    }

    delete [] labels;
    delete [] slice1_d;
    delete [] slice1_Z;
    delete [] slice2_d;
    delete [] slice2_Z;
    delete [] d;
    delete [] Z;
  }

  int *clusters = new int[A.n];
  cout << "Number of clusters: " << completed.size() << endl;
  cout << endl;

  for (int x = 0; x < completed.size(); x++) {
    int idx = 0;
    ndx << "[cluster_" << x+1 << "]" << endl;
    for (int y = 0; y < completed[x].size(); y++) {
      ndx << completed[x][y]+1 << " ";
      clusters[completed[x][y]] = x+1;
      if (++idx > 19) {
	ndx << endl;
	idx = 0;
      }
    }
    ndx << endl;
    ndx << endl;
  }

  for (int x = 0; x < A.n; x++)
    output << clusters[x] << endl;

  ndx.close();
  output.close();

  delete [] clusters;
  delete [] Ax;
  return 0;

} // main.
int CalcCodebooks(const char *desc_path_char, const char *desc_type_char, const char *cb_path_char) {
	cv::Mat					desc_set, labels_set, centers;			
	int						desc_dim;	
	fs::path				cb_path, desc_path;
	fs::directory_iterator	eod;
	boost::smatch			what;	
	boost::regex			filter(".*dat");
	DescType				cb_desc_type = DESCRIPTORS_NUM;
	bool					valid_desc_type = false;
	RandGen					rand_gen;
	MatWriter				mw;	

	// decode codebook type
	desc_path = fs::path(desc_path_char)/fs::path(desc_type_char);
	CHECK(fs::exists(desc_path));
	for (int desc_type = 0; desc_type < DESCRIPTORS_NUM; desc_type++) {
		if (std::string(desc_type_char) == ActionClassifier::desc_names_[desc_type]) {
			cb_desc_type = static_cast<DescType>(desc_type);
			valid_desc_type = true;
			break;
		}
	}
	CHECK(valid_desc_type);
	cb_path = fs::path(cb_path_char);
	// if doesn't exist create a folder for codebooks
	try {
		if (fs::is_directory(cb_path) == false)
			fs::create_directories(cb_path);
	} catch(boost::filesystem::filesystem_error e) { 
		LOGF() << "Failed to create codebooks folder: " << cb_path << std::endl;
		return -1;
	}	
	cb_path = cb_path/fs::path("cb_" + std::string(ActionClassifier::desc_names_[cb_desc_type]) + ".dat");

	if (fs::exists(cb_path)) {
		LOGI() << std::string(desc_type_char) << "code-book already exists." << std::endl;
		return 0;
	}

	// iterate through different actions at different rotations until sufficient number of descriptors is collected
	LOGI() << "Collecting descriptors for " << desc_type_char << " codebook" << std::endl;	
	while (desc_set.rows < CB_MAX_DESCRIPTOR_NUM) {
		for(fs::directory_iterator dir_iter(desc_path) ; (dir_iter != eod) && (desc_set.rows < CB_MAX_DESCRIPTOR_NUM); ++dir_iter) {			
			if ((fs::is_regular_file(*dir_iter)) && (boost::regex_match(dir_iter->path().string(), what, filter))) {
				MatReader mr;
				cv::Mat frame_desc;
				int desc_cnt = 0;

				if (rand_gen.RandInt(100) > CB_ANGLE_REJECTION_RATE)
					continue;			
				
				LOGI() << "Extracting descriptors from " << fs::basename(dir_iter->path()) << std::endl;				
                if (mr.Init(dir_iter->path().string()) != 0) {
                    LOGW() << "Failed to open " << fs::basename(dir_iter->path()) << std::endl;
                    continue;
                }
                                    
				while ((mr.Read(&frame_desc) == 0) && (desc_set.rows < CB_MAX_DESCRIPTOR_NUM)){
					for (int desc_idx = 0; desc_idx < frame_desc.rows; desc_idx++) {
						if (rand_gen.RandInt(100) > CB_DESCRIPTOR_REJECTION_RATE)
							continue;
						desc_set.push_back(frame_desc.row(desc_idx));
						desc_cnt ++;						

						if (desc_set.rows == CB_MAX_DESCRIPTOR_NUM)	{
							break;
						}
					}
				}
				LOGI() << desc_cnt << " descriptors added, total descriptors num: " << desc_set.rows << std::endl;
			}
		}
	}	

	// calculate codebook words
	desc_dim = desc_set.cols;
	LOGI() << "kmeans clustering, max iterations: " << CB_KMEAS_MAX_ITER << std::endl;
	kmeans(desc_set, BOW_SIZE, labels_set, cv::TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, CB_KMEAS_MAX_ITER, 0.01*desc_dim), 5, cv::KMEANS_PP_CENTERS, centers);
	LOGI() << "kmeans complete" << std::endl;

	CHECK(mw.Init(cb_path.string()) == 0);
	mw.Write(centers);

	return 0;
}
예제 #14
0
int main(int argc, char **argv) {
	extern char *optarg;
	extern int optind, optopt;
	int c, ncluster = 4, rank, nworker, nline, totalLine, ndim, i = 0, j=0;
	int type;
	char *inFile, *outFile;
	float thres = 0.01;

	float **data;	// input data in this process
	float **centroid;	// all cluster centroids
	int *label;	// for each data point, find its new class label

	double stime, etime, 	// whole system time
		stimeCluster, etimeCluster;	// maximum cluster time among all processes
	double elapse, elapseWhole, elapseCluster, elapseClusterWhole;

	if(argc != 15) {
		printUsage(argv[0]);
		exit(EXIT_FAILURE);
	}

	while ((c = getopt(argc, argv, "p:i:o:k:t:l:d:")) != EOF) {
		switch (c) {
		case 'p':
			type = atoi(optarg);
			break;
		case 'i':
			inFile = optarg;
			break;
		case 'o':
			outFile = optarg;
			break;
		case 'k':
			ncluster = atoi(optarg);
			break;
		case 't':
			thres = atof(optarg);
			break;
		case 'l':
			nline = atof(optarg);
			break;
		case 'd':
			ndim = atof(optarg);
			break;
		default:
			printUsage(argv[0]);
			exit(EXIT_FAILURE);
		}
	}

	if (optind > argc) {
		printUsage(argv[0]);
		exit(EXIT_FAILURE);
	}

	stime = MPI_Wtime();

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &nworker);
	printf("rank:%d init done\n", rank);

	// read input data (each process has a portion of all data)
	// IMP: nline changed from total lines for all input to subtotal lines for a specific process
	totalLine = nline;
	data = kmeans_read(inFile, &nline, ndim, MPI_COMM_WORLD);
	printf("rank:%d read data done. nline:%d\n", rank, nline);

	// initialize cluster centers
	centroid = (float **) malloc(ncluster * sizeof(float *));	// pointer to each line
	centroid[0] = (float *) malloc(ncluster * ndim * sizeof(float));
	for(i = 1; i < ncluster; i++)
		centroid[i] = centroid[i-1] + ndim;
	//float centroid[ncluster][ndim];
	if(rank == 0) {
		for(i = 0; i < ncluster; i++)
			for(j = 0; j < ndim; j++)
				centroid[i][j] = data[i][j];
	}
//		memcpy(centroid, data, ncluster * ndim * sizeof(float));
	printf("rank:%d init cluster center done\n", rank);

	// broadcast the centroid to all other processes
	MPI_Bcast(centroid[0], ncluster*ndim, MPI_FLOAT, 0, MPI_COMM_WORLD);

	// do kmeans calculation
	stimeCluster = MPI_Wtime();
	label = (int *) malloc(nline * sizeof(int));

	kmeans(type, data, ncluster, ndim, nline, thres, label, centroid, MPI_COMM_WORLD);
	printf("rank:%d kmeans done\n", rank);
	etimeCluster = MPI_Wtime();

	kmeans_write(outFile, nline, totalLine, ncluster, ndim , centroid, label, 0, MPI_COMM_WORLD);

	free(label);
	free(centroid[0]);
	free(centroid);
	etime = MPI_Wtime();
	elapse = etime - stime;
	elapseCluster = etimeCluster-stimeCluster;

	// get the maximum time among processes
	MPI_Reduce(&elapse, &elapseWhole, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
	MPI_Reduce(&elapseCluster, &elapseClusterWhole, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
	// performance report
	printf("Done! Rank: %d\tTime: %f\n", rank, etime-stime);
	if(rank == 0)
		printf("System time: %f secs\tClustering time: %fsecs\n", elapseWhole, elapseCluster);

	MPI_Finalize();
	return EXIT_SUCCESS;
}
예제 #15
0
int main (){
    char file[50];//name of file
    int nclus;//number of clusters
    //float *Cone, *Ctwo, *temp, *temp2;
    
    
    /*--------------------------------------------------------------------------
    -------------get input parameters
    --------------------------------------------------------------------------*/
    
    
    //get name of file to open
    printf("what is the name of your file?");    
    if (scanf("%s", &file[0]) != 1){ //read in value and make sure it is valid name
               printf("error: filemane invalid!\n");
               exit(EXIT_FAILURE); 
    }
    printf("filename is %s\n", file);
    
    //get number of clusters
    printf("how many clusters do you want to find?"); 
     if (scanf("%d", &nclus) != 1){ //read in value and make sure it is valid number
               printf("error: number invalid!\n");
               exit(EXIT_FAILURE); 
    }
    printf("number of k clusters is %d\n", nclus);
        
        
    /*--------------------------------------------------------------------------
    -------------open and read input file
    --------------------------------------------------------------------------*/
        
    //open input file 
    FILE *fp;
    if ((fp = fopen(file,"r")) == NULL){
       printf("Error:cannot read the file %s\n", file);        
    }else{
       printf("success opening file %s\n", file); 
    }
    
    //read number of items and attributes
    int items, attributes;
    fscanf(fp,"%d",&items);
    fscanf(fp,"%d",&attributes);
    printf ("items %d attributes %d\n", items, attributes);
    getchar();
    
    //Create nodes    
    Node *head = (Node *) malloc (sizeof(Node)); 
    Node *node = (Node *) malloc (sizeof(Node));
    
    float *temp  = (float *)malloc(items*attributes*sizeof(float));
    //read file
    printf("reading file\n");
    int i, j; //counters
    for (i=0; i < items; i++){//for each item
        printf("reading item %d\n",i);      
          for (j=0; j < attributes; j++){
              fscanf(fp,"%f",&temp[(attributes)*i+j]);              
              //printf("read attribute %d, %f\n",j,temp[(attributes)*i+j]);
              
          }     

          node = addData(nclus+1,&temp[(attributes)*i]);  
          append(head,node);
    }
       
       
    /*--------------------------------------------------------------------------
    -------------get initial means
    --------------------------------------------------------------------------*/
        
    float *means = (float *)malloc(nclus*attributes*sizeof(float));
    //float *means_b = (float *)malloc(nclus*attributes*sizeof(float)); 
    
    int index;
    node = head;
    //printf("node...%f %f\n", node->att[0], node->att[1]); 
    for (i=0;i<nclus;i++){
        node = node->next;
        //printf("node...%f %f\n", node->att[0], node->att[1]); 
        printf("\n\ninitial center for cluster %d:\n",i);
        for (j=0;j<attributes;j++){ 
            index =  (attributes)*(i)+j;    
            means[index] = node->att[j];       
            printf("attribute %d: %f\n", j, node->att[j]);    
        }
    }
    printf("press enter\n");
    getchar();
    
   /*--------------------------------------------------------------------------
    -------------run k-means algorithm
    --------------------------------------------------------------------------*/
   
   //declare variables for k-means algo
   float *att_sum = (float *)malloc(nclus*attributes*sizeof(float)); //sum of each column for each cluster
   float *sqerr_sum = (float *)malloc(nclus*sizeof(float)); //keep track of rmse
   float *rmse = (float *)malloc(nclus*sizeof(float)); //keep track of rmse
   float *rmsediff = (float *)malloc(nclus*sizeof(float)); //differences in rmse
   int *cnt = (int *)malloc(nclus*sizeof(int)); //number of elements in each cluster
   
   
   float *last_rmse = (float *)malloc(nclus*sizeof(float));  
   int test = 0;
   int count = 0;
   do{//repeat until test = 1 (RMSE converges)
      test = kmeans(head,&means[0],nclus,&last_rmse[0], attributes,
           &att_sum[0], &sqerr_sum[0], &rmse[0], &rmsediff[0], &cnt[0]);
      printf("press enter for next iteration\n");
      getchar();
      //printf("test %d\n", test); 
      count ++;
      if (count > 100) 
         test = 1; 
   }while(test == 0);
          
    
   /*--------------------------------------------------------------------------
    -------------print results in output file
    --------------------------------------------------------------------------*/
    FILE *out;
    if ((out = fopen("output_gbm.csv","w")) == NULL){
       printf("Error:cannot read the file output.txt\n");        
    }else{
       printf("success opening output file output.txt for output\n"); 
    }
    node = head;
    while(node->next != NULL){
        node = node->next;      

         for (j=0;j<attributes;j++) { 
             fprintf(out, "%f,", node->att[j]);
         }
         fprintf(out,"%d\n",node->kclus);
    }
    fclose(out);
    fclose (fp) ; 
    
    
    /*--------------------------------------------------------------------------
    -------------run k-nearest neighbor algorithm
    --------------------------------------------------------------------------*/
    int response;
    printf("if you want to run k-nearest neighbor type the number 1 and press enter, otherwise press any other key: "); 
     if (scanf("%d", &response) != 1){ //read in value and make sure it is valid number
               printf("error: number invalid!\n");
               exit(EXIT_FAILURE); 
    }
    printf("response is %d\n", response);
    //run k-nn algorithm
    
    if (response == 1){
         knn(head, nclus); //knn call function
    }
    
    
    free(last_rmse);
    free(means);
    free(att_sum); 
    free(sqerr_sum); 
    free(rmse); 
    free(rmsediff); 
    free(cnt);
    

    return 1; 
}
예제 #16
0
/*----< kmeans_clustering() >---------------------------------------------*/
float* kmeans_clustering(float  *feature,    /* in: [npoints][nfeatures] */
                         int     nfeatures,
                         int     npoints,
                         int     nclusters,
                         float   threshold)
{    
    int      i, j, n = 0;				/* counters */
	int		 loop=0, temp;
    int     *new_centers_len;	/* [nclusters]: no. of points in each cluster */
    float    delta;				/* if the point moved */
    float  **clusters;			/* out: [nclusters][nfeatures] */
    float  **new_centers;		/* [nclusters][nfeatures] */
	int     *membership;		/* which cluster a data point belongs to */
	int     *membership_new;	/* newly assignment membership */

	int     *initial;			/* used to hold the index of points not yet selected
								   prevents the "birthday problem" of dual selection (?)
								   considered holding initial cluster indices, but changed due to
								   possible, though unlikely, infinite loops */
	int      initial_points;
	int		 c = 0;

	/* nclusters should never be > npoints
	   that would guarantee a cluster without points */
	if (nclusters > npoints)
		nclusters = npoints;

	/* allocate memory for membership and membership_new */
    membership = (int*) malloc(npoints * sizeof(int));
    membership_new = (int*) malloc(npoints * sizeof(int));

    /* allocate space for and initialize returning variable clusters[] */
    clusters    = (float**) malloc(nclusters *             sizeof(float*));
    clusters[0] = (float*)  malloc(nclusters * nfeatures * sizeof(float));
    for (i=1; i<nclusters; i++)
        clusters[i] = clusters[i-1] + nfeatures;

	/* initialize the random clusters */
	initial = (int *) malloc (npoints * sizeof(int));
	for (i = 0; i < npoints; i++)
	{
		initial[i] = i;
	}
	initial_points = npoints;

    /* randomly pick cluster centers */
    for (i=0; i<nclusters && initial_points >= 0; i++) {
		//n = (int)rand() % initial_points;		
		
        for (j=0; j<nfeatures; j++)
            clusters[i][j] = feature[initial[n]*npoints+j];	// remapped

		/* swap the selected index to the end (not really necessary,
		   could just move the end up) */
		temp = initial[n];
		initial[n] = initial[initial_points-1];
		initial[initial_points-1] = temp;
		initial_points--;
		n++;
    }

#pragma acc data create(membership[0:npoints], membership_new[0:npoints])
{
	/* initialize the membership and membership_new to -1 for all */
	#pragma acc kernels present(membership)
    for (i=0; i < npoints; i++)
		membership[i] = membership_new[i] = -1;

    /* allocate space for and initialize new_centers_len and new_centers */
    new_centers_len = (int*) calloc(nclusters, sizeof(int));

    new_centers    = (float**) malloc(nclusters *            sizeof(float*));
    new_centers[0] = (float*)  calloc(nclusters * nfeatures, sizeof(float));
    for (i=1; i<nclusters; i++)
        new_centers[i] = new_centers[i-1] + nfeatures;

	/* iterate until convergence */
	do {
        delta = 0.0;
		delta = (float) kmeans(feature,			/* in: [npoints][nfeatures] */
							   nfeatures,		/* number of attributes for each point */
							   npoints,			/* number of data points */
							   nclusters,		/* number of clusters */
							   membership,		/* which cluster the point belongs to */
        					   membership_new,  /* newly assignment membership */
							   clusters,		/* out: [nclusters][nfeatures] */
							   new_centers_len,	/* out: number of points in each cluster */
							   new_centers		/* sum of points in each cluster */
							   );

		/* replace old cluster centers with new_centers */
		/* CPU side of reduction */
		for (i=0; i<nclusters; i++) {
			for (j=0; j<nfeatures; j++) {
				if (new_centers_len[i] > 0)
					clusters[i][j] = new_centers[i][j] / new_centers_len[i];	/* take average i.e. sum/n */
				new_centers[i][j] = 0.0;	/* set back to 0 */
			}
			new_centers_len[i] = 0;			/* set back to 0 */
		}	 
		c++;
    } while ((delta > threshold) && (loop++ < 500));	/* makes sure loop terminates */
	printf("iterated %d times\n", c);
} /* end acc data */

    free(new_centers[0]);
    free(new_centers);
    free(new_centers_len);
    free(membership);
    free(membership_new);

    return clusters;
}
예제 #17
0
//从所有的feature 文件中读出所有的feature 。
void fImgSvm::createFeatureDict()
{
    static char * dirname = "/home/fuxiang/code-fuxiang90/cpp/opencv/opencv-start/feature";
    DIR *dp;
    struct dirent *entry;
    struct stat statbuf;
    chdir("/home/fuxiang/code-fuxiang90/cpp/opencv/opencv-start");
    if( (dp = opendir( "/home/fuxiang/code-fuxiang90/cpp/opencv/opencv-start/feature" ) ) == NULL ) {
        fprintf(stderr, "cannot open directory: %s\n", dirname);
        //return ;
    }

    //double darray[SIFTN] ;
    vector < vector<double > > featurevec;
    int featuresum = 0;
    chdir(dirname);
    while( (entry = readdir(dp)) != NULL) {
        if( S_ISDIR(statbuf.st_mode)  ) {
            continue;
        } else {

            //strcpy(filename[filenum++], entry->d_name);
            string filename = entry->d_name;
            //filename = ""
            if(filename == "." || filename == "..")
                continue;
            int pos =  filename.find(".feature") ;
            if(pos == -1 )
                continue;

            mfimgfeature.GetSiftFeatureFile(filename ,featurevec);
        }
    }
    int nfeature = featurevec.size();

    SGMatrix<float64_t> data(SIFTN, nfeature) ;
    for (int i = 0; i < nfeature; ++i)  {
        for (int j = 0; j < SIFTN; j++) {
            data(j ,i) = featurevec[i][j];
        }
    }

    CDenseFeatures<float64_t>* centers;
    kmeans(data,centers,nfeature);

//    int cnClusterNumber = mwordnum;
//    CvMat *pszLabels = cvCreateMat(nfeature, 1, CV_32SC1);
//    CvMat szSamples, *pszClusterCenters ;
//    pszClusterCenters =  cvCreateMat(cnClusterNumber, SIFTN, CV_32FC1);
//    cvInitMatHeader ( &szSamples,nfeature,SIFTN, CV_32FC1, pszDiscriptor);
//    cvKMeans2(&szSamples, cnClusterNumber, pszLabels,cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),1, (CvRNG *)0, 0, pszClusterCenters);
    //cvKMeans2(&szSamples, cnClusterNumber, pszLabels, pszClusterCenters);
    ofstream fout("dict");
    for(int i = 0 ; i < mwordnum ; i ++) {
        vector<double > d ;
        for(int j = 0 ; j < SIFTN ; j ++) {
            fout << data(j,i) << " " ;
            d.push_back(data(j,i) );
        }
        dictmap.insert( map<int ,vector<double> >::value_type(i ,d) );

        fout << endl;
    }
    fout.close();

    //SG_UNREF(data);
    //SG_UNREF(centers);
}
void dirichlet_fit_main(struct data_t *data, int rseed)
{
    const int N = data->N, S = data->S, K = data->K;
    int i, j, k;

    gsl_rng *ptGSLRNG;
    gsl_rng_env_setup();
    gsl_set_error_handler_off();
    ptGSLRNG = gsl_rng_alloc(gsl_rng_default);
    gsl_set_error_handler_off();
    gsl_rng_set(ptGSLRNG, rseed);

    /* allocate matrices */
    double **aadZ, **aadLambda, **aadErr, *adW;
    adW = (double *) calloc(K, sizeof(double));

    aadZ = (double **) calloc(K, sizeof(double *));
    aadLambda = (double **) calloc(K, sizeof(double *));
    aadErr = (double **) calloc(K, sizeof(double*));

    aadZ[0] = (double *) calloc(K * N, sizeof(double));
    aadLambda[0] = (double *) calloc(K * S, sizeof(double));
    aadErr[0] = (double *) calloc(K * S, sizeof(double));

    for (k = 1; k < K; k++) {
        aadZ[k] = aadZ[0] + k * N;
        aadLambda[k] = aadLambda[0] + k * S;
        aadErr[k] = aadErr[0] + k * S;
    }

    /* soft k means initialiser */
    kmeans(data, ptGSLRNG, adW, aadZ, aadLambda);
    for (k = 0; k < K; k++) {
        adW[k] = 0.0;
        for (i = 0; i < N; i++)
            adW[k] += aadZ[k][i];
    }

    if (data->verbose)
        Rprintf("  Expectation Maximization setup\n");
    for (k = 0; k < K; k++) {
        for (j = 0; j < S; j++) {
            const double x = aadLambda[k][j];
            aadLambda[k][j] = (x > 0.0) ? log(x) : -10;
        }
        optimise_lambda_k(aadLambda[k], data, aadZ[k]);
    }

    /* simple EM algorithm */
    int iter = 0;
    double dNLL = 0.0, dNew, dChange = BIG_DBL;

    if (data->verbose)
        Rprintf("  Expectation Maximization\n");
    while (dChange > 1.0e-6 && iter < 100) {
        calc_z(aadZ, data, adW, aadLambda); /* latent var expectation */
        for (k = 0; k < K; k++) /* mixture components, given pi */
            optimise_lambda_k(aadLambda[k], data, aadZ[k]);
        for (k = 0; k < K; k++) { /* current likelihood & weights */
            adW[k] = 0.0;
            for(i = 0; i < N; i++)
                adW[k] += aadZ[k][i];
        }

        dNew = neg_log_likelihood(adW, aadLambda, data);
        dChange = fabs(dNLL - dNew);
        dNLL = dNew;
        iter++;
        R_CheckUserInterrupt();
        if (data->verbose && (iter % 10) == 0)
            Rprintf("    iteration %d change %f\n", iter, dChange);
    }

    /* hessian */
    if (data->verbose)
        Rprintf("  Hessian\n");
    gsl_matrix *ptHessian = gsl_matrix_alloc(S, S),
        *ptInverseHessian = gsl_matrix_alloc(S, S);
    gsl_permutation *p = gsl_permutation_alloc(S);
    double dLogDet = 0., dTemp;
    int signum, status;

    for (k = 0; k < K; k++) {
        data->adPi = aadZ[k];
        if (k > 0)
            dLogDet += 2.0 * log(N) - log(adW[k]);
        hessian(ptHessian, aadLambda[k], data);

        status = gsl_linalg_LU_decomp(ptHessian, p, &signum);
        gsl_linalg_LU_invert(ptHessian, p, ptInverseHessian);
        for (j = 0; j < S; j++) {
            aadErr[k][j] = gsl_matrix_get(ptInverseHessian, j, j);
            dTemp = gsl_matrix_get(ptHessian, j, j);
            dLogDet += log(fabs(dTemp));
        }
    }

    gsl_matrix_free(ptHessian);
    gsl_matrix_free(ptInverseHessian);
    gsl_permutation_free(p);

    /* results */
    double dP = K * S + K - 1;
    data->NLE = dNLL; data->LogDet = dLogDet;
    data->fit_laplace = dNLL + 0.5 * dLogDet - 0.5 * dP * log(2. * M_PI);
    data->fit_bic = dNLL + 0.5 * log(N) * dP;
    data->fit_aic = dNLL + dP;

    group_output(data, aadZ);
    mixture_output(data, adW, aadLambda, aadErr);

    free(aadErr[0]); free(aadErr);
    free(aadLambda[0]); free(aadLambda);
    free(aadZ[0]); free(aadZ);
    free(adW);
}
예제 #19
0
Mat BOWKMeansTrainer::cluster( const Mat& _descriptors ) const
{
    Mat labels, vocabulary;
    kmeans( _descriptors, clusterCount, labels, termcrit, attempts, flags, vocabulary );
    return vocabulary;
}
예제 #20
0
void ClusterContacts(vector<dContactGeom>& contacts,int maxClusters,Real clusterNormalScale)
{
  if((int)contacts.size() <= maxClusters) return;
  vector<Vector> pts(contacts.size());
  for(size_t i=0;i<pts.size();i++) {
    pts[i].resize(7);
    pts[i][0] = contacts[i].pos[0];
    pts[i][1] = contacts[i].pos[1];
    pts[i][2] = contacts[i].pos[2];
    pts[i][3] = contacts[i].normal[0]*clusterNormalScale;
    pts[i][4] = contacts[i].normal[1]*clusterNormalScale;
    pts[i][5] = contacts[i].normal[2]*clusterNormalScale;
    pts[i][6] = contacts[i].depth;
  }

  Statistics::KMeans kmeans(pts,maxClusters);
  //randomized
  //kmeans.RandomInitialCenters();
  //deterministic
  for(size_t i=0;i<kmeans.centers.size();i++)
    kmeans.centers[i] = kmeans.data[(i*pts.size())/kmeans.centers.size()];
  int iters=20;
  kmeans.Iterate(iters);
  contacts.resize(kmeans.centers.size());
  vector<int> degenerate;
  for(size_t i=0;i<contacts.size();i++) {
    contacts[i].pos[0] = kmeans.centers[i][0];
    contacts[i].pos[1] = kmeans.centers[i][1];
    contacts[i].pos[2] = kmeans.centers[i][2];
    contacts[i].normal[0] = kmeans.centers[i][3]/clusterNormalScale;
    contacts[i].normal[1] = kmeans.centers[i][4]/clusterNormalScale;
    contacts[i].normal[2] = kmeans.centers[i][5]/clusterNormalScale;
    Real len = Vector3(contacts[i].normal[0],contacts[i].normal[1],contacts[i].normal[2]).length();
    if(FuzzyZero(len) || !IsFinite(len)) {
      printf("ODESimulator: Warning, clustered normal became zero/infinite\n");
      //pick any in the cluster
      int found = -1;
      for(size_t k=0;k<kmeans.labels.size();k++) {
	if(kmeans.labels[k] == (int)i) {
	  found = (int)k;
	  break;
	}
      }
      if(found < 0) {
	//strange -- degenerate cluster?
	degenerate.push_back(i);
	continue;
      }
      contacts[i].pos[0] = pts[found][0];
      contacts[i].pos[1] = pts[found][1];
      contacts[i].pos[2] = pts[found][2];
      contacts[i].normal[0] = pts[found][3];
      contacts[i].normal[1] = pts[found][4];
      contacts[i].normal[2] = pts[found][5];
      Real len = Vector3(contacts[i].normal[0],contacts[i].normal[1],contacts[i].normal[2]).length();
      contacts[i].normal[0] /= len;
      contacts[i].normal[1] /= len;
      contacts[i].normal[2] /= len;
      contacts[i].depth = pts[found][6];
      continue;
    }
    contacts[i].normal[0] /= len;
    contacts[i].normal[1] /= len;
    contacts[i].normal[2] /= len;
    //cout<<"Clustered contact "<<contacts[i].pos[0]<<" "<<contacts[i].pos[1]<<" "<<contacts[i].pos[2]<<endl;
    //cout<<"Clustered normal "<<contacts[i].normal[0]<<" "<<contacts[i].normal[1]<<" "<<contacts[i].normal[2]<<endl;
    contacts[i].depth = kmeans.centers[i][6];
  }
  reverse(degenerate.begin(),degenerate.end());
  for(size_t i=0;i<degenerate.size();i++) {
    contacts.erase(contacts.begin()+degenerate[i]);
  }
}
int main(){
	s4_init_simulation();
	kmeans(TIME);
	s4_wrapup_simulation();
	return 0;
}
std::vector<cv::Rect> 
SoftPPWordSplitter::split(const CCGroup &grp)
{
    cv::Rect bb = grp.get_rect();
    // generate the projection profile sums
    cv::Mat sums(1, bb.width, CV_32FC1, cv::Scalar(0));
    ProjectionProfileComputer pp_computer(cv::Size(bb.width, 1), bb.x);
    for (int i = 0; i < grp.ccs.size(); i++) {
        sums = pp_computer.compute(grp.ccs[i].pixels, sums);
    }

    int threshold = pp_computer.compute_threshold(sums);
    if (_verbose) {
        std::cout << "Projection Profile Threshold: " << threshold << std::endl;
    }
    cv::Mat gaps = sums < threshold;

    // now shrink each bounding rect on the border with the gaps matrix
    std::vector<cv::Rect> original_rects(grp.ccs.size());
    std::transform(
        grp.ccs.begin(), grp.ccs.end(), 
        original_rects.begin(), 
        [](const CC &cc) -> cv::Rect { return cc.rect; });
    std::sort(
        original_rects.begin(),
        original_rects.end(), 
        [](const cv::Rect &a, const cv::Rect &b) -> bool { return a.x < b.x; });
    RectShrinker shrinker(0.10, bb.x);
    std::vector<cv::Rect> shrinked_rects(shrinker.shrink(original_rects, gaps));
    
    //cv::Mat img(grp.get_image());
    //cv::imshow("RECTS-wo-rects", img);
    //cv::waitKey(0);
    //for (cv::Rect r : shrinked_rects) {
    //    cv::rectangle(img, r.tl(), r.br(), cv::Scalar(128));
    //}
    //cv::imshow("RECTS", img);
    //cv::waitKey(0);

    std::vector<bool> collide(bb.width, false);
    for (int i = 0; i < shrinked_rects.size(); i++) {
        for (int j = shrinked_rects[i].x; j < shrinked_rects[i].x + shrinked_rects[i].width; j++) {
            collide[j-bb.x] = true;
        }
    }

    //std::vector<bool> collide(bb.width, false);
    //for (int i = 0; i < ccs.size(); i++) {
    //    for (int j = ccs[i].rect.x; j < ccs[i].rect.x + ccs[i].rect.width; j++) {
    //        collide[j-bb.x] = true;
    //    }
    //}

    std::vector<float> heights(grp.ccs.size(), 0.0);
    std::transform(
        grp.ccs.begin(),
        grp.ccs.end(), 
        heights.begin(),
        [] (const CC &c) -> float { return c.rect.height; });
    float mean_height = cv::sum(heights)[0] / heights.size();

    // Now find the rects from this binary mask.
    // This merges overlapping/touching CCs into a single component
    std::vector<cv::Rect> rects;
    cv::Rect last_rect(bb.x, bb.y, 1, bb.height);
    
    for (int i = 0; i < collide.size(); i++) {
        if (collide[i]) {
            last_rect.width += 1;
        } else {
            if (last_rect.width > 0) {
                rects.push_back(last_rect);
            }
            last_rect = cv::Rect(bb.x + i, bb.y, 0, bb.height);
        }
    }
    if (last_rect.width > 0) {
        rects.push_back(last_rect);
    }

    if (_verbose)
        std::cout << "#Rects: " << rects.size() << std::endl;

    if (rects.size() <= 2) {
        std::vector<cv::Rect> result;
        result.push_back(bb);
        return result;
    }

    // find the dists
    std::vector<float> dists;
    for (int i = 1; i < rects.size(); i++) {
        dists.push_back(rects[i].tl().x - rects[i-1].br().x);
    }

    //  kmeans
    cv::Mat dist_mat(dists.size(), 1, CV_32FC1);
    for (size_t i = 0; i < dists.size(); i++) {
        dist_mat.at<float>(i,0) = dists[i];
    }
    cv::Mat centers;
    cv::Mat labels;//(dists.size(),1, CV_32SC1, cv::Scalar(0));
    /*
    float min = *std::min_element(dists.begin(), dists.end());
    float max = *std::max_element(dists.begin(), dists.end());
    for (size_t i = 0; i < dists.size(); i++) {
        labels.at<int>(i,0) = std::abs(dists[i] - min) < std::abs(dists[i] - max) ? 0 : 1;
    }
    */

    if (_verbose) 
        std::cout << dist_mat << std::endl;
    kmeans(dist_mat, 2, labels, cv::TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, .01), 5, cv::KMEANS_PP_CENTERS, centers);

    if (_verbose)
        std::cout << centers << std::endl;

    std::vector<float> cpy(dists);
    std::sort(cpy.begin(), cpy.end());
    float median = cpy[cpy.size() / 2];
    if (cpy.size() % 2 == 0) {
        median = cpy[cpy.size() / 2] + cpy[cpy.size() / 2 - 1];
        median = median / 2.0f;
    }
    float medval = median;

    float height = std::abs(centers.at<float>(0,0) - centers.at<float>(1,0)) / mean_height;
    median = std::abs(centers.at<float>(0,0) - centers.at<float>(1,0)) / (median + 1e-10);
    if (_verbose) {
        std::cout << dists.size() << " " << medval << " " << median << " " << height << std::endl;
    }
    // liblinear: 92% ACC: (10-F)
    // ./train -v 10 -B 1 -w1 2 -c 100 dists_cleaned.dat   
    // do we have a single cluster?!
    //if (dists.size() > 3 && median * 0.84320891 + height * 0.3127415 < 1.23270849 ||
    //    dists.size() <= 3 && height < 0.43413942) {
    if (median * 0.33974138 + height * 0.47850904 < 0.56307525) {
        std::vector<cv::Rect> result;
        result.push_back(bb);
        return result;
    }

    // get the index of the smallest center
    int small_center = centers.at<float>(0,0) < centers.at<float>(1,0) ? 0 : 1;

    // count the distance to cluster assignments
    int cnt[2] = {0,0};
    for (int i = 0; i < labels.rows; i++) {
        cnt[labels.at<int>(i,0)]++;
    }
    // we have more word gaps than letter gaps -> don't split!
    if (cnt[small_center] < cnt[1-small_center]) {
        std::vector<cv::Rect> result;
        result.push_back(bb);
        return result;
    }

    // start from left to right and iteratively merge rects if the
    // distance between them is clustered into the smaller center
    last_rect = rects[0];
    std::vector<cv::Rect> word_candidates;
    for (int i = 1; i < rects.size(); i++) {
        if (_allow_single_letters) {
            if (labels.at<int>(i-1,0) == small_center) {
                // extend the last rect
                last_rect = last_rect | rects[i];
            } else {
                // do not extend it!
                word_candidates.push_back(last_rect);
                last_rect = rects[i];
            }
        } else {
            if (labels.at<int>(i-1,0) == small_center) {
                // extend the last rect
                last_rect = last_rect | rects[i];
            } else if (i < labels.rows && labels.at<int>(i,0) == small_center) {
                // do not extend it!
                word_candidates.push_back(last_rect);
                last_rect = rects[i];
            } else {
                last_rect = last_rect | rects[i];
            }
        }
    }
    word_candidates.push_back(last_rect);

    // for each rect, find the original connected component rects
    std::vector<cv::Rect> words;
    for (cv::Rect candidate : word_candidates) {
        std::vector<cv::Rect> word;
        for (size_t i = 0; i < grp.ccs.size(); i++) {
            cv::Rect intersect(grp.ccs[i].rect & candidate);
            if (float (intersect.width * intersect.height) / float (grp.ccs[i].rect.width * grp.ccs[i].rect.height) >= 0.8f) {
                cv::Rect r = grp.ccs[i].rect;
                // set the text height correctly
                r.y = bb.y;
                r.height = bb.height;
                word.push_back(r);
            }
        }

        if (_verbose) {
            std::cout << "Accumulated: " << word.size() << " rects!" << std::endl;
        }
        if (word.empty()) continue;
        assert(!word.empty());
        cv::Rect r = word[0];
        for (size_t i = 1; i < word.size(); i++) {
            r = r | word[i];
        }
        words.push_back(r);
    }
    
    return words;
}