Exemple #1
0
gmm_t * gmm_learn (int di, int ni, int ki, int niter,
                   const float * v, int nt, int seed, int nredo,
                   int flags)
{
    long d=di,k=ki,n=ni;

    int iter, iter_tot = 0;
    double old_key, key = 666;

    niter = (niter == 0 ? 10000 : niter);

    /* the GMM parameters */
    float * p = fvec_new_0 (n * k);      /* p(ci|x) for all i */
    gmm_t * g = gmm_new (d, k);

    /* initialize the GMM: k-means + variance estimation */
    int * nassign = ivec_new (n);  /* not useful -> to be removed when debugged */
    float * dis = fvec_new (n);
    kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign);

    fflush (stderr);
    fprintf (stderr, "assign = ");
    ivec_print (nassign, k);
    fprintf (stderr, "\n");
    free (nassign);

    /* initialization of the GMM parameters assuming a diagonal matrix */
    fvec_set (g->w, k, 1.0 / k);
    double sig = fvec_sum (dis, n) / n;
    printf ("sigma at initialization = %.3f\n", sig);
    fvec_set (g->sigma, k * d, sig);
    free (dis);


    /* start the EM algorithm */
    fprintf (stdout, "<><><><> GMM  <><><><><>\n");

    if(flags & GMM_FLAGS_PURE_KMEANS) niter=0;

    for (iter = 1 ; iter <= niter ; iter++) {

        gmm_compute_p_thread (n, v, g, p, flags, nt);
        fflush(stdout);

        gmm_handle_empty(n, v, g, p);

        gmm_compute_params (n, v, p, g, flags, nt);
        fflush(stdout);


        iter_tot++;

        /* convergence reached -> leave */
        old_key = key;
        key = fvec_sum (g->mu, k * d);

        printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key);
        fflush(stdout);

        if (key == old_key)
            break;
    }
    fprintf (stderr, "\n");

    free(p);

    return g;
}
Exemple #2
0
void ahc_clustering(DyArray *ahct, int bf, int rho, const fDataSet *ds){
	ASSERTINFO(ahct == NULL || bf <= 0 || rho <= 0 || ds == NULL, "IPP");

	int		n 	= ds->n;
	int		d 	= ds->d;
	Cluster	_clu, clu, *pclu = NULL, *p0clu = NULL;
	int		i;
	float	qerror;
	int		iclu, bfi, ni, ichild, ori_id;			// the pointer, branch factor and volume of the i-th cluster
	int 	*nassign = ivec_new_set(bf, 0);
	int 	*assign = NULL;
	float	*cent = fvec_new(d*bf);
	float	*mem_points = NULL;
	DyArray	*member = (DyArray*)malloc(sizeof(DyArray)*bf);

	/* initialize the first cluster (root) to add it to the ahc tree */
	Cluster_init(&clu, n);
	for(i = 0; i < n; i++){
		clu.idx[i] = i;
	}
	clu.type = ClusterType_Root;
	DyArray_add(ahct, (void*)&clu, 1);

	/* begin the loop of adaptive hierarchical clustering */
	iclu = 0;
	while(iclu < ahct->count){
		/* deal with the i-th cluster */
		// figure out the adaptive branch factor of the i-th cluster
		pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
		ni = pclu->npts;
		bfi = i_min(bf, (int)round(ni / (float)rho));

		// deal with the cluster according to its size
		if(bfi < 2){
			/*
			 *	this is a leaf cluster
			 *	- mark it, release the children
			 *	* not necessary to store real data points
			 */
			pclu->type = ClusterType_Leaf;
		}else{
			printf("----------------- cluster %d, bfi-%d:\n", iclu, bfi);

			/*
			 * this is an inner cluster
			 * - divide it
			 */
			memcpy(&_clu, pclu, sizeof(Cluster));

			// extract data points from the original dataset according to the idx
			mem_points = fvec_new(ni * d);
			for(i = 0; i < ni; i++){
				memcpy(mem_points+i*d, ds->data+_clu.idx[i]*d, d);
			}

			// divide this cluster
			assign = ivec_new(ni);

			if(iclu == 30){
				int _a = 1;
				_a++;

				ivec_print(_clu.idx, _clu.npts);
			}

			qerror = kmeans(	d, ni, bfi, CLUSTERING_NITER, mem_points,
								CLUSTERING_NTHREAD | KMEANS_QUIET | KMEANS_INIT_BERKELEY, CLUSTERING_SEED, CLUSTERING_NREDO,
								cent, NULL, assign, nassign);

			// prepare space for members' ids
			for(i = 0; i < bfi; i++){
				DyArray_init(&member[i], sizeof(int), nassign[i]);
			}
			// extract member points' ids for each children cluster
			for(i = 0; i < ni; i++){
				ori_id = _clu.idx[i];
				DyArray_add(&member[assign[i]], (void*)&ori_id, 1);
			}

			// fulfill the type, centroids and the children of this cluster, add them to the ahct
			_clu.type = ClusterType_Inner;
			_clu.cents = fvec_new(d * bfi);
			memcpy(_clu.cents, cent, sizeof(float)*d*bfi);

			DyArray_init(&_clu.children, sizeof(int), bfi);
			for(i = 0; i < bfi; i++){
				Cluster_init(&clu, nassign[i]);
				memcpy(clu.idx, (int*)member[i].elem, sizeof(int)*nassign[i]);

				DyArray_add(&_clu.children, (void*)&ahct->count, 1);	/* the i-th child's position */
				DyArray_add(ahct, (void*)&clu, 1);						/* add the i-th child to the ahct */
			}

			/* as per the elems of ahct may change when expanding the space
			 * we decide to get the brand new address of the element
			 */
			pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
			memcpy(pclu, &_clu, sizeof(Cluster));


			/* report */
			ivec_print(nassign, bfi);
			ivec_print((int*)_clu.children.elem, _clu.children.count);

			/* unset or release */
			FREE(mem_points);
			FREE(assign);
			for(i = 0; i < bfi; i++){
				DyArray_unset(&member[i]);
			}
		}

		// move to next cluster
		iclu++;
	}

	FREE(nassign);
	FREE(cent);
	FREE(member);
	pclu = NULL;
}