Exemplo n.º 1
0
pca_online_t * pca_online_new (int d)
{
  pca_online_t * pca = (pca_online_t *) malloc (sizeof (pca_online_t));
  pca->d = d;
  pca->n = 0;
  pca->mu = fvec_new_0 (d);
  pca->cov = fvec_new_0 (d*(long)d);
  pca->eigvec = fvec_new (d*(long)d);
  pca->eigval = fvec_new (d);
  return pca;
}
Exemplo n.º 2
0
float *fmat_new_covariance (int d, int n, const float *v, float *avg, int assume_centered)
{
  
  long i, j;

  float *cov = fvec_new_0 (d * d);
  
  if(!assume_centered) {

    float *sums = avg ? avg : fvec_new(d);
    fvec_0(sums,d);
    
    for (i = 0; i < n; i++)
      for (j = 0; j < d; j++)
        sums[j] += v[i * d + j];
    
    
    for (i = 0; i < d; i++)
      for (j = 0; j < d; j++)
        cov[i + j * d] = sums[i] * sums[j];
    
    
    if(avg)
      for(i=0;i<d;i++) avg[i]/=n;
    else
      free (sums);

  } 

  FINTEGER di=d,ni=n;

  if(0)  {
    float alpha = 1.0 / n, beta = -1.0 / (n * n);
    sgemm_ ("N", "T", &di, &di, &ni, &alpha, v, &di, v, &di, &beta, cov, &di);
  } else if(1) {
    /* transpose input matrix */
    float *vt=fvec_new(n*d);
    for(i=0;i<d;i++) 
      for(j=0;j<n;j++) 
        vt[i*n+j]=v[j*d+i];
    float alpha = 1.0 / n, beta = -1.0 / (n * n);
    
    sgemm_ ("T", "N", &di, &di, &ni, &alpha, vt, &ni, vt, &ni, &beta, cov, &di);
    
    free(vt);
  } else {
    float alpha = 1.0 / n, beta = -1.0 / (n * n);
    ssyrk_("L","N", &di, &ni, &alpha,(float*)v,&di,&beta,cov,&di);

    /* copy lower triangle to upper */

    for(i=0;i<d;i++)
      for(j=i+1;j<d;j++) 
        cov[i+j*d]=cov[j+i*d];

  }

  return cov;
}
Exemplo n.º 3
0
float * spfvec_to_fvec (int * idx, float * v, int nz, int n)
{
  int i;
  float * ret = fvec_new_0 (n);
  for (i = 0 ; i < nz ; i++) 
    if(idx[i] >= 0)  /* ignore bad bins */
      ret[idx[i]] = v[i];

  return ret;
}
Exemplo n.º 4
0
void gmm_handle_empty(int n, const float *v, gmm_t *g, float *p) {
    long d=g->d, k=g->k;

    long nz=fvec_count_occurrences(p,k*n,0);
    printf("nb of 0 probabilities: %ld / (%ld*%d) = %.1f %%\n",
           nz,k,n,nz*100.0/(k*n));

    int i,j;
    float *w=fvec_new_0(k);
    for (i = 0 ; i < n ; i++)
        for (j = 0 ; j < k ; j++)
            w[j]+=p[j+i*k];

    int bigprime=1000003;

    for (j = 0 ; j < k ; j++) if(w[j]==0) {
            printf("center %d is empty....",j);
            fflush(stdout);
            int j2;

            j2=j;
            for(i=0; i<k; i++) {
                j2=(j2+bigprime)%k;
                if(w[j2]>0) break;
            }
            assert(i<k || !"could not find centroid to split, veeeery bad input data");

            /* dimension to split: that with highest variance */
            int split_dim = fvec_arg_max (g->sigma + d * j2, d);

            /* transfer half(?) of the points from j2 -> j */
            int nt=0,nnz=0;
            for(i=0; i<n; i++) if(p[j2+i*k]>0) {
                    nnz++;
                    if(v[i*d+split_dim]<g->mu[j2*d+split_dim]) {
                        p[j+i*k]=p[j2+i*k];
                        p[j2+i*k]=0;
                        nt++;
                    }
                }

            printf("split %d at dim %d (variance %g, transferred %d/%d pts)\n",
                   j2,split_dim,g->sigma[d*j2+split_dim],nt,nnz);

            w[j2]=-1; /* avoid further splits */
        }

    free(w);

}
Exemplo n.º 5
0
float *fmat_new_0 (int nrow, int ncol)
{
  return fvec_new_0 (nrow * (long)ncol);
}
Exemplo n.º 6
0
void HBPlus::inner_lb_distance_OnePerPoint(const fDataSet *ds)
{
    int i, j, nci, otheri;
    float dis = 0;
    float *xcenter = fvec_new(d);
    float *ocenter = fvec_new(d);
    float *x = fvec_new(d);
    // distance between each centroid pair
    float *centroid_dis_map = fvec_new_0(ncenter*ncenter);
    innerLB = (DoubleIndex **)malloc(sizeof(DoubleIndex*)*ncenter);
    for(i = 0; i < ncenter; i++){
        innerLB[i] = NULL;
    }

    /// prepare distances between each two centroids
    for(i = 0; i < ncenter; i++)
    {
        memcpy(xcenter, centroid+i*d, sizeof(float)*d);
        for(j = 0; j <= i; j++)
        {
            memcpy(ocenter, centroid+j*d, sizeof(float)*d);
            dis = odistance(xcenter, ocenter, d);
            centroid_dis_map[i*ncenter+j] = dis;
            if(i != j)
            {
                centroid_dis_map[j*ncenter+i] = dis;
            }
        }
    }

    // initialize the storing space for inner distance of each member point
    for(nci = 0; nci < ncenter; nci++)
    {
        /// cnt_member_points
        int cnt_member = member[nci].size();
        innerLB[nci] = (DoubleIndex*)malloc(sizeof(DoubleIndex) * cnt_member);
        for(i = 0; i < cnt_member; i++)
        {
            innerLB[nci][i].id = -1;
            innerLB[nci][i].val = FLOAT_MAX;
        }
    }

    for(nci = 0; nci < ncenter; nci++)
    {
        /* in each centroid */
        memcpy(xcenter, centroid+nci*d, sizeof(float)*d);   // the current centroid
        int cnt_member = member[nci].size();    // cnt member points
        
        /* for each member points */
        for(i = 0; i < cnt_member; i++){
            memcpy(x, ds->data+member[nci][i]*d, sizeof(float)*d);
            
            /* for each other centroid */
            for(otheri = 0; otheri < ncenter; otheri++)
            {
                if(otheri != nci)
                {
                    memcpy(ocenter, centroid+otheri*d, sizeof(float)*d);
                    dis = (odistance_square(x, ocenter, d) - odistance_square(x, xcenter, d)) / (2*centroid_dis_map[nci*ncenter+otheri]);
                    if(f_bigger(innerLB[nci][i].val, dis))
                    {// update using smaller distance
                        innerLB[nci][i].val = dis;
                        innerLB[nci][i].id = member[nci][i];          // id is the data point
                    }
                }
            }
        }
        // sort member data points along the innerLB distance in the nci-th cluster
        DI_MergeSort(innerLB[nci], 0, cnt_member-1);
    }

    free(centroid_dis_map); centroid_dis_map = NULL;
    free(ocenter); ocenter = NULL;
    free(xcenter); xcenter = NULL;
    free(x); x = NULL;
}
Exemplo n.º 7
0
gmm_t * gmm_learn (int di, int ni, int ki, int niter,
                   const float * v, int nt, int seed, int nredo,
                   int flags)
{
    long d=di,k=ki,n=ni;

    int iter, iter_tot = 0;
    double old_key, key = 666;

    niter = (niter == 0 ? 10000 : niter);

    /* the GMM parameters */
    float * p = fvec_new_0 (n * k);      /* p(ci|x) for all i */
    gmm_t * g = gmm_new (d, k);

    /* initialize the GMM: k-means + variance estimation */
    int * nassign = ivec_new (n);  /* not useful -> to be removed when debugged */
    float * dis = fvec_new (n);
    kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign);

    fflush (stderr);
    fprintf (stderr, "assign = ");
    ivec_print (nassign, k);
    fprintf (stderr, "\n");
    free (nassign);

    /* initialization of the GMM parameters assuming a diagonal matrix */
    fvec_set (g->w, k, 1.0 / k);
    double sig = fvec_sum (dis, n) / n;
    printf ("sigma at initialization = %.3f\n", sig);
    fvec_set (g->sigma, k * d, sig);
    free (dis);


    /* start the EM algorithm */
    fprintf (stdout, "<><><><> GMM  <><><><><>\n");

    if(flags & GMM_FLAGS_PURE_KMEANS) niter=0;

    for (iter = 1 ; iter <= niter ; iter++) {

        gmm_compute_p_thread (n, v, g, p, flags, nt);
        fflush(stdout);

        gmm_handle_empty(n, v, g, p);

        gmm_compute_params (n, v, p, g, flags, nt);
        fflush(stdout);


        iter_tot++;

        /* convergence reached -> leave */
        old_key = key;
        key = fvec_sum (g->mu, k * d);

        printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key);
        fflush(stdout);

        if (key == old_key)
            break;
    }
    fprintf (stderr, "\n");

    free(p);

    return g;
}
Exemplo n.º 8
0
float ANC::neighbor_cluster_estimation(const fDataSet *ds, int nth)
{
	/// check for necessary data: centroids, basedata
	ASSERTINFO(ds == NULL || centroid == NULL || ds->data == NULL, "IPP");

	/// prepare for necessary variables
	neighbor.resize(ncenter);
	int	i, iclu = -1, ineighbor = -1;
	int	K = ncenter;
	int	n = ds->n;
	int	*tmp_assign = ivec_new_set(n * g, -1);
	float	*tmp_dis = fvec_new_0(n * g);
	int *neighbor_flag = ivec_new_set(ncenter*ncenter, 0);

	/// find k-nn among all centroids for each base vector: query=basedata, dataset=centroids, k=2 for neighbor cluster
	knn_full_thread (	
				2,		// euclidean distance
				n, K, d, 
				g,		// g-nn
				centroid, ds->data, NULL, tmp_assign, tmp_dis, nth);

	// extract neighbor clusters for each cluster
	for(i = 0; i < n; i++)
	{
		iclu = tmp_assign[i*g];					// current cluster = current point's 1-NN
		for (int ig = 1; ig < g; ig++){
			ineighbor = tmp_assign[i*g+ig];				// current neighbor cluster = current point's g-thNN
			if(0 == neighbor_flag[iclu*ncenter+ineighbor]){
				neighbor[iclu].push_back(ineighbor);
				neighbor_flag[iclu*ncenter+ineighbor] = 1;
			}
		}
	}

	puts("end neighbor");

	// check number of neighbor cluster
	for(i = 0; i < K; i++){
		ASSERTINFO(neighbor[i].size() == 0, "warning: there is a cluster who has no neighbors");
	}
	puts(">>> finished neighbor cluster registration");

	///### display neighbor cluster count
	puts(">>> neighbor cluster");
	int sum_neighbor = 0;
	for(i = 0;  i < K; i++){
		// printf("\n%d - %d\t", i, neighbor[i].size());
		sum_neighbor += neighbor[i].size();
	}
	float avg_neighbor = sum_neighbor / (float)K;

	/*
	if(K <= 10){
		for(i = 0;  i < K; i++){
			printf("\n%d - %d\t", i, neighbor[i].size());
			for(ineighbor = 0; ineighbor < neighbor[i].size(); ineighbor++){
				printf("%d ", neighbor[i][ineighbor]);
			}
		}
	}*/
	

	/// disallocate space
	FREE(tmp_assign);
	FREE(tmp_dis);

	return avg_neighbor;
}
Exemplo n.º 9
0
Arquivo: vlad.c Projeto: atroudi/V3V_2
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) 
{

	int i,j,l,n_quantile,i0,i1,ai,a,ma,ni;
	int *perm ;
	float un , diff;
	float *tab,*u,*avg,*sum,*mom2,*dists;
	int *hist,*assign;


	if(flags<11 || flags>=13) 
	{
		assign=ivec_new(n);

		nn(n,k,d,centroids,v,assign,NULL,NULL);    

		if(flags==6 || flags==7) 
		{
			n_quantile = flags==6 ? 3 : 1;
			fvec_0(desc,k*d*n_quantile);
			perm      = ivec_new(n);
			tab       = fvec_new(n);
			ivec_sort_index(assign,n,perm);
			i0=0;
			for(i=0;i<k;i++) 
			{
				i1=i0;
				while(i1<n && assign[perm[i1]]==i) 
				{
					i1++;
				}

				if(i1==i0) continue;

				for(j=0;j<d;j++) 
				{        
					for(l=i0;l<i1;l++)
					{
						tab[l-i0]=v[perm[l]*d+j];
					}
					ni=i1-i0;
					fvec_sort(tab,ni);
					for(l=0;l<n_quantile;l++) 
					{
						desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni;
					}
				}

				i0=i1;
			}
			free(perm);
			free(tab);
		} 
		else if(flags==5) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

		} 
		else if(flags==8 || flags==9) 
		{
			fvec_0(desc,k*d);

			u   = fvec_new(d);

			for(i=0;i<n;i++) 
			{
				fvec_cpy(u,v+i*d,d);
				fvec_sub(u,centroids+assign[i]*d,d);
				un=(float)sqrt(fvec_norm2sqr(u,d));

				if(un==0) continue;
				if(flags==8) 
				{        
					fvec_div_by(u,d,un);
				} else if(flags==9) 
				{
					fvec_div_by(u,d,sqrt(un));
				}

				fvec_add(desc+assign[i]*d,u,d);

			}
			free(u);
		} 
		else if(flags==10) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

			for(i=0;i<k;i++) 
			{
				fvec_normalize(desc+i*d,d,2.0);  
			}

		} 
		else if(flags==13) 
		{

			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]);
				}
			}     

		} 
		else if(flags==14) 
		{
			avg = fvec_new_0(k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist=ivec_new_histogram(k,assign,n);

			for(i=0;i<k;i++) 
			{
				if(hist[i]>0) 
				{
					for(j=0;j<d;j++) 
					{
						avg[i*d+j]/=hist[i];
					}
				}
			}

			free(hist);

			fvec_0(desc,k*d);
			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j]));
				}
			}

			fvec_sqrt(desc,k*d);

			free(avg);
		}  
		else if(flags==15) 
		{
			fvec_0(desc,k*d*2);
			sum = desc;

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist = ivec_new_histogram(k,assign,n);

			mom2 = desc+k*d;

			for(i=0;i<n;i++) 
			{
				ai=assign[i];
				for(j=0;j<d;j++) 
				{
					mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai]));
				}
			}
			fvec_sqrt(mom2,k*d);
			free(hist);


		} 
		else if(flags==17) 
		{
			fvec_0(desc,k*d*2);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					diff=v[i*d+j]-centroids[assign[i]*d+j];
					if(diff>0)
					{
						desc[assign[i]*d+j]+=diff;
					}
					else 
					{
						desc[assign[i]*d+j+k*d]-=diff;
					}
				}
			}

		} 
		else 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}


			if(flags==1) 
			{
				hist=ivec_new_histogram(k,assign,n);
				/* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */

				for(i=0;i<k;i++) 
				{
					for(j=0;j<d;j++) 
					{
						desc[i*d+j]/=hist[i];    
					}
				}
				free(hist);
			}

			if(flags==2) 
			{
				for(i=0;i<k;i++) 
				{
					fvec_normalize(desc+i*d,d,2.0);
				}
			}

			if(flags==3 || flags==4) 
			{
				assert(!"not implemented");
			}

			if(flags==16) 
			{
				hist=ivec_new_histogram(k,assign,n);
				for(i=0;i<k;i++) 
				{
					if(hist[i]>0) 
					{
						fvec_norm(desc+i*d,d,2);
						fvec_mul_by(desc+i*d,d,sqrt(hist[i]));
					}
				}
				free(hist);
			}


		}
		free(assign);
	} 
	else if(flags==11 || flags==12) 
	{
		ma=flags==11 ? 4 : 2;
		assign=ivec_new(n*ma);

		dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL);    

		fvec_0(desc,k*d);

		for(i=0;i<n;i++) 
		{
			for(j=0;j<d;j++) 
			{
				for(a=0;a<ma;a++) 
				{
					desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j];
				}
			}
		} 

		free(dists);

		free(assign);
	}

}
Exemplo n.º 10
0
Arquivo: gmm.c Projeto: Erotemic/yael
void gmm_fisher_spatial(int N, int K, int D, 
                        const float *Q, 
                        const float *sgmm, 
                        const float *ll, 
                        float *sdesc) {
  float *Q_sum = fvec_new_0(K); 
  
  {
    long k, n;
    for(n = 0; n < N; n++) 
      for(k = 0; k < K; k++) 
        Q_sum[k] += Q[n * K + k];     
    for(k = 0; k < K; k++) Q_sum[k] /= N;
  }

  float *Q_ll, *Q_ll_2; 
  
  {
    /* prepare a matrix containing both ll and ll**2 */
    
    float *ll_ll2 = fvec_new(D * 2 * N); 
    fvec_cpy(ll_ll2, ll, D * N); 
    float *ll2 = ll_ll2 + D * N; 
    long i;
    for(i = 0; i < D * N; i++) 
      ll2[i] = ll[i] * ll[i]; 

    /* compute Q.T * ll_ll2 */

    FINTEGER mi = K, ni = 2 * D, ki = N; 
    float one_over_N = 1.0 / N, zero = 0; 
    Q_ll = fvec_new(K * 2 * D);
    Q_ll_2 = Q_ll + K * D; 
    sgemm_("N", "N", &mi, &ni, &ki, 
           &one_over_N, Q, &mi, 
           ll_ll2, &ki, 
           &zero, Q_ll, &mi); 
    free(ll_ll2);   
  }

  {
    const float *mm = sgmm; 
    float *d_mm = sdesc; 
    long k, d; 
    for(d = 0; d < D; d++) 
      for(k = 0; k < K; k++) 
        d_mm[d + k * D] = Q_ll[K * d + k] - Q_sum[k] * mm[d]; 
    
    float *d_S = sdesc + K * D; 
    const float *S = sgmm + D;
    for(d = 0; d < D; d++) {
      float dfact = S[d] - mm[d] * mm[d]; 
      for(k = 0; k < K; k++) 
        d_S[d + k * D] = -Q_ll_2[K * d + k] + 2 * Q_ll[K * d + k] * mm[d] + Q_sum[k] * dfact; 
    }


  }


  free(Q_ll); 
  free(Q_sum);  
}
Exemplo n.º 11
0
Arquivo: gmm.c Projeto: Erotemic/yael
void gmm_fisher_from_posteriors(int n, const float *v, const gmm_t * g, int flags, const float *p, 
                                float *dp_dlambda) {
  
  long d=g->d, k=g->k;
  long i,j,l;
  long ii=0;

  float * vp = NULL; /* v*p */
  float * sum_pj = NULL; /* sum of p's for a given j */  


#define P(j,i) p[(i)*k+(j)]
#define V(l,i) v[(i)*d+(l)]
#define MU(l,j) g->mu[(j)*d+(l)]
#define SIGMA(l,j) g->sigma[(j)*d+(l)]
#define VP(l,j) vp[(j)*d+(l)]

  if(flags & GMM_FLAGS_W) {


    float *accus = fvec_new_0(k); 
    
    for(i=0;i<n;i++) 
      for(j=1;j<k;j++) 
        accus[j] += P(j,i)/g->w[j] - P(0,i)/g->w[0];
    
    for(j=1;j<k;j++) {        
      double accu=accus[j];
      
      /* normalization */
      double f=n*(1/g->w[j]+1/g->w[0]);
      
      dp_dlambda[ii++]=accu/sqrt(f);
    }
    free(accus);
    
  } 

  if(flags & GMM_FLAGS_MU) {
    float *dp_dmu=dp_dlambda+ii;

#define DP_DMU(l,j) dp_dmu[(j)*d+(l)]
    
    if(0) { /* simple and slow */
    
      for(j=0;j<k;j++) {
        for(l=0;l<d;l++) {
          double accu=0;
          
          for(i=0;i<n;i++) 
            accu += P(j,i) * (V(l,i)-MU(l,j)) / SIGMA(l,j);
          
          DP_DMU(l,j)=accu;
        }
      }
      
    } else { /* complicated and fast */

      /* precompute  tables that may be useful for sigma too */
      vp = fvec_new(k * d);
      fmat_mul_tr(v,p,d,k,n,vp);

      sum_pj = fvec_new_0(k);
      for(i=0;i<n;i++) 
        for(j=0;j<k;j++) 
          sum_pj[j] += P(j,i);        

      for(j=0;j<k;j++) {
        for(l=0;l<d;l++)
          DP_DMU(l,j) = (VP(l,j) - MU(l,j) * sum_pj[j]) / SIGMA(l,j);
      }

    }

    /* normalization */
    if(!(flags & GMM_FLAGS_NO_NORM)) {
      for(j=0;j<k;j++) 
        for(l=0;l<d;l++) {
          float nf = sqrt(n*g->w[j]/SIGMA(l,j));
          if(nf > 0) DP_DMU(l,j) /= nf;                
        }        
    }
#undef DP_DMU
    ii+=d*k;
  }

  if(flags & (GMM_FLAGS_SIGMA | GMM_FLAGS_1SIGMA)) {

    
    if(flags & GMM_FLAGS_1SIGMA) { /* fast not implemented for 1 sigma */

      for(j=0;j<k;j++) {
        double accu2=0;
        for(l=0;l<d;l++) {
          double accu=0;
        
          for(i=0;i<n;i++) 
            accu += P(j,i) * (sqr(V(l,i)-MU(l,j)) / SIGMA(l,j) - 1) / sqrt(SIGMA(l,j));
        
          if(flags & GMM_FLAGS_SIGMA) {

            double f=flags & GMM_FLAGS_NO_NORM ? 1.0 : 2*n*g->w[j]/SIGMA(l,j);
          
            dp_dlambda[ii++]=accu/sqrt(f);
          } 
          accu2+=accu;        
        }

        if(flags & GMM_FLAGS_1SIGMA) {
          double f=flags & GMM_FLAGS_NO_NORM ? 1.0 : 2*d*n*g->w[j]/SIGMA(0,j);
          dp_dlambda[ii++]=accu2/sqrt(f);        
        }

      }  
    
    } else { /* fast and complicated */
      assert(flags & GMM_FLAGS_SIGMA);
      float *dp_dsigma = dp_dlambda + ii;

      if(!vp) {
        vp = fvec_new(k * d);
        fmat_mul_tr(v,p,d,k,n,vp);
      }

      if(!sum_pj) {
        sum_pj = fvec_new(k);
        for(j=0;j<k;j++) {        
          double sum=0;        
          for(i=0;i<n;i++) sum += P(j,i);        
          sum_pj[j] = sum;
        }
      }
      float *v2 = fvec_new(n * d);
      for(i = n*d-1 ; i >= 0; i--) v2[i] = v[i] * v[i];
      float *v2p = fvec_new(k * d);

      fmat_mul_tr(v2,p,d,k,n,v2p);
      free(v2);


#define V2P(l,j) v2p[(j)*d+(l)]
#define DP_DSIGMA(i,j) dp_dsigma[(i)+(j)*d]
      for(j=0;j<k;j++) {

        for(l=0;l<d;l++) {
          double accu;

          accu = V2P(l, j);

          accu += VP(l, j) * (- 2 * MU(l,j));

          accu += sum_pj[j] * (sqr(MU(l,j))  - SIGMA(l,j));

          /* normalization */

          double f;

          if(flags & GMM_FLAGS_NO_NORM) {
            f = pow(SIGMA(l,j), -1.5);
          } else {
            f = 1 / (SIGMA(l,j) * sqrt(2*n*g->w[j]));
          }

          DP_DSIGMA(l,j) = accu * f;

        }

      }  
      
      free(v2p);

#undef DP_DSIGMA
#undef V2P
      ii += d * k;
    }

  }
  
  assert(ii==gmm_fisher_sizeof(g,flags));
#undef P
#undef V
#undef MU
#undef SIGMA
  free(sum_pj);
  free(vp);
}
Exemplo n.º 12
0
void Clustering::neighbor_cluster_estimation(const fDataSet *ds, int nth)
{
	/// check for necessary data: centroids, basedata
	ASSERTINFO(ds == NULL || centroid == NULL || ds->data == NULL, "IPP");

	/// prepare for necessary variables
	neighbor.resize(ncenter);
	int	i, iclu = -1, ineighbor = -1;
	int	K = ncenter;
	int	n = ds->n;
	int	d = ds->d;
	int	*tmp_assign = ivec_new_set(n * 2, -1);
	float	*tmp_dis = fvec_new_0(n * 2);
	int *neighbor_flag = ivec_new_set(ncenter*ncenter, 0);

	/// find k-nn among all centroids for each base vector: query=basedata, dataset=centroids, k=2 for neighbor cluster
	knn_full_thread (	
				2,		// euclidean distance
				n, K, d, 
				2,		// 2-nn
				centroid, ds->data, NULL, tmp_assign, tmp_dis, nth);

	// extract neighbor clusters for each cluster
	for(i = 0; i < n; i++)
	{
		iclu = tmp_assign[i*2];					// current cluster = current point's 1-NN
		ineighbor = tmp_assign[i*2+1];				// current neighbor cluster = current point's 2-NN
		if(0 == neighbor_flag[iclu*ncenter+ineighbor]){
			neighbor[iclu].push_back(ineighbor);
			neighbor_flag[iclu*ncenter+ineighbor] = 1;
		}

	}
	puts(">>> finished neighbor cluster registration");

	///### display neighbor cluster count
	puts(">>> neighbor cluster");
	int sum_neighbor = 0;
	for(i = 0;  i < K; i++){
		printf("\n%d - %d\t", i, neighbor[i].size());
		sum_neighbor += neighbor[i].size();
	}
	printf("\naveragely %lf neighbors\n", sum_neighbor / (float)K);

	/*
	if(K <= 500){
		for(i = 0;  i < K; i++){
			printf("\n%d - %d\t", i, neighbor[i].size());
			int ineighbor;
			for(ineighbor = 0; ineighbor < neighbor[i].size(); ineighbor++){
				printf("%d ", neighbor[i][ineighbor]);
			}
		}
	}
	*/
	printf("\naveragely %lf neighbors\n", sum_neighbor / (float)K);

	/// disallocate space
	FREE(tmp_assign);
	FREE(tmp_dis);
}