Example #1
0
void gmm_fisher_save_soft_assgn(int n, const float *v, const gmm_t * g, int flags,
                                float *dp_dlambda,
                                float *word_total_soft_assignment) {
    long d=g->d, k=g->k;
    float *p = fvec_new(n * k);
    long i,j,l;
    long ii=0;

    float * vp = NULL; /* v*p */
    float * sum_pj = NULL; /* sum of p's for a given j */

    gmm_compute_p(n,v,g,p,flags | GMM_FLAGS_W);

#define P(j,i) p[(i)*k+(j)]
#define V(l,i) v[(i)*d+(l)]
#define MU(l,j) g->mu[(j)*d+(l)]
#define SIGMA(l,j) g->sigma[(j)*d+(l)]
#define VP(l,j) vp[(j)*d+(l)]

    // Save total soft assignment per centroid
    if (word_total_soft_assignment != NULL) {
        for (j=0; j<k; j++) {
            double sum=0;
            for (i=0; i<n; i++) {
                sum += P(j,i);
            }
            if (n != 0) {
                word_total_soft_assignment[j] = (float)(sum/n);
            } else {
                word_total_soft_assignment[j] = 0.0;
            }
        }
    }

    if(flags & GMM_FLAGS_W) {

        for(j=1; j<k; j++) {
            double accu=0;

            for(i=0; i<n; i++)
                accu+= P(j,i)/g->w[j] - P(0,i)/g->w[0];

            /* normalization */
            double f=n*(1/g->w[j]+1/g->w[0]);

            dp_dlambda[ii++]=accu/sqrt(f);
        }
    }

    if(flags & GMM_FLAGS_MU) {
        float *dp_dmu=dp_dlambda+ii;

#define DP_DMU(l,j) dp_dmu[(j)*d+(l)]

        if(0) { /* simple and slow */

            for(j=0; j<k; j++) {
                for(l=0; l<d; l++) {
                    double accu=0;

                    for(i=0; i<n; i++)
                        accu += P(j,i) * (V(l,i)-MU(l,j)) / SIGMA(l,j);

                    DP_DMU(l,j)=accu;
                }
            }

        } else { /* complicated and fast */

            /* precompute  tables that may be useful for sigma too */
            vp = fvec_new(k * d);
            fmat_mul_tr(v,p,d,k,n,vp);

            sum_pj = fvec_new(k);
            for(j=0; j<k; j++) {
                double sum=0;
                for(i=0; i<n; i++) sum += P(j,i);
                sum_pj[j] = sum;
            }

            for(j=0; j<k; j++) {
                for(l=0; l<d; l++)
                    DP_DMU(l,j) = (VP(l,j) - MU(l,j) * sum_pj[j]) / SIGMA(l,j);
            }

        }
        /* normalization */
        if(!(flags & GMM_FLAGS_NO_NORM)) {
            for(j=0; j<k; j++)
                for(l=0; l<d; l++) {
                    float nf = sqrt(n*g->w[j]/SIGMA(l,j));
                    if(nf > 0) DP_DMU(l,j) /= nf;
                }
        }
#undef DP_DMU
        ii+=d*k;
    }

    if(flags & (GMM_FLAGS_SIGMA | GMM_FLAGS_1SIGMA)) {


        if(flags & GMM_FLAGS_1SIGMA) { /* fast not implemented for 1 sigma */

            for(j=0; j<k; j++) {
                double accu2=0;
                for(l=0; l<d; l++) {
                    double accu=0;

                    for(i=0; i<n; i++)
                        accu += P(j,i) * (sqr(V(l,i)-MU(l,j)) / SIGMA(l,j) - 1) / sqrt(SIGMA(l,j));

                    if(flags & GMM_FLAGS_SIGMA) {

                        double f=flags & GMM_FLAGS_NO_NORM ? 1.0 : 2*n*g->w[j]/SIGMA(l,j);

                        dp_dlambda[ii++]=accu/sqrt(f);
                    }
                    accu2+=accu;
                }

                if(flags & GMM_FLAGS_1SIGMA) {
                    double f=flags & GMM_FLAGS_NO_NORM ? 1.0 : 2*d*n*g->w[j]/SIGMA(0,j);
                    dp_dlambda[ii++]=accu2/sqrt(f);
                }

            }

        } else { /* fast and complicated */
            assert(flags & GMM_FLAGS_SIGMA);
            float *dp_dsigma = dp_dlambda + ii;

            if(!vp) {
                vp = fvec_new(k * d);
                fmat_mul_tr(v,p,d,k,n,vp);
            }

            if(!sum_pj) {
                sum_pj = fvec_new(k);
                for(j=0; j<k; j++) {
                    double sum=0;
                    for(i=0; i<n; i++) sum += P(j,i);
                    sum_pj[j] = sum;
                }
            }
            float *v2 = fvec_new(n * d);
            for(i = n*d-1 ; i >= 0; i--) v2[i] = v[i] * v[i];
            float *v2p = fvec_new(k * d);
            fmat_mul_tr(v2,p,d,k,n,v2p);
            free(v2);

#define V2P(l,j) v2p[(j)*d+(l)]
#define DP_DSIGMA(i,j) dp_dsigma[(i)+(j)*d]
            for(j=0; j<k; j++) {

                for(l=0; l<d; l++) {
                    double accu;

                    accu = V2P(l, j);

                    accu += VP(l, j) * (- 2 * MU(l,j));

                    accu += sum_pj[j] * (sqr(MU(l,j))  - SIGMA(l,j));

                    /* normalization */

                    double f;

                    if(flags & GMM_FLAGS_NO_NORM) {
                        f = pow(SIGMA(l,j), -1.5);
                    } else {
                        f = 1 / (SIGMA(l,j) * sqrt(2*n*g->w[j]));
                    }

                    DP_DSIGMA(l,j) = accu * f;

                }

            }

            free(v2p);

#undef DP_DSIGMA
#undef V2P
            ii += d * k;
        }

    }

    assert(ii==gmm_fisher_sizeof(g,flags));
#undef P
#undef V
#undef MU
#undef SIGMA
    free(p);
    free(sum_pj);
    free(vp);
}
Example #2
0
float *fmat_new_pca_from_covariance(int d,const float *cov, float *singvals) 
{
  float *pcamat=fvec_new(d*d);
  fmat_pca_from_covariance (d, cov, singvals, pcamat);
  return pcamat;
}
Example #3
0
int main (int argc, char ** argv)
{
  int i;
  int k = 10;
  int d = 0;
  int nb = 0;
  int nq = 0;
  int nt = count_cpu();
  int verbose = 1;
  int ret = 0;

  int fmt_b = FMT_FVEC;
  int fmt_q = FMT_FVEC;
  int fmt_nn = FMT_IVEC;
  int fmt_dis = FMT_FVEC;

  const char * fb_name = NULL;    /* database filename */
  const char * fq_name = NULL;    /* query filename */
  const char * fnn_name = "nn.out";   /* nn idx filename */
  const char * fdis_name = "dis.out";  /* nn dis filename */

  if (argc == 1)
    usage (argv[0]);

  for (i = 1 ; i < argc ; i++) {
    char *a = argv[i];

    if (!strcmp (a, "-h") || !strcmp (a, "--help"))
      usage (argv[0]);
    else if (!strcmp (a, "-silence")) {
      verbose = 0;
    }
    else if (!strcmp (a, "-verbose")) {
      verbose = 2;
    }
    else if (!strcmp (a, "-k") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &k);
      assert (ret);
    }
    else if (!strcmp (a, "-d") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &d);
      assert (ret);
    }
    else if (!strcmp (a, "-nt") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nt);
      assert (ret);
    }
    else if (!strcmp (a, "-nb") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nb);
      assert (ret);
    }
    else if (!strcmp (a, "-nq") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nq);
      assert (ret);
    }
    else if (!strcmp (a, "-b") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_FVEC;
    }
    else if (!strcmp (a, "-bb") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_BVEC;
    }
    else if (!strcmp (a, "-bt") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_TEXT;
    }
    else if (!strcmp (a, "-q") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_FVEC;
    }
    else if (!strcmp (a, "-qb") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_BVEC;
    }
    else if (!strcmp (a, "-qt") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_TEXT;
    }
    else if (!strcmp (a, "-onn") && i+1 < argc) {
      fnn_name = argv[++i];
      fmt_nn = FMT_IVEC;
    }
    else if (!strcmp (a, "-onnt") && i+1 < argc) {
      fnn_name = argv[++i];
      fmt_nn = FMT_TEXT;
    }
    else if (!strcmp (a, "-odis") && i+1 < argc) {
      fdis_name = argv[++i];
      fmt_dis = FMT_FVEC;
    }
    else if (!strcmp (a, "-odist") && i+1 < argc) {
      fdis_name = argv[++i];
      fmt_dis = FMT_TEXT;
    }
  }

  assert (fb_name && fq_name);

  fprintf (stderr, "k = %d\nd = %d\nnt = %d\n", k, d, nt);

  if (verbose) {
    fprintf (stderr, "fb = %s  (fmt = %s)\n", fb_name, 
	     (fmt_b == FMT_FVEC ? "fvec" : (fmt_b == FMT_BVEC ? "bvec" : "txt")));
    fprintf (stderr, "fq = %s  (fmt = %s)\n", fq_name, 
	     (fmt_q == FMT_FVEC ? "fvec" : (fmt_q == FMT_BVEC ? "bvec" : "txt")));
    fprintf (stderr, "fnn = %s  (fmt = %s)\n", fnn_name, 
	     (fmt_nn == FMT_IVEC ? "ivec" : "txt"));
    fprintf (stderr, "fdis = %s  (fmt = %s)\n", fdis_name, 
	     (fmt_dis == FMT_FVEC ? "fvec" : "txt"));
  }


  /* read the input vectors for database and queries */
  float * vb = my_fvec_read (fb_name, fmt_b, verbose, &nb, &d);
  float * vq = my_fvec_read (fq_name, fmt_q, verbose, &nq, &d);


  /* Search */
  int * idx = ivec_new (k * nq);
  float * dis = fvec_new (k * nq);

  knn_full_thread (2, nq, nb, d, k, vb, vq, NULL, idx, dis, nt);
  knn_reorder_shortlist (nq, nb, d, k, vb, vq, idx, dis);

  /* write the distance output file */
  if (fmt_dis == FMT_FVEC)
    ret = fvecs_write (fdis_name, k, nq, dis);
  else if (fmt_dis == FMT_TEXT)
    ret = fvecs_write_txt (fdis_name, k, nq, dis);
  else assert (0 || "Unknow output format\n");
  assert (ret == nq);
  
  /* write the distance output file */
  if (fmt_nn == FMT_IVEC)
    ret = ivecs_write (fnn_name, k, nq, idx);
  else if (fmt_nn == FMT_TEXT)
    ret = ivecs_write_txt (fnn_name, k, nq, idx);
  else assert (0 || "Unknow output format\n");
  assert (ret == nq);
  
  free (idx);
  free (dis);
  free (vb);
  free (vq);
  return 0;
}
Example #4
0
gmm_t * gmm_learn (int di, int ni, int ki, int niter,
                   const float * v, int nt, int seed, int nredo,
                   int flags)
{
    long d=di,k=ki,n=ni;

    int iter, iter_tot = 0;
    double old_key, key = 666;

    niter = (niter == 0 ? 10000 : niter);

    /* the GMM parameters */
    float * p = fvec_new_0 (n * k);      /* p(ci|x) for all i */
    gmm_t * g = gmm_new (d, k);

    /* initialize the GMM: k-means + variance estimation */
    int * nassign = ivec_new (n);  /* not useful -> to be removed when debugged */
    float * dis = fvec_new (n);
    kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign);

    fflush (stderr);
    fprintf (stderr, "assign = ");
    ivec_print (nassign, k);
    fprintf (stderr, "\n");
    free (nassign);

    /* initialization of the GMM parameters assuming a diagonal matrix */
    fvec_set (g->w, k, 1.0 / k);
    double sig = fvec_sum (dis, n) / n;
    printf ("sigma at initialization = %.3f\n", sig);
    fvec_set (g->sigma, k * d, sig);
    free (dis);


    /* start the EM algorithm */
    fprintf (stdout, "<><><><> GMM  <><><><><>\n");

    if(flags & GMM_FLAGS_PURE_KMEANS) niter=0;

    for (iter = 1 ; iter <= niter ; iter++) {

        gmm_compute_p_thread (n, v, g, p, flags, nt);
        fflush(stdout);

        gmm_handle_empty(n, v, g, p);

        gmm_compute_params (n, v, p, g, flags, nt);
        fflush(stdout);


        iter_tot++;

        /* convergence reached -> leave */
        old_key = key;
        key = fvec_sum (g->mu, k * d);

        printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key);
        fflush(stdout);

        if (key == old_key)
            break;
    }
    fprintf (stderr, "\n");

    free(p);

    return g;
}
Example #5
0
float * fvec_new_cpy (const float * v, long n) {
  float *ret = fvec_new(n);
  memcpy (ret, v, n * sizeof (*ret));
  return ret;  
}
Example #6
0
float * fvec_new_randn (long n)
{
  float * f = fvec_new (n);
  fvec_randn(f,n);
  return f;
}
Example #7
0
void ahc_clustering(DyArray *ahct, int bf, int rho, const fDataSet *ds){
	ASSERTINFO(ahct == NULL || bf <= 0 || rho <= 0 || ds == NULL, "IPP");

	int		n 	= ds->n;
	int		d 	= ds->d;
	Cluster	_clu, clu, *pclu = NULL, *p0clu = NULL;
	int		i;
	float	qerror;
	int		iclu, bfi, ni, ichild, ori_id;			// the pointer, branch factor and volume of the i-th cluster
	int 	*nassign = ivec_new_set(bf, 0);
	int 	*assign = NULL;
	float	*cent = fvec_new(d*bf);
	float	*mem_points = NULL;
	DyArray	*member = (DyArray*)malloc(sizeof(DyArray)*bf);

	/* initialize the first cluster (root) to add it to the ahc tree */
	Cluster_init(&clu, n);
	for(i = 0; i < n; i++){
		clu.idx[i] = i;
	}
	clu.type = ClusterType_Root;
	DyArray_add(ahct, (void*)&clu, 1);

	/* begin the loop of adaptive hierarchical clustering */
	iclu = 0;
	while(iclu < ahct->count){
		/* deal with the i-th cluster */
		// figure out the adaptive branch factor of the i-th cluster
		pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
		ni = pclu->npts;
		bfi = i_min(bf, (int)round(ni / (float)rho));

		// deal with the cluster according to its size
		if(bfi < 2){
			/*
			 *	this is a leaf cluster
			 *	- mark it, release the children
			 *	* not necessary to store real data points
			 */
			pclu->type = ClusterType_Leaf;
		}else{
			printf("----------------- cluster %d, bfi-%d:\n", iclu, bfi);

			/*
			 * this is an inner cluster
			 * - divide it
			 */
			memcpy(&_clu, pclu, sizeof(Cluster));

			// extract data points from the original dataset according to the idx
			mem_points = fvec_new(ni * d);
			for(i = 0; i < ni; i++){
				memcpy(mem_points+i*d, ds->data+_clu.idx[i]*d, d);
			}

			// divide this cluster
			assign = ivec_new(ni);

			if(iclu == 30){
				int _a = 1;
				_a++;

				ivec_print(_clu.idx, _clu.npts);
			}

			qerror = kmeans(	d, ni, bfi, CLUSTERING_NITER, mem_points,
								CLUSTERING_NTHREAD | KMEANS_QUIET | KMEANS_INIT_BERKELEY, CLUSTERING_SEED, CLUSTERING_NREDO,
								cent, NULL, assign, nassign);

			// prepare space for members' ids
			for(i = 0; i < bfi; i++){
				DyArray_init(&member[i], sizeof(int), nassign[i]);
			}
			// extract member points' ids for each children cluster
			for(i = 0; i < ni; i++){
				ori_id = _clu.idx[i];
				DyArray_add(&member[assign[i]], (void*)&ori_id, 1);
			}

			// fulfill the type, centroids and the children of this cluster, add them to the ahct
			_clu.type = ClusterType_Inner;
			_clu.cents = fvec_new(d * bfi);
			memcpy(_clu.cents, cent, sizeof(float)*d*bfi);

			DyArray_init(&_clu.children, sizeof(int), bfi);
			for(i = 0; i < bfi; i++){
				Cluster_init(&clu, nassign[i]);
				memcpy(clu.idx, (int*)member[i].elem, sizeof(int)*nassign[i]);

				DyArray_add(&_clu.children, (void*)&ahct->count, 1);	/* the i-th child's position */
				DyArray_add(ahct, (void*)&clu, 1);						/* add the i-th child to the ahct */
			}

			/* as per the elems of ahct may change when expanding the space
			 * we decide to get the brand new address of the element
			 */
			pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
			memcpy(pclu, &_clu, sizeof(Cluster));


			/* report */
			ivec_print(nassign, bfi);
			ivec_print((int*)_clu.children.elem, _clu.children.count);

			/* unset or release */
			FREE(mem_points);
			FREE(assign);
			for(i = 0; i < bfi; i++){
				DyArray_unset(&member[i]);
			}
		}

		// move to next cluster
		iclu++;
	}

	FREE(nassign);
	FREE(cent);
	FREE(member);
	pclu = NULL;
}
Example #8
0
        int m, float alpha, float *R,
        int nk, DoubleIndex **knnset, Cost *cost, int lb_type);


(Clustering *c, fDataSet *queryset, int m, float alpha, float *R, float *r_centroid, char *folder, int nk, DoubleIndex **knnset, Cost *cost)
{
    char    filename[256];
    int     nq = queryset->n,
            qi, i, set_i;
    int     cid, point_num;
    float   knn_R;
    float   *set;
    int     *set_id;
    int     set_num;
    float   *set_vector = NULL;
    float   *query = fvec_new(d);
    DoubleIndex candidate;
    DoubleIndex *lowerbound = (DoubleIndex*)malloc(sizeof(DoubleIndex)*c->ncenter);
                                                                // lower bounds between query and all centers
    Cost costi;
    struct timeval tvb, tve, tvb_lb, tve_lb, tvb_io, tve_io;
    for(qi = 0; qi < nq; qi++)
    {
        /// initialize the cost recorder
        CostInit(&costi);
        gettimeofday(&tvb, NULL);

        /// the qi-th query
        memcpy(query, queryset->data+qi*d, sizeof(float)*d);
        knnset[qi] = (DoubleIndex*)malloc(sizeof(DoubleIndex)*nk);
        /// calculate the lower bounds between query and all clusters to get the right order
Example #9
0
/* n1 = pts */
static void nn_single_full (int distance_type,
			    int n1, int n2, int d,
			    const float *mat2, const float *mat1, 
			    const float *vw_weights,                             
			    int *vw, float *vwdis)
{
  int step1 = MIN (n1, BLOCK_N1), step2 = MIN (n2, BLOCK_N2);

  float *dists = fvec_new (step1 * step2);

  /* divide the dataset into sub-blocks to:
   * - not make a too big dists2 output array 
   */
  
  long i1,i2,j1,j2;
  for (i1 = 0; i1 < n1; i1 += step1) {  

    int m1 = MIN (step1, n1 - i1);

    /* clear mins */

    for (j1 = 0; j1 < m1; j1++) {
      vw[j1+i1]=-1;
      vwdis[j1+i1]=1e30;
    }

    for (i2 = 0; i2 < n2 ; i2 += step2) {     
      
      int m2 = MIN (step2, n2 - i2);
      
      if(distance_type==2)       
        compute_cross_distances (d, m2, m1, mat2+i2*d, mat1+i1*d, dists);
      else
        compute_cross_distances_alt (distance_type, d, m2, m1, mat2+i2*d, mat1+i1*d, dists);

      if(vw_weights) {
        for(j1=0;j1<m1;j1++) for (j2 = 0; j2 < m2; j2++)
          dists[j1 * m2 + j2] *= vw_weights[j2 + i2];        
      }

      /* update mins */

      for(j1=0;j1<m1;j1++) {
        float *dline=dists+j1*m2;
        
        int imin=vw[i1+j1];
        float dmin=vwdis[i1+j1];

        for(j2=0;j2<m2;j2++) 
          if(dline[j2]<dmin) {
            imin=j2+i2;
            dmin=dline[j2];
          }
          
        vw[i1+j1]=imin;
        vwdis[i1+j1]=dmin;

      }      

    }  
  }

  free (dists);
}
Example #10
0
void knn_full (int distance_type,int n1, int n2, int d, int k,
	       const float *mat2, const float *mat1,
	       const float *vw_weights,
	       int *vw, float *vwdis)
{
  assert (k <= n2);

  if(k==1) {
    nn_single_full(distance_type, n1, n2, d, mat2, mat1, vw_weights, vw, vwdis);
    return;
  }

  
  int step1 = MIN (n1, BLOCK_N1), step2 = MIN (n2, BLOCK_N2);

  float *dists = fvec_new (step1 * step2);


  /* allocate all heaps at once */
  long oneh = fbinheap_sizeof(k);
  // oneh=(oneh+7) & ~7; /* round up to 8 bytes */
  char *minbuf = malloc (oneh * step1);

#define MINS(i) ((fbinheap_t*)(minbuf + oneh * i))
  
  long i1,i2,j1,j2;
  for (i1 = 0; i1 < n1; i1 += step1) {  

    int m1 = MIN (step1, n1 - i1);

    /* clear mins */
    for (j1 = 0; j1 < m1; j1++) 
      fbinheap_init(MINS(j1),k);
        

    for (i2 = 0; i2 < n2 ; i2 += step2) {     
      
      int m2 = MIN (step2, n2 - i2);
      
      
      if(distance_type==2)       
        compute_cross_distances (d, m2, m1, mat2+i2*d, mat1+i1*d, dists);
      else 
        compute_cross_distances_alt (distance_type, d, m2, m1, mat2+i2*d, mat1+i1*d, dists);    

      if(vw_weights) {
        for(j1=0;j1<m1;j1++) for (j2 = 0; j2 < m2; j2++)
          dists[j1 * m2 + j2] *= vw_weights[j2 + i2];        
      }

      /* update mins */

      for(j1=0;j1<m1;j1++) {
        float *dline=dists+j1*m2; 
        fbinheap_addn_label_range(MINS(j1),m2,i2,dline);
      }      

    }  

    for (j1 = 0; j1 < m1; j1++) {
      fbinheap_t *mh = MINS(j1);
      assert (mh->k == k);
      fbinheap_sort(mh, vw + (i1+j1) * k, vwdis + (i1+j1) * k);
    }
  }

#undef MINS
  free (minbuf);
  free(dists);
}
Example #11
0
File: vlad.c Project: atroudi/V3V_2
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) 
{

	int i,j,l,n_quantile,i0,i1,ai,a,ma,ni;
	int *perm ;
	float un , diff;
	float *tab,*u,*avg,*sum,*mom2,*dists;
	int *hist,*assign;


	if(flags<11 || flags>=13) 
	{
		assign=ivec_new(n);

		nn(n,k,d,centroids,v,assign,NULL,NULL);    

		if(flags==6 || flags==7) 
		{
			n_quantile = flags==6 ? 3 : 1;
			fvec_0(desc,k*d*n_quantile);
			perm      = ivec_new(n);
			tab       = fvec_new(n);
			ivec_sort_index(assign,n,perm);
			i0=0;
			for(i=0;i<k;i++) 
			{
				i1=i0;
				while(i1<n && assign[perm[i1]]==i) 
				{
					i1++;
				}

				if(i1==i0) continue;

				for(j=0;j<d;j++) 
				{        
					for(l=i0;l<i1;l++)
					{
						tab[l-i0]=v[perm[l]*d+j];
					}
					ni=i1-i0;
					fvec_sort(tab,ni);
					for(l=0;l<n_quantile;l++) 
					{
						desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni;
					}
				}

				i0=i1;
			}
			free(perm);
			free(tab);
		} 
		else if(flags==5) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

		} 
		else if(flags==8 || flags==9) 
		{
			fvec_0(desc,k*d);

			u   = fvec_new(d);

			for(i=0;i<n;i++) 
			{
				fvec_cpy(u,v+i*d,d);
				fvec_sub(u,centroids+assign[i]*d,d);
				un=(float)sqrt(fvec_norm2sqr(u,d));

				if(un==0) continue;
				if(flags==8) 
				{        
					fvec_div_by(u,d,un);
				} else if(flags==9) 
				{
					fvec_div_by(u,d,sqrt(un));
				}

				fvec_add(desc+assign[i]*d,u,d);

			}
			free(u);
		} 
		else if(flags==10) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

			for(i=0;i<k;i++) 
			{
				fvec_normalize(desc+i*d,d,2.0);  
			}

		} 
		else if(flags==13) 
		{

			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]);
				}
			}     

		} 
		else if(flags==14) 
		{
			avg = fvec_new_0(k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist=ivec_new_histogram(k,assign,n);

			for(i=0;i<k;i++) 
			{
				if(hist[i]>0) 
				{
					for(j=0;j<d;j++) 
					{
						avg[i*d+j]/=hist[i];
					}
				}
			}

			free(hist);

			fvec_0(desc,k*d);
			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j]));
				}
			}

			fvec_sqrt(desc,k*d);

			free(avg);
		}  
		else if(flags==15) 
		{
			fvec_0(desc,k*d*2);
			sum = desc;

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist = ivec_new_histogram(k,assign,n);

			mom2 = desc+k*d;

			for(i=0;i<n;i++) 
			{
				ai=assign[i];
				for(j=0;j<d;j++) 
				{
					mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai]));
				}
			}
			fvec_sqrt(mom2,k*d);
			free(hist);


		} 
		else if(flags==17) 
		{
			fvec_0(desc,k*d*2);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					diff=v[i*d+j]-centroids[assign[i]*d+j];
					if(diff>0)
					{
						desc[assign[i]*d+j]+=diff;
					}
					else 
					{
						desc[assign[i]*d+j+k*d]-=diff;
					}
				}
			}

		} 
		else 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}


			if(flags==1) 
			{
				hist=ivec_new_histogram(k,assign,n);
				/* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */

				for(i=0;i<k;i++) 
				{
					for(j=0;j<d;j++) 
					{
						desc[i*d+j]/=hist[i];    
					}
				}
				free(hist);
			}

			if(flags==2) 
			{
				for(i=0;i<k;i++) 
				{
					fvec_normalize(desc+i*d,d,2.0);
				}
			}

			if(flags==3 || flags==4) 
			{
				assert(!"not implemented");
			}

			if(flags==16) 
			{
				hist=ivec_new_histogram(k,assign,n);
				for(i=0;i<k;i++) 
				{
					if(hist[i]>0) 
					{
						fvec_norm(desc+i*d,d,2);
						fvec_mul_by(desc+i*d,d,sqrt(hist[i]));
					}
				}
				free(hist);
			}


		}
		free(assign);
	} 
	else if(flags==11 || flags==12) 
	{
		ma=flags==11 ? 4 : 2;
		assign=ivec_new(n*ma);

		dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL);    

		fvec_0(desc,k*d);

		for(i=0;i<n;i++) 
		{
			for(j=0;j<d;j++) 
			{
				for(a=0;a<ma;a++) 
				{
					desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j];
				}
			}
		} 

		free(dists);

		free(assign);
	}

}
Example #12
0
int main()
{
  int i, j, k, d = 10,d2=5;
  float * a = fvec_new (d * d);
  float * b = fvec_new (d * d);
  float * b0 = fvec_new (d * d);
#define B0(i,j) b0[(i)+(j)*d]
#define A(i,j) a[(i)+(j)*d]
#define B(i,j) b[(i)+(j)*d] 
  float * lambda = fvec_new (d);
  float * v = fvec_new (d * d);
  float *v_part=fvec_new (d * d2);

  for (i = 0 ; i < d ; i++)
    for (j = 0 ; j  <= i ; j++) {
      A(i,j) = A(j,i) = drand48(); 
      B0(i,j)=drand48();
      B0(j,i)=drand48();
      
/*      B(i,j) = B(j,i) = drand48(); */
    }
  /* make a positive definite b (with b=b0*b0') */
  for (i = 0 ; i < d ; i++)
    for (j = 0 ; j  < d ; j++) {
      double accu=0;
      for(k=0;k<d;k++) 
        accu+=B0(i,k)*B0(j,k);
      B(i,j)=accu;
    }

  printf ("a = ");
  fmat_print(a,d,d);

  printf ("\nb = "); 
  fmat_print(b,d,d);

  printf ("Solution of the eigenproblem Av=lambda v\n");
  
  printf ("\n");
  int ret=eigs_sym (d, a, lambda, v);
  assert(ret==0);
  printf ("\n");


  printf("Eigenvectors:\n");
  fmat_print(v,d,d);

  fprintf(stdout, "lambda = ");
  fvec_print (lambda, d);
  printf ("\n");

  printf("Partial eigenvalues/vectors:\n");

  printf ("\n");
  ret=eigs_sym_part (d, a, d2, lambda, v_part);
  assert(ret>0);
  if(ret<d2) 
    printf("!!! only %d / %d eigenvalues converged\n",ret,d2);

  printf ("\n");

  printf("Eigenvectors:\n");
  fmat_print(v_part,d,d2);
  
  fprintf(stdout, "lambda = ");
  fvec_print (lambda, d2);
  printf ("\n");

   
  printf ("Solution of the generalized eigenproblem Av=lambda B v\n");

  printf ("\n");
  ret=geigs_sym (d, a, b, lambda, v);
  assert(ret==0);
  printf ("\n");

  fmat_print(v,d,d);

  fprintf(stdout, "lambda = ");
  fvec_print (lambda, d);
  printf ("\n");

  free (a);
  free (lambda);
  free (v);

  return 0;
}
Example #13
0
File: gmm.c Project: Erotemic/yael
void gmm_fisher_spatial(int N, int K, int D, 
                        const float *Q, 
                        const float *sgmm, 
                        const float *ll, 
                        float *sdesc) {
  float *Q_sum = fvec_new_0(K); 
  
  {
    long k, n;
    for(n = 0; n < N; n++) 
      for(k = 0; k < K; k++) 
        Q_sum[k] += Q[n * K + k];     
    for(k = 0; k < K; k++) Q_sum[k] /= N;
  }

  float *Q_ll, *Q_ll_2; 
  
  {
    /* prepare a matrix containing both ll and ll**2 */
    
    float *ll_ll2 = fvec_new(D * 2 * N); 
    fvec_cpy(ll_ll2, ll, D * N); 
    float *ll2 = ll_ll2 + D * N; 
    long i;
    for(i = 0; i < D * N; i++) 
      ll2[i] = ll[i] * ll[i]; 

    /* compute Q.T * ll_ll2 */

    FINTEGER mi = K, ni = 2 * D, ki = N; 
    float one_over_N = 1.0 / N, zero = 0; 
    Q_ll = fvec_new(K * 2 * D);
    Q_ll_2 = Q_ll + K * D; 
    sgemm_("N", "N", &mi, &ni, &ki, 
           &one_over_N, Q, &mi, 
           ll_ll2, &ki, 
           &zero, Q_ll, &mi); 
    free(ll_ll2);   
  }

  {
    const float *mm = sgmm; 
    float *d_mm = sdesc; 
    long k, d; 
    for(d = 0; d < D; d++) 
      for(k = 0; k < K; k++) 
        d_mm[d + k * D] = Q_ll[K * d + k] - Q_sum[k] * mm[d]; 
    
    float *d_S = sdesc + K * D; 
    const float *S = sgmm + D;
    for(d = 0; d < D; d++) {
      float dfact = S[d] - mm[d] * mm[d]; 
      for(k = 0; k < K; k++) 
        d_S[d + k * D] = -Q_ll_2[K * d + k] + 2 * Q_ll[K * d + k] * mm[d] + Q_sum[k] * dfact; 
    }


  }


  free(Q_ll); 
  free(Q_sum);  
}
Example #14
0
float *fmat_new (int nrow, int ncol)
{
  float *m = fvec_new (nrow * (long)ncol);
  return m;
}
Example #15
0
void ANC::search(const fDataSet *baseset, const fDataSet *queryset, char *folder, int nk, DoubleIndex **knnset, Cost *cost, int lb_type)
{
	char filename[256];
	int nq = queryset->n,
		qi, i, set_i;
	int cid;
	float knn_R;
	float *set;
	int *set_id;
	int set_num;
	float *set_vector = NULL;
	float *query = fvec_new(d);
	DoubleIndex candidate;
	DoubleIndex *lb = (DoubleIndex*)malloc(sizeof(DoubleIndex)*ncenter);
																// lower bounds between query and all centers
	
    Cost costi;
	struct timeval tvb, tve, tvb_lb, tve_lb, tvb_io, tve_io;

	for(qi = 0; qi < nq; qi++)
	{
		/// initialize the cost recorder
		CostInit(&costi);
		gettimeofday(&tvb, NULL);

		/// the qi-th query
		memcpy(query, queryset->data+qi*d, sizeof(float)*d);
		knnset[qi] = (DoubleIndex*)malloc(sizeof(DoubleIndex)*nk);
		/// calculate and sort the lower bounds between query and all clusters to get the right order
		gettimeofday(&tvb_lb, NULL);
		if((int)Algorithm_Search_CrossLB == lb_type){
			lowerbound_crosspoint(lb, query);
		}else if((int)Algorithm_Search == lb_type){
			lowerbound(lb, query, true);	
		}
		
		gettimeofday(&tve_lb, NULL);
		costi.lowerbound = timediff(tvb_lb, tve_lb);

		/// search for knn
		set_vector = fvec_new(d);
		knn_R = FLOAT_MAX;
		i = 0;
		Heap heap(nk);
		while(i < ncenter)
		{
			cid = lb[i].id;
			// the i-th cluster
			if(f_bigger(lb[i].val, knn_R))
			{
				break;
			}
			// knn_R > lb[i], means there are candidates in the i-th cluster
			set_num = member[cid].size();
			set = fvec_new(set_num*d);
			set_id = ivec_new(set_num);
			
            /* we do not test the time cost of disk page for speed, we do not really load the data 
            sprintf(filename, "%s/%d.cluster", folder, cid);
			gettimeofday(&tvb_io, NULL);
			HB_ClusterFromFile(filename, set_num, d, set, set_id);
			gettimeofday(&tve_io, NULL);
            costi.io = costi.io + timediff(tvb_io, tve_io);
            */

            /* instead, we extract member points directly from the base set */
            for(int mi = 0; mi < set_num; mi++){
                int pts_id = member[cid][mi];
                set_id[mi] = pts_id;
                memcpy(set+mi*d, baseset->data+pts_id*d, sizeof(float)*d);
            }

            // update cost
			costi.page = costi.page + 1;
			costi.point = costi.point + set_num;

			for(set_i = 0; set_i < set_num; set_i++)
			{// calculate real distance between all candidates and query
				candidate.id = set_id[set_i];
				memcpy(set_vector, set+set_i*d, sizeof(float)*d);
				candidate.val = odistance(query, set_vector, d);
				if(heap.length < heap.MaxNum || f_bigger(heap.elem[0].val, candidate.val))
				{// heap is not full or new value is smaller, insert
					heap.max_insert(&candidate);
				}
			}
			knn_R = heap.elem[0].val;
			i++;
			// free
			free(set); set = NULL;
			free(set_id); set_id = NULL;
		}// end of search loop
		// printf("%d ", i);//
		memcpy(knnset[qi], heap.elem, sizeof(DoubleIndex)*heap.length);

		gettimeofday(&tve, NULL);
		costi.cpu = timediff(tvb, tve);
		costi.search = costi.cpu - costi.lowerbound - costi.io;

		/// sum new cost
		CostCombine(cost, &costi);
	}

	CostMultiply(cost, 1/(float)nq);

	free(set_vector); set_vector = NULL;
	free(query); query = NULL;
	free(lb); lb = NULL;
}
Example #16
0
hkm_t *hkm_learn (int n, int d, int nlevel, int bf,
		  const float *points, int nb_iter_max, int nt, int verbose, 
		  int **clust_assign_out)
{
  int i, l, parent, k = 1;
  hkm_t *hkm = hkm_new (d, nlevel, bf);

  /* the absolute assignement of all points and the sizes of clusters */
  int *node_assign = calloc (sizeof (int), n);

  /* the buffer that receives the vectors gathered by parent node */
  float *v = fvec_new (n * d);

  /* Initialization */
  for (l = 0; l < nlevel; l++) {

    /* sort the vectors depending on which cluster they have been assigned to,
       and compute the number of vectors assigned to each cluster 
       *** NOTE: to replace with the k_max function of ivfgeo
       -> put this function in a separate library             */
    int *node_assign_idx = malloc (sizeof (*node_assign_idx) * n);
    ivec_sort_index (node_assign, n, node_assign_idx);

    /* Re-order the vectors depending on the previous order */
    for (i = 0; i < n ; i++)
      memmove (v + d * i, points + d * node_assign_idx[i], 
	       sizeof (*points) * d);

    /* k is the number of nodes/leaves at this level */
    int pos = 0;
    for (parent = 0; parent < k ; parent++) {
      /* Count the number of vectors assigned to this internal node */
      int nassign = 0;
      while (pos + nassign < n)
        if (node_assign[node_assign_idx[pos + nassign]] == parent)
          nassign++;
        else break;

      if (verbose) 
	fprintf (stderr, "[Level %d | Parent %d] nassign=%d | pos=%d", l, parent, nassign, pos); 

      if (nassign == 0) {
        fprintf (stderr, "# Problem2: no enough vectors in a node\n");
        exit (1);
      }

      /* Perform the clustering on this subset of points */
      int *clust_assign = ivec_new (nassign);
      float * centroids = fvec_new (bf * d);
      int nt = count_cpu();
      int flags = nt | KMEANS_INIT_RANDOM | KMEANS_QUIET;
      float err = kmeans (d, nassign, bf, nb_iter_max, v + d * pos, flags,
			  0, 1, centroids, NULL, clust_assign, NULL);
      if (verbose)
	fprintf (stderr, "-> err = %.3f\n", err);
      memcpy (hkm->centroids[l] + d * parent * bf, centroids,
              d * bf * sizeof (*centroids));

      /* Update the indexes for those points */
      for (i = 0; i < nassign; i++) {
        int truepos = node_assign_idx[pos + i];
        node_assign[truepos] = node_assign[truepos] * bf + clust_assign[i];
      }

      free (centroids);
      free (clust_assign);
      pos += nassign;
    }

    k *= bf;
    free (node_assign_idx);
  }

  if(clust_assign_out) {
    *clust_assign_out = (int *) malloc (n * sizeof (int));
    memcpy (*clust_assign_out, node_assign, n * sizeof (int));
  } 
  free (node_assign);
  free (v);
  return hkm;
}
Example #17
0
void ANC::lowerbound_crosspoint(DoubleIndex *lb, const float *query){
	int i, j, nci, otheri, ineighbor, id_n;
	float max_dis, temp_dis, sdis_q_c, sdis_q_nc;
	float *center = fvec_new(d);
	float *ocenter = fvec_new(d);
	DoubleIndex *sqdis_query_centroid;				// square distance between query and all centroids


	/// prepare the query centroid square distances
	sqdis_query_centroid = (DoubleIndex*)malloc(sizeof(DoubleIndex)*ncenter);
	for(nci = 0; nci < ncenter; nci++)
	{
		sqdis_query_centroid[nci].id = nci;
		memcpy(center, centroid+nci*d, sizeof(float)*d);
		sqdis_query_centroid[nci].val = odistance_square(query, center, d);
	}
	
	/// figure out lower bounds for each cluster
	for(nci = 0; nci < ncenter; nci++){
		int cnt = 0;

		sdis_q_c = sqdis_query_centroid[nci].val;			// square dis between (q and C)
		memcpy(center, centroid+nci*d, sizeof(float)*d);		// centroid of C
		
		max_dis = FLOAT_ZERO;
		for(i = 0; i < neighbor[nci].size(); i++){			// all neighbor clusters
			id_n = neighbor[nci][i];
			sdis_q_nc = sqdis_query_centroid[id_n].val;			// square dis between (q and neighbor cluster)
			if(f_bigger(sdis_q_c, sdis_q_nc)){				// separating hyperplane
				cnt += 1;

				memcpy(ocenter, centroid+id_n*d, sizeof(float)*d);	// centroid of the neighbor cluster

				temp_dis = crosspoint_distance(query, center, ocenter, d, sqrt(sdis_q_c));

				if(f_bigger(temp_dis, max_dis))
				{// a larger lower bound distance
					max_dis = temp_dis;
				}
			}
		}

		lb[nci].id = nci;
		lb[nci].val = max_dis;
	}

	/// sort lower bounds
	DI_MergeSort(lb, 0, ncenter-1);

	/// ### store into files
	FILE *fp = open_file("lowerbound.txt", "w+");
	for(i = 0; i < ncenter; i++){
		fprintf(fp, " %d-%f", lb[i].id, lb[i].val);
	}
	fputc('\n', fp);
	fclose(fp);

	free(center); center = NULL;
	free(ocenter); ocenter = NULL;
	free(sqdis_query_centroid); sqdis_query_centroid = NULL;
}
Example #18
0
void HBPlus::inner_lb_distance_OnePerPoint(const fDataSet *ds)
{
    int i, j, nci, otheri;
    float dis = 0;
    float *xcenter = fvec_new(d);
    float *ocenter = fvec_new(d);
    float *x = fvec_new(d);
    // distance between each centroid pair
    float *centroid_dis_map = fvec_new_0(ncenter*ncenter);
    innerLB = (DoubleIndex **)malloc(sizeof(DoubleIndex*)*ncenter);
    for(i = 0; i < ncenter; i++){
        innerLB[i] = NULL;
    }

    /// prepare distances between each two centroids
    for(i = 0; i < ncenter; i++)
    {
        memcpy(xcenter, centroid+i*d, sizeof(float)*d);
        for(j = 0; j <= i; j++)
        {
            memcpy(ocenter, centroid+j*d, sizeof(float)*d);
            dis = odistance(xcenter, ocenter, d);
            centroid_dis_map[i*ncenter+j] = dis;
            if(i != j)
            {
                centroid_dis_map[j*ncenter+i] = dis;
            }
        }
    }

    // initialize the storing space for inner distance of each member point
    for(nci = 0; nci < ncenter; nci++)
    {
        /// cnt_member_points
        int cnt_member = member[nci].size();
        innerLB[nci] = (DoubleIndex*)malloc(sizeof(DoubleIndex) * cnt_member);
        for(i = 0; i < cnt_member; i++)
        {
            innerLB[nci][i].id = -1;
            innerLB[nci][i].val = FLOAT_MAX;
        }
    }

    for(nci = 0; nci < ncenter; nci++)
    {
        /* in each centroid */
        memcpy(xcenter, centroid+nci*d, sizeof(float)*d);   // the current centroid
        int cnt_member = member[nci].size();    // cnt member points
        
        /* for each member points */
        for(i = 0; i < cnt_member; i++){
            memcpy(x, ds->data+member[nci][i]*d, sizeof(float)*d);
            
            /* for each other centroid */
            for(otheri = 0; otheri < ncenter; otheri++)
            {
                if(otheri != nci)
                {
                    memcpy(ocenter, centroid+otheri*d, sizeof(float)*d);
                    dis = (odistance_square(x, ocenter, d) - odistance_square(x, xcenter, d)) / (2*centroid_dis_map[nci*ncenter+otheri]);
                    if(f_bigger(innerLB[nci][i].val, dis))
                    {// update using smaller distance
                        innerLB[nci][i].val = dis;
                        innerLB[nci][i].id = member[nci][i];          // id is the data point
                    }
                }
            }
        }
        // sort member data points along the innerLB distance in the nci-th cluster
        DI_MergeSort(innerLB[nci], 0, cnt_member-1);
    }

    free(centroid_dis_map); centroid_dis_map = NULL;
    free(ocenter); ocenter = NULL;
    free(xcenter); xcenter = NULL;
    free(x); x = NULL;
}
Example #19
0
/* estimate the GMM parameters */
static void gmm_compute_params (int n, const float * v, const float * p,
                                gmm_t * g,
                                int flags,
                                int n_thread)
{
    long i, j;

    long d=g->d, k=g->k;
    float * vtmp = fvec_new (d);
    float * mu_old = fvec_new_cpy (g->mu, k * d);
    float * w_old = fvec_new_cpy (g->w, k);

    fvec_0 (g->w, k);
    fvec_0 (g->mu, k * d);
    fvec_0 (g->sigma, k * d);

    if(0) {
        /* slow and simple */
        for (j = 0 ; j < k ; j++) {
            double dtmp = 0;
            for (i = 0 ; i < n ; i++) {
                /* contribution to the gaussian weight */
                dtmp += p[i * k + j];
                /* contribution to mu */

                fvec_cpy (vtmp, v + i * d, d);
                fvec_mul_by (vtmp, d, p[i * k + j]);
                fvec_add (g->mu + j * d, vtmp, d);

                /* contribution to the variance */
                fvec_cpy (vtmp, v + i * d, d);
                fvec_sub (vtmp, mu_old + j * d, d);
                fvec_sqr (vtmp, d);
                fvec_mul_by (vtmp, d, p[i * k + j]);
                fvec_add (g->sigma + j * d, vtmp, d);

            }
            g->w[j] = dtmp;
        }

    } else {
        /* fast and complicated */

        if(n_thread<=1)
            compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w);
        else
            compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread);
    }

    if(flags & GMM_FLAGS_1SIGMA) {
        for (j = 0 ; j < k ; j++) {
            float *sigma_j=g->sigma+j*d;
            double var=fvec_sum(sigma_j,d)/d;
            fvec_set(sigma_j,d,var);
        }
    }

    long nz=0;
    for(i=0; i<k*d; i++)
        if(g->sigma[i]<min_sigma) {
            g->sigma[i]=min_sigma;
            nz++;
        }

    if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma);

    for (j = 0 ; j < k ; j++) {
        fvec_div_by (g->mu + j * d, d, g->w[j]);
        fvec_div_by (g->sigma + j * d, d, g->w[j]);
    }

    assert(finite(fvec_sum(g->mu, k*d)));

    fvec_normalize (g->w, k, 1);

    printf ("w = ");
    fvec_print (g->w, k);
    double imfac = k * fvec_sum_sqr (g->w, k);
    printf (" imfac = %.3f\n", imfac);

    free (vtmp);
    free (w_old);
    free (mu_old);
}
Example #20
0
float *fvec_new_nan (long n)
{
  float *ret = fvec_new(n);
  fvec_nan(ret,n);
  return ret;
}
Example #21
0
void gmm_compute_p (int n, const float * v,
                    const gmm_t * g,
                    float * p,
                    int flags)
{
    if(n==0) return; /* sgemm doesn't like empty matrices */

    long i, j, l;
    double dtmp;
    long d=g->d, k=g->k;


    float * logdetnr = fvec_new(k);

    for (j = 0 ; j < k ; j++) {
        logdetnr[j] = -d / 2.0 * log (2 * M_PI);
        for (i = 0 ; i < d ; i++)
            logdetnr[j] -= 0.5 * log (g->sigma[j * d + i]);
    }

    /* compute all probabilities in log domain */

    /* compute squared Mahalanobis distances (result in p) */

    if(0) { /* simple & slow */
        for (i = 0 ; i < n ; i++) {
            for (j = 0 ; j < k ; j++) {
                dtmp = 0;
                for (l = 0 ; l < d ; l++) {
                    dtmp += sqr (v[i * d + l] - g->mu[j * d + l]) / g->sigma[j * d + l];
                }
                p[i * k + j] = dtmp;
            }
        }
    } else { /* complicated & fast */
        compute_mahalanobis_sqr(n,k,d,g->mu,g->sigma,v,p);
    }

    /* convert distances to probabilities, staying in the log domain
       until the very end */
    for (i = 0 ; i < n ; i++) {

        for (j = 0 ; j < k ; j++) {
            p[i * k + j] = logdetnr[j] - 0.5 * p[i * k + j];
            CHECKFINITE(p[i * k + j]);
        }

        /* at this point, we have p(x|ci) -> we want p(ci|x) */


        if(flags & GMM_FLAGS_NO_NORM) {     /* compute the normalization factor */

            dtmp=0;

        } else {

            dtmp = p[i * k + 0];

            if(flags & GMM_FLAGS_W)
                dtmp+=log(g->w[0]);

            for (j = 1 ; j < k ; j++) {
                double log_p=p[i * k + j];

                if(flags & GMM_FLAGS_W)
                    log_p+=log(g->w[j]);

                dtmp = log_sum (dtmp, log_p);
            }

            /* now dtmp contains the log of sums */
        }

        for (j = 0 ; j < k ; j++) {
            double log_norm=0;

            if(flags & GMM_FLAGS_W)
                log_norm=log(g->w[j])-dtmp;
            else
                log_norm=-dtmp;

            p[i * k + j] = exp (p[i * k + j] + log_norm);
            CHECKFINITE(p[i * k + j]);
        }

        //    printf ("p[%d] = ", i);
        //    fvec_print (p + i * k, k);
    }

    free(logdetnr);

}
Example #22
0
float * fvec_new_randn_r (long n, unsigned int seed)
{
  float * f = fvec_new (n);
  fvec_randn_r(f,n,seed);
  return f;
}
Example #23
0
int merge_ordered_sets (const int **labels,const float **vals,
			const int *sizes,int k,
			int **labels_out,float **vals_out) {
  int i,j;
  int n_out = ivec_sum (sizes, k);

  int *all_labels = ivec_new (n_out);
  float *all_vals = fvec_new (n_out);

  /* Maxheap:
   * * maxheap label = index of table in 0..k-1
   * * maxheap val = - (label from labels table)
   *  
   * If maxheap val does not fit in a float (if label>2**24), it
   * triggers an assertion. Time to implement a maxheap with int
   * values...
   */
  fbinheap_t *mh = fbinheap_new(k);

  /* current index on table k */ 
  int indices[k];

  for ( i = 0 ; i < k ; i++) {
    if (sizes[i] == 0) 
      continue;
    indices[i] = 0;
    int label = labels[i][0];
    float mh_val = -label;
    assert ((int)(-mh_val) == label || !"lost precision in int->float conversion");
    fbinheap_add (mh, i, mh_val);
  }
  
  int all_i = 0;
  while (mh->k>0) {    

    /* smallest available label */    
    i = mh->label[1];       /* index of table */
    j = (int)(-mh->val[1]); /* label */

    /* I don't dare compiling with -DNDEBUG */    
    /* assert(j==labels[i][indices[i]]); */

    all_labels[all_i] = j;
    all_vals[all_i] = vals[i][indices[i]];
    all_i++;

    /* remove handled label */
    fbinheap_pop (mh);
    
    indices[i]++;
    if (indices[i] < sizes[i]) { /* push next label from this table */
      int label = labels[i][indices[i]];
      float mh_val = -label;
      assert ((int)(-mh_val) == label || !"lost precision in int->float conversion");
      fbinheap_add (mh, i, mh_val);
    }
  }
  fbinheap_delete (mh);  
  assert (all_i == n_out);

  *labels_out = all_labels;
  *vals_out = all_vals;
  return n_out;
}