Exemplo n.º 1
0
void fvec_normalize_2stage(float * v, long n, double scal) {
  double nr = fvec_normalize (v, n, 2);
  
  if(nr == 0) return; 

  int renorm = 0;
  int i;

  for(i=0; i<n; i++) 
    if(v[i] > scal) {
      v[i] = scal; 
      renorm = 1; 
    }
  
  if(renorm) 
    fvec_normalize (v, n, 2);
    
}
Exemplo n.º 2
0
int fmat_svd_partial_full(int n,int m,int nev,const float *a,int a_transposed,
                          float *s,float *vout,float *uout,int nt) {
  
  arpack_eigs_t *ae=arpack_eigs_begin(n,nev);
  if(!ae) return -100;
  int ret=0;
  
  int j,i;
  float *ax=NEWA(float,m);
  
  int it;

  for(it=0;;it++) {
    float *x,*y;
    ret=arpack_eigs_step(ae,&x,&y); 

    printf("arpack iteration %d ret=%d\r",it,ret);

    if(ret<0) break; /* error */

    if(ret==0) break; /* stop iteration */

    /* ret==1 */

    if(!a_transposed) {
      fmat_mul_v(m,n,a,n,x,ax,nt);
      fmat_mul_tv(n,m,a,n,ax,y,nt);
    } else {
      fmat_mul_tv(m,n,a,m,x,ax,nt);
      fmat_mul_v(n,m,a,m,ax,y,nt);
    }

    fflush(stdout);
  } 
  printf("\n");

  free(ax);

  float *v=vout ? vout : fmat_new(nev,n);

  ret=arpack_eigs_end(ae,s,v);

  if(ret>0) {
    int nconv=ret;
        
    if(s)
      for(j=0;j<nconv;j++) 
        s[j]=sqrt(s[j]);    

    if(uout) 
      for(i=0;i<nconv;i++) {
        float *u=uout+m*(long)i;
        if(!a_transposed)
          fmat_mul_v(m,n,a,n,v+n*(long)i,u,nt);
        else
          fmat_mul_tv(m,n,a,m,v+n*(long)i,u,nt);
        fvec_normalize(u,m,2);
      }               
    
  }

  if(!vout) free(v);
  
  return ret;
}
Exemplo n.º 3
0
/**
 * Allocate and extract a feature vector from a sequence.
 * There is a global table of delimiter symbols which is only 
 * initialized once the first sequence is processed. 
 * See fvec_reset_delim();
 * @param x Sequence of bytes 
 * @param l Length of sequence
 * @param s Source of features, e.g. file name
 * @return feature vector
 */
fvec_t *fvec_extract(char *x, int l, char *s)
{
    fvec_t *fv;
    int nlen;
    const char *dlm_str, *cfg_str;
    assert(x && l >= 0);

    /* Allocate feature vector */
    fv = calloc(1, sizeof(fvec_t));
    if (!fv) {
        error("Could not extract feature vector");
        return NULL;
    }

    /* Initialize feature vector */
    fv->len = 0;
    fv->total = 0;
    fv->dim = (feat_t *) malloc(l * sizeof(feat_t));
    fv->val = (float *) malloc(l * sizeof(float));
    fv->mem = sizeof(fvec_t);

    /* Set source */
    if (s) {
        fv->src = strdup(s);
        fv->mem += strlen(s);
    }

    /* Check for empty sequence */
    if (l == 0)
        return fv;

    if (!fv->dim || !fv->val) {
        error("Could not allocate feature vector");
        fvec_destroy(fv);
        return NULL;
    }

    /* Get n-gram length */
    config_lookup_int(&cfg, "features.ngram_len", (int *) &nlen);

    /* Construct delimiter lookup table */
    config_lookup_string(&cfg, "features.ngram_delim", &dlm_str);

    /* N-grams of bytes */
    if (!dlm_str || strlen(dlm_str) == 0) {
        /* Feature extraction */
        extract_ngrams(fv, x, l, nlen);
    } else {
        if (delim[0] == DELIM_NOT_INIT) {
            memset(delim, 0, 256);
            decode_delim(dlm_str);
        }

        /* Feature extraction */
        extract_wgrams(fv, x, l, nlen);
    }
    fv->total = fv->len;

    /* Sort extracted features */
    qsort(fv->dim, fv->len, sizeof(feat_t), cmp_feat);

    /* Compute embedding and condense */
    config_lookup_string(&cfg, "features.vect_embed", &cfg_str);
    if (!strcasecmp(cfg_str, "cnt")) {
        fvec_condense(fv, EMBED_CNT);
    } else if (!strcasecmp(cfg_str, "bin")) {
        fvec_condense(fv, EMBED_BIN);
    } else {
        warning("Unknown embedding '%s', using 'cnt'.", cfg_str);
        fvec_condense(fv, EMBED_CNT);
    }

    /* Compute l2 normalization */
    fvec_normalize(fv, NORM_L2);
    return fv;
}
Exemplo n.º 4
0
/* estimate the GMM parameters */
static void gmm_compute_params (int n, const float * v, const float * p,
                                gmm_t * g,
                                int flags,
                                int n_thread)
{
    long i, j;

    long d=g->d, k=g->k;
    float * vtmp = fvec_new (d);
    float * mu_old = fvec_new_cpy (g->mu, k * d);
    float * w_old = fvec_new_cpy (g->w, k);

    fvec_0 (g->w, k);
    fvec_0 (g->mu, k * d);
    fvec_0 (g->sigma, k * d);

    if(0) {
        /* slow and simple */
        for (j = 0 ; j < k ; j++) {
            double dtmp = 0;
            for (i = 0 ; i < n ; i++) {
                /* contribution to the gaussian weight */
                dtmp += p[i * k + j];
                /* contribution to mu */

                fvec_cpy (vtmp, v + i * d, d);
                fvec_mul_by (vtmp, d, p[i * k + j]);
                fvec_add (g->mu + j * d, vtmp, d);

                /* contribution to the variance */
                fvec_cpy (vtmp, v + i * d, d);
                fvec_sub (vtmp, mu_old + j * d, d);
                fvec_sqr (vtmp, d);
                fvec_mul_by (vtmp, d, p[i * k + j]);
                fvec_add (g->sigma + j * d, vtmp, d);

            }
            g->w[j] = dtmp;
        }

    } else {
        /* fast and complicated */

        if(n_thread<=1)
            compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w);
        else
            compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread);
    }

    if(flags & GMM_FLAGS_1SIGMA) {
        for (j = 0 ; j < k ; j++) {
            float *sigma_j=g->sigma+j*d;
            double var=fvec_sum(sigma_j,d)/d;
            fvec_set(sigma_j,d,var);
        }
    }

    long nz=0;
    for(i=0; i<k*d; i++)
        if(g->sigma[i]<min_sigma) {
            g->sigma[i]=min_sigma;
            nz++;
        }

    if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma);

    for (j = 0 ; j < k ; j++) {
        fvec_div_by (g->mu + j * d, d, g->w[j]);
        fvec_div_by (g->sigma + j * d, d, g->w[j]);
    }

    assert(finite(fvec_sum(g->mu, k*d)));

    fvec_normalize (g->w, k, 1);

    printf ("w = ");
    fvec_print (g->w, k);
    double imfac = k * fvec_sum_sqr (g->w, k);
    printf (" imfac = %.3f\n", imfac);

    free (vtmp);
    free (w_old);
    free (mu_old);
}
Exemplo n.º 5
0
void mexFunction (int nlhs, mxArray *plhs[],
                  int nrhs, const mxArray*prhs[])

{
  if (nrhs < 4) 
    mexErrMsgTxt("At least 4 arguments are required even nb of input arguments required.");
  else if (nlhs != 1) 
    mexErrMsgTxt("yael_fisher produces exactly 1 output argument.");

  int flags = GMM_FLAGS_MU;
  int verbose = 0;
  int fishernorm1 = 1;
  
  if(mxGetClassID(PARAM_V)!=mxSINGLE_CLASS)
    mexErrMsgTxt("need single precision array.");

  if(mxGetClassID(PARAM_W)!=mxSINGLE_CLASS)
    mexErrMsgTxt("need single precision array.");

  if(mxGetClassID(PARAM_MU)!=mxSINGLE_CLASS)
    mexErrMsgTxt("need single precision array.");

  if(mxGetClassID(PARAM_SIGMA)!=mxSINGLE_CLASS)
    mexErrMsgTxt("need single precision array.");

  float *v = (float*) mxGetPr (PARAM_V);
  float *w = (float*) mxGetPr (PARAM_W);
  float *mu = (float*) mxGetPr (PARAM_MU);
  float *sigma = (float*) mxGetPr (PARAM_SIGMA);

  {
    int i;
    for(i = 4 ; i < nrhs ; i += 1) {
      char varname[256];
      if (mxGetClassID(prhs[i]) != mxCHAR_CLASS) 
        mexErrMsgTxt ("variable name required");         

      if (mxGetString (prhs[i], varname, 256) != 0)
        mexErrMsgTxt ("Could not convert string data");

      if (!strcmp(varname, "sigma")) 
	flags |= GMM_FLAGS_SIGMA;
      
      else if (!strcmp(varname,"weights")) 
        flags |= GMM_FLAGS_W;

      else if (!strcmp(varname,"nomu")) 
        flags &= ~ GMM_FLAGS_MU;

      else if (!strcmp(varname,"verbose")) 
        verbose = 1;

      else if (!strcmp(varname,"nonorm")) 
        fishernorm1 = 0;

      else 
        mexErrMsgTxt("unknown variable name");  
    }
  }

  if (verbose) {
    fprintf (stdout, "v     -> %ld x %ld\n", mxGetM (PARAM_V), mxGetN (PARAM_V));
    fprintf (stdout, "w     -> %ld x %ld\n", mxGetM (PARAM_W), mxGetN (PARAM_W));
    fprintf (stdout, "mu    -> %ld x %ld\n", mxGetM (PARAM_MU), mxGetN (PARAM_MU));
    fprintf (stdout, "sigma -> %ld x %ld\n", mxGetM (PARAM_SIGMA), mxGetN (PARAM_SIGMA));
  }

  int d = mxGetM (PARAM_V);  /* vector dimensionality */
  int n = mxGetN (PARAM_V);  /* number of fisher vector to produce */
  int k = mxGetN (PARAM_W);  /* number of gaussian */

  if (verbose)
    fprintf (stdout, "d       = %d\nn       = %d\nk       = %d\n", d, n, k);

  if (mxGetM (PARAM_MU) != d || mxGetM (PARAM_SIGMA) != d || 
      mxGetN (PARAM_MU) !=k || mxGetN (PARAM_SIGMA) != k || 
      (mxGetM (PARAM_W) != 1 && mxGetN (PARAM_W) != 1) )
    mexErrMsgTxt("Invalid input dimensionalities.");

  

  /* ouptut: GMM, i.e., weights, mu and variances */
  gmm_t g = {d, k, w, mu, sigma};
  int dout = gmm_fisher_sizeof (&g, flags); 
  if (verbose)
    fprintf (stdout, "Size of the fisher vector = %d\n", dout);

  plhs[0] = mxCreateNumericMatrix (dout, 1, mxSINGLE_CLASS, mxREAL);
  float * vf = (float *) mxGetPr (plhs[0]);
  gmm_fisher (n, v, &g, flags, vf);

  if (fishernorm1) {
    int ret = fvec_normalize (vf, dout, 2.);
    if (ret == 1)
      fvec_set (vf, dout, 1);
  }
}
Exemplo n.º 6
0
Arquivo: vlad.c Projeto: atroudi/V3V_2
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) 
{

	int i,j,l,n_quantile,i0,i1,ai,a,ma,ni;
	int *perm ;
	float un , diff;
	float *tab,*u,*avg,*sum,*mom2,*dists;
	int *hist,*assign;


	if(flags<11 || flags>=13) 
	{
		assign=ivec_new(n);

		nn(n,k,d,centroids,v,assign,NULL,NULL);    

		if(flags==6 || flags==7) 
		{
			n_quantile = flags==6 ? 3 : 1;
			fvec_0(desc,k*d*n_quantile);
			perm      = ivec_new(n);
			tab       = fvec_new(n);
			ivec_sort_index(assign,n,perm);
			i0=0;
			for(i=0;i<k;i++) 
			{
				i1=i0;
				while(i1<n && assign[perm[i1]]==i) 
				{
					i1++;
				}

				if(i1==i0) continue;

				for(j=0;j<d;j++) 
				{        
					for(l=i0;l<i1;l++)
					{
						tab[l-i0]=v[perm[l]*d+j];
					}
					ni=i1-i0;
					fvec_sort(tab,ni);
					for(l=0;l<n_quantile;l++) 
					{
						desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni;
					}
				}

				i0=i1;
			}
			free(perm);
			free(tab);
		} 
		else if(flags==5) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

		} 
		else if(flags==8 || flags==9) 
		{
			fvec_0(desc,k*d);

			u   = fvec_new(d);

			for(i=0;i<n;i++) 
			{
				fvec_cpy(u,v+i*d,d);
				fvec_sub(u,centroids+assign[i]*d,d);
				un=(float)sqrt(fvec_norm2sqr(u,d));

				if(un==0) continue;
				if(flags==8) 
				{        
					fvec_div_by(u,d,un);
				} else if(flags==9) 
				{
					fvec_div_by(u,d,sqrt(un));
				}

				fvec_add(desc+assign[i]*d,u,d);

			}
			free(u);
		} 
		else if(flags==10) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

			for(i=0;i<k;i++) 
			{
				fvec_normalize(desc+i*d,d,2.0);  
			}

		} 
		else if(flags==13) 
		{

			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]);
				}
			}     

		} 
		else if(flags==14) 
		{
			avg = fvec_new_0(k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist=ivec_new_histogram(k,assign,n);

			for(i=0;i<k;i++) 
			{
				if(hist[i]>0) 
				{
					for(j=0;j<d;j++) 
					{
						avg[i*d+j]/=hist[i];
					}
				}
			}

			free(hist);

			fvec_0(desc,k*d);
			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j]));
				}
			}

			fvec_sqrt(desc,k*d);

			free(avg);
		}  
		else if(flags==15) 
		{
			fvec_0(desc,k*d*2);
			sum = desc;

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist = ivec_new_histogram(k,assign,n);

			mom2 = desc+k*d;

			for(i=0;i<n;i++) 
			{
				ai=assign[i];
				for(j=0;j<d;j++) 
				{
					mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai]));
				}
			}
			fvec_sqrt(mom2,k*d);
			free(hist);


		} 
		else if(flags==17) 
		{
			fvec_0(desc,k*d*2);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					diff=v[i*d+j]-centroids[assign[i]*d+j];
					if(diff>0)
					{
						desc[assign[i]*d+j]+=diff;
					}
					else 
					{
						desc[assign[i]*d+j+k*d]-=diff;
					}
				}
			}

		} 
		else 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}


			if(flags==1) 
			{
				hist=ivec_new_histogram(k,assign,n);
				/* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */

				for(i=0;i<k;i++) 
				{
					for(j=0;j<d;j++) 
					{
						desc[i*d+j]/=hist[i];    
					}
				}
				free(hist);
			}

			if(flags==2) 
			{
				for(i=0;i<k;i++) 
				{
					fvec_normalize(desc+i*d,d,2.0);
				}
			}

			if(flags==3 || flags==4) 
			{
				assert(!"not implemented");
			}

			if(flags==16) 
			{
				hist=ivec_new_histogram(k,assign,n);
				for(i=0;i<k;i++) 
				{
					if(hist[i]>0) 
					{
						fvec_norm(desc+i*d,d,2);
						fvec_mul_by(desc+i*d,d,sqrt(hist[i]));
					}
				}
				free(hist);
			}


		}
		free(assign);
	} 
	else if(flags==11 || flags==12) 
	{
		ma=flags==11 ? 4 : 2;
		assign=ivec_new(n*ma);

		dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL);    

		fvec_0(desc,k*d);

		for(i=0;i<n;i++) 
		{
			for(j=0;j<d;j++) 
			{
				for(a=0;a<ma;a++) 
				{
					desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j];
				}
			}
		} 

		free(dists);

		free(assign);
	}

}