void fvec_normalize_2stage(float * v, long n, double scal) { double nr = fvec_normalize (v, n, 2); if(nr == 0) return; int renorm = 0; int i; for(i=0; i<n; i++) if(v[i] > scal) { v[i] = scal; renorm = 1; } if(renorm) fvec_normalize (v, n, 2); }
int fmat_svd_partial_full(int n,int m,int nev,const float *a,int a_transposed, float *s,float *vout,float *uout,int nt) { arpack_eigs_t *ae=arpack_eigs_begin(n,nev); if(!ae) return -100; int ret=0; int j,i; float *ax=NEWA(float,m); int it; for(it=0;;it++) { float *x,*y; ret=arpack_eigs_step(ae,&x,&y); printf("arpack iteration %d ret=%d\r",it,ret); if(ret<0) break; /* error */ if(ret==0) break; /* stop iteration */ /* ret==1 */ if(!a_transposed) { fmat_mul_v(m,n,a,n,x,ax,nt); fmat_mul_tv(n,m,a,n,ax,y,nt); } else { fmat_mul_tv(m,n,a,m,x,ax,nt); fmat_mul_v(n,m,a,m,ax,y,nt); } fflush(stdout); } printf("\n"); free(ax); float *v=vout ? vout : fmat_new(nev,n); ret=arpack_eigs_end(ae,s,v); if(ret>0) { int nconv=ret; if(s) for(j=0;j<nconv;j++) s[j]=sqrt(s[j]); if(uout) for(i=0;i<nconv;i++) { float *u=uout+m*(long)i; if(!a_transposed) fmat_mul_v(m,n,a,n,v+n*(long)i,u,nt); else fmat_mul_tv(m,n,a,m,v+n*(long)i,u,nt); fvec_normalize(u,m,2); } } if(!vout) free(v); return ret; }
/** * Allocate and extract a feature vector from a sequence. * There is a global table of delimiter symbols which is only * initialized once the first sequence is processed. * See fvec_reset_delim(); * @param x Sequence of bytes * @param l Length of sequence * @param s Source of features, e.g. file name * @return feature vector */ fvec_t *fvec_extract(char *x, int l, char *s) { fvec_t *fv; int nlen; const char *dlm_str, *cfg_str; assert(x && l >= 0); /* Allocate feature vector */ fv = calloc(1, sizeof(fvec_t)); if (!fv) { error("Could not extract feature vector"); return NULL; } /* Initialize feature vector */ fv->len = 0; fv->total = 0; fv->dim = (feat_t *) malloc(l * sizeof(feat_t)); fv->val = (float *) malloc(l * sizeof(float)); fv->mem = sizeof(fvec_t); /* Set source */ if (s) { fv->src = strdup(s); fv->mem += strlen(s); } /* Check for empty sequence */ if (l == 0) return fv; if (!fv->dim || !fv->val) { error("Could not allocate feature vector"); fvec_destroy(fv); return NULL; } /* Get n-gram length */ config_lookup_int(&cfg, "features.ngram_len", (int *) &nlen); /* Construct delimiter lookup table */ config_lookup_string(&cfg, "features.ngram_delim", &dlm_str); /* N-grams of bytes */ if (!dlm_str || strlen(dlm_str) == 0) { /* Feature extraction */ extract_ngrams(fv, x, l, nlen); } else { if (delim[0] == DELIM_NOT_INIT) { memset(delim, 0, 256); decode_delim(dlm_str); } /* Feature extraction */ extract_wgrams(fv, x, l, nlen); } fv->total = fv->len; /* Sort extracted features */ qsort(fv->dim, fv->len, sizeof(feat_t), cmp_feat); /* Compute embedding and condense */ config_lookup_string(&cfg, "features.vect_embed", &cfg_str); if (!strcasecmp(cfg_str, "cnt")) { fvec_condense(fv, EMBED_CNT); } else if (!strcasecmp(cfg_str, "bin")) { fvec_condense(fv, EMBED_BIN); } else { warning("Unknown embedding '%s', using 'cnt'.", cfg_str); fvec_condense(fv, EMBED_CNT); } /* Compute l2 normalization */ fvec_normalize(fv, NORM_L2); return fv; }
/* estimate the GMM parameters */ static void gmm_compute_params (int n, const float * v, const float * p, gmm_t * g, int flags, int n_thread) { long i, j; long d=g->d, k=g->k; float * vtmp = fvec_new (d); float * mu_old = fvec_new_cpy (g->mu, k * d); float * w_old = fvec_new_cpy (g->w, k); fvec_0 (g->w, k); fvec_0 (g->mu, k * d); fvec_0 (g->sigma, k * d); if(0) { /* slow and simple */ for (j = 0 ; j < k ; j++) { double dtmp = 0; for (i = 0 ; i < n ; i++) { /* contribution to the gaussian weight */ dtmp += p[i * k + j]; /* contribution to mu */ fvec_cpy (vtmp, v + i * d, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->mu + j * d, vtmp, d); /* contribution to the variance */ fvec_cpy (vtmp, v + i * d, d); fvec_sub (vtmp, mu_old + j * d, d); fvec_sqr (vtmp, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->sigma + j * d, vtmp, d); } g->w[j] = dtmp; } } else { /* fast and complicated */ if(n_thread<=1) compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w); else compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread); } if(flags & GMM_FLAGS_1SIGMA) { for (j = 0 ; j < k ; j++) { float *sigma_j=g->sigma+j*d; double var=fvec_sum(sigma_j,d)/d; fvec_set(sigma_j,d,var); } } long nz=0; for(i=0; i<k*d; i++) if(g->sigma[i]<min_sigma) { g->sigma[i]=min_sigma; nz++; } if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma); for (j = 0 ; j < k ; j++) { fvec_div_by (g->mu + j * d, d, g->w[j]); fvec_div_by (g->sigma + j * d, d, g->w[j]); } assert(finite(fvec_sum(g->mu, k*d))); fvec_normalize (g->w, k, 1); printf ("w = "); fvec_print (g->w, k); double imfac = k * fvec_sum_sqr (g->w, k); printf (" imfac = %.3f\n", imfac); free (vtmp); free (w_old); free (mu_old); }
void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[]) { if (nrhs < 4) mexErrMsgTxt("At least 4 arguments are required even nb of input arguments required."); else if (nlhs != 1) mexErrMsgTxt("yael_fisher produces exactly 1 output argument."); int flags = GMM_FLAGS_MU; int verbose = 0; int fishernorm1 = 1; if(mxGetClassID(PARAM_V)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); if(mxGetClassID(PARAM_W)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); if(mxGetClassID(PARAM_MU)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); if(mxGetClassID(PARAM_SIGMA)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); float *v = (float*) mxGetPr (PARAM_V); float *w = (float*) mxGetPr (PARAM_W); float *mu = (float*) mxGetPr (PARAM_MU); float *sigma = (float*) mxGetPr (PARAM_SIGMA); { int i; for(i = 4 ; i < nrhs ; i += 1) { char varname[256]; if (mxGetClassID(prhs[i]) != mxCHAR_CLASS) mexErrMsgTxt ("variable name required"); if (mxGetString (prhs[i], varname, 256) != 0) mexErrMsgTxt ("Could not convert string data"); if (!strcmp(varname, "sigma")) flags |= GMM_FLAGS_SIGMA; else if (!strcmp(varname,"weights")) flags |= GMM_FLAGS_W; else if (!strcmp(varname,"nomu")) flags &= ~ GMM_FLAGS_MU; else if (!strcmp(varname,"verbose")) verbose = 1; else if (!strcmp(varname,"nonorm")) fishernorm1 = 0; else mexErrMsgTxt("unknown variable name"); } } if (verbose) { fprintf (stdout, "v -> %ld x %ld\n", mxGetM (PARAM_V), mxGetN (PARAM_V)); fprintf (stdout, "w -> %ld x %ld\n", mxGetM (PARAM_W), mxGetN (PARAM_W)); fprintf (stdout, "mu -> %ld x %ld\n", mxGetM (PARAM_MU), mxGetN (PARAM_MU)); fprintf (stdout, "sigma -> %ld x %ld\n", mxGetM (PARAM_SIGMA), mxGetN (PARAM_SIGMA)); } int d = mxGetM (PARAM_V); /* vector dimensionality */ int n = mxGetN (PARAM_V); /* number of fisher vector to produce */ int k = mxGetN (PARAM_W); /* number of gaussian */ if (verbose) fprintf (stdout, "d = %d\nn = %d\nk = %d\n", d, n, k); if (mxGetM (PARAM_MU) != d || mxGetM (PARAM_SIGMA) != d || mxGetN (PARAM_MU) !=k || mxGetN (PARAM_SIGMA) != k || (mxGetM (PARAM_W) != 1 && mxGetN (PARAM_W) != 1) ) mexErrMsgTxt("Invalid input dimensionalities."); /* ouptut: GMM, i.e., weights, mu and variances */ gmm_t g = {d, k, w, mu, sigma}; int dout = gmm_fisher_sizeof (&g, flags); if (verbose) fprintf (stdout, "Size of the fisher vector = %d\n", dout); plhs[0] = mxCreateNumericMatrix (dout, 1, mxSINGLE_CLASS, mxREAL); float * vf = (float *) mxGetPr (plhs[0]); gmm_fisher (n, v, &g, flags, vf); if (fishernorm1) { int ret = fvec_normalize (vf, dout, 2.); if (ret == 1) fvec_set (vf, dout, 1); } }
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) { int i,j,l,n_quantile,i0,i1,ai,a,ma,ni; int *perm ; float un , diff; float *tab,*u,*avg,*sum,*mom2,*dists; int *hist,*assign; if(flags<11 || flags>=13) { assign=ivec_new(n); nn(n,k,d,centroids,v,assign,NULL,NULL); if(flags==6 || flags==7) { n_quantile = flags==6 ? 3 : 1; fvec_0(desc,k*d*n_quantile); perm = ivec_new(n); tab = fvec_new(n); ivec_sort_index(assign,n,perm); i0=0; for(i=0;i<k;i++) { i1=i0; while(i1<n && assign[perm[i1]]==i) { i1++; } if(i1==i0) continue; for(j=0;j<d;j++) { for(l=i0;l<i1;l++) { tab[l-i0]=v[perm[l]*d+j]; } ni=i1-i0; fvec_sort(tab,ni); for(l=0;l<n_quantile;l++) { desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni; } } i0=i1; } free(perm); free(tab); } else if(flags==5) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]; } } } else if(flags==8 || flags==9) { fvec_0(desc,k*d); u = fvec_new(d); for(i=0;i<n;i++) { fvec_cpy(u,v+i*d,d); fvec_sub(u,centroids+assign[i]*d,d); un=(float)sqrt(fvec_norm2sqr(u,d)); if(un==0) continue; if(flags==8) { fvec_div_by(u,d,un); } else if(flags==9) { fvec_div_by(u,d,sqrt(un)); } fvec_add(desc+assign[i]*d,u,d); } free(u); } else if(flags==10) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]; } } for(i=0;i<k;i++) { fvec_normalize(desc+i*d,d,2.0); } } else if(flags==13) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]); } } } else if(flags==14) { avg = fvec_new_0(k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } hist=ivec_new_histogram(k,assign,n); for(i=0;i<k;i++) { if(hist[i]>0) { for(j=0;j<d;j++) { avg[i*d+j]/=hist[i]; } } } free(hist); fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j])); } } fvec_sqrt(desc,k*d); free(avg); } else if(flags==15) { fvec_0(desc,k*d*2); sum = desc; for(i=0;i<n;i++) { for(j=0;j<d;j++) { sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } hist = ivec_new_histogram(k,assign,n); mom2 = desc+k*d; for(i=0;i<n;i++) { ai=assign[i]; for(j=0;j<d;j++) { mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai])); } } fvec_sqrt(mom2,k*d); free(hist); } else if(flags==17) { fvec_0(desc,k*d*2); for(i=0;i<n;i++) { for(j=0;j<d;j++) { diff=v[i*d+j]-centroids[assign[i]*d+j]; if(diff>0) { desc[assign[i]*d+j]+=diff; } else { desc[assign[i]*d+j+k*d]-=diff; } } } } else { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } if(flags==1) { hist=ivec_new_histogram(k,assign,n); /* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */ for(i=0;i<k;i++) { for(j=0;j<d;j++) { desc[i*d+j]/=hist[i]; } } free(hist); } if(flags==2) { for(i=0;i<k;i++) { fvec_normalize(desc+i*d,d,2.0); } } if(flags==3 || flags==4) { assert(!"not implemented"); } if(flags==16) { hist=ivec_new_histogram(k,assign,n); for(i=0;i<k;i++) { if(hist[i]>0) { fvec_norm(desc+i*d,d,2); fvec_mul_by(desc+i*d,d,sqrt(hist[i])); } } free(hist); } } free(assign); } else if(flags==11 || flags==12) { ma=flags==11 ? 4 : 2; assign=ivec_new(n*ma); dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL); fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { for(a=0;a<ma;a++) { desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j]; } } } free(dists); free(assign); } }