/* estimate the GMM parameters */ static void gmm_compute_params (int n, const float * v, const float * p, gmm_t * g, int flags, int n_thread) { long i, j; long d=g->d, k=g->k; float * vtmp = fvec_new (d); float * mu_old = fvec_new_cpy (g->mu, k * d); float * w_old = fvec_new_cpy (g->w, k); fvec_0 (g->w, k); fvec_0 (g->mu, k * d); fvec_0 (g->sigma, k * d); if(0) { /* slow and simple */ for (j = 0 ; j < k ; j++) { double dtmp = 0; for (i = 0 ; i < n ; i++) { /* contribution to the gaussian weight */ dtmp += p[i * k + j]; /* contribution to mu */ fvec_cpy (vtmp, v + i * d, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->mu + j * d, vtmp, d); /* contribution to the variance */ fvec_cpy (vtmp, v + i * d, d); fvec_sub (vtmp, mu_old + j * d, d); fvec_sqr (vtmp, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->sigma + j * d, vtmp, d); } g->w[j] = dtmp; } } else { /* fast and complicated */ if(n_thread<=1) compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w); else compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread); } if(flags & GMM_FLAGS_1SIGMA) { for (j = 0 ; j < k ; j++) { float *sigma_j=g->sigma+j*d; double var=fvec_sum(sigma_j,d)/d; fvec_set(sigma_j,d,var); } } long nz=0; for(i=0; i<k*d; i++) if(g->sigma[i]<min_sigma) { g->sigma[i]=min_sigma; nz++; } if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma); for (j = 0 ; j < k ; j++) { fvec_div_by (g->mu + j * d, d, g->w[j]); fvec_div_by (g->sigma + j * d, d, g->w[j]); } assert(finite(fvec_sum(g->mu, k*d))); fvec_normalize (g->w, k, 1); printf ("w = "); fvec_print (g->w, k); double imfac = k * fvec_sum_sqr (g->w, k); printf (" imfac = %.3f\n", imfac); free (vtmp); free (w_old); free (mu_old); }
gmm_t * gmm_learn (int di, int ni, int ki, int niter, const float * v, int nt, int seed, int nredo, int flags) { long d=di,k=ki,n=ni; int iter, iter_tot = 0; double old_key, key = 666; niter = (niter == 0 ? 10000 : niter); /* the GMM parameters */ float * p = fvec_new_0 (n * k); /* p(ci|x) for all i */ gmm_t * g = gmm_new (d, k); /* initialize the GMM: k-means + variance estimation */ int * nassign = ivec_new (n); /* not useful -> to be removed when debugged */ float * dis = fvec_new (n); kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign); fflush (stderr); fprintf (stderr, "assign = "); ivec_print (nassign, k); fprintf (stderr, "\n"); free (nassign); /* initialization of the GMM parameters assuming a diagonal matrix */ fvec_set (g->w, k, 1.0 / k); double sig = fvec_sum (dis, n) / n; printf ("sigma at initialization = %.3f\n", sig); fvec_set (g->sigma, k * d, sig); free (dis); /* start the EM algorithm */ fprintf (stdout, "<><><><> GMM <><><><><>\n"); if(flags & GMM_FLAGS_PURE_KMEANS) niter=0; for (iter = 1 ; iter <= niter ; iter++) { gmm_compute_p_thread (n, v, g, p, flags, nt); fflush(stdout); gmm_handle_empty(n, v, g, p); gmm_compute_params (n, v, p, g, flags, nt); fflush(stdout); iter_tot++; /* convergence reached -> leave */ old_key = key; key = fvec_sum (g->mu, k * d); printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key); fflush(stdout); if (key == old_key) break; } fprintf (stderr, "\n"); free(p); return g; }
void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[]) { if (nrhs < 4) mexErrMsgTxt("At least 4 arguments are required even nb of input arguments required."); else if (nlhs != 1) mexErrMsgTxt("yael_fisher produces exactly 1 output argument."); int flags = GMM_FLAGS_MU; int verbose = 0; int fishernorm1 = 1; if(mxGetClassID(PARAM_V)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); if(mxGetClassID(PARAM_W)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); if(mxGetClassID(PARAM_MU)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); if(mxGetClassID(PARAM_SIGMA)!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); float *v = (float*) mxGetPr (PARAM_V); float *w = (float*) mxGetPr (PARAM_W); float *mu = (float*) mxGetPr (PARAM_MU); float *sigma = (float*) mxGetPr (PARAM_SIGMA); { int i; for(i = 4 ; i < nrhs ; i += 1) { char varname[256]; if (mxGetClassID(prhs[i]) != mxCHAR_CLASS) mexErrMsgTxt ("variable name required"); if (mxGetString (prhs[i], varname, 256) != 0) mexErrMsgTxt ("Could not convert string data"); if (!strcmp(varname, "sigma")) flags |= GMM_FLAGS_SIGMA; else if (!strcmp(varname,"weights")) flags |= GMM_FLAGS_W; else if (!strcmp(varname,"nomu")) flags &= ~ GMM_FLAGS_MU; else if (!strcmp(varname,"verbose")) verbose = 1; else if (!strcmp(varname,"nonorm")) fishernorm1 = 0; else mexErrMsgTxt("unknown variable name"); } } if (verbose) { fprintf (stdout, "v -> %ld x %ld\n", mxGetM (PARAM_V), mxGetN (PARAM_V)); fprintf (stdout, "w -> %ld x %ld\n", mxGetM (PARAM_W), mxGetN (PARAM_W)); fprintf (stdout, "mu -> %ld x %ld\n", mxGetM (PARAM_MU), mxGetN (PARAM_MU)); fprintf (stdout, "sigma -> %ld x %ld\n", mxGetM (PARAM_SIGMA), mxGetN (PARAM_SIGMA)); } int d = mxGetM (PARAM_V); /* vector dimensionality */ int n = mxGetN (PARAM_V); /* number of fisher vector to produce */ int k = mxGetN (PARAM_W); /* number of gaussian */ if (verbose) fprintf (stdout, "d = %d\nn = %d\nk = %d\n", d, n, k); if (mxGetM (PARAM_MU) != d || mxGetM (PARAM_SIGMA) != d || mxGetN (PARAM_MU) !=k || mxGetN (PARAM_SIGMA) != k || (mxGetM (PARAM_W) != 1 && mxGetN (PARAM_W) != 1) ) mexErrMsgTxt("Invalid input dimensionalities."); /* ouptut: GMM, i.e., weights, mu and variances */ gmm_t g = {d, k, w, mu, sigma}; int dout = gmm_fisher_sizeof (&g, flags); if (verbose) fprintf (stdout, "Size of the fisher vector = %d\n", dout); plhs[0] = mxCreateNumericMatrix (dout, 1, mxSINGLE_CLASS, mxREAL); float * vf = (float *) mxGetPr (plhs[0]); gmm_fisher (n, v, &g, flags, vf); if (fishernorm1) { int ret = fvec_normalize (vf, dout, 2.); if (ret == 1) fvec_set (vf, dout, 1); } }