/* compute sum and diagonal of covariance matrix of a set of points (v) weighted by probabilities (p) */ static void compute_sum_dcov(int ni,int ki,int di, const float *v,const float *mu_old,const float *p, float *mu,float *sigma,float *w) { long i,j,l; FINTEGER n=ni,k=ki,d=di; for (j = 0 ; j < k ; j++) { double dtmp = 0; for (i = 0 ; i < n ; i++) dtmp += p[i * k + j]; w[j] = dtmp; } float zero=0,one=1; sgemm_("Not transposed","Transposed",&d,&k,&n,&one,v,&d,p,&k,&zero,mu,&d); float *v2=fvec_new_cpy(v,n*(long)d); fvec_sqr(v2,n*(long)d); sgemm_("Not transposed","Transposed",&d,&k,&n,&one,v2,&d,p,&k,&zero,sigma,&d); free(v2); for (j = 0 ; j < k ; j++) { float *sigma_j=sigma+j*d; const float *mu_old_j=mu_old+j*d; const float *mu_j=mu+j*d; for(l=0; l<d; l++) { sigma_j[l]+=mu_old_j[l]*(mu_old_j[l]*w[j]-2*mu_j[l]); } } }
void pca_online_project (const pca_online_t * pca, const float * v, float * vo, int d, long n, int dout) { const char trmat[2] = {'T', 'N'}; float * vb = fvec_new_cpy (v, n*d); assert (d == pca->d); fmat_subtract_from_columns (pca->d, n, vb, pca->mu); fmat_mul_full (pca->eigvec, vb, dout, n, pca->d, trmat, vo); free (vb); }
float *fmat_center_columns(int d,int n,float *v) { assert(n>0); float *accu=fvec_new_cpy(v,d); long i; for(i=1;i<n;i++) fvec_add(accu,v+i*d,d); fvec_div_by(accu,d,n); for(i=0;i<n;i++) fvec_sub(v+i*d,accu,d); return accu; }
/* estimate the GMM parameters */ static void gmm_compute_params (int n, const float * v, const float * p, gmm_t * g, int flags, int n_thread) { long i, j; long d=g->d, k=g->k; float * vtmp = fvec_new (d); float * mu_old = fvec_new_cpy (g->mu, k * d); float * w_old = fvec_new_cpy (g->w, k); fvec_0 (g->w, k); fvec_0 (g->mu, k * d); fvec_0 (g->sigma, k * d); if(0) { /* slow and simple */ for (j = 0 ; j < k ; j++) { double dtmp = 0; for (i = 0 ; i < n ; i++) { /* contribution to the gaussian weight */ dtmp += p[i * k + j]; /* contribution to mu */ fvec_cpy (vtmp, v + i * d, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->mu + j * d, vtmp, d); /* contribution to the variance */ fvec_cpy (vtmp, v + i * d, d); fvec_sub (vtmp, mu_old + j * d, d); fvec_sqr (vtmp, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->sigma + j * d, vtmp, d); } g->w[j] = dtmp; } } else { /* fast and complicated */ if(n_thread<=1) compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w); else compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread); } if(flags & GMM_FLAGS_1SIGMA) { for (j = 0 ; j < k ; j++) { float *sigma_j=g->sigma+j*d; double var=fvec_sum(sigma_j,d)/d; fvec_set(sigma_j,d,var); } } long nz=0; for(i=0; i<k*d; i++) if(g->sigma[i]<min_sigma) { g->sigma[i]=min_sigma; nz++; } if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma); for (j = 0 ; j < k ; j++) { fvec_div_by (g->mu + j * d, d, g->w[j]); fvec_div_by (g->sigma + j * d, d, g->w[j]); } assert(finite(fvec_sum(g->mu, k*d))); fvec_normalize (g->w, k, 1); printf ("w = "); fvec_print (g->w, k); double imfac = k * fvec_sum_sqr (g->w, k); printf (" imfac = %.3f\n", imfac); free (vtmp); free (w_old); free (mu_old); }