static void compute_sum_dcov_thread(int ni,int ki,int di, const float *v,const float *mu_old,const float *p, float *mu,float *sigma,float *w, int n_thread) { long n=ni,d=di,k=ki; compute_sum_dcov_t t= { n,k,d, v,mu_old,p, fvec_new(n_thread*d*k), /* mu */ fvec_new(n_thread*d*k), /* sigma */ fvec_new(n_thread*k), /* w */ n_thread }; compute_tasks(n_thread,n_thread,&compute_sum_dcov_task_fun,&t); /* accumulate over n's */ long i; fvec_cpy(mu,t.mu,k*d); fvec_cpy(sigma,t.sigma,k*d); fvec_cpy(w,t.w,k); for(i=1; i<n_thread; i++) { fvec_add(mu,t.mu+i*d*k,d*k); fvec_add(sigma,t.sigma+i*d*k,d*k); fvec_add(w,t.w+i*k,k); } free(t.mu); free(t.sigma); free(t.w); }
/* * A simple test for the binary embedding */ int test_embed_tfidf() { int i, j, n, err = 0; string_t strs[10]; config_set_string(&cfg, "features.vect_norm", "none"); config_set_string(&cfg, "features.tfidf_file", TEST_TFIDF); unlink(TEST_TFIDF); char *test_file = getenv("TEST_FILE"); idf_create(test_file); test_printf("Testing TFIDF embedding"); input_config("lines"); n = input_open(test_file); input_read(strs, n); /* Compute IDF manually */ config_set_string(&cfg, "features.vect_embed", "bin"); fvec_t *w = fvec_zero(); for (i = 0, err = 0; i < n; i++) { fvec_t *fv = fvec_extract(strs[i].str, strs[i].len); fvec_add(w, fv); fvec_destroy(fv); } fvec_invert(w); fvec_mul(w, n); fvec_log2(w); if (!idf_check(w)) { err++; test_error("(%d) internal idf values seem to be wrong", i); } /* Invert w for multiplying out IDFs */ fvec_invert(w); config_set_string(&cfg, "features.vect_embed", "tfidf"); for (i = 0, err = 0; i < n; i++) { fvec_t *fv = fvec_extract(strs[i].str, strs[i].len); fvec_times(fv, w); /* Check if rest tf */ double d = 0; for (j = 0; j < fv->len; j++) d += fv->val[j]; err += fabs(d - 1.0) > 1e-6; fvec_destroy(fv); } test_return(err, n); fvec_destroy(w); input_free(strs, n); input_close(); idf_destroy(); unlink(TEST_TFIDF); return err; }
void aubio_pitchmcomb_spectral_pp (aubio_pitchmcomb_t * p, fvec_t * newmag) { fvec_t *mag = (fvec_t *) p->scratch; fvec_t *tmp = (fvec_t *) p->scratch2; uint_t j; uint_t length = mag->length; /* copy newmag to mag (scracth) */ for (j = 0; j < length; j++) { mag->data[j] = newmag->data[j]; } fvec_min_removal (mag); /* min removal */ fvec_alpha_normalise (mag, p->alpha); /* alpha normalisation */ /* skipped *//* low pass filtering */ /** \bug fvec_moving_thres may write out of bounds */ fvec_adapt_thres (mag, tmp, p->win_post, p->win_pre); /* adaptative threshold */ fvec_add (mag, -p->threshold); /* fixed threshold */ { aubio_spectralpeak_t *peaks = (aubio_spectralpeak_t *) p->peaks; uint_t count; /* return bin and ebin */ count = aubio_pitchmcomb_quadpick (peaks, mag); for (j = 0; j < count; j++) peaks[j].mag = newmag->data[peaks[j].bin]; /* reset non peaks */ for (j = count; j < length; j++) peaks[j].mag = 0.; p->peaks = peaks; p->count = count; } }
/* Accumulate information for PCA for n input vectors */ void pca_online_accu (struct pca_online_s * pca, const float * v, long n) { int d = pca->d; float * cov = fvec_new (d*(long)d); float * mu = fvec_new (d); fmat_sum_rows (v, d, n, mu); fmat_mul_tr (v, v, d, d, n, cov); fvec_add (pca->mu, mu, d); fvec_add (pca->cov, cov, d*(long)d); pca->n += n; free (cov); free (mu); }
float *fmat_center_columns(int d,int n,float *v) { assert(n>0); float *accu=fvec_new_cpy(v,d); long i; for(i=1;i<n;i++) fvec_add(accu,v+i*d,d); fvec_div_by(accu,d,n); for(i=0;i<n;i++) fvec_sub(v+i*d,accu,d); return accu; }
/* * Internal: Allocates and extracts a feature vector from a string * without postprocessing but blended n-grams * @param x String of bytes (with space delimiters) * @param l Length of sequence * @return feature vector */ fvec_t *fvec_extract_intern(char *x, int l) { int blend; cfg_int i, n; /* Get config */ config_lookup_bool(&cfg, "features.ngram_blend", &blend); config_lookup_int(&cfg, "features.ngram_len", &n); /* Extract n-grams */ fvec_t *fv = fvec_extract_intern2(x, l, n); /* Blended n-grams */ for (i = 1; blend && i < n; i++) { fvec_t *fx = fvec_extract_intern2(x, l, i); fvec_add(fv, fx); fvec_destroy(fx); } return fv; }
/* * A stres test for the addition of feature vectors */ int test_stress_add() { int i, j, err = 0; fvec_t *fx, *fy, *fz; char buf[STR_LENGTH + 1]; test_printf("Stress test for addition of feature vectors"); /* Create empty vector */ fz = fvec_extract("aa0bb0cc", 8, "zero"); for (i = 0; i < NUM_VECTORS; i++) { /* Create random key and string */ for (j = 0; j < STR_LENGTH; j++) buf[j] = rand() % 10 + '0'; buf[j] = 0; /* Extract features */ fx = fvec_extract(buf, strlen(buf), "test"); /* Add fx to fz */ fy = fvec_add(fz, fx); fvec_destroy(fz); err += fabs(fvec_norm2(fy) - 1.4142135623) > 1e-7; /* Substract fx from fz */ fz = fvec_sub(fy, fx); fvec_sparsify(fz); /* Clean up */ fvec_destroy(fy); fvec_destroy(fx); } fvec_destroy(fz); test_return(err, i); return err; }
/* * A simple static test for the addition of feature vectors */ int test_static_add() { int i, err = 0; fvec_t *fx, *fy, *fz; test_printf("Addition of feature vectors"); for (i = 0; test_add[i].x; i++) { /* Extract features */ fx = fvec_extract(test_add[i].x, strlen(test_add[i].x), "test"); fy = fvec_extract(test_add[i].y, strlen(test_add[i].y), "test"); /* Add test vectors */ fz = fvec_add(fx, fy); err += fabs(fvec_norm1(fz) - test_add[i].res) > 1e-7; fvec_destroy(fz); fvec_destroy(fx); fvec_destroy(fy); } test_return(err, i); return err; }
void fmat_add_to_columns(int d,int n,float *v,const float *avg) { long i; for(i=0;i<n;i++) fvec_add(v+i*d,avg,d); }
void fvec_min_removal (fvec_t * v) { smpl_t v_min = fvec_min (v); fvec_add (v, - v_min ); }
/** * Print shared n-grams for each cluster * @param c Clustering structure * @param fa Array of feature vectors * @param file Output file */ void export_shared_ngrams(cluster_t *c, farray_t *fa, const char *file) { assert(c && fa && file); int i, j, k; double shared; FILE *f; char *name = NULL; config_lookup_float(&cfg, "cluster.shared_ngrams", &shared); if (shared <= 0.0) return; if (verbose > 0) printf("Exporting shared n-grams with minimum ratio %4.2f.\n", shared); if (!(f = fopen(file, "a"))) { error("Could not create file '%s'.", file); return; } /* Print incremental header */ fprintf(f, "# ---\n# Shared n-grams for %s\n", fa->src); fprintf(f, "# Minimum ratio of shared n-grams: %4.2f (%2.0f%%)\n", shared, shared * 100); fprintf(f, "# ---\n# <cluster> <ratio> <hash> <ngram>\n"); /* Compute shared n-grams per cluster */ for (i = 0; i < c->num; i++) { fvec_t *s = fvec_zero(); for (j = 0, k = 0; j < c->len; j++) { if (c->cluster[j] != i) continue; /* Clone and binarize */ fvec_t *x = fvec_clone(fa->x[j]); fvec_bin(x); if (k == 0) name = cluster_get_name(c, j); /* Merge n-grams in cluster */ fvec_t *y = fvec_add(s, x); fvec_destroy(s); fvec_destroy(x); s = y; k++; } /* Check for empty cluster */ if (k == 0) continue; fvec_div(s, k); /* Output shared n-grams */ for (j = 0; j < s->len; j++) { if (s->val[j] < shared) continue; fprintf(f, "%s %6.4f %.16llx ", name, s->val[j], (long long unsigned int) s->dim[j]); /* Lookup feature */ fentry_t *fe = ftable_get(s->dim[j]); if (!fe) error("Oops. Feature not in lookup table."); /* Print feature */ fprintf(f, "\""); for (k = 0; k < fe->len; k++) { if (isprint(fe->data[k]) || fe->data[k] == '%') fprintf(f, "%c", fe->data[k]); else fprintf(f, "%%%.2x", fe->data[k]); } fprintf(f, "\"\n"); } fvec_destroy(s); } fclose(f); }
/* estimate the GMM parameters */ static void gmm_compute_params (int n, const float * v, const float * p, gmm_t * g, int flags, int n_thread) { long i, j; long d=g->d, k=g->k; float * vtmp = fvec_new (d); float * mu_old = fvec_new_cpy (g->mu, k * d); float * w_old = fvec_new_cpy (g->w, k); fvec_0 (g->w, k); fvec_0 (g->mu, k * d); fvec_0 (g->sigma, k * d); if(0) { /* slow and simple */ for (j = 0 ; j < k ; j++) { double dtmp = 0; for (i = 0 ; i < n ; i++) { /* contribution to the gaussian weight */ dtmp += p[i * k + j]; /* contribution to mu */ fvec_cpy (vtmp, v + i * d, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->mu + j * d, vtmp, d); /* contribution to the variance */ fvec_cpy (vtmp, v + i * d, d); fvec_sub (vtmp, mu_old + j * d, d); fvec_sqr (vtmp, d); fvec_mul_by (vtmp, d, p[i * k + j]); fvec_add (g->sigma + j * d, vtmp, d); } g->w[j] = dtmp; } } else { /* fast and complicated */ if(n_thread<=1) compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w); else compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread); } if(flags & GMM_FLAGS_1SIGMA) { for (j = 0 ; j < k ; j++) { float *sigma_j=g->sigma+j*d; double var=fvec_sum(sigma_j,d)/d; fvec_set(sigma_j,d,var); } } long nz=0; for(i=0; i<k*d; i++) if(g->sigma[i]<min_sigma) { g->sigma[i]=min_sigma; nz++; } if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma); for (j = 0 ; j < k ; j++) { fvec_div_by (g->mu + j * d, d, g->w[j]); fvec_div_by (g->sigma + j * d, d, g->w[j]); } assert(finite(fvec_sum(g->mu, k*d))); fvec_normalize (g->w, k, 1); printf ("w = "); fvec_print (g->w, k); double imfac = k * fvec_sum_sqr (g->w, k); printf (" imfac = %.3f\n", imfac); free (vtmp); free (w_old); free (mu_old); }
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) { int i,j,l,n_quantile,i0,i1,ai,a,ma,ni; int *perm ; float un , diff; float *tab,*u,*avg,*sum,*mom2,*dists; int *hist,*assign; if(flags<11 || flags>=13) { assign=ivec_new(n); nn(n,k,d,centroids,v,assign,NULL,NULL); if(flags==6 || flags==7) { n_quantile = flags==6 ? 3 : 1; fvec_0(desc,k*d*n_quantile); perm = ivec_new(n); tab = fvec_new(n); ivec_sort_index(assign,n,perm); i0=0; for(i=0;i<k;i++) { i1=i0; while(i1<n && assign[perm[i1]]==i) { i1++; } if(i1==i0) continue; for(j=0;j<d;j++) { for(l=i0;l<i1;l++) { tab[l-i0]=v[perm[l]*d+j]; } ni=i1-i0; fvec_sort(tab,ni); for(l=0;l<n_quantile;l++) { desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni; } } i0=i1; } free(perm); free(tab); } else if(flags==5) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]; } } } else if(flags==8 || flags==9) { fvec_0(desc,k*d); u = fvec_new(d); for(i=0;i<n;i++) { fvec_cpy(u,v+i*d,d); fvec_sub(u,centroids+assign[i]*d,d); un=(float)sqrt(fvec_norm2sqr(u,d)); if(un==0) continue; if(flags==8) { fvec_div_by(u,d,un); } else if(flags==9) { fvec_div_by(u,d,sqrt(un)); } fvec_add(desc+assign[i]*d,u,d); } free(u); } else if(flags==10) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]; } } for(i=0;i<k;i++) { fvec_normalize(desc+i*d,d,2.0); } } else if(flags==13) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]); } } } else if(flags==14) { avg = fvec_new_0(k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } hist=ivec_new_histogram(k,assign,n); for(i=0;i<k;i++) { if(hist[i]>0) { for(j=0;j<d;j++) { avg[i*d+j]/=hist[i]; } } } free(hist); fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j])); } } fvec_sqrt(desc,k*d); free(avg); } else if(flags==15) { fvec_0(desc,k*d*2); sum = desc; for(i=0;i<n;i++) { for(j=0;j<d;j++) { sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } hist = ivec_new_histogram(k,assign,n); mom2 = desc+k*d; for(i=0;i<n;i++) { ai=assign[i]; for(j=0;j<d;j++) { mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai])); } } fvec_sqrt(mom2,k*d); free(hist); } else if(flags==17) { fvec_0(desc,k*d*2); for(i=0;i<n;i++) { for(j=0;j<d;j++) { diff=v[i*d+j]-centroids[assign[i]*d+j]; if(diff>0) { desc[assign[i]*d+j]+=diff; } else { desc[assign[i]*d+j+k*d]-=diff; } } } } else { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } if(flags==1) { hist=ivec_new_histogram(k,assign,n); /* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */ for(i=0;i<k;i++) { for(j=0;j<d;j++) { desc[i*d+j]/=hist[i]; } } free(hist); } if(flags==2) { for(i=0;i<k;i++) { fvec_normalize(desc+i*d,d,2.0); } } if(flags==3 || flags==4) { assert(!"not implemented"); } if(flags==16) { hist=ivec_new_histogram(k,assign,n); for(i=0;i<k;i++) { if(hist[i]>0) { fvec_norm(desc+i*d,d,2); fvec_mul_by(desc+i*d,d,sqrt(hist[i])); } } free(hist); } } free(assign); } else if(flags==11 || flags==12) { ma=flags==11 ? 4 : 2; assign=ivec_new(n*ma); dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL); fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { for(a=0;a<ma;a++) { desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j]; } } } free(dists); free(assign); } }