Exemplo n.º 1
0
static void compute_sum_dcov_thread(int ni,int ki,int di,
                                    const float *v,const float *mu_old,const float *p,
                                    float *mu,float *sigma,float *w,
                                    int n_thread) {
    long n=ni,d=di,k=ki;

    compute_sum_dcov_t t= {
        n,k,d,
        v,mu_old,p,
        fvec_new(n_thread*d*k), /* mu */
        fvec_new(n_thread*d*k), /* sigma */
        fvec_new(n_thread*k), /* w */
        n_thread
    };

    compute_tasks(n_thread,n_thread,&compute_sum_dcov_task_fun,&t);

    /* accumulate over n's */

    long i;
    fvec_cpy(mu,t.mu,k*d);
    fvec_cpy(sigma,t.sigma,k*d);
    fvec_cpy(w,t.w,k);
    for(i=1; i<n_thread; i++) {
        fvec_add(mu,t.mu+i*d*k,d*k);
        fvec_add(sigma,t.sigma+i*d*k,d*k);
        fvec_add(w,t.w+i*k,k);
    }
    free(t.mu);
    free(t.sigma);
    free(t.w);
}
Exemplo n.º 2
0
/* 
 * A simple test for the binary embedding
 */
int test_embed_tfidf()
{
    int i, j, n, err = 0;
    string_t strs[10];

    config_set_string(&cfg, "features.vect_norm", "none");
    config_set_string(&cfg, "features.tfidf_file", TEST_TFIDF);

    unlink(TEST_TFIDF);
    char *test_file = getenv("TEST_FILE");
    idf_create(test_file);
    test_printf("Testing TFIDF embedding");

    input_config("lines");
    n = input_open(test_file);
    input_read(strs, n);

    /* Compute IDF manually */
    config_set_string(&cfg, "features.vect_embed", "bin");
    fvec_t *w = fvec_zero();
    for (i = 0, err = 0; i < n; i++) {
        fvec_t *fv = fvec_extract(strs[i].str, strs[i].len);
        fvec_add(w, fv);
        fvec_destroy(fv);
    }
    fvec_invert(w);
    fvec_mul(w, n);
    fvec_log2(w);

    if (!idf_check(w)) {
        err++;
        test_error("(%d) internal idf values seem to be wrong", i);
    }

    /* Invert w for multiplying out IDFs */
    fvec_invert(w);

    config_set_string(&cfg, "features.vect_embed", "tfidf");
    for (i = 0, err = 0; i < n; i++) {
        fvec_t *fv = fvec_extract(strs[i].str, strs[i].len);
        fvec_times(fv, w);

        /* Check if rest tf */
        double d = 0;
        for (j = 0; j < fv->len; j++)
            d += fv->val[j];
        err += fabs(d - 1.0) > 1e-6;
        fvec_destroy(fv);
    }
    test_return(err, n);

    fvec_destroy(w);
    input_free(strs, n);
    input_close();

    idf_destroy();
    unlink(TEST_TFIDF);

    return err;
}
void
aubio_pitchmcomb_spectral_pp (aubio_pitchmcomb_t * p, fvec_t * newmag)
{
  fvec_t *mag = (fvec_t *) p->scratch;
  fvec_t *tmp = (fvec_t *) p->scratch2;
  uint_t j;
  uint_t length = mag->length;
  /* copy newmag to mag (scracth) */
  for (j = 0; j < length; j++) {
    mag->data[j] = newmag->data[j];
  }
  fvec_min_removal (mag);       /* min removal          */
  fvec_alpha_normalise (mag, p->alpha); /* alpha normalisation  */
  /* skipped *//* low pass filtering   */
  /** \bug fvec_moving_thres may write out of bounds */
  fvec_adapt_thres (mag, tmp, p->win_post, p->win_pre);      /* adaptative threshold */
  fvec_add (mag, -p->threshold);        /* fixed threshold      */
  {
    aubio_spectralpeak_t *peaks = (aubio_spectralpeak_t *) p->peaks;
    uint_t count;
    /*  return bin and ebin */
    count = aubio_pitchmcomb_quadpick (peaks, mag);
    for (j = 0; j < count; j++)
      peaks[j].mag = newmag->data[peaks[j].bin];
    /* reset non peaks */
    for (j = count; j < length; j++)
      peaks[j].mag = 0.;
    p->peaks = peaks;
    p->count = count;
  }
}
Exemplo n.º 4
0
/* Accumulate information for PCA for n input vectors */
void pca_online_accu (struct pca_online_s * pca, const float * v, long n)
{
  int d = pca->d;
  float * cov = fvec_new (d*(long)d);
  float * mu = fvec_new (d);

  fmat_sum_rows (v, d, n, mu);
  fmat_mul_tr (v, v, d, d, n, cov);

  fvec_add (pca->mu, mu, d);
  fvec_add (pca->cov, cov, d*(long)d);

  pca->n += n;

  free (cov);
  free (mu);
}
Exemplo n.º 5
0
float *fmat_center_columns(int d,int n,float *v) 
{
  assert(n>0);

  float *accu=fvec_new_cpy(v,d);
  long i;

  for(i=1;i<n;i++) 
    fvec_add(accu,v+i*d,d);

  fvec_div_by(accu,d,n);
  
  for(i=0;i<n;i++) 
    fvec_sub(v+i*d,accu,d);

  return accu;  
}
Exemplo n.º 6
0
Arquivo: fvec.c Projeto: MLDroid/sally
/*
 * Internal: Allocates and extracts a feature vector from a string
 * without postprocessing but blended n-grams
 * @param x String of bytes (with space delimiters)
 * @param l Length of sequence
 * @return feature vector
 */
fvec_t *fvec_extract_intern(char *x, int l)
{
    int blend;
    cfg_int i, n;

    /* Get config */
    config_lookup_bool(&cfg, "features.ngram_blend", &blend);
    config_lookup_int(&cfg, "features.ngram_len", &n);

    /* Extract n-grams */
    fvec_t *fv = fvec_extract_intern2(x, l, n);

    /* Blended n-grams */
    for (i = 1; blend && i < n; i++) {
        fvec_t *fx = fvec_extract_intern2(x, l, i);
        fvec_add(fv, fx);
        fvec_destroy(fx);
    }

    return fv;
}
Exemplo n.º 7
0
/*
 * A stres test for the addition of feature vectors
 */
int test_stress_add()
{
    int i, j, err = 0;
    fvec_t *fx, *fy, *fz;
    char buf[STR_LENGTH + 1];

    test_printf("Stress test for addition of feature vectors");

    /* Create empty vector */
    fz = fvec_extract("aa0bb0cc", 8, "zero");
    for (i = 0; i < NUM_VECTORS; i++) {

        /* Create random key and string */
        for (j = 0; j < STR_LENGTH; j++)
            buf[j] = rand() % 10 + '0';
        buf[j] = 0;

        /* Extract features */
        fx = fvec_extract(buf, strlen(buf), "test");

        /* Add fx to fz */
        fy = fvec_add(fz, fx);
        fvec_destroy(fz);

        err += fabs(fvec_norm2(fy) - 1.4142135623) > 1e-7;

        /* Substract fx from fz */
        fz = fvec_sub(fy, fx);
        fvec_sparsify(fz);

        /* Clean up */
        fvec_destroy(fy);
        fvec_destroy(fx);
    }

    fvec_destroy(fz);
    test_return(err, i);
    return err;
}
Exemplo n.º 8
0
/*
 * A simple static test for the addition of feature vectors
 */
int test_static_add()
{
    int i, err = 0;
    fvec_t *fx, *fy, *fz;

    test_printf("Addition of feature vectors");

    for (i = 0; test_add[i].x; i++) {
        /* Extract features */
        fx = fvec_extract(test_add[i].x, strlen(test_add[i].x), "test");
        fy = fvec_extract(test_add[i].y, strlen(test_add[i].y), "test");

        /* Add test vectors */
        fz = fvec_add(fx, fy);
        err += fabs(fvec_norm1(fz) - test_add[i].res) > 1e-7;

        fvec_destroy(fz);
        fvec_destroy(fx);
        fvec_destroy(fy);
    }

    test_return(err, i);
    return err;
}
Exemplo n.º 9
0
void fmat_add_to_columns(int d,int n,float *v,const float *avg) {
  long i;
  for(i=0;i<n;i++) 
    fvec_add(v+i*d,avg,d);
}
Exemplo n.º 10
0
void
fvec_min_removal (fvec_t * v)
{
  smpl_t v_min = fvec_min (v);
  fvec_add (v,  - v_min );
}
Exemplo n.º 11
0
/**
 * Print shared n-grams for each cluster
 * @param c Clustering structure
 * @param fa Array of feature vectors
 * @param file Output file
 */
void export_shared_ngrams(cluster_t *c, farray_t *fa, const char *file)
{
    assert(c && fa && file);
    int i, j, k;
    double shared;
    FILE *f;
    char *name = NULL;

    config_lookup_float(&cfg, "cluster.shared_ngrams", &shared);
    if (shared <= 0.0)
        return;

    if (verbose > 0)
        printf("Exporting shared n-grams with minimum ratio %4.2f.\n",
               shared);

    if (!(f = fopen(file, "a"))) {
        error("Could not create file '%s'.", file);
        return;
    }

    /* Print incremental header */
    fprintf(f, "# ---\n# Shared n-grams for %s\n", fa->src);
    fprintf(f, "# Minimum ratio of shared n-grams: %4.2f (%2.0f%%)\n",
            shared, shared * 100);
    fprintf(f, "# ---\n# <cluster> <ratio> <hash> <ngram>\n");

    /* Compute shared n-grams per cluster */
    for (i = 0; i < c->num; i++) {
        fvec_t *s = fvec_zero();

        for (j = 0, k = 0; j < c->len; j++) {
            if (c->cluster[j] != i)
                continue;

            /* Clone and binarize */
            fvec_t *x = fvec_clone(fa->x[j]);
            fvec_bin(x);

            if (k == 0)
                name = cluster_get_name(c, j);

            /* Merge n-grams in cluster */
            fvec_t *y = fvec_add(s, x);
            fvec_destroy(s);
            fvec_destroy(x);
            s = y;
            k++;
        }

        /* Check for empty cluster */
        if (k == 0)
            continue;

        fvec_div(s, k);

        /* Output shared n-grams */
        for (j = 0; j < s->len; j++) {
            if (s->val[j] < shared)
                continue;

            fprintf(f, "%s %6.4f %.16llx ", name, s->val[j],
                    (long long unsigned int) s->dim[j]);

            /* Lookup feature */
            fentry_t *fe = ftable_get(s->dim[j]);
            if (!fe)
                error("Oops. Feature not in lookup table.");

            /* Print feature */
            fprintf(f, "\"");
            for (k = 0; k < fe->len; k++) {
                if (isprint(fe->data[k]) || fe->data[k] == '%')
                    fprintf(f, "%c", fe->data[k]);
                else
                    fprintf(f, "%%%.2x", fe->data[k]);
            }
            fprintf(f, "\"\n");
        }
        fvec_destroy(s);
    }

    fclose(f);
}
Exemplo n.º 12
0
/* estimate the GMM parameters */
static void gmm_compute_params (int n, const float * v, const float * p,
                                gmm_t * g,
                                int flags,
                                int n_thread)
{
    long i, j;

    long d=g->d, k=g->k;
    float * vtmp = fvec_new (d);
    float * mu_old = fvec_new_cpy (g->mu, k * d);
    float * w_old = fvec_new_cpy (g->w, k);

    fvec_0 (g->w, k);
    fvec_0 (g->mu, k * d);
    fvec_0 (g->sigma, k * d);

    if(0) {
        /* slow and simple */
        for (j = 0 ; j < k ; j++) {
            double dtmp = 0;
            for (i = 0 ; i < n ; i++) {
                /* contribution to the gaussian weight */
                dtmp += p[i * k + j];
                /* contribution to mu */

                fvec_cpy (vtmp, v + i * d, d);
                fvec_mul_by (vtmp, d, p[i * k + j]);
                fvec_add (g->mu + j * d, vtmp, d);

                /* contribution to the variance */
                fvec_cpy (vtmp, v + i * d, d);
                fvec_sub (vtmp, mu_old + j * d, d);
                fvec_sqr (vtmp, d);
                fvec_mul_by (vtmp, d, p[i * k + j]);
                fvec_add (g->sigma + j * d, vtmp, d);

            }
            g->w[j] = dtmp;
        }

    } else {
        /* fast and complicated */

        if(n_thread<=1)
            compute_sum_dcov(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w);
        else
            compute_sum_dcov_thread(n,k,d,v,mu_old,p,g->mu,g->sigma,g->w,n_thread);
    }

    if(flags & GMM_FLAGS_1SIGMA) {
        for (j = 0 ; j < k ; j++) {
            float *sigma_j=g->sigma+j*d;
            double var=fvec_sum(sigma_j,d)/d;
            fvec_set(sigma_j,d,var);
        }
    }

    long nz=0;
    for(i=0; i<k*d; i++)
        if(g->sigma[i]<min_sigma) {
            g->sigma[i]=min_sigma;
            nz++;
        }

    if(nz) printf("WARN %ld sigma diagonals are too small (set to %g)\n",nz,min_sigma);

    for (j = 0 ; j < k ; j++) {
        fvec_div_by (g->mu + j * d, d, g->w[j]);
        fvec_div_by (g->sigma + j * d, d, g->w[j]);
    }

    assert(finite(fvec_sum(g->mu, k*d)));

    fvec_normalize (g->w, k, 1);

    printf ("w = ");
    fvec_print (g->w, k);
    double imfac = k * fvec_sum_sqr (g->w, k);
    printf (" imfac = %.3f\n", imfac);

    free (vtmp);
    free (w_old);
    free (mu_old);
}
Exemplo n.º 13
0
Arquivo: vlad.c Projeto: atroudi/V3V_2
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) 
{

	int i,j,l,n_quantile,i0,i1,ai,a,ma,ni;
	int *perm ;
	float un , diff;
	float *tab,*u,*avg,*sum,*mom2,*dists;
	int *hist,*assign;


	if(flags<11 || flags>=13) 
	{
		assign=ivec_new(n);

		nn(n,k,d,centroids,v,assign,NULL,NULL);    

		if(flags==6 || flags==7) 
		{
			n_quantile = flags==6 ? 3 : 1;
			fvec_0(desc,k*d*n_quantile);
			perm      = ivec_new(n);
			tab       = fvec_new(n);
			ivec_sort_index(assign,n,perm);
			i0=0;
			for(i=0;i<k;i++) 
			{
				i1=i0;
				while(i1<n && assign[perm[i1]]==i) 
				{
					i1++;
				}

				if(i1==i0) continue;

				for(j=0;j<d;j++) 
				{        
					for(l=i0;l<i1;l++)
					{
						tab[l-i0]=v[perm[l]*d+j];
					}
					ni=i1-i0;
					fvec_sort(tab,ni);
					for(l=0;l<n_quantile;l++) 
					{
						desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni;
					}
				}

				i0=i1;
			}
			free(perm);
			free(tab);
		} 
		else if(flags==5) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

		} 
		else if(flags==8 || flags==9) 
		{
			fvec_0(desc,k*d);

			u   = fvec_new(d);

			for(i=0;i<n;i++) 
			{
				fvec_cpy(u,v+i*d,d);
				fvec_sub(u,centroids+assign[i]*d,d);
				un=(float)sqrt(fvec_norm2sqr(u,d));

				if(un==0) continue;
				if(flags==8) 
				{        
					fvec_div_by(u,d,un);
				} else if(flags==9) 
				{
					fvec_div_by(u,d,sqrt(un));
				}

				fvec_add(desc+assign[i]*d,u,d);

			}
			free(u);
		} 
		else if(flags==10) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

			for(i=0;i<k;i++) 
			{
				fvec_normalize(desc+i*d,d,2.0);  
			}

		} 
		else if(flags==13) 
		{

			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]);
				}
			}     

		} 
		else if(flags==14) 
		{
			avg = fvec_new_0(k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist=ivec_new_histogram(k,assign,n);

			for(i=0;i<k;i++) 
			{
				if(hist[i]>0) 
				{
					for(j=0;j<d;j++) 
					{
						avg[i*d+j]/=hist[i];
					}
				}
			}

			free(hist);

			fvec_0(desc,k*d);
			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j]));
				}
			}

			fvec_sqrt(desc,k*d);

			free(avg);
		}  
		else if(flags==15) 
		{
			fvec_0(desc,k*d*2);
			sum = desc;

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist = ivec_new_histogram(k,assign,n);

			mom2 = desc+k*d;

			for(i=0;i<n;i++) 
			{
				ai=assign[i];
				for(j=0;j<d;j++) 
				{
					mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai]));
				}
			}
			fvec_sqrt(mom2,k*d);
			free(hist);


		} 
		else if(flags==17) 
		{
			fvec_0(desc,k*d*2);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					diff=v[i*d+j]-centroids[assign[i]*d+j];
					if(diff>0)
					{
						desc[assign[i]*d+j]+=diff;
					}
					else 
					{
						desc[assign[i]*d+j+k*d]-=diff;
					}
				}
			}

		} 
		else 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}


			if(flags==1) 
			{
				hist=ivec_new_histogram(k,assign,n);
				/* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */

				for(i=0;i<k;i++) 
				{
					for(j=0;j<d;j++) 
					{
						desc[i*d+j]/=hist[i];    
					}
				}
				free(hist);
			}

			if(flags==2) 
			{
				for(i=0;i<k;i++) 
				{
					fvec_normalize(desc+i*d,d,2.0);
				}
			}

			if(flags==3 || flags==4) 
			{
				assert(!"not implemented");
			}

			if(flags==16) 
			{
				hist=ivec_new_histogram(k,assign,n);
				for(i=0;i<k;i++) 
				{
					if(hist[i]>0) 
					{
						fvec_norm(desc+i*d,d,2);
						fvec_mul_by(desc+i*d,d,sqrt(hist[i]));
					}
				}
				free(hist);
			}


		}
		free(assign);
	} 
	else if(flags==11 || flags==12) 
	{
		ma=flags==11 ? 4 : 2;
		assign=ivec_new(n*ma);

		dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL);    

		fvec_0(desc,k*d);

		for(i=0;i<n;i++) 
		{
			for(j=0;j<d;j++) 
			{
				for(a=0;a<ma;a++) 
				{
					desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j];
				}
			}
		} 

		free(dists);

		free(assign);
	}

}