Exemplo n.º 1
1
static void inline sse_sum_unaligned1_F(int count, float *partial, float *accum)
{
#if MANUAL_SSE
int i;
__m128 v4in, v4acc;
float *tmp=aligned_alloca(4*sizeof(*tmp));

for(i=0; ((long)&(accum[i]) & 0x0f) && (i<count);i++) {
	accum[i]+=partial[i];
	}

//fprintf(stderr, "unaligned %d %p %p\n", i, partial, accum);

PRAGMA_IVDEP
for(;i<(count-3);i+=4) {
	memcpy(tmp, &(partial[i]), 4*sizeof(*tmp));
	v4in=_mm_load_ps(tmp);
	v4acc=_mm_load_ps(&(accum[i]));
	v4acc=_mm_add_ps(v4acc, v4in);
	_mm_store_ps(&(accum[i]), v4acc);
	}
for(;i<count;i++) {
	accum[i]+=partial[i];
	}
#else
fprintf(stderr, "**** MANUAL_SSE disabled in %s\n", __FUNCTION__);
exit(-2);
#endif
}
Exemplo n.º 2
0
void nonparametric(float *firstbin, int step, int count, float *median, float *ks_test)
{
    float *tmp;
    double a,b,lambda;
    int i;
    tmp=aligned_alloca(count*sizeof(float));
    for(i=0; i<count; i++)tmp[i]=firstbin[i*step];
    sort_floats(tmp, count);
    if(count & 1)*median=(tmp[count>>1]+tmp[(count>>1)+1])/2.0;
    else *median=tmp[count>>1];
Exemplo n.º 3
0
static float compute_median(float *firstbin, int step, int count)
{
    float *tmp;
    int i;
    tmp=aligned_alloca(count*sizeof(float));
    for(i=0; i<count; i++)tmp[i]=firstbin[i*step];
    sort_floats(tmp, count);
    if(!(count & 1))return (tmp[count>>1]+tmp[(count>>1)-1])/2.0;
    return tmp[count>>1];
}
Exemplo n.º 4
0
static void inline sse_sum_unaligned_F(int count, float *partial, float *accum)
{
#if MANUAL_SSE
#if USE_AVX
int i;
__m256 v8in, v8acc;

//fprintf(stderr, "unaligned %d %p %p\n", i, partial, accum);
for(i=0; ((long)&(accum[i]) & 0x0f) && (i<count);i++) {
	accum[i]+=partial[i];
	}

PRAGMA_IVDEP
for(;i<(count-7);i+=8) {
	v8in=_mm256_loadu_ps(&(partial[i]));
	v8acc=_mm256_load_ps(&(accum[i]));
	v8acc=_mm256_add_ps(v8acc, v8in);
	_mm256_store_ps(&(accum[i]), v8acc);
	}

PRAGMA_IVDEP
for(;i<count;i++) {
	accum[i]+=partial[i];
	}
#else
int i;
__m128 v4in, v4acc;
float *tmp=aligned_alloca(16*sizeof(*tmp));

for(i=0; ((long)&(accum[i]) & 0x0f) && (i<count);i++) {
	accum[i]+=partial[i];
	}

//fprintf(stderr, "unaligned %d %p %p\n", i, partial, accum);

PRAGMA_IVDEP
for(;i<(count-15);i+=16) {
	memcpy(tmp, &(partial[i]), 16*sizeof(*tmp));

	v4in=_mm_load_ps(tmp);
	v4acc=_mm_load_ps(&(accum[i]));
	v4acc=_mm_add_ps(v4acc, v4in);
	_mm_store_ps(&(accum[i]), v4acc);

	v4in=_mm_load_ps(&(tmp[4]));
	v4acc=_mm_load_ps(&(accum[i+4]));
	v4acc=_mm_add_ps(v4acc, v4in);
	_mm_store_ps(&(accum[i+4]), v4acc);

	v4in=_mm_load_ps(&(tmp[8]));
	v4acc=_mm_load_ps(&(accum[i+8]));
	v4acc=_mm_add_ps(v4acc, v4in);
	_mm_store_ps(&(accum[i+8]), v4acc);

	v4in=_mm_load_ps(&(tmp[12]));
	v4acc=_mm_load_ps(&(accum[i+12]));
	v4acc=_mm_add_ps(v4acc, v4in);
	_mm_store_ps(&(accum[i+12]), v4acc);
	}

PRAGMA_IVDEP
for(;i<count;i++) {
	accum[i]+=partial[i];
	}
#endif
#else
fprintf(stderr, "**** MANUAL_SSE disabled in %s\n", __FUNCTION__);
exit(-2);
#endif
}