int main(void) { //struct timespec st,end,st2,end2; INT4 ii, length = 100000; REAL4VectorAligned *floatvalues1 = NULL, *floatvalues2 = NULL, *floatvalues3 = NULL; alignedREAL8Vector *doublevalues1 = NULL, *doublevalues2 = NULL, *doublevalues3 = NULL; alignedREAL4VectorArray *floatvalues = NULL; XLAL_CHECK( (floatvalues1 = XLALCreateREAL4VectorAligned(length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatvalues2 = XLALCreateREAL4VectorAligned(length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatvalues3 = XLALCreateREAL4VectorAligned(length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (doublevalues1 = createAlignedREAL8Vector(length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (doublevalues2 = createAlignedREAL8Vector(length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (doublevalues3 = createAlignedREAL8Vector(length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatvalues = createAlignedREAL4VectorArray(2, length, 32)) != NULL, XLAL_EFUNC ); for (ii=0; ii<length; ii++) { floatvalues1->data[ii] = (REAL4)(ii-length/2)*2.0e-3; doublevalues1->data[ii] = (REAL8)(ii-length/2)*2.0e-3; floatvalues2->data[ii] = (REAL4)(ii)*1.0e-3; doublevalues2->data[ii] = (REAL8)(ii)*1.0e-3; floatvalues3->data[ii] = (REAL4)(ii-length/2)*2.0e-4; doublevalues3->data[ii] = (REAL8)(ii-length/2)*2.0e-4; } memcpy(floatvalues->data[0]->data, floatvalues1->data, sizeof(REAL4)*length); memcpy(floatvalues->data[1]->data, floatvalues2->data, sizeof(REAL4)*length); REAL4VectorAligned *floatresult_vecsum = NULL, *floatresult_vecmult = NULL, *floatresult_addscalar = NULL, *floatresult_scale = NULL; alignedREAL8Vector *doubleresult_exp = NULL, *doubleresult_addscalar = NULL, *doubleresult_scale = NULL; alignedREAL4VectorArray *arraysumresult = NULL; XLAL_CHECK( (doubleresult_exp = createAlignedREAL8Vector(doublevalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatresult_vecsum = XLALCreateREAL4VectorAligned(floatvalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatresult_vecmult = XLALCreateREAL4VectorAligned(floatvalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatresult_addscalar = XLALCreateREAL4VectorAligned(floatvalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (floatresult_scale = XLALCreateREAL4VectorAligned(floatvalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (doubleresult_addscalar = createAlignedREAL8Vector(doublevalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (doubleresult_scale = createAlignedREAL8Vector(doublevalues1->length, 32)) != NULL, XLAL_EFUNC ); XLAL_CHECK( (arraysumresult = createAlignedREAL4VectorArray(2, length, 32)) != NULL, XLAL_EFUNC ); memset(arraysumresult->data[0]->data, 0, sizeof(REAL4)*length); memset(arraysumresult->data[1]->data, 0, sizeof(REAL4)*length); //clock_gettime(CLOCK_REALTIME, &st); XLAL_CHECK( sse_exp_REAL8Vector(doubleresult_exp, doublevalues3) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseSSVectorSum(floatresult_vecsum, floatvalues1, floatvalues2) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseSSVectorMultiply(floatresult_vecmult, floatvalues1, floatvalues2) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseAddScalarToREAL4Vector(floatresult_addscalar, floatvalues1, (REAL4)100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseScaleREAL4Vector(floatresult_scale, floatvalues1, (REAL4)100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseAddScalarToREAL8Vector(doubleresult_addscalar, doublevalues1, 100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseScaleREAL8Vector(doubleresult_scale, doublevalues1, 100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( sseSSVectorArraySum(arraysumresult, floatvalues, floatvalues, 0, 1, 0, 1) == XLAL_SUCCESS, XLAL_EFUNC ); //clock_gettime(CLOCK_REALTIME, &end); REAL4 maxfloaterr_vecsum = 0.0, maxfloatrelerr_vecsum = 0.0, maxfloaterr_vecmult = 0.0, maxfloatrelerr_vecmult = 0.0, maxfloaterr_addscalar = 0.0, maxfloatrelerr_addscalar = 0.0, maxfloaterr_scale = 0.0, maxfloatrelerr_scale = 0.0, maxfloaterr_seqsum = 0.0, maxfloatrelerr_seqsum = 0.0; REAL8 maxdoubleerr_exp = 0.0, maxdoublerelerr_exp = 0.0, maxdoubleerr_addscalar = 0.0, maxdoublerelerr_addscalar = 0.0, maxdoubleerr_scale = 0.0, maxdoublerelerr_scale = 0.0; for (ii=0; ii<length; ii++) { REAL8 exp_libm = exp(doublevalues3->data[ii]); REAL8 doubleerr = fabs(doubleresult_exp->data[ii] - exp_libm); REAL8 doublerelerr = Relerr( doubleerr, exp_libm ); maxdoubleerr_exp = fmax(doubleerr, maxdoubleerr_exp); maxdoublerelerr_exp = fmax(doublerelerr, maxdoublerelerr_exp); REAL4 sumval = (REAL4)(floatvalues1->data[ii] + floatvalues2->data[ii]); REAL4 floaterr = fabsf(floatresult_vecsum->data[ii] - sumval); REAL4 floatrelerr = Relfloaterr( floaterr, sumval ); maxfloaterr_vecsum = fmaxf(floaterr, maxfloaterr_vecsum); maxfloatrelerr_vecsum = fmaxf(floatrelerr, maxfloatrelerr_vecsum); REAL4 multval = (REAL4)(floatvalues1->data[ii]*floatvalues2->data[ii]); floaterr = fabsf(floatresult_vecmult->data[ii] - multval); floatrelerr = Relfloaterr(floaterr, multval); maxfloaterr_vecmult = fmaxf(floaterr, maxfloaterr_vecmult); maxfloatrelerr_vecmult = fmaxf(floatrelerr, maxfloatrelerr_vecmult); sumval = (REAL4)(floatvalues1->data[ii]+(REAL4)100.0); REAL8 sumvald = (doublevalues1->data[ii]+100.0); floaterr = fabsf(floatresult_addscalar->data[ii] - sumval); floatrelerr = Relfloaterr(floaterr, sumval); doubleerr = fabs(doubleresult_addscalar->data[ii] - sumvald); doublerelerr = Relerr( doubleerr, sumvald ); maxfloaterr_addscalar = fmaxf(floaterr, maxfloaterr_addscalar); maxfloatrelerr_addscalar = fmaxf(floatrelerr, maxfloatrelerr_addscalar); maxdoubleerr_addscalar = fmax(doubleerr, maxdoubleerr_addscalar); maxdoublerelerr_addscalar = fmax(doublerelerr, maxdoublerelerr_addscalar); multval = (REAL4)(floatvalues1->data[ii]*(REAL4)100.0); REAL8 multvald = (doublevalues1->data[ii]*100.0); floaterr = fabsf(floatresult_scale->data[ii] - multval); floatrelerr = Relfloaterr(floaterr, multval); doubleerr = fabs(doubleresult_scale->data[ii] - multvald); doublerelerr = Relerr( doubleerr, multvald ); maxfloaterr_scale = fmaxf(floaterr, maxfloaterr_scale); maxfloatrelerr_scale = fmaxf(floatrelerr, maxfloatrelerr_scale); maxdoubleerr_scale = fmax(doubleerr, maxdoubleerr_scale); maxdoublerelerr_scale = fmax(doublerelerr, maxdoublerelerr_scale); floaterr = fabsf(arraysumresult->data[0]->data[ii] - (REAL4)(floatvalues1->data[ii]+floatvalues2->data[ii])); floatrelerr = Relfloaterr(floaterr, (REAL4)(floatvalues1->data[ii]+floatvalues2->data[ii])); maxfloaterr_seqsum = fmaxf(floaterr, maxfloaterr_seqsum); maxfloatrelerr_seqsum = fmaxf(floatrelerr, maxfloatrelerr_seqsum); } fprintf(stderr, "Test results SSE:\n"); fprintf(stderr, "-----------------\n"); fprintf(stderr, "Add REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecsum, maxfloatrelerr_vecsum); fprintf(stderr, "Multiply REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecmult, maxfloatrelerr_vecmult); fprintf(stderr, "Add scalar to REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_addscalar, maxfloatrelerr_addscalar); fprintf(stderr, "Add scalar to REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_addscalar, maxdoublerelerr_addscalar); fprintf(stderr, "Scale REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_scale, maxfloatrelerr_scale); fprintf(stderr, "Scale REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_scale, maxdoublerelerr_scale); fprintf(stderr, "exp(REAL8Vector): max error = %g, max relative error = %g\n", maxdoubleerr_exp, maxdoublerelerr_exp); fprintf(stderr, "Sum vectors of vector array into vector array: max error = %g, max relative error = %g\n", maxfloaterr_seqsum, maxfloatrelerr_seqsum); //fprintf(stderr, "Time elapsed: %li\n", (end.tv_sec-st.tv_sec)*GIGA+(end.tv_nsec-st.tv_nsec)); #ifdef __AVX__ //clock_gettime(CLOCK_REALTIME, &st2); XLAL_CHECK( avxSSVectorSum(floatresult_vecsum, floatvalues1, floatvalues2) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( avxSSVectorMultiply(floatresult_vecmult, floatvalues1, floatvalues2) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( avxAddScalarToREAL4Vector(floatresult_addscalar, floatvalues1, (REAL4)100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( avxScaleREAL4Vector(floatresult_scale, floatvalues1, (REAL4)100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( avxAddScalarToREAL8Vector(doubleresult_addscalar, doublevalues1, 100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( avxScaleREAL8Vector(doubleresult_scale, doublevalues1, 100.0) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK( avxSSVectorArraySum(arraysumresult, floatvalues, floatvalues, 0, 1, 0, 1) == XLAL_SUCCESS, XLAL_EFUNC ); //clock_gettime(CLOCK_REALTIME, &end2); //REAL4 maxfloaterr_vecsum = 0.0, maxfloatrelerr_vecsum = 0.0, maxfloaterr_vecmult = 0.0, maxfloatrelerr_vecmult = 0.0, maxfloaterr_addscalar = 0.0, maxfloatrelerr_addscalar = 0.0, maxfloaterr_scale = 0.0, maxfloatrelerr_scale = 0.0, maxfloaterr_seqsum = 0.0, maxfloatrelerr_seqsum = 0.0, maxfloaterr_seqsub = 0.0, maxfloatrelerr_seqsub = 0.0; //REAL8 maxdoubleerr_addscalar = 0.0, maxdoublerelerr_addscalar = 0.0, maxdoubleerr_scale = 0.0, maxdoublerelerr_scale = 0.0; for (ii=0; ii<length; ii++) { REAL4 floaterr = fabsf(floatresult_vecsum->data[ii] - (REAL4)(floatvalues1->data[ii] + floatvalues2->data[ii])); REAL4 floatrelerr = fabsf((REAL4)(1.0 - floatresult_vecsum->data[ii]/(REAL4)(floatvalues1->data[ii] + floatvalues2->data[ii]))); if (floaterr>maxfloaterr_vecsum) maxfloaterr_vecsum = floaterr; if (floatrelerr>maxfloatrelerr_vecsum) maxfloatrelerr_vecsum = floatrelerr; floaterr = fabsf(floatresult_vecmult->data[ii] - (REAL4)(floatvalues1->data[ii]*floatvalues2->data[ii])); floatrelerr = fabsf((REAL4)(1.0 - floatresult_vecmult->data[ii]/(REAL4)(floatvalues1->data[ii]*floatvalues2->data[ii]))); if (floaterr>maxfloaterr_vecmult) maxfloaterr_vecmult = floaterr; if (floatrelerr>maxfloatrelerr_vecmult) maxfloatrelerr_vecmult = floatrelerr; floaterr = fabsf(floatresult_addscalar->data[ii] - (REAL4)(floatvalues1->data[ii]+(REAL4)100.0)); REAL8 doubleerr = fabs(doubleresult_addscalar->data[ii] - (doublevalues1->data[ii]+100.0)); floatrelerr = fabsf((REAL4)(1.0 - floatresult_addscalar->data[ii]/(REAL4)(floatvalues1->data[ii]+(REAL4)100.0))); REAL8 doublerelerr = fabs(1.0 - doubleresult_addscalar->data[ii]/(doublevalues1->data[ii]+100.0)); if (floaterr>maxfloaterr_addscalar) maxfloaterr_addscalar = floaterr; if (floatrelerr>maxfloatrelerr_addscalar) maxfloatrelerr_addscalar = floatrelerr; if (doubleerr>maxdoubleerr_addscalar) maxdoubleerr_addscalar = doubleerr; if (doublerelerr>maxdoublerelerr_addscalar) maxdoublerelerr_addscalar = doublerelerr; floaterr = fabsf(floatresult_scale->data[ii] - (REAL4)(floatvalues1->data[ii]*(REAL4)100.0)); doubleerr = fabs(doubleresult_scale->data[ii] - (doublevalues1->data[ii]*100.0)); floatrelerr = fabsf((REAL4)(1.0 - floatresult_scale->data[ii]/(REAL4)(floatvalues1->data[ii]*(REAL4)100.0))); doublerelerr = fabs(1.0 - doubleresult_scale->data[ii]/(doublevalues1->data[ii]*100.0)); if (floaterr>maxfloaterr_scale) maxfloaterr_scale = floaterr; if (floatrelerr>maxfloatrelerr_scale) maxfloatrelerr_scale = floatrelerr; if (doubleerr>maxdoubleerr_scale) maxdoubleerr_scale = doubleerr; if (doublerelerr>maxdoublerelerr_scale) maxdoublerelerr_scale = doublerelerr; floaterr = fabsf(arraysumresult->data[ii] - (REAL4)(floatvalues1->data[ii]+floatvalues2->data[ii])); floatrelerr = fabsf((REAL4)(1.0 - arraysumresult->data[ii]/(REAL4)(floatvalues1->data[ii]+floatvalues2->data[ii]))); if (floaterr>maxfloaterr_seqsum) maxfloaterr_seqsum = floaterr; if (floatrelerr>maxfloatrelerr_seqsum) maxfloatrelerr_seqsum = floatrelerr; } fprintf(stderr, "Test results AVX:\n"); fprintf(stderr, "-----------------\n"); fprintf(stderr, "Add REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecsum, maxfloatrelerr_vecsum); fprintf(stderr, "Multiply REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecmult, maxfloatrelerr_vecmult); fprintf(stderr, "Add scalar to REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_addscalar, maxfloatrelerr_addscalar); fprintf(stderr, "Add scalar to REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_addscalar, maxdoublerelerr_addscalar); fprintf(stderr, "Scale REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_scale, maxfloatrelerr_scale); fprintf(stderr, "Scale REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_scale, maxdoublerelerr_scale); fprintf(stderr, "Sum vectors of vector array into vector array: max error = %g, max relative error = %g\n", maxfloaterr_seqsum, maxfloatrelerr_seqsum); //fprintf(stderr, "Time elapsed: %li\n", (end2.tv_sec-st2.tv_sec)*GIGA+(end2.tv_nsec-st2.tv_nsec)); #endif XLALDestroyREAL4VectorAligned(floatvalues1); destroyAlignedREAL8Vector(doublevalues1); XLALDestroyREAL4VectorAligned(floatvalues2); destroyAlignedREAL8Vector(doublevalues2); destroyAlignedREAL8Vector(doubleresult_exp); XLALDestroyREAL4VectorAligned(floatresult_vecsum); XLALDestroyREAL4VectorAligned(floatresult_vecmult); XLALDestroyREAL4VectorAligned(floatresult_addscalar); XLALDestroyREAL4VectorAligned(floatresult_scale); destroyAlignedREAL8Vector(doubleresult_addscalar); destroyAlignedREAL8Vector(doubleresult_scale); destroyAlignedREAL4VectorArray(floatvalues); destroyAlignedREAL4VectorArray(arraysumresult); return 0; }
// ---------- main ---------- int main ( int argc, char *argv[] ) { UserInput_t XLAL_INIT_DECL(uvar_s); UserInput_t *uvar = &uvar_s; uvar->randSeed = 1; uvar->Nruns = 1; uvar->inAlign = uvar->outAlign = sizeof(void*); // ---------- register user-variable ---------- XLALRegisterUvarMember( randSeed, INT4, 's', OPTIONAL, "Random-number seed"); XLALRegisterUvarMember( Nruns, INT4, 'r', OPTIONAL, "Number of repeated timing 'runs' to average over (=improves variance)" ); XLALRegisterUvarMember( inAlign, INT4, 'a', OPTIONAL, "Alignment of input vectors; default is sizeof(void*), i.e. no particular alignment" ); XLALRegisterUvarMember( outAlign, INT4, 'b', OPTIONAL, "Alignment of output vectors; default is sizeof(void*), i.e. no particular alignment" ); BOOLEAN should_exit = 0; XLAL_CHECK( XLALUserVarReadAllInput( &should_exit, argc, argv, lalVCSInfoList ) == XLAL_SUCCESS, XLAL_EFUNC ); if ( should_exit ) { exit (1); } srand ( uvar->randSeed ); XLAL_CHECK ( uvar->Nruns >= 1, XLAL_EDOM ); UINT4 Nruns = (UINT4)uvar->Nruns; UINT4 Ntrials = 1000000 + 7; REAL4VectorAligned *xIn_a, *xIn2_a, *xOut_a, *xOut2_a; XLAL_CHECK ( ( xIn_a = XLALCreateREAL4VectorAligned ( Ntrials, uvar->inAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( xIn2_a = XLALCreateREAL4VectorAligned ( Ntrials, uvar->inAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( xOut_a = XLALCreateREAL4VectorAligned ( Ntrials, uvar->outAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( xOut2_a = XLALCreateREAL4VectorAligned ( Ntrials, uvar->outAlign )) != NULL, XLAL_EFUNC ); REAL4VectorAligned *xOutRef_a, *xOutRef2_a; XLAL_CHECK ( (xOutRef_a = XLALCreateREAL4VectorAligned ( Ntrials, uvar->outAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( (xOutRef2_a = XLALCreateREAL4VectorAligned ( Ntrials, uvar->outAlign )) != NULL, XLAL_EFUNC ); // extract aligned REAL4 vectors from these REAL4 *xIn = xIn_a->data; REAL4 *xIn2 = xIn2_a->data; REAL4 *xOut = xOut_a->data; REAL4 *xOut2 = xOut2_a->data; REAL4 *xOutRef = xOutRef_a->data; REAL4 *xOutRef2 = xOutRef2_a->data; UINT4Vector *xOutU4; UINT4Vector *xOutRefU4; XLAL_CHECK ( ( xOutU4 = XLALCreateUINT4Vector ( Ntrials )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( xOutRefU4 = XLALCreateUINT4Vector ( Ntrials )) != NULL, XLAL_EFUNC ); REAL8VectorAligned *xInD_a, *xIn2D_a, *xOutD_a, *xOutRefD_a; XLAL_CHECK ( ( xInD_a = XLALCreateREAL8VectorAligned ( Ntrials, uvar->inAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( xIn2D_a = XLALCreateREAL8VectorAligned ( Ntrials, uvar->inAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( xOutD_a = XLALCreateREAL8VectorAligned ( Ntrials, uvar->outAlign )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( (xOutRefD_a= XLALCreateREAL8VectorAligned ( Ntrials, uvar->outAlign )) != NULL, XLAL_EFUNC ); // extract aligned REAL8 vectors from these REAL8 *xInD = xInD_a->data; REAL8 *xIn2D = xIn2D_a->data; REAL8 *xOutD = xOutD_a->data; REAL8 *xOutRefD = xOutRefD_a->data; REAL8 tic, toc; REAL4 maxErr = 0, maxRelerr = 0; REAL4 abstol, reltol; XLALPrintInfo ("Testing sin(x), cos(x) for x in [-1000, 1000]\n"); for ( UINT4 i = 0; i < Ntrials; i ++ ) { xIn[i] = 2000 * ( frand() - 0.5 ); } abstol = 2e-7, reltol = 1e-5; // ==================== SIN() ==================== TESTBENCH_VECTORMATH_S2S(Sin,xIn); // ==================== COS() ==================== TESTBENCH_VECTORMATH_S2S(Cos,xIn); // ==================== SINCOS() ==================== TESTBENCH_VECTORMATH_S2SS(SinCos,xIn); // ==================== SINCOS(2PI*x) ==================== TESTBENCH_VECTORMATH_S2SS(SinCos2Pi,xIn); // ==================== EXP() ==================== XLALPrintInfo ("\nTesting exp(x) for x in [-10, 10]\n"); for ( UINT4 i = 0; i < Ntrials; i ++ ) { xIn[i] = 20 * ( frand() - 0.5 ); } abstol = 4e-3, reltol = 3e-7; TESTBENCH_VECTORMATH_S2S(Exp,xIn); // ==================== LOG() ==================== XLALPrintInfo ("\nTesting log(x) for x in (0, 10000]\n"); for ( UINT4 i = 0; i < Ntrials; i ++ ) { xIn[i] = 10000.0f * frand() + 1e-6; } // for i < Ntrials abstol = 2e-6, reltol = 2e-7; TESTBENCH_VECTORMATH_S2S(Log,xIn); // ==================== ADD,MUL ==================== for ( UINT4 i = 0; i < Ntrials; i ++ ) { xIn[i] = -10000.0f + 20000.0f * frand() + 1e-6; xIn2[i] = -10000.0f + 20000.0f * frand() + 1e-6; xInD[i] = -100000.0 + 200000.0 * frand() + 1e-6; xIn2D[i]= -100000.0 + 200000.0 * frand() + 1e-6; } // for i < Ntrials abstol = 2e-7, reltol = 2e-7; XLALPrintInfo ("\nTesting add,multiply,shift,scale(x,y) for x,y in (-10000, 10000]\n"); TESTBENCH_VECTORMATH_SS2S(Add,xIn,xIn2); TESTBENCH_VECTORMATH_SS2S(Multiply,xIn,xIn2); TESTBENCH_VECTORMATH_SS2S(Max,xIn,xIn2); TESTBENCH_VECTORMATH_SS2S(Shift,xIn[0],xIn2); TESTBENCH_VECTORMATH_SS2S(Scale,xIn[0],xIn2); TESTBENCH_VECTORMATH_DD2D(Scale,xInD[0],xIn2D); // ==================== FIND ==================== for ( UINT4 i = 0; i < Ntrials; i ++ ) { xIn[i] = -10000.0f + 20000.0f * frand() + 1e-6; xIn2[i] = -10000.0f + 20000.0f * frand() + 1e-6; } // for i < Ntrials XLALPrintInfo ("\nTesting find for x,y in (-10000, 10000]\n"); TESTBENCH_VECTORMATH_SS2uU(FindVectorLessEqual,xIn,xIn2); TESTBENCH_VECTORMATH_SS2uU(FindScalarLessEqual,xIn[0],xIn2); XLALPrintInfo ("\n"); // ---------- clean up memory ---------- XLALDestroyREAL4VectorAligned ( xIn_a ); XLALDestroyREAL4VectorAligned ( xIn2_a ); XLALDestroyREAL4VectorAligned ( xOut_a ); XLALDestroyREAL4VectorAligned ( xOut2_a ); XLALDestroyREAL4VectorAligned ( xOutRef_a ); XLALDestroyREAL4VectorAligned ( xOutRef2_a ); XLALDestroyUINT4Vector ( xOutU4 ); XLALDestroyUINT4Vector ( xOutRefU4 ); XLALDestroyREAL8VectorAligned ( xInD_a ); XLALDestroyREAL8VectorAligned ( xIn2D_a ); XLALDestroyREAL8VectorAligned ( xOutD_a ); XLALDestroyREAL8VectorAligned ( xOutRefD_a ); XLALDestroyUserVars(); LALCheckMemoryLeaks(); return XLAL_SUCCESS; } // main()