/* ------------------------------------------------------------------------- */ static pstatus_t sse2_set_32s( INT32 val, INT32* pDst, UINT32 len) { UINT32 uval = *((UINT32*) &val); return sse2_set_32u(uval, (UINT32*) pDst, len); }
/* ------------------------------------------------------------------------- */ int test_set32u_func(void) { #if defined(WITH_SSE2) || defined(WITH_IPP) UINT32 ALIGN(dest[512]); int off; #endif int failed = 0; char testStr[256]; testStr[0] = '\0'; #ifdef WITH_SSE2 /* Test SSE under various alignments */ if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { int len; for (len=1; len<512-off; ++len) { int i; memset(dest, 0, sizeof(dest)); sse2_set_32u(0xdeadbeefU, dest+off, len); for (i=0; i<len; ++i) { if (dest[off+i] != 0xdeadbeefU) { printf("set32u-SSE FAIL: off=%d len=%d dest[%d]=0x%08x\n", off, len, i+off, dest[i+off]); failed=1; } } } } } #endif /* i386 */ #ifdef WITH_IPP strcat(testStr, " IPP"); for (off=0; off<16; ++off) { int len; for (len=1; len<512-off; ++len) { int i; memset(dest, 0, sizeof(dest)); ipp_wrapper_set_32u(0xdeadbeefU, dest+off, len); for (i=0; i<len; ++i) { if (dest[off+i] != 0xdeadbeefU) { printf("set32u-IPP FAIL: off=%d len=%d dest[%d]=0x%08x\n", off, len, i+off, dest[i+off]); failed=1; } } } } #endif /* WITH_IPP */ if (!failed) printf("All set32u tests passed (%s).\n", testStr); return (failed > 0) ? FAILURE : SUCCESS; }
} /* ------------------------------------------------------------------------- */ static inline void memset32u_naive( UINT32 val, UINT32 *dst, size_t count) { while (count--) *dst++ = val; } /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst, TRUE, memset32u_naive(constant, dst, size), #ifdef WITH_SSE2 TRUE, sse2_set_32u(constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif TRUE, ipp_wrapper_set_32u(constant, dst, size)); int test_set32u_speed(void) { UINT32 ALIGN(dst[MAX_TEST_SIZE+1]); set32u_speed_test("set32u", "aligned", NULL, NULL, 0xdeadbeef, dst, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #if 0 /* Not really necessary; should be almost as fast. */ set32u_speed_test("set32u", "unaligned", NULL, NULL, dst+1, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #endif