Пример #1
0
/* ------------------------------------------------------------------------- */
int test_sign16s_func(void)
{
	INT16 ALIGN(src[65535]), ALIGN(d1[65535]);
#ifdef WITH_SSE2
	INT16 ALIGN(d2[65535]);
	int i;
#endif
	int failed = 0;
	char testStr[256];

	/* Test when we can reach 16-byte alignment */
	testStr[0] = '\0';
	get_random_data(src, sizeof(src));
	general_sign_16s(src+1, d1+1, 65535);
#ifdef WITH_SSE2
	if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
	{
		strcat(testStr, " SSSE3");
		ssse3_sign_16s(src+1, d2+1, 65535);
		for (i=1; i<65535; ++i)
		{
			if (d1[i] != d2[i])
			{ 
				printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n", 
					i, src[i], d1[i], d2[i]); 
				++failed;
			}
		}
	}
#endif /* i386 */

	/* Test when we cannot reach 16-byte alignment */
	get_random_data(src, sizeof(src));
	general_sign_16s(src+1, d1+2, 65535);
#ifdef WITH_SSE2
	if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
	{
		ssse3_sign_16s(src+1, d2+2, 65535);
		for (i=2; i<65535; ++i)
		{
			if (d1[i] != d2[i])
			{ 
				printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n", 
					i, src[i-1], d1[i], d2[i]); 
				++failed;
			}
		}
	}
#endif /* i386 */
	if (!failed) printf("All sign16s tests passed (%s).\n", testStr);
	return (failed > 0) ? FAILURE : SUCCESS;
}
Пример #2
0
/* ------------------------------------------------------------------------- */
int test_sign16s_func(void)
{
	INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
	int failed = 0;
	int i;
	UINT32 pflags = primitives_get_flags(primitives_get());
	char testStr[256];

	/* Test when we can reach 16-byte alignment */
	testStr[0] = '\0';
	get_random_data(src, sizeof(src));
	general_sign_16s(src+1, d1+1, 65535);
#ifdef _M_IX86_AMD64
	if (pflags & PRIM_X86_SSSE3_AVAILABLE)
	{
		strcat(testStr, " SSSE3");
		ssse3_sign_16s(src+1, d2+1, 65535);
		for (i=1; i<65535; ++i)
		{
			if (d1[i] != d2[i])
			{ 
				printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n", 
					i, src[i], d1[i], d2[i]); 
				++failed;
			}
		}
	}
#endif /* i386 */

	/* Test when we cannot reach 16-byte alignment */
	get_random_data(src, sizeof(src));
	general_sign_16s(src+1, d1+2, 65535);
#ifdef _M_IX86_AMD64
	if (pflags & PRIM_X86_SSSE3_AVAILABLE)
	{
		ssse3_sign_16s(src+1, d2+2, 65535);
		for (i=2; i<65535; ++i)
		{
			if (d1[i] != d2[i])
			{ 
				printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n", 
					i, src[i-1], d1[i], d2[i]); 
				++failed;
			}
		}
	}
#endif /* i386 */
	if (!failed) printf("All sign16s tests passed (%s).\n", testStr);
	return (failed > 0) ? FAILURE : SUCCESS;
}
Пример #3
0
			if (d1[i] != d2[i])
			{ 
				printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n", 
					i, src[i-1], d1[i], d2[i]); 
				++failed;
			}
		}
	}
#endif /* i386 */
	if (!failed) printf("All sign16s tests passed (%s).\n", testStr);
	return (failed > 0) ? FAILURE : SUCCESS;
}

/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
	TRUE, general_sign_16s(src1, dst, size),
	TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
	FALSE, dst=dst, 0,
	FALSE, dst=dst);

int test_sign16s_speed(void)
{
	INT16 ALIGN(src[MAX_TEST_SIZE+3]), ALIGN(dst[MAX_TEST_SIZE+3]);
	get_random_data(src, sizeof(src));
	sign16s_speed_test("sign16s", "aligned", src, NULL, 0, dst,
		test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
	sign16s_speed_test("sign16s", "unaligned", src+1, NULL, 0, dst,
		test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
	return SUCCESS;
}
Пример #4
0
/* ------------------------------------------------------------------------- */
pstatus_t ssse3_sign_16s(
	const INT16 *pSrc,
	INT16 *pDst,
	INT32 len)
{
	const INT16 *sptr = (const INT16 *) pSrc;
	INT16 *dptr = (INT16 *) pDst;
	size_t count;

	if (len < 16)
	{
		return general_sign_16s(pSrc, pDst, len);
	}

	/* Check for 16-byte alignment (eventually). */
	if ((ULONG_PTR) pDst & 0x01)
	{
		return general_sign_16s(pSrc, pDst, len);
	}

	/* Seek 16-byte alignment. */
	while ((ULONG_PTR) dptr & 0x0f)
	{
		INT16 src = *sptr++;
		*dptr++ = (src < 0) ? (-1) : ((src > 0) ? 1 : 0);
		if (--len == 0) return PRIMITIVES_SUCCESS;
	}

	/* Do 32-short chunks using 8 XMM registers. */
	count = len >> 5;	/* / 32  */
	len -= count << 5;	/* * 32 */
	if ((ULONG_PTR) sptr & 0x0f)
	{
		/* Unaligned */
		while (count--)
		{
			__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
			xmm0 = _mm_set1_epi16(0x0001U);
			xmm1 = _mm_set1_epi16(0x0001U);
			xmm2 = _mm_set1_epi16(0x0001U);
			xmm3 = _mm_set1_epi16(0x0001U);
			xmm4 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
			xmm5 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
			xmm6 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
			xmm7 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
			xmm0 = _mm_sign_epi16(xmm0, xmm4);
			xmm1 = _mm_sign_epi16(xmm1, xmm5);
			xmm2 = _mm_sign_epi16(xmm2, xmm6);
			xmm3 = _mm_sign_epi16(xmm3, xmm7);
			_mm_store_si128((__m128i *) dptr, xmm0);         dptr += 8;
			_mm_store_si128((__m128i *) dptr, xmm1);         dptr += 8;
			_mm_store_si128((__m128i *) dptr, xmm2);         dptr += 8;
			_mm_store_si128((__m128i *) dptr, xmm3);         dptr += 8;
		}
	}
	else
	{
		/* Aligned */
		while (count--)
		{
			__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
			xmm0 = _mm_set1_epi16(0x0001U);
			xmm1 = _mm_set1_epi16(0x0001U);
			xmm2 = _mm_set1_epi16(0x0001U);
			xmm3 = _mm_set1_epi16(0x0001U);
			xmm4 = _mm_load_si128((__m128i *) sptr); sptr += 8;
			xmm5 = _mm_load_si128((__m128i *) sptr); sptr += 8;
			xmm6 = _mm_load_si128((__m128i *) sptr); sptr += 8;
			xmm7 = _mm_load_si128((__m128i *) sptr); sptr += 8;
			xmm0 = _mm_sign_epi16(xmm0, xmm4);
			xmm1 = _mm_sign_epi16(xmm1, xmm5);
			xmm2 = _mm_sign_epi16(xmm2, xmm6);
			xmm3 = _mm_sign_epi16(xmm3, xmm7);
			_mm_store_si128((__m128i *) dptr, xmm0);         dptr += 8;
			_mm_store_si128((__m128i *) dptr, xmm1);         dptr += 8;
			_mm_store_si128((__m128i *) dptr, xmm2);         dptr += 8;
			_mm_store_si128((__m128i *) dptr, xmm3);         dptr += 8;
		}
	}

	/* Do 8-short chunks using two XMM registers. */
	count = len >> 3;
	len -= count << 3;
	while (count--)
	{
		__m128i xmm0 = _mm_set1_epi16(0x0001U);
		__m128i xmm1 = LOAD_SI128(sptr);					sptr += 8;
		xmm0 = _mm_sign_epi16(xmm0, xmm1);
		_mm_store_si128((__m128i *) dptr, xmm0);			dptr += 8;
	}

	/* Do leftovers. */
	while (len--)
	{
		INT16 src = *sptr++;
		*dptr++ = (src < 0) ? -1 : ((src > 0) ? 1 : 0);
	}

	return PRIMITIVES_SUCCESS;
}