コード例 #1
0
ファイル: buckets.c プロジェクト: ColumPaget/Alaya
unsigned int mmx_hash_bucket_data(unsigned char *key, int size, int NoOfItems)
{
		char *p, *end;
    __m64 v1, v2, s; 
		int val;

		if (size < 8) return(fnv_data2bucket(key, size, NoOfItems));

		p=key;
		end=key+size;
    _mm_empty();                            // emms
		v1=_mm_set1_pi32(FNV_INIT_VAL);

		while ((end-p) > 7)
		{
		v2=_mm_setr_pi32(*p,*(p+4));
		v1=_mm_add_pi16(v1, v2);
		v1=_mm_slli_pi32(v1, 3);
		p+=8;
		}

		val=_mm_cvtsi64_si32(v1);
    _mm_empty();                            // emms

		if (val < 0) val=1-val;
 		val =val % NoOfItems;
		return(val);
}
コード例 #2
0
ファイル: mmx-psubd-2.c プロジェクト: MaxKellermann/gcc
static void
TEST (void)
{
  __m64_union u, s1, s2;
  __m64_union e;
  int i;
   
  s1.as_m64 = _mm_setr_pi32 (30, 90);
  s2.as_m64 = _mm_setr_pi32 (76, -100);
  u.as_m64 = test (s1.as_m64, s2.as_m64);
   
  for (i = 0; i < 2; i++)
     e.as_int[i] = s1.as_int[i] - s2.as_int[i];

  if (u.as_m64 != e.as_m64)
    abort ();
}
コード例 #3
0
ファイル: sse-cvtpi32x2ps-1.c プロジェクト: MaxKellermann/gcc
static void
TEST (void)
{
  __m64_union s1, s2;
  union128 u;
  float e[4] = {1000.0, -20000.0, 43.0, 546.0};

  /* input signed in {1000, -20000, 43, 546}.  */
  s1.as_m64 = _mm_setr_pi32 (1000, -20000);
  s2.as_m64 = _mm_setr_pi32 (43, 546);
   
  u.x = test (s1.as_m64, s2.as_m64);


  if (check_union128 (u, e))
    abort ();
}
コード例 #4
0
ファイル: r_drawt_mmx.cpp プロジェクト: JohnnyonFlame/odamex
void r_dimpatchD_MMX(const DCanvas *const cvs, argb_t color, int alpha, int x1, int y1, int w, int h)
{
	int x, y, i;
	argb_t *line;
	int invAlpha = 256 - alpha;

	int dpitch = cvs->pitch / sizeof(DWORD);
	line = (argb_t *)cvs->buffer + y1 * dpitch;

	int batches = w / 2;
	int remainder = w & 1;

	// MMX temporaries:
	const __m64 upper8mask = _mm_set_pi16(0, 0xff, 0xff, 0xff);
	const __m64 blendAlpha = _mm_set_pi16(0, alpha, alpha, alpha);
	const __m64 blendInvAlpha = _mm_set_pi16(0, invAlpha, invAlpha, invAlpha);
	const __m64 blendColor = _mm_set_pi16(0, RPART(color), GPART(color), BPART(color));
	const __m64 blendMult = _mm_mullo_pi16(blendColor, blendAlpha);

	for (y = y1; y < y1 + h; y++)
	{
		// MMX optimize the bulk in batches of 2 colors:
		for (i = 0, x = x1; i < batches; ++i, x += 2)
		{
#if 1
			const __m64 input = _mm_setr_pi32(line[x + 0], line[x + 1]);
#else
			// NOTE(jsd): No guarantee of 64-bit alignment; cannot use.
			const __m64 input = *((__m64 *)line[x]);
#endif
			const __m64 output = blend2vs1_mmx(input, blendMult, blendInvAlpha, upper8mask);
#if 1
			line[x+0] = _mm_cvtsi64_si32(_mm_srli_si64(output, 32*0));
			line[x+1] = _mm_cvtsi64_si32(_mm_srli_si64(output, 32*1));
#else
			// NOTE(jsd): No guarantee of 64-bit alignment; cannot use.
			*((__m64 *)line[x]) = output;
#endif
		}

		if (remainder)
		{
			// Pick up the remainder:
			for (; x < x1 + w; x++)
			{
				line[x] = alphablend1a(line[x], color, alpha);
			}
		}

		line += dpitch;
	}

	// Required to reset FP:
	_mm_empty();
}
コード例 #5
0
ファイル: r_drawt_mmx.cpp プロジェクト: JohnnyonFlame/odamex
void rtv_lucent4cols_MMX(byte *source, argb_t *dest, int bga, int fga)
{
	// SSE2 temporaries:
	const __m64 upper8mask = _mm_set_pi16(0, 0xff, 0xff, 0xff);
	const __m64 fgAlpha = _mm_set_pi16(0, fga, fga, fga);
	const __m64 bgAlpha = _mm_set_pi16(0, bga, bga, bga);

#if 1
	const __m64 bgColors01 = _mm_setr_pi32(dest[0], dest[1]);
#else
	const __m64 bgColors01 = *((__m64 *)&dest[0]);
#endif
	const __m64 fgColors01 = _mm_setr_pi32(
		rt_mapcolor<argb_t>(dcol.colormap, source[0]),
		rt_mapcolor<argb_t>(dcol.colormap, source[1])
	);

	const __m64 finalColors01 = _mm_packs_pu16(
		_mm_srli_pi16(
			_mm_adds_pi16(
				_mm_mullo_pi16(_mm_and_si64(_mm_unpacklo_pi8(bgColors01, bgColors01), upper8mask), bgAlpha),
				_mm_mullo_pi16(_mm_and_si64(_mm_unpacklo_pi8(fgColors01, fgColors01), upper8mask), fgAlpha)
			),
			8
		),
		_mm_srli_pi16(
			_mm_adds_pi16(
				_mm_mullo_pi16(_mm_and_si64(_mm_unpackhi_pi8(bgColors01, bgColors01), upper8mask), bgAlpha),
				_mm_mullo_pi16(_mm_and_si64(_mm_unpackhi_pi8(fgColors01, fgColors01), upper8mask), fgAlpha)
			),
			8
		)
	);

#if 1
	const __m64 bgColors23 = _mm_setr_pi32(dest[2], dest[3]);
#else
	// NOTE(jsd): No guarantee of 64-bit alignment; cannot use.
	const __m64 bgColors23 = *((__m64 *)&dest[2]);
#endif
	const __m64 fgColors23 = _mm_setr_pi32(
		rt_mapcolor<argb_t>(dcol.colormap, source[2]),
		rt_mapcolor<argb_t>(dcol.colormap, source[3])
	);

	const __m64 finalColors23 = _mm_packs_pu16(
		_mm_srli_pi16(
			_mm_adds_pi16(
				_mm_mullo_pi16(_mm_and_si64(_mm_unpacklo_pi8(bgColors23, bgColors23), upper8mask), bgAlpha),
				_mm_mullo_pi16(_mm_and_si64(_mm_unpacklo_pi8(fgColors23, fgColors23), upper8mask), fgAlpha)
			),
			8
		),
		_mm_srli_pi16(
			_mm_adds_pi16(
				_mm_mullo_pi16(_mm_and_si64(_mm_unpackhi_pi8(bgColors23, bgColors23), upper8mask), bgAlpha),
				_mm_mullo_pi16(_mm_and_si64(_mm_unpackhi_pi8(fgColors23, fgColors23), upper8mask), fgAlpha)
			),
			8
		)
	);
	
#if 1
	dest[0] = _mm_cvtsi64_si32(_mm_srli_si64(finalColors01, 32*0));
	dest[1] = _mm_cvtsi64_si32(_mm_srli_si64(finalColors01, 32*1));
	dest[2] = _mm_cvtsi64_si32(_mm_srli_si64(finalColors23, 32*0));
	dest[3] = _mm_cvtsi64_si32(_mm_srli_si64(finalColors23, 32*1));
#else
	// NOTE(jsd): No guarantee of 64-bit alignment; cannot use.
	*((__m64 *)&dest[0]) = finalColors01;
	*((__m64 *)&dest[2]) = finalColors23;
#endif

	// Required to reset FP:
	_mm_empty();
}
コード例 #6
0
__m64 test_mm_setr_pi32(int a, int b) {
  // CHECK-LABEL: test_mm_setr_pi32
  // CHECK: insertelement <2 x i32>
  // CHECK: insertelement <2 x i32>
  return _mm_setr_pi32(a, b);
}