示例#1
0
__m64 test_mm_set1_pi8(char a) {
  // CHECK-LABEL: test_mm_set1_pi8
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  return _mm_set1_pi8(a);
}
示例#2
0
void pix_compare :: processYUV_MMX(imageStruct &image, imageStruct &right)
{
  long datasize =   image.xsize * image.ysize * image.csize;
  datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0);
  __m64*leftPix =  (__m64*)image.data;
  __m64*rightPix = (__m64*)right.data;

  __m64 l, r, b;
  __m64 mask = _mm_setr_pi8((unsigned char)0x00,
			    (unsigned char)0xFF,
			    (unsigned char)0x00,
			    (unsigned char)0xFF,
			    (unsigned char)0x00,
			    (unsigned char)0xFF,
			    (unsigned char)0x00,
			    (unsigned char)0xFF);
  __m64 zeros = _mm_set1_pi8((unsigned char)0x00);
  //format is U Y V Y
  if (m_direction) {
    while(datasize--){
      l=leftPix[datasize];
      r=rightPix[datasize];
      b=_mm_subs_pu8(l, r);
      b=_mm_and_si64(b, mask);
      b=_mm_cmpeq_pi32(b, zeros);
      r=_mm_and_si64(r, b);
      l=_mm_andnot_si64(b, l);

      leftPix[datasize]=_mm_or_si64(l, r);
    }
  } else {
    while(datasize--){
      l=leftPix[datasize];
      r=rightPix[datasize];
      b=_mm_subs_pu8(r, l);
      b=_mm_and_si64(b, mask);
      b=_mm_cmpeq_pi32(b, zeros);
      r=_mm_and_si64(r, b);
      l=_mm_andnot_si64(b, l);

      leftPix[datasize]=_mm_or_si64(l, r);
    }
  }
  _mm_empty();
}
示例#3
0
static void
composite_add_u8_const_src_mmx (uint8_t *dest, uint8_t *src1_1, int n)
{
  __m64 xmm0;
  uint8_t add = *src1_1;

  xmm0 = _mm_set1_pi8(add);
  for (; n >= 8; n -= 8) {
    *(__m64 *)dest = _mm_adds_pu8(xmm0, *(__m64 *)dest);
    dest += 8;
  }
  for (; n > 0; n--) {
    int val = *dest + add;
    if (val > 255)
      val = 255;
    *dest++ = val;
  }
  _mm_empty();
}
示例#4
0
void pix_offset :: processGrayMMX(imageStruct &image)
{
  unsigned char m_grey=m_offset[chRed];

  register int pixsize = (image.ysize * image.xsize)>>3;

  register __m64 offset_64 = _mm_set1_pi8(m_grey);
  register __m64*data_p= reinterpret_cast<__m64*>(image.data);
  _mm_empty();

  if(m_saturate) {
    while(pixsize--) {
      data_p[0]=_mm_adds_pu8(data_p[0], offset_64);
      data_p++;
    }
  } else {
    while(pixsize--) {
      data_p[0]=_mm_add_pi8(data_p[0], offset_64);
      data_p++;
    }
  }
  _mm_empty();
}
示例#5
0
inline __m64 foo2 (char x) {
    return _mm_set1_pi8 (x);
}
示例#6
0
void
mlib_m_ImageMaximum_U8_3(
    mlib_s32 *res32,
    const mlib_image *img)
{
/* src address */
	__m64 *sp, *sl;

/* src data */
	__m64 sd;

/* max values */
	__m64 max0, max1, max2, max3;

/* edge mask */
	mlib_s32 emask;

/* loop variables */
	mlib_s32 n1;

/* height of image */
	mlib_s32 height = mlib_ImageGetHeight(img);

/* elements to next row */
	mlib_s32 slb = mlib_ImageGetStride(img);
	mlib_s32 width = mlib_ImageGetWidth(img) * 3;

	mlib_u8 *dend;

	if (slb == width) {
		width *= height;
		height = 1;
	}

	sp = sl = (__m64 *) mlib_ImageGetData(img);

	max1 = _mm_set1_pi8(MLIB_U8_MIN);
	max2 = _mm_set1_pi8(MLIB_U8_MIN);
	max3 = _mm_set1_pi8(MLIB_U8_MIN);

	for (; height > 0; height--) {

		n1 = width;
		dend = (mlib_u8 *)sp + width;

		for (; n1 > 23; n1 -= 24) {
			sd = (*sp++);
			MLIB_M_IMAGE_MAXIMUM_U8(max1, max1, sd);
			sd = (*sp++);
			MLIB_M_IMAGE_MAXIMUM_U8(max2, max2, sd);
			sd = (*sp++);
			MLIB_M_IMAGE_MAXIMUM_U8(max3, max3, sd);
		}

		if (n1 > 0) {
			emask = (n1 > 7) ? 0xFF : (0xFF << (8 - n1));
			sd = (*sp++);
			MLIB_M_IMAGE_MAXIMUM_U8_M32(max1, max1, sd, emask);

			n1 = ((mlib_u8 *)dend - (mlib_u8 *)sp);
			if (n1 > 0) {
				emask = (n1 > 7) ? 0xFF : (0xFF << (8 - n1));
				sd = (*sp++);
				MLIB_M_IMAGE_MAXIMUM_U8_M32(max2, max2, sd,
				    emask);

				n1 = ((mlib_u8 *)dend - (mlib_u8 *)sp);
				if (n1 > 0) {
					emask = (0xFF << (8 - n1));
					sd = *sp;
					MLIB_M_IMAGE_MAXIMUM_U8_M32(max3, max3,
					    sd, emask);
				}
			}
		}

		sp = sl = (__m64 *) ((mlib_u8 *)sl + slb);
	}

	MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max1, _mm_srli_si64(max2, 8),
	    mmx_write_64(0x00ffffffffffffffll));
	MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_slli_si64(max2, 16),
	    mmx_write_64(0x0000000000ff0000ll));
	MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_srli_si64(max3, 16),
	    mmx_write_64(0x0000ffffffffffffll));
	MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_slli_si64(max3, 8),
	    mmx_write_64(0x0000000000ffff00ll));
	MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_srli_si64(max0, 24),
	    mmx_write_64(0x000000ffff000000ll));
	MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_srli_si64(max0, 24),
	    mmx_write_64(0x0000000000ffffffll));

	res32[0] = _mm_cvtsi64_si32(_mm_and_si64(max0,
				mmx_write_64(0x00000000000000ffll)));
	res32[1] =
	    _mm_cvtsi64_si32(_mm_and_si64(_mm_srli_si64(max0, 8),
	    mmx_write_64(0x00000000000000ffll)));
	res32[2] =
	    _mm_cvtsi64_si32(_mm_and_si64(_mm_srli_si64(max0, 16),
	    mmx_write_64(0x00000000000000ffll)));

	_mm_empty();
}
示例#7
0
void
mlib_m_ImageMaximum_U8_124(
    mlib_s32 *res32,
    const mlib_image *img)
{
/* src address */
	__m64 *sp, *sl;

/* src data */
	__m64 sd;

/* min values */
	__m64 max;

	__m64 _4s16_1, _4s16_2;
	__m64 _2s32_1, _2s32_2;

/* edge mask */
	mlib_s32 emask;

/* loop variables */
	mlib_s32 n1;

/* height of image */
	mlib_s32 height = mlib_ImageGetHeight(img);

/* elements to next row */
	mlib_s32 slb = mlib_ImageGetStride(img);

/* number of image channels */
	mlib_s32 channels = mlib_ImageGetChannels(img);
	mlib_s32 width = mlib_ImageGetWidth(img) * channels;

	mlib_s32 s1, s2;

	if (slb == width) {
		width *= height;
		height = 1;
	}

	sp = sl = (__m64 *) mlib_ImageGetData(img);

/* min values */
	max = _mm_set1_pi8(MLIB_U8_MIN);

	for (; height > 0; height--) {

		n1 = width;

		for (; n1 > 7; n1 -= 8) {
			sd = (*sp++);
			MLIB_M_IMAGE_MAXIMUM_U8(max, max, sd);
		}

		if (n1 > 0) {
			emask = (0xFF << (8 - n1));
			sd = *sp;
			MLIB_M_IMAGE_MAXIMUM_U8_M32(max, max, sd, emask);
		}

		sp = sl = (__m64 *) ((mlib_u8 *)sl + slb);
	}

	switch (channels) {
	case 1:
	    {
		    MLIB_M_CONVERT_8U8_4S16(_4s16_1, _4s16_2, max);
		    MLIB_M_IMAGE_MAXIMUM_S16(_4s16_1, _4s16_1, _4s16_2);
		    MLIB_M_CONVERT_4S16_2S32(_2s32_1, _2s32_2, _4s16_1);
		    MLIB_M_IMAGE_MAXIMUM_S32(_2s32_1, _2s32_1, _2s32_2);
		    MLIB_M_CONVERT_2S32_S32(s1, s2, _2s32_1);
		    MLIB_M_IMAGE_MAXIMUM(res32[0], s1, s2);
		    break;
	    }

	case 2:
	    {
		    MLIB_M_CONVERT_8U8_4S16(_4s16_1, _4s16_2, max);
		    MLIB_M_IMAGE_MAXIMUM_S16(_4s16_1, _4s16_1, _4s16_2);
		    MLIB_M_CONVERT_4S16_2S32(_2s32_1, _2s32_2, _4s16_1);
		    MLIB_M_IMAGE_MAXIMUM_S32(_2s32_1, _2s32_1, _2s32_2);
		    ((__m64 *) res32)[0] = _2s32_1;
		    break;
	    }

	case 4:
	    {
		    MLIB_M_CONVERT_8U8_4S16(_4s16_1, _4s16_2, max);
		    MLIB_M_IMAGE_MAXIMUM_S16(_4s16_1, _4s16_1, _4s16_2);
		    MLIB_M_CONVERT_4S16_2S32(_2s32_1, _2s32_2, _4s16_1);
		    ((__m64 *) res32)[0] = _2s32_2;
		    ((__m64 *) res32)[1] = _2s32_1;
		    break;
	    }
	}

	_mm_empty();
}