__m64 test_mm_set1_pi8(char a) { // CHECK-LABEL: test_mm_set1_pi8 // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> // CHECK: insertelement <8 x i8> return _mm_set1_pi8(a); }
void pix_compare :: processYUV_MMX(imageStruct &image, imageStruct &right) { long datasize = image.xsize * image.ysize * image.csize; datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0); __m64*leftPix = (__m64*)image.data; __m64*rightPix = (__m64*)right.data; __m64 l, r, b; __m64 mask = _mm_setr_pi8((unsigned char)0x00, (unsigned char)0xFF, (unsigned char)0x00, (unsigned char)0xFF, (unsigned char)0x00, (unsigned char)0xFF, (unsigned char)0x00, (unsigned char)0xFF); __m64 zeros = _mm_set1_pi8((unsigned char)0x00); //format is U Y V Y if (m_direction) { while(datasize--){ l=leftPix[datasize]; r=rightPix[datasize]; b=_mm_subs_pu8(l, r); b=_mm_and_si64(b, mask); b=_mm_cmpeq_pi32(b, zeros); r=_mm_and_si64(r, b); l=_mm_andnot_si64(b, l); leftPix[datasize]=_mm_or_si64(l, r); } } else { while(datasize--){ l=leftPix[datasize]; r=rightPix[datasize]; b=_mm_subs_pu8(r, l); b=_mm_and_si64(b, mask); b=_mm_cmpeq_pi32(b, zeros); r=_mm_and_si64(r, b); l=_mm_andnot_si64(b, l); leftPix[datasize]=_mm_or_si64(l, r); } } _mm_empty(); }
static void composite_add_u8_const_src_mmx (uint8_t *dest, uint8_t *src1_1, int n) { __m64 xmm0; uint8_t add = *src1_1; xmm0 = _mm_set1_pi8(add); for (; n >= 8; n -= 8) { *(__m64 *)dest = _mm_adds_pu8(xmm0, *(__m64 *)dest); dest += 8; } for (; n > 0; n--) { int val = *dest + add; if (val > 255) val = 255; *dest++ = val; } _mm_empty(); }
void pix_offset :: processGrayMMX(imageStruct &image) { unsigned char m_grey=m_offset[chRed]; register int pixsize = (image.ysize * image.xsize)>>3; register __m64 offset_64 = _mm_set1_pi8(m_grey); register __m64*data_p= reinterpret_cast<__m64*>(image.data); _mm_empty(); if(m_saturate) { while(pixsize--) { data_p[0]=_mm_adds_pu8(data_p[0], offset_64); data_p++; } } else { while(pixsize--) { data_p[0]=_mm_add_pi8(data_p[0], offset_64); data_p++; } } _mm_empty(); }
inline __m64 foo2 (char x) { return _mm_set1_pi8 (x); }
void mlib_m_ImageMaximum_U8_3( mlib_s32 *res32, const mlib_image *img) { /* src address */ __m64 *sp, *sl; /* src data */ __m64 sd; /* max values */ __m64 max0, max1, max2, max3; /* edge mask */ mlib_s32 emask; /* loop variables */ mlib_s32 n1; /* height of image */ mlib_s32 height = mlib_ImageGetHeight(img); /* elements to next row */ mlib_s32 slb = mlib_ImageGetStride(img); mlib_s32 width = mlib_ImageGetWidth(img) * 3; mlib_u8 *dend; if (slb == width) { width *= height; height = 1; } sp = sl = (__m64 *) mlib_ImageGetData(img); max1 = _mm_set1_pi8(MLIB_U8_MIN); max2 = _mm_set1_pi8(MLIB_U8_MIN); max3 = _mm_set1_pi8(MLIB_U8_MIN); for (; height > 0; height--) { n1 = width; dend = (mlib_u8 *)sp + width; for (; n1 > 23; n1 -= 24) { sd = (*sp++); MLIB_M_IMAGE_MAXIMUM_U8(max1, max1, sd); sd = (*sp++); MLIB_M_IMAGE_MAXIMUM_U8(max2, max2, sd); sd = (*sp++); MLIB_M_IMAGE_MAXIMUM_U8(max3, max3, sd); } if (n1 > 0) { emask = (n1 > 7) ? 0xFF : (0xFF << (8 - n1)); sd = (*sp++); MLIB_M_IMAGE_MAXIMUM_U8_M32(max1, max1, sd, emask); n1 = ((mlib_u8 *)dend - (mlib_u8 *)sp); if (n1 > 0) { emask = (n1 > 7) ? 0xFF : (0xFF << (8 - n1)); sd = (*sp++); MLIB_M_IMAGE_MAXIMUM_U8_M32(max2, max2, sd, emask); n1 = ((mlib_u8 *)dend - (mlib_u8 *)sp); if (n1 > 0) { emask = (0xFF << (8 - n1)); sd = *sp; MLIB_M_IMAGE_MAXIMUM_U8_M32(max3, max3, sd, emask); } } } sp = sl = (__m64 *) ((mlib_u8 *)sl + slb); } MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max1, _mm_srli_si64(max2, 8), mmx_write_64(0x00ffffffffffffffll)); MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_slli_si64(max2, 16), mmx_write_64(0x0000000000ff0000ll)); MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_srli_si64(max3, 16), mmx_write_64(0x0000ffffffffffffll)); MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_slli_si64(max3, 8), mmx_write_64(0x0000000000ffff00ll)); MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_srli_si64(max0, 24), mmx_write_64(0x000000ffff000000ll)); MLIB_M_IMAGE_MAXIMUM_U8_M64(max0, max0, _mm_srli_si64(max0, 24), mmx_write_64(0x0000000000ffffffll)); res32[0] = _mm_cvtsi64_si32(_mm_and_si64(max0, mmx_write_64(0x00000000000000ffll))); res32[1] = _mm_cvtsi64_si32(_mm_and_si64(_mm_srli_si64(max0, 8), mmx_write_64(0x00000000000000ffll))); res32[2] = _mm_cvtsi64_si32(_mm_and_si64(_mm_srli_si64(max0, 16), mmx_write_64(0x00000000000000ffll))); _mm_empty(); }
void mlib_m_ImageMaximum_U8_124( mlib_s32 *res32, const mlib_image *img) { /* src address */ __m64 *sp, *sl; /* src data */ __m64 sd; /* min values */ __m64 max; __m64 _4s16_1, _4s16_2; __m64 _2s32_1, _2s32_2; /* edge mask */ mlib_s32 emask; /* loop variables */ mlib_s32 n1; /* height of image */ mlib_s32 height = mlib_ImageGetHeight(img); /* elements to next row */ mlib_s32 slb = mlib_ImageGetStride(img); /* number of image channels */ mlib_s32 channels = mlib_ImageGetChannels(img); mlib_s32 width = mlib_ImageGetWidth(img) * channels; mlib_s32 s1, s2; if (slb == width) { width *= height; height = 1; } sp = sl = (__m64 *) mlib_ImageGetData(img); /* min values */ max = _mm_set1_pi8(MLIB_U8_MIN); for (; height > 0; height--) { n1 = width; for (; n1 > 7; n1 -= 8) { sd = (*sp++); MLIB_M_IMAGE_MAXIMUM_U8(max, max, sd); } if (n1 > 0) { emask = (0xFF << (8 - n1)); sd = *sp; MLIB_M_IMAGE_MAXIMUM_U8_M32(max, max, sd, emask); } sp = sl = (__m64 *) ((mlib_u8 *)sl + slb); } switch (channels) { case 1: { MLIB_M_CONVERT_8U8_4S16(_4s16_1, _4s16_2, max); MLIB_M_IMAGE_MAXIMUM_S16(_4s16_1, _4s16_1, _4s16_2); MLIB_M_CONVERT_4S16_2S32(_2s32_1, _2s32_2, _4s16_1); MLIB_M_IMAGE_MAXIMUM_S32(_2s32_1, _2s32_1, _2s32_2); MLIB_M_CONVERT_2S32_S32(s1, s2, _2s32_1); MLIB_M_IMAGE_MAXIMUM(res32[0], s1, s2); break; } case 2: { MLIB_M_CONVERT_8U8_4S16(_4s16_1, _4s16_2, max); MLIB_M_IMAGE_MAXIMUM_S16(_4s16_1, _4s16_1, _4s16_2); MLIB_M_CONVERT_4S16_2S32(_2s32_1, _2s32_2, _4s16_1); MLIB_M_IMAGE_MAXIMUM_S32(_2s32_1, _2s32_1, _2s32_2); ((__m64 *) res32)[0] = _2s32_1; break; } case 4: { MLIB_M_CONVERT_8U8_4S16(_4s16_1, _4s16_2, max); MLIB_M_IMAGE_MAXIMUM_S16(_4s16_1, _4s16_1, _4s16_2); MLIB_M_CONVERT_4S16_2S32(_2s32_1, _2s32_2, _4s16_1); ((__m64 *) res32)[0] = _2s32_2; ((__m64 *) res32)[1] = _2s32_1; break; } } _mm_empty(); }