Exemplo n.º 1
0
void pix_subtract :: processYUV_MMX (imageStruct &image, imageStruct &right){
  int datasize =   image.xsize * image.ysize * image.csize;
  __m64*leftPix =  (__m64*)image.data;
  __m64*rightPix = (__m64*)right.data;

  datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0);
  __m64 null64 = _mm_setzero_si64();
  __m64 offset = _mm_setr_pi16(0x80, 0x00, 0x80, 0x00);
  __m64 l0, l1, r0, r1;
  while (datasize--) {
    l1=leftPix[datasize];
    r1=rightPix[datasize];

    l0=_mm_unpacklo_pi8 (l1, null64);
    r0=_mm_unpacklo_pi8 (r1, null64);
    l1=_mm_unpackhi_pi8 (l1, null64);
    r1=_mm_unpackhi_pi8 (r1, null64);

    l0=_mm_adds_pu16(l0, offset);
    l1=_mm_adds_pu16(l1, offset);

    l0=_mm_subs_pu16(l0, r0);
    l1=_mm_subs_pu16(l1, r1);

    leftPix[datasize]=_mm_packs_pu16(l0, l1);
  }
  _mm_empty();
}
Exemplo n.º 2
0
__m64 test_mm_setr_pi16(short a, short b, short c, short d) {
  // CHECK-LABEL: test_mm_setr_pi16
  // CHECK: insertelement <4 x i16>
  // CHECK: insertelement <4 x i16>
  // CHECK: insertelement <4 x i16>
  // CHECK: insertelement <4 x i16>
  return _mm_setr_pi16(a, b, c, d);
}
Exemplo n.º 3
0
     15-Dec-09               initial coding                           gpk
     16-Jan-11  compute rgba1 directly, not using delta_y; fix        gpk
                reference URL
     20-Feb-11  sum u_vec & v_vec before shifting to match order      gpk
                of operations in scalar.c code
		
 ************************************************************************* */

void  yuv422rgb_mmx1(const unsigned char *  __restrict__ sourcep,
		     int source_byte_count,
		     unsigned char * __restrict__ destp)
{
  const unsigned char *source_endp;
  const unsigned char *vector_endp;
  int remainder;
  const __m64 u_coeff = _mm_setr_pi16(0, -22, 113, 0);
  const __m64 v_coeff = _mm_setr_pi16(90, -46, 0,  0);
  __m64 y0_vec, y1_vec, u_vec, v_vec, uv_vec, rgba0, rgba1;
  short  y0, u, y1, v;
  const unsigned char alpha = 255;
  
  
  /* we're working with things in 4-byte macropixels  */
  remainder = source_byte_count % 4;

  source_endp = sourcep + source_byte_count;
  vector_endp = source_endp - remainder;

  while (sourcep < vector_endp)
    {
     /* pull YUYV from the four byte macropixel starting at sourcep. */