Ejemplo n.º 1
0
void pix_offset :: processRGBAMMX(imageStruct &image)
{
  char  R = m_offset[chRed];
  char  G = m_offset[chGreen];
  char  B = m_offset[chBlue];
  char  A = m_offset[chAlpha];

  register int pixsize = (image.ysize * image.xsize)>>1;

  register __m64 offset_64 = _mm_setr_pi8(R, G, B, A, R, G, B, A);
  register __m64*data_p= (__m64*)image.data;
  _mm_empty();

  if(m_saturate) {
    while(pixsize--) {
      data_p[0]=_mm_adds_pu8(data_p[0], offset_64);
      data_p++;
    }
  } else {
    while(pixsize--) {
      data_p[0]=_mm_add_pi8(data_p[0], offset_64);
      data_p++;
    }
  }
  _mm_empty();
}
Ejemplo n.º 2
0
void pix_diff :: processYUV_MMX (imageStruct &image, imageStruct &right){
  int datasize =   image.xsize * image.ysize * image.csize;
  __m64*leftPix =  (__m64*)image.data;
  __m64*rightPix = (__m64*)right.data;

  datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0);
  __m64 mask = _mm_setr_pi8(0x40, 0x00, 0x40, 0x00,
			    0x40, 0x00, 0x40, 0x00);
  __m64 l, r, b;
  while (datasize--) {
    l=leftPix[datasize];
    r=rightPix[datasize];

    l=_mm_adds_pu8(l, mask);
    r=_mm_subs_pu8(r, mask);

    b  = l;
    b  = _mm_subs_pu8     (b, r);
    r  = _mm_subs_pu8     (r, l);
    b  = _mm_or_si64      (b, r);

    leftPix[datasize]=b;
  }
  _mm_empty();
}
Ejemplo n.º 3
0
__m64 test_mm_setr_pi8(char a, char b, char c, char d, char e, char f, char g, char h) {
  // CHECK-LABEL: test_mm_setr_pi8
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  // CHECK: insertelement <8 x i8>
  return _mm_setr_pi8(a, b, c, d, e, f, g, h);
}
Ejemplo n.º 4
0
void pix_offset :: processYUVMMX(imageStruct &image)
{
  register int pixsize = (image.ysize * image.xsize)>>2;

  register __m64 offset_64 = _mm_setr_pi8(U, Y, V, Y, U, Y, V, Y);
  register __m64*data_p= (__m64*)image.data;
  _mm_empty();

  while(pixsize--) {
    data_p[0]=_mm_add_pi8(data_p[0], offset_64);
    data_p++;
  }
  _mm_empty();
}
Ejemplo n.º 5
0
void pix_compare :: processYUV_MMX(imageStruct &image, imageStruct &right)
{
  long datasize =   image.xsize * image.ysize * image.csize;
  datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0);
  __m64*leftPix =  (__m64*)image.data;
  __m64*rightPix = (__m64*)right.data;

  __m64 l, r, b;
  __m64 mask = _mm_setr_pi8((unsigned char)0x00,
			    (unsigned char)0xFF,
			    (unsigned char)0x00,
			    (unsigned char)0xFF,
			    (unsigned char)0x00,
			    (unsigned char)0xFF,
			    (unsigned char)0x00,
			    (unsigned char)0xFF);
  __m64 zeros = _mm_set1_pi8((unsigned char)0x00);
  //format is U Y V Y
  if (m_direction) {
    while(datasize--){
      l=leftPix[datasize];
      r=rightPix[datasize];
      b=_mm_subs_pu8(l, r);
      b=_mm_and_si64(b, mask);
      b=_mm_cmpeq_pi32(b, zeros);
      r=_mm_and_si64(r, b);
      l=_mm_andnot_si64(b, l);

      leftPix[datasize]=_mm_or_si64(l, r);
    }
  } else {
    while(datasize--){
      l=leftPix[datasize];
      r=rightPix[datasize];
      b=_mm_subs_pu8(r, l);
      b=_mm_and_si64(b, mask);
      b=_mm_cmpeq_pi32(b, zeros);
      r=_mm_and_si64(r, b);
      l=_mm_andnot_si64(b, l);

      leftPix[datasize]=_mm_or_si64(l, r);
    }
  }
  _mm_empty();
}
Ejemplo n.º 6
0
void pix_add :: processYUV_MMX (imageStruct &image, imageStruct &right){
  int datasize =   image.xsize * image.ysize * image.csize;
  __m64*leftPix =  reinterpret_cast<__m64*>(image.data);
  __m64*rightPix = reinterpret_cast<__m64*>(right.data);

  datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0);
  __m64 mask = _mm_setr_pi8(0x40, 0x00, 0x40, 0x00,
			    0x40, 0x00, 0x40, 0x00);
  __m64 l, r;
  while (datasize--) {
    l=leftPix[datasize];
    r=rightPix[datasize];
    l=_mm_subs_pu8(l, mask);
    r=_mm_subs_pu8(r, mask);

    l=_mm_adds_pu8(l,r);

    leftPix[datasize]=l;
  }
  _mm_empty();
}
Ejemplo n.º 7
0
void pix_multiply :: processYUV_MMX(imageStruct &image, imageStruct &right)
{
    int datasize =   image.xsize * image.ysize * image.csize;
    __m64*leftPix =  (__m64*)image.data;
    __m64*rightPix = (__m64*)right.data;

    datasize=datasize/sizeof(__m64)+(datasize%sizeof(__m64)!=0);

    __m64 l0, r0, l1, r1;
    __m64 mask= _mm_setr_pi8((unsigned char)0xFF,
                             (unsigned char)0x00,
                             (unsigned char)0xFF,
                             (unsigned char)0x00,
                             (unsigned char)0xFF,
                             (unsigned char)0x00,
                             (unsigned char)0xFF,
                             (unsigned char)0x00);
    __m64 yuvclamp0 = _mm_setr_pi8((unsigned char)0x00,
                                   (unsigned char)0x10,
                                   (unsigned char)0x00,
                                   (unsigned char)0x10,
                                   (unsigned char)0x00,
                                   (unsigned char)0x10,
                                   (unsigned char)0x00,
                                   (unsigned char)0x10);
    __m64 yuvclamp1 = _mm_setr_pi8((unsigned char)0x00,
                                   (unsigned char)0x24,
                                   (unsigned char)0x00,
                                   (unsigned char)0x24,
                                   (unsigned char)0x00,
                                   (unsigned char)0x24,
                                   (unsigned char)0x00,
                                   (unsigned char)0x24);
    __m64 yuvclamp2 = _mm_setr_pi8((unsigned char)0x00,
                                   (unsigned char)0x14,
                                   (unsigned char)0x00,
                                   (unsigned char)0x14,
                                   (unsigned char)0x00,
                                   (unsigned char)0x14,
                                   (unsigned char)0x00,
                                   (unsigned char)0x14);

    __m64 null64 = _mm_setzero_si64();
    while(datasize--)    {
        r1=rightPix[datasize];
        l1=leftPix [datasize];

        r1=_mm_or_si64(r1, mask);

        l0=_mm_unpacklo_pi8(l1, null64);
        r0=_mm_unpacklo_pi8(r1, null64);
        l1=_mm_unpackhi_pi8(l1, null64);
        r1=_mm_unpackhi_pi8(r1, null64);

        l0=_mm_mullo_pi16  (l0, r0);
        l1=_mm_mullo_pi16  (l1, r1);

        l0=_mm_srli_pi16(l0, 8);
        l1=_mm_srli_pi16(l1, 8);

        l0=_mm_packs_pu16(l0, l1);

        l0=_mm_subs_pu8(l0, yuvclamp0);
        l0=_mm_adds_pu8(l0, yuvclamp1);
        l0=_mm_subs_pu8(l0, yuvclamp2);


        leftPix[datasize]=l0;
    }
    _mm_empty();
}