Exemplo n.º 1
0
void
yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb,
			 unsigned int w, unsigned int h)
{
  unsigned int xx, yy;
  register const unsigned char *yp1, *up, *vp;
  unsigned char *dp1;

  /* plane pointers */
  yp1 = yuv;
  up = yuv + (w * h);
  vp = up + (w * (h / 4));
  /* destination pointers */
  dp1 = rgb;



  yp1 = yuv;
  up = yuv + (w * h);
  vp = up + ((w / 2) * (h / 2));
  dp1 = rgb;
  for (yy = 0; yy < h; yy++)
    {
      for (xx = 0; xx < w; xx += 8)
	{
	  movq_m2r(*yp1, mm0);
	  movq_r2r(mm0, mm1);
	  psrlw_i2r(8, mm0);
	  psllw_i2r(8, mm1);
	  psrlw_i2r(8, mm1);

	  pxor_r2r(mm7, mm7);
	  movd_m2r(*up, mm3);
	  movd_m2r(*vp, mm2);

	  punpcklbw_r2r(mm7, mm2);
	  punpcklbw_r2r(mm7, mm3);

	  movq_m2r(CONST_16, mm4);
	  psubsw_r2r(mm4, mm0);
	  psubsw_r2r(mm4, mm1);

	  movq_m2r(CONST_128, mm5);
	  psubsw_r2r(mm5, mm2);
	  psubsw_r2r(mm5, mm3);

	  movq_m2r(CONST_YMUL, mm4);
	  pmullw_r2r(mm4, mm0);
	  pmullw_r2r(mm4, mm1);

	  movq_m2r(CONST_CRVCRV, mm7);
	  pmullw_r2r(mm3, mm7);

	  movq_m2r(CONST_CBUCBU, mm6);
	  pmullw_r2r(mm2, mm6);

	  movq_m2r(CONST_CGUCGU, mm5);
	  pmullw_r2r(mm2, mm5);

	  movq_m2r(CONST_CGVCGV, mm4);
	  pmullw_r2r(mm3, mm4);

	  movq_r2r(mm0, mm2);
	  paddsw_r2r(mm7, mm2);
	  paddsw_r2r(mm1, mm7);

	  psraw_i2r(RES, mm2);
	  psraw_i2r(RES, mm7);
	  packuswb_r2r(mm7, mm2);

	  pxor_r2r(mm7, mm7);
	  movq_r2r(mm2, mm3);
	  punpckhbw_r2r(mm7, mm2);
	  punpcklbw_r2r(mm3, mm7);
	  por_r2r(mm7, mm2);

	  movq_r2r(mm0, mm3);
	  psubsw_r2r(mm5, mm3);
	  psubsw_r2r(mm4, mm3);
	  paddsw_m2r(CONST_32, mm3);

	  movq_r2r(mm1, mm7);
	  psubsw_r2r(mm5, mm7);
	  psubsw_r2r(mm4, mm7);
	  paddsw_m2r(CONST_32, mm7);

	  psraw_i2r(RES, mm3);
	  psraw_i2r(RES, mm7);
	  packuswb_r2r(mm7, mm3);

	  pxor_r2r(mm7, mm7);
	  movq_r2r(mm3, mm4);
	  punpckhbw_r2r(mm7, mm3);
	  punpcklbw_r2r(mm4, mm7);
	  por_r2r(mm7, mm3);

	  movq_m2r(CONST_32, mm4);
	  paddsw_r2r(mm6, mm0);
	  paddsw_r2r(mm6, mm1);
	  paddsw_r2r(mm4, mm0);
	  paddsw_r2r(mm4, mm1);
	  psraw_i2r(RES, mm0);
	  psraw_i2r(RES, mm1);
	  packuswb_r2r(mm1, mm0);

	  pxor_r2r(mm7, mm7);
	  movq_r2r(mm0, mm5);
	  punpckhbw_r2r(mm7, mm0);
	  punpcklbw_r2r(mm5, mm7);
	  por_r2r(mm7, mm0);

	  pxor_r2r(mm1, mm1);
	  movq_r2r(mm0, mm5);
	  movq_r2r(mm3, mm6);
	  movq_r2r(mm2, mm7);
	  punpckhbw_r2r(mm3, mm2);
	  punpcklbw_r2r(mm6, mm7);
	  punpckhbw_r2r(mm1, mm0);
	  punpcklbw_r2r(mm1, mm5);

	  movq_r2r(mm7, mm1);
	  punpckhwd_r2r(mm5, mm7);
	  punpcklwd_r2r(mm5, mm1);

	  movq_r2r(mm2, mm4);
	  punpckhwd_r2r(mm0, mm2);
	  punpcklwd_r2r(mm0, mm4);

	  movntq_r2m(mm1, *(dp1));
	  movntq_r2m(mm7, *(dp1 + 8));
	  movntq_r2m(mm4, *(dp1 + 16));
	  movntq_r2m(mm2, *(dp1 + 24));

	  yp1 += 8;
	  up += 4;
	  vp += 4;
	  dp1 += 8 * 4;
	}
      if (yy & 0x1)
	{
	  up -= w / 2;
	  vp -= w / 2;
	}
    }
  emms();
}
Exemplo n.º 2
0
static void
_evas_yv12torgb_sse(unsigned char **yuv, unsigned char *rgb, int w, int h)
{
#ifdef BUILD_MMX
   int xx, yy;
   register unsigned char *yp1, *up, *vp;
   unsigned char *dp1;

   /* destination pointers */
   dp1 = rgb;

   for (yy = 0; yy < h; yy++)
     {
	/* plane pointers */
	yp1 = yuv[yy];
	up = yuv[h + (yy / 2)];
	vp = yuv[h + (h / 2) + (yy / 2)];
	for (xx = 0; xx < (w - 7); xx += 8)
	  {
	     movd_m2r(*up, mm3);
	     movd_m2r(*vp, mm2);
	     movq_m2r(*yp1, mm0);

	     pxor_r2r(mm7, mm7);
	     punpcklbw_r2r(mm7, mm2);
	     punpcklbw_r2r(mm7, mm3);

	     movq_r2r(mm0, mm1);
	     psrlw_i2r(8, mm0);
	     psllw_i2r(8, mm1);
	     psrlw_i2r(8, mm1);

	     movq_m2r(CONST_16, mm4);
	     psubsw_r2r(mm4, mm0);
	     psubsw_r2r(mm4, mm1);

	     movq_m2r(CONST_128, mm5);
	     psubsw_r2r(mm5, mm2);
	     psubsw_r2r(mm5, mm3);

	     movq_m2r(CONST_YMUL, mm4);
	     pmullw_r2r(mm4, mm0);
	     pmullw_r2r(mm4, mm1);

	     movq_m2r(CONST_CRVCRV, mm7);
	     pmullw_r2r(mm3, mm7);
	     movq_m2r(CONST_CBUCBU, mm6);
	     pmullw_r2r(mm2, mm6);
	     movq_m2r(CONST_CGUCGU, mm5);
	     pmullw_r2r(mm2, mm5);
	     movq_m2r(CONST_CGVCGV, mm4);
	     pmullw_r2r(mm3, mm4);

	     movq_r2r(mm0, mm2);
	     paddsw_r2r(mm7, mm2);
	     paddsw_r2r(mm1, mm7);

	     psraw_i2r(RES, mm2);
	     psraw_i2r(RES, mm7);
	     packuswb_r2r(mm7, mm2);

	     pxor_r2r(mm7, mm7);
	     movq_r2r(mm2, mm3);
	     punpckhbw_r2r(mm7, mm2);
	     punpcklbw_r2r(mm3, mm7);
	     por_r2r(mm7, mm2);

	     movq_r2r(mm0, mm3);
	     psubsw_r2r(mm5, mm3);
	     psubsw_r2r(mm4, mm3);
	     paddsw_m2r(CONST_32, mm3);

	     movq_r2r(mm1, mm7);
	     psubsw_r2r(mm5, mm7);
	     psubsw_r2r(mm4, mm7);
	     paddsw_m2r(CONST_32, mm7);

	     psraw_i2r(RES, mm3);
	     psraw_i2r(RES, mm7);
	     packuswb_r2r(mm7, mm3);

	     pxor_r2r(mm7, mm7);
	     movq_r2r(mm3, mm4);
	     punpckhbw_r2r(mm7, mm3);
	     punpcklbw_r2r(mm4, mm7);
	     por_r2r(mm7, mm3);

	     movq_m2r(CONST_32, mm4);
	     paddsw_r2r(mm6, mm0);
	     paddsw_r2r(mm6, mm1);
	     paddsw_r2r(mm4, mm0);
	     paddsw_r2r(mm4, mm1);
	     psraw_i2r(RES, mm0);
	     psraw_i2r(RES, mm1);
	     packuswb_r2r(mm1, mm0);

	     pxor_r2r(mm7, mm7);
	     movq_r2r(mm0, mm5);
	     punpckhbw_r2r(mm7, mm0);
	     punpcklbw_r2r(mm5, mm7);
	     por_r2r(mm7, mm0);

	     movq_m2r(CONST_FF, mm1);
	     movq_r2r(mm0, mm5);
	     movq_r2r(mm3, mm6);
	     movq_r2r(mm2, mm7);
	     punpckhbw_r2r(mm3, mm2);
	     punpcklbw_r2r(mm6, mm7);
	     punpckhbw_r2r(mm1, mm0);
	     punpcklbw_r2r(mm1, mm5);

	     movq_r2r(mm7, mm1);
	     punpckhwd_r2r(mm5, mm7);
	     punpcklwd_r2r(mm5, mm1);

	     movq_r2r(mm2, mm4);
	     punpckhwd_r2r(mm0, mm2);
	     punpcklwd_r2r(mm0, mm4);

	     movntq_r2m(mm1, *(dp1));
	     movntq_r2m(mm7, *(dp1 + 8));
	     movntq_r2m(mm4, *(dp1 + 16));
	     movntq_r2m(mm2, *(dp1 + 24));

	     yp1 += 8;
	     up += 4;
	     vp += 4;
	     dp1 += 8 * 4;
	  }
	/* cleanup pixles that arent a multiple of 8 pixels wide */
	if (xx < w)
	  {
	     int y, u, v, r, g, b;

	     for (; xx < w; xx += 2)
	       {
		  u = (*up++) - 128;
		  v = (*vp++) - 128;

		  y = RZ(YMUL) * ((*yp1++) - 16);
		  r = LUT_CLIP((y + (_crv * v)) >> RES);
		  g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
		  b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
		  *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);

		  dp1 += 4;

		  y = RZ(YMUL) * ((*yp1++) - 16);
		  r = LUT_CLIP((y + (_crv * v)) >> RES);
		  g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
		  b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
		  *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);

		  dp1 += 4;
	       }
	  }
     }
Exemplo n.º 3
0
void deinterlace_bob_yuv_mmx(uint8_t *pdst, uint8_t *psrc,
			     int width, int height )
{

  int Line;
  long long* YVal1;
  long long* YVal2;
  long long* YVal3;
  long long* Dest;
  uint8_t* pEvenLines = psrc;
  uint8_t* pOddLines = psrc+width;
  int LineLength = width;
  int Pitch = width * 2;
  int IsOdd = 1;
  long EdgeDetect = 625;
  long JaggieThreshold = 73;

  int n;

  unsigned long long qwEdgeDetect;
  unsigned long long qwThreshold;
  const unsigned long long Mask = 0xfefefefefefefefeULL;
  const unsigned long long YMask = 0x00ff00ff00ff00ffULL;

  qwEdgeDetect = EdgeDetect;
  qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16);
  qwThreshold = JaggieThreshold;
  qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16);


  // copy first even line no matter what, and the first odd line if we're
  // processing an odd field.
  ac_memcpy(pdst, pEvenLines, LineLength);
  if (IsOdd)
    ac_memcpy(pdst + LineLength, pOddLines, LineLength);

  height = height / 2;
  for (Line = 0; Line < height - 1; ++Line)
  {
    if (IsOdd)
    {
      YVal1 = (long long *)(pOddLines + Line * Pitch);
      YVal2 = (long long *)(pEvenLines + (Line + 1) * Pitch);
      YVal3 = (long long *)(pOddLines + (Line + 1) * Pitch);
      Dest = (long long *)(pdst + (Line * 2 + 2) * LineLength);
    }
    else
    {
      YVal1 = (long long *)(pEvenLines + Line * Pitch);
      YVal2 = (long long *)(pOddLines + Line * Pitch);
      YVal3 = (long long *)(pEvenLines + (Line + 1) * Pitch);
      Dest = (long long *)(pdst + (Line * 2 + 1) * LineLength);
    }

    // For ease of reading, the comments below assume that we're operating on an odd
    // field (i.e., that bIsOdd is true).  The exact same processing is done when we
    // operate on an even field, but the roles of the odd and even fields are reversed.
    // It's just too cumbersome to explain the algorithm in terms of "the next odd
    // line if we're doing an odd field, or the next even line if we're doing an
    // even field" etc.  So wherever you see "odd" or "even" below, keep in mind that
    // half the time this function is called, those words' meanings will invert.

    // Copy the odd line to the overlay verbatim.
    ac_memcpy((char *)Dest + LineLength, YVal3, LineLength);

    n = LineLength >> 3;
    while( n-- )
    {
      movq_m2r (*YVal1++, mm0);
      movq_m2r (*YVal2++, mm1);
      movq_m2r (*YVal3++, mm2);

      // get intensities in mm3 - 4
      movq_r2r ( mm0, mm3 );
      movq_r2r ( mm1, mm4 );
      movq_r2r ( mm2, mm5 );

      pand_m2r ( *&YMask, mm3 );
      pand_m2r ( *&YMask, mm4 );
      pand_m2r ( *&YMask, mm5 );

      // get average in mm0
      pand_m2r ( *&Mask, mm0 );
      pand_m2r ( *&Mask, mm2 );
      psrlw_i2r ( 01, mm0 );
      psrlw_i2r ( 01, mm2 );
      paddw_r2r ( mm2, mm0 );

      // work out (O1 - E) * (O2 - E) / 2 - EdgeDetect * (O1 - O2) ^ 2 >> 12
      // result will be in mm6

      psrlw_i2r ( 01, mm3 );
      psrlw_i2r ( 01, mm4 );
      psrlw_i2r ( 01, mm5 );

      movq_r2r ( mm3, mm6 );
      psubw_r2r ( mm4, mm6 );	//mm6 = O1 - E

      movq_r2r ( mm5, mm7 );
      psubw_r2r ( mm4, mm7 );	//mm7 = O2 - E

      pmullw_r2r ( mm7, mm6 );		// mm6 = (O1 - E) * (O2 - E)

      movq_r2r ( mm3, mm7 );
      psubw_r2r ( mm5, mm7 );		// mm7 = (O1 - O2)
      pmullw_r2r ( mm7, mm7 );	// mm7 = (O1 - O2) ^ 2
      psrlw_i2r ( 12, mm7 );		// mm7 = (O1 - O2) ^ 2 >> 12
      pmullw_m2r ( *&qwEdgeDetect, mm7 );// mm7  = EdgeDetect * (O1 - O2) ^ 2 >> 12

      psubw_r2r ( mm7, mm6 );      // mm6 is what we want

      pcmpgtw_m2r ( *&qwThreshold, mm6 );

      movq_r2r ( mm6, mm7 );

      pand_r2r ( mm6, mm0 );

      pandn_r2r ( mm1, mm7 );

      por_r2r ( mm0, mm7 );

      movq_r2m ( mm7, *Dest++ );
    }
  }

  // Copy last odd line if we're processing an even field.
  if (! IsOdd)
  {
    ac_memcpy(pdst + (height * 2 - 1) * LineLength,
                      pOddLines + (height - 1) * Pitch,
                      LineLength);
  }

  // clear out the MMX registers ready for doing floating point
  // again
  emms();
}