Exemplo n.º 1
0
void pix_invert :: processYUVAltivec(imageStruct &image)
{
int h,w,width;
   width = image.xsize/8;

    union{
        unsigned char c[16];
        vector unsigned char v;
    }charBuffer;

    vector unsigned char offset;
    vector unsigned char *inData = (vector unsigned char*) image.data;

    charBuffer.c[0] = 255;
    offset = charBuffer.v;
    offset = (vector unsigned char) vec_splat(offset,0);
    #ifndef PPC970
    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
	vec_dst( inData, prefetchSize, 0 );
       #endif
    for ( h=0; h<image.ysize; h++){
        for (w=0; w<width; w++)
        {
        #ifndef PPC970
	vec_dst( inData, prefetchSize, 0 );
        #endif
        inData[0]=vec_subs(offset,inData[0]);
        inData++;

         }
         #ifndef PPC970
        vec_dss( 0 );
        #endif
    }  /*end of working altivec function */
}
Exemplo n.º 2
0
void pix_add :: processRGBA_Altivec(imageStruct &image, imageStruct &right)
{
 int h,w,width;
   width = image.xsize/4;


    vector unsigned char *inData = (vector unsigned char*) image.data;
    vector unsigned char *rightData = (vector unsigned char*) right.data;

        #ifndef PPC970
   	UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
	vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
        #endif
    for ( h=0; h<image.ysize; h++){
        for (w=0; w<width; w++)
        {
        #ifndef PPC970
	vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
        #endif

            inData[0] = vec_adds(inData[0], rightData[0]);

            inData++;
            rightData++;
        }
        #ifndef PPC970
        vec_dss( 0 );
        vec_dss( 1 );
        #endif
    }  /*end of working altivec function */
}
Exemplo n.º 3
0
void pix_diff :: processRGBA_Altivec(imageStruct &image, imageStruct &right)
{

    int datasize = image.xsize * image.ysize / 4;
    vector signed short  hiImage, loImage, hiRight, loRight;
    vector unsigned char zero = vec_splat_u8(0);
    vector unsigned char *inData = (vector unsigned char *)image.data;
    vector unsigned char *rightData = (vector unsigned char *)right.data;

    #ifndef PPC970
   	UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
	vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
        vec_dst( inData+256, prefetchSize, 2 );
        vec_dst( rightData+256, prefetchSize, 3 );
    #endif

    do {

        #ifndef PPC970
	vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
        vec_dst( inData+256, prefetchSize, 2 );
        vec_dst( rightData+256, prefetchSize, 3 );
        #endif

        hiImage = (vector signed short)vec_mergeh(zero,inData[0]);
        loImage = (vector signed short)vec_mergel(zero,inData[0]);
        hiRight = (vector signed short)vec_mergeh(zero,rightData[0]);
        loRight = (vector signed short)vec_mergel(zero,rightData[0]);

        hiImage = vec_subs(hiImage,hiRight);
        loImage = vec_subs(loImage,loRight);

        hiImage = vec_abs(hiImage);
        loImage = vec_abs(loImage);

        inData[0] = vec_packsu(hiImage,loImage);

        inData++;
        rightData++;
    }
    while (--datasize);
    #ifndef PPC970
        vec_dss( 0 );
        vec_dss( 1 );
        vec_dss( 2 );
        vec_dss( 3 );
    #endif
}
Exemplo n.º 4
0
void pix_background :: processYUVAltivec(imageStruct &image)
{
register int h,w,i,j,width;
int pixsize = image.xsize * image.ysize * image.csize;
    h = image.ysize;
    w = image.xsize/8;
    width = image.xsize/8;
    
    //check to see if the buffer isn't 16byte aligned (highly unlikely)
    if (image.ysize*image.xsize % 16 != 0){
        error("image not properly aligned for Altivec - try something SD or HD maybe?");
        return;
        }
    
    union{
        unsigned short		s[8];
        vector unsigned short	v;
    }shortBuffer;

    if(m_savedImage.xsize!=image.xsize ||
       m_savedImage.ysize!=image.ysize ||
       m_savedImage.format!=image.format)m_reset=1;

    m_savedImage.xsize=image.xsize;
    m_savedImage.ysize=image.ysize;
    m_savedImage.setCsizeByFormat(image.format);
    m_savedImage.reallocate();
    
    if (m_reset){
    memcpy(m_savedImage.data,image.data,pixsize);
    m_reset = 0; 
    }
    
    register vector unsigned short	UVres1, Yres1, UVres2, Yres2;//interleave;
    register vector unsigned short	hiImage, loImage;
    register vector unsigned short	Yrange, UVrange, Yblank,UVblank,blank;
    register vector bool short		Ymasklo,Ymaskhi,  UVmaskhi;
    register vector unsigned short	Yhi,Ylo,UVhi,UVlo; 
    register vector unsigned char	one = vec_splat_u8(1);
    register vector unsigned short	sone = vec_splat_u16(1);
    register vector unsigned int			Uhi, Ulo, Vhi, Vlo,Ures,Vres;
    register vector bool int 			Umasklo, Umaskhi, Vmaskhi, Vmasklo;

    vector unsigned char	*inData = (vector unsigned char*) image.data;
    vector unsigned char	*rightData = (vector unsigned char*) m_savedImage.data;
    
    shortBuffer.s[0] =  m_Yrange;
    Yrange = shortBuffer.v;
    Yrange = vec_splat(Yrange,0);
    
    shortBuffer.s[0] = 128;
    shortBuffer.s[1] = 0;
    shortBuffer.s[2] = 128;
    shortBuffer.s[3] = 0;
    shortBuffer.s[4] = 128;
    shortBuffer.s[5] = 0;
    shortBuffer.s[6] = 128;
    shortBuffer.s[7] = 0;
    blank = shortBuffer.v;
    
    shortBuffer.s[0] =  0;
    Yblank = shortBuffer.v;
    Yblank = vec_splat(Yblank,0);
    
    shortBuffer.s[0] =  128;
    UVblank = shortBuffer.v;
    UVblank = vec_splat(UVblank,0);
    
    shortBuffer.s[0] = m_Urange;
    shortBuffer.s[1] = m_Vrange;
    shortBuffer.s[2] = m_Urange;
    shortBuffer.s[3] = m_Vrange;
    shortBuffer.s[4] = m_Urange;
    shortBuffer.s[5] = m_Vrange;
    shortBuffer.s[6] = m_Urange;
    shortBuffer.s[7] = m_Vrange;
    UVrange = shortBuffer.v;
    
    
    //setup the cache prefetch -- A MUST!!!
    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
    #ifndef PPC970 
    vec_dst( inData, prefetchSize, 0 );
    vec_dst( rightData, prefetchSize, 1 );
    vec_dst( inData+32, prefetchSize, 2 );
    vec_dst( rightData+32, prefetchSize, 3 );
    #endif //PPC970
    
    for ( i=0; i<h; i++){
        for (j=0; j<w; j++)
        {
        #ifndef PPC970
        //this function is probably memory bound on most G4's -- what else is new?
            vec_dst( inData, prefetchSize, 0 );
            vec_dst( rightData, prefetchSize, 1 );
            vec_dst( inData+32, prefetchSize, 2 );
            vec_dst( rightData+32, prefetchSize, 3 );
        #endif
        //separate the U and V from Y
        UVres1 = (vector unsigned short)vec_mule(one,inData[0]);
        UVres2 = (vector unsigned short)vec_mule(one,rightData[0]);
            
        //vec_mulo Y * 1 to short vector Y Y Y Y shorts
        Yres1 = (vector unsigned short)vec_mulo(one,inData[0]);
        Yres2 = (vector unsigned short)vec_mulo(one,rightData[0]);
        
        Yhi = vec_adds(Yres2,Yrange);
        Ylo = vec_subs(Yres2,Yrange);
        
        //go to ints for comparison
        UVhi = vec_adds(UVres2,UVrange);
        UVlo = vec_subs(UVres2,UVrange);
        
        Uhi = vec_mule(sone,UVhi);
        Ulo = vec_mule(sone,UVlo);
        
        Vhi = vec_mulo(sone,UVhi);
        Vlo = vec_mulo(sone,UVlo);
        
        Ures = vec_mule(sone,UVres1);
         Vres = vec_mulo(sone,UVres1);
         
         Umasklo = vec_cmpgt(Ures,Ulo);
         Umaskhi = vec_cmplt(Ures,Uhi);
         
         Vmasklo = vec_cmpgt(Vres,Vlo);
         Vmaskhi = vec_cmplt(Vres,Vhi);
         
         Umaskhi = vec_and(Umaskhi,Umasklo);
         
         Vmaskhi = vec_and(Vmaskhi,Vmasklo);
         
         Umasklo = vec_and(Umaskhi,Vmaskhi);
         Vmasklo = vec_and(Umaskhi,Vmaskhi);
         
         hiImage = (vector unsigned short)vec_mergeh(Umasklo,Vmasklo);
         loImage = (vector unsigned short)vec_mergel(Umasklo,Vmasklo);
         
         //pack it back down to bool short
         UVmaskhi = (vector bool short)vec_packsu(hiImage,loImage);
         
         Ymasklo = vec_cmpgt(Yres1,Ylo);
         Ymaskhi = vec_cmplt(Yres1,Yhi);
         
         Ymaskhi = vec_and(Ymaskhi,Ymasklo);
         
         Ymaskhi = vec_and(Ymaskhi,UVmaskhi);
         UVmaskhi = vec_and(Ymaskhi,UVmaskhi);
         
         //bitwise comparison and move using the result of the comparison as a mask
         Yres1 = vec_sel(Yres1,Yblank,Ymaskhi);
         
         //UVres1 = vec_sel(UVres1,UVres2,UVmaskhi);
         UVres1 = vec_sel(UVres1,UVblank,UVmaskhi);
         
         //merge the Y and UV back together
         hiImage = vec_mergeh(UVres1,Yres1);
         loImage = vec_mergel(UVres1,Yres1);
         
         //pack it back down to unsigned char to store
         inData[0] = vec_packsu(hiImage,loImage);
         
         inData++;
         rightData++;
        
        }
        #ifndef PPC970
        vec_dss(0);
        vec_dss(1);
        vec_dss(2);
        vec_dss(3);
        #endif
    }
}
Exemplo n.º 5
0
void pix_add :: processYUV_Altivec(imageStruct &image, imageStruct &right)
{
 int h,w,width;
   width = image.xsize/8;
   //format is U Y V Y
    union
    {
        //unsigned int	i;
        short	elements[8];
        //vector signed char v;
        vector	signed short v;
    }shortBuffer;

        union
    {
        //unsigned int	i;
        unsigned char	elements[16];
        //vector signed char v;
        vector	unsigned char v;
    }charBuffer;

    //vector unsigned char c;
    register vector signed short d, hiImage, loImage, YRight, UVRight, YImage, UVImage, UVTemp, YTemp;
   // vector unsigned char zero = vec_splat_u8(0);
    register vector unsigned char c,one;
  //  vector signed short zshort = vec_splat_s16(0);
    vector unsigned char *inData = (vector unsigned char*) image.data;
    vector unsigned char *rightData = (vector unsigned char*) right.data;

    //Write the pixel (pair) to the transfer buffer
    charBuffer.elements[0] = 2;
    charBuffer.elements[1] = 1;
    charBuffer.elements[2] = 2;
    charBuffer.elements[3] = 1;
    charBuffer.elements[4] = 2;
    charBuffer.elements[5] = 1;
    charBuffer.elements[6] = 2;
    charBuffer.elements[7] = 1;
    charBuffer.elements[8] = 2;
    charBuffer.elements[9] = 1;
    charBuffer.elements[10] = 2;
    charBuffer.elements[11] = 1;
    charBuffer.elements[12] = 2;
    charBuffer.elements[13] = 1;
    charBuffer.elements[14] = 2;
    charBuffer.elements[15] = 1;


    //Load it into the vector unit
    c = charBuffer.v;

    one =  vec_splat_u8( 1 );

    shortBuffer.elements[0] = 255;

    //Load it into the vector unit
    d = shortBuffer.v;
    d = static_cast<vector signed short>(vec_splat(static_cast<vector signed short>(d),0));
#ifndef PPC970
    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
    vec_dst( inData, prefetchSize, 0 );
    vec_dst( rightData, prefetchSize, 1 );
#endif
    for ( h=0; h<image.ysize; h++){
      for (w=0; w<width; w++)
        {
#ifndef PPC970
	  vec_dst( inData, prefetchSize, 0 );
	  vec_dst( rightData, prefetchSize, 1 );
#endif
	  //interleaved U Y V Y chars

	  //vec_mule UV * 2 to short vector U V U V shorts
	  UVImage = static_cast<vector signed short>(vec_mule(one,inData[0]));
	  UVRight = static_cast<vector signed short>(vec_mule(c,rightData[0]));

	  //vec_mulo Y * 1 to short vector Y Y Y Y shorts
	  YImage = static_cast<vector signed short>(vec_mulo(c,inData[0]));
	  YRight = static_cast<vector signed short>(vec_mulo(c,rightData[0]));

	  //vel_subs UV - 255
	  UVRight = static_cast<vector signed short>(vec_subs(UVRight, d));

	  //vec_adds UV
	  UVTemp = vec_adds(UVImage,UVRight);

	  //vec_adds Y
	  YTemp = vec_adds(YImage,YRight);

	  hiImage = vec_mergeh(UVTemp,YTemp);
	  loImage = vec_mergel(UVTemp,YTemp);

	  //vec_mergel + vec_mergeh Y and UV
	  inData[0] = vec_packsu(hiImage, loImage);

	  inData++;
	  rightData++;
        }
#ifndef PPC970
        vec_dss( 0 );
        vec_dss( 1 );
#endif
    }  /*end of working altivec function */
}
Exemplo n.º 6
0
/* more optimized version - unrolled and load-hoisted */
void pix_offset :: processYUVAltivec(imageStruct &image)
{
  register int h,w,width,height;
  width = image.xsize/16; //for altivec
  height = image.ysize;
  //format is U Y V Y
  // start of working altivec function
  union {
    short       elements[8];
    vector      signed short v;
  } transferBuffer;

  register vector signed short c, hi, lo;
  register vector signed short hi1, lo1;
  register vector signed short loadhi, loadhi1, loadlo, loadlo1;
  register vector unsigned char zero = vec_splat_u8(0);
  register vector unsigned char *inData = (vector unsigned char*) image.data;

  //Write the pixel (pair) to the transfer buffer
  //transferBuffer.i = (U << 24) | (Y << 16) | (V << 8 ) | Y;
  transferBuffer.elements[0] = U;
  transferBuffer.elements[1] = Y;
  transferBuffer.elements[2] = V;
  transferBuffer.elements[3] = Y;
  transferBuffer.elements[4] = U;
  transferBuffer.elements[5] = Y;
  transferBuffer.elements[6] = V;
  transferBuffer.elements[7] = Y;

  //Load it into the vector unit
  c = transferBuffer.v;


#ifndef PPC970
  UInt32                        prefetchSize = GetPrefetchConstant( 16, 1,
      256 );
  vec_dst( inData, prefetchSize, 0 );
  vec_dst( inData+16, prefetchSize, 1 );
  vec_dst( inData+32, prefetchSize, 2 );
  vec_dst( inData+64, prefetchSize, 3 );
#endif

  //expand the UInt8's to short's
  loadhi = (vector signed short) vec_mergeh( zero, inData[0] );
  loadlo = (vector signed short) vec_mergel( zero, inData[0] );

  loadhi1 = (vector signed short) vec_mergeh( zero, inData[1] );
  loadlo1 = (vector signed short) vec_mergel( zero, inData[1] );
  \


  for ( h=0; h<height; h++) {
    for (w=0; w<width; w++) {

#ifndef PPC970
      vec_dst( inData, prefetchSize, 0 );
      vec_dst( inData+16, prefetchSize, 1 );
      vec_dst( inData+32, prefetchSize, 2 );
      vec_dst( inData+64, prefetchSize, 3 );
#endif

      //add the constant to it
      hi = vec_add( loadhi, c );
      lo = vec_add( loadlo, c );

      hi1 = vec_add( loadhi1, c );
      lo1 = vec_add( loadlo1, c );


      //expand the UInt8's to short's
      loadhi = (vector signed short) vec_mergeh( zero, inData[2] );
      loadlo = (vector signed short) vec_mergel( zero, inData[2] );


      loadhi1 = (vector signed short) vec_mergeh( zero, inData[3] );
      loadlo1 = (vector signed short) vec_mergel( zero, inData[3] );

      //pack the result back down, with saturation
      inData[0] = vec_packsu( hi, lo );
      inData++;


      inData[0] = vec_packsu( hi1, lo1 );
      inData++;
    }
  }

  //
  // finish the last iteration after the loop
  //
  hi = vec_add( loadhi, c );
  lo = vec_add( loadlo, c );

  hi1 = vec_add( loadhi1, c );
  lo1 = vec_add( loadlo1, c );

  //pack the result back down, with saturation
  inData[0] = vec_packsu( hi, lo );

  inData++;

  inData[0] = vec_packsu( hi1, lo1 );

  inData++;

#ifndef PPC970
  vec_dss( 0 );
  vec_dss( 1 );
  vec_dss( 2 );
  vec_dss( 3 );  //end of working altivec function
#endif
}
Exemplo n.º 7
0
void pix_movement :: processYUVAltivec(imageStruct &image)
{
    if (image.xsize*image.ysize != buffer.xsize*buffer.ysize){
        buffer.xsize = image.xsize;
        buffer.ysize = image.ysize;
        buffer.reallocate(buffer.xsize*buffer.ysize*2);
    }
    int pixsize = image.ysize * image.xsize/8;

    union{
        signed short  c[8];
        vector signed short  v;
    }shortBuffer;

    union{
        unsigned short  c[8];
        vector unsigned short  v;
    }ushortBuffer;

    int i;

    vector signed short thresh;
    shortBuffer.c[0] = threshold;
    thresh = shortBuffer.v;
    thresh = (vector signed short)vec_splat(thresh,0);

    vector unsigned char *rp = (vector unsigned char *) image.data; // read pointer
    vector unsigned char *wp = (vector unsigned char *) buffer.data; // write pointer to the copy
    vector unsigned char grey0,grey1;
    vector unsigned char one = vec_splat_u8(1);
    vector unsigned short Y0,Ywp0,hiImage0,loImage0;
    vector unsigned short Y1,Ywp1,hiImage1,loImage1;
    vector unsigned short UVwp0,UVwp1;
    vector signed short temp0,temp1;

    ushortBuffer.c[0]=127;
    vector unsigned short UV0= (vector unsigned short)vec_splat(ushortBuffer.v, 0);
    vector unsigned short UV1= (vector unsigned short)vec_splat(ushortBuffer.v, 0);

#ifndef PPC970
    //setup the cache prefetch -- A MUST!!!
    UInt32 prefetchSize = GetPrefetchConstant( 16, 0, 256 );
    vec_dst( rp, prefetchSize, 0 );
    vec_dst( wp, prefetchSize, 1 );
#endif

    int j = 16;

    pixsize/=2;
    for (i=0; i < pixsize; i++) {
# ifndef PPC970
        //setup the cache prefetch -- A MUST!!!
        UInt32 prefetchSize = GetPrefetchConstant( j, 0, j * 16 );
        vec_dst( rp, prefetchSize, 0 );
        vec_dst( wp, prefetchSize, 1 );
        vec_dst( rp+16, prefetchSize, 2 );
        vec_dst( wp+16, prefetchSize, 3 );
# endif

        grey0 = rp[0];
        grey1 = rp[1];

//      rp[Y0]=255*(abs(grey0-*wp)>thresh);

//      UV0= (vector unsigned short)vec_mule(grey0,one);
        Y0 = (vector unsigned short)vec_mulo(grey0,one);

//      UV1= (vector unsigned short)vec_mule(grey1,one);
        Y1 = (vector unsigned short)vec_mulo(grey1,one);

        //wp is actually 1/2 the size of the image because it is only Y??

        //here the full U Y V Y is stored
//      UVwp0= (vector unsigned short)vec_mule(wp[0],one);
        Ywp0 = (vector unsigned short)vec_mulo(wp[0],one);

//      UVwp1= (vector unsigned short)vec_mule(wp[1],one);
        Ywp1 = (vector unsigned short)vec_mulo(wp[1],one);

        //store the current pixels as the history for next time
        wp[0]=grey0;
        wp++;
        wp[0]=grey1;
        wp++;

        temp0 = vec_abs(vec_sub((vector signed short)Y0,(vector signed short)Ywp0));
        Y0 = (vector unsigned short)vec_cmpgt(temp0,thresh);

        temp1 = vec_abs(vec_sub((vector signed short)Y1,(vector signed short)Ywp1));
        Y1 = (vector unsigned short)vec_cmpgt(temp1,thresh);

        hiImage0 = vec_mergeh(UV0,Y0);
        loImage0 = vec_mergel(UV0,Y0);

        hiImage1 = vec_mergeh(UV1,Y1);
        loImage1 = vec_mergel(UV1,Y1);

        grey0 = vec_packsu(hiImage0,loImage0);
        grey1 = vec_packsu(hiImage1,loImage1);

        rp[0]=grey0;
        rp++;
        rp[0]=grey1;
        rp++;
       // grey = rp[0];
       // rp[Y1]=255*(abs(grey-*wp)>thresh);
       // *wp++=grey;

       // rp+=4;
       // rp++;
    }

# ifndef PPC970
    vec_dss(0);
    vec_dss(1);
    vec_dss(2);
    vec_dss(3);
# endif
}
Exemplo n.º 8
0
void pix_diff :: processYUV_Altivec(imageStruct &image, imageStruct &right)
{
  long h,w,width;

   width = image.xsize/8;
   //format is U Y V Y
    union
    {
        //unsigned int	i;
        short	elements[8];
        //vector signed char v;
        vector	short v;
    }shortBuffer;


    vector signed short d, hiImage, loImage,hiRight, loRight;//, YRight, UVRight, YImage, UVImage, UVTemp, YTemp;
    vector unsigned char zero = vec_splat_u8(0);
    vector unsigned char *inData = (vector unsigned char*) image.data;
    vector unsigned char *rightData = (vector unsigned char*) right.data;


    shortBuffer.elements[0] = 128;
    shortBuffer.elements[1] = 0;
    shortBuffer.elements[2] = 128;
    shortBuffer.elements[3] = 0;
    shortBuffer.elements[4] = 128;
    shortBuffer.elements[5] = 0;
    shortBuffer.elements[6] = 128;
    shortBuffer.elements[7] = 0;

    //Load it into the vector unit
    d = shortBuffer.v;



#ifndef PPC970
   	UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
	vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
    #endif
    for ( h=0; h<image.ysize; h++){
        for (w=0; w<width; w++)
        {
        #ifndef PPC970
	vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
           #endif
            //interleaved U Y V Y chars

            //break out to unsigned shorts
            hiImage = (vector signed short) vec_mergeh( zero, inData[0] );
            loImage = (vector signed short) vec_mergel( zero, inData[0] );
            hiRight = (vector signed short) vec_mergeh( zero, rightData[0] );
            loRight = (vector signed short) vec_mergel( zero, rightData[0] );

            //subtract the 128 offset for UV
            hiImage = vec_subs(hiImage,d);
            loImage = vec_subs(loImage,d);
            hiRight = vec_subs(hiRight,d);
            loRight = vec_subs(loRight,d);

            hiImage = vec_subs(hiImage,hiRight);
            loImage = vec_subs(loImage,loRight);

            hiImage = vec_adds(hiImage,d);
            loImage = vec_adds(loImage,d);

            hiImage = vec_abs(hiImage);
            loImage = vec_abs(loImage);

            inData[0] = vec_packsu(hiImage, loImage);

            inData++;
            rightData++;

        }
        #ifndef PPC970
        vec_dss( 0 );
        vec_dss( 1 );
        #endif
    }  /*end of working altivec function */
}
Exemplo n.º 9
0
void pix_compare :: processYUV_Altivec(imageStruct &image, imageStruct &right)
{
register int h,w,i,j,width;

    h = image.ysize;
    w = image.xsize/8;
    width = image.xsize/8;

    //check to see if the buffer isn't 16byte aligned (highly unlikely)
    if (image.ysize*image.xsize % 16 != 0){
        error("image not properly aligned for Altivec");
        return;
        }

    register vector unsigned short	UVres1, Yres1, UVres2, Yres2;//interleave;
    register vector unsigned short	hiImage, loImage;
    register vector bool short		Ymask1;
    register vector unsigned char	one = vec_splat_u8(1);

    vector unsigned char	*inData = (vector unsigned char*) image.data;
    vector unsigned char	*rightData = (vector unsigned char*) right.data;

    #ifndef PPC970
    //setup the cache prefetch -- A MUST!!!
    UInt32			prefetchSize = GetPrefetchConstant( 16, 1, 256 );
    vec_dst( inData, prefetchSize, 0 );
    vec_dst( rightData, prefetchSize, 1 );
    #endif
    if (m_direction) {

    for ( i=0; i<h; i++){
        for (j=0; j<w; j++)
        {
        #ifndef PPC970
        //this function is probably memory bound on most G4's -- what else is new?
        vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
        #endif

        //separate the U and V from Y
        UVres1 = (vector unsigned short)vec_mule(one,inData[0]);
        UVres2 = (vector unsigned short)vec_mule(one,rightData[0]);

        //vec_mulo Y * 1 to short vector Y Y Y Y shorts
        Yres1 = (vector unsigned short)vec_mulo(one,inData[0]);
        Yres2 = (vector unsigned short)vec_mulo(one,rightData[0]);

         //compare the Y values
         Ymask1 = vec_cmpgt(Yres1,Yres2);

         //bitwise comparison and move using the result of the comparison as a mask
         Yres1 = vec_sel(Yres2,Yres1,Ymask1);

         UVres1 = vec_sel(UVres2,UVres1,Ymask1);

         //merge the Y and UV back together
         hiImage = vec_mergeh(UVres1,Yres1);
         loImage = vec_mergel(UVres1,Yres1);

         //pack it back down to unsigned char to store
         inData[0] = vec_packsu(hiImage,loImage);

            inData++;
            rightData++;

        }
        #ifndef PPC970
        vec_dss(1);
        vec_dss(0);
        #endif

    }
    }else{

    for ( i=0; i<h; i++){
        for (j=0; j<w; j++)
        {
        #ifndef PPC970
        vec_dst( inData, prefetchSize, 0 );
        vec_dst( rightData, prefetchSize, 1 );
        #endif

        UVres1 = (vector unsigned short)vec_mule(one,inData[0]);
        UVres2 = (vector unsigned short)vec_mule(one,rightData[0]);

        //vec_mulo Y * 1 to short vector Y Y Y Y shorts
        Yres1 = (vector unsigned short)vec_mulo(one,inData[0]);
        Yres2 = (vector unsigned short)vec_mulo(one,rightData[0]);

         Ymask1 = vec_cmplt(Yres1,Yres2);

         Yres1 = vec_sel(Yres2,Yres1,Ymask1);

         UVres1 = vec_sel(UVres2,UVres1,Ymask1);

         hiImage = vec_mergeh(UVres1,Yres1);
         loImage = vec_mergel(UVres1,Yres1);

         inData[0] = vec_packsu(hiImage,loImage);

            inData++;
            rightData++;

        }
        #ifndef PPC970
        vec_dss(1);
        vec_dss(0);
        #endif
    }
    }
}
Exemplo n.º 10
0
/* start of optimized motionblur */
void pix_motionblur :: processYUVAltivec(imageStruct &image)
{
  int h,w,width;
  signed short rightGain,imageGain;
  unsigned char *saved = m_savedImage.data;

  m_savedImage.xsize=image.xsize;
  m_savedImage.ysize=image.ysize;
  m_savedImage.setCsizeByFormat(image.format);
  m_savedImage.reallocate();
  if(saved!=m_savedImage.data) {
    m_savedImage.setBlack();
  }
  saved=m_savedImage.data;

  width = image.xsize/8;
  /*
  // hmm: why does it read 235 ?
  rightGain = (signed short)(235. * m_motionblur);
  imageGain = (signed short) (255. - (235. * m_motionblur));
  */
  rightGain = m_blur1;
  imageGain = m_blur0;

  union {
    signed short        elements[8];
    vector      signed short v;
  } shortBuffer;

  union {
    unsigned int        elements[4];
    vector      unsigned int v;
  } bitBuffer;

  register vector signed short gainAdd, hiImage, loImage,hiRight,loRight,
           YImage, UVImage;
  // register vector signed short loadhiImage, loadloImage,loadhiRight,loadloRight;
  register vector unsigned char loadImage, loadRight;
  register vector unsigned char zero = vec_splat_u8(0);
  register vector signed int UVhi,UVlo,Yhi,Ylo;
  register vector signed int UVhiR,UVloR,YhiR,YloR;
  register vector signed short gainSub,gain,gainR;//,d;
  register vector unsigned int bitshift;
  vector unsigned char *inData = (vector unsigned char*) image.data;
  vector unsigned char *rightData = (vector unsigned char*) saved;


  shortBuffer.elements[0] = 128;
  shortBuffer.elements[1] = 0;
  shortBuffer.elements[2] = 128;
  shortBuffer.elements[3] = 0;
  shortBuffer.elements[4] = 128;
  shortBuffer.elements[5] = 0;
  shortBuffer.elements[6] = 128;
  shortBuffer.elements[7] = 0;

  gainSub = shortBuffer.v;

  shortBuffer.elements[0] = imageGain;
  gain = shortBuffer.v;
  gain =  vec_splat(gain, 0 );

  shortBuffer.elements[0] = rightGain;
  gainR = shortBuffer.v;
  gainR =  vec_splat(gainR, 0 );

  bitBuffer.elements[0] = 8;

  //Load it into the vector unit
  bitshift = bitBuffer.v;
  bitshift = vec_splat(bitshift,0);

  shortBuffer.elements[0] = 128;

  //Load it into the vector unit
  gainAdd = shortBuffer.v;
  gainAdd = (vector signed short)vec_splat((vector signed short)gainAdd,0);

# ifndef PPC970
  UInt32                        prefetchSize = GetPrefetchConstant( 16, 1,
      256 );
  vec_dst( inData, prefetchSize, 0 );
  vec_dst( rightData, prefetchSize, 1 );
  vec_dst( inData+32, prefetchSize, 2 );
  vec_dst( rightData+32, prefetchSize, 3 );
# endif

  loadImage = inData[0];
  loadRight = rightData[0];

  for ( h=0; h<image.ysize; h++) {
    for (w=0; w<width; w++) {
# ifndef PPC970
      vec_dst( inData, prefetchSize, 0 );
      vec_dst( rightData, prefetchSize, 1 );
      vec_dst( inData+32, prefetchSize, 2 );
      vec_dst( rightData+32, prefetchSize, 3 );
# endif
      //interleaved U Y V Y chars

      hiImage = (vector signed short) vec_mergeh( zero, loadImage );
      loImage = (vector signed short) vec_mergel( zero, loadImage );

      hiRight = (vector signed short) vec_mergeh( zero, loadRight );
      loRight = (vector signed short) vec_mergel( zero, loadRight );

      //hoist that load!!
      loadImage = inData[1];
      loadRight = rightData[1];

      //subtract 128 from UV

      hiImage = vec_subs(hiImage,gainSub);
      loImage = vec_subs(loImage,gainSub);

      hiRight = vec_subs(hiRight,gainSub);
      loRight = vec_subs(loRight,gainSub);

      //now vec_mule the UV into two vector ints
      //change sone to gain
      UVhi = vec_mule(gain,hiImage);
      UVlo = vec_mule(gain,loImage);

      UVhiR = vec_mule(gainR,hiRight);
      UVloR = vec_mule(gainR,loRight);

      //now vec_mulo the Y into two vector ints
      Yhi = vec_mulo(gain,hiImage);
      Ylo = vec_mulo(gain,loImage);

      YhiR = vec_mulo(gainR,hiRight);
      YloR = vec_mulo(gainR,loRight);


      //this is where to do the add and bitshift due to the resolution
      //add UV
      UVhi = vec_adds(UVhi,UVhiR);
      UVlo = vec_adds(UVlo,UVloR);

      Yhi = vec_adds(Yhi,YhiR);
      Ylo = vec_adds(Ylo,YloR);

      //bitshift UV
      UVhi = vec_sra(UVhi,bitshift);
      UVlo = vec_sra(UVlo,bitshift);

      Yhi = vec_sra(Yhi,bitshift);
      Ylo = vec_sra(Ylo,bitshift);

      //pack the UV into a single short vector
      UVImage =  vec_packs(UVhi,UVlo);

      //pack the Y into a single short vector
      YImage =  vec_packs(Yhi,Ylo);

      //vec_mergel + vec_mergeh Y and UV
      hiImage =  vec_mergeh(UVImage,YImage);
      loImage =  vec_mergel(UVImage,YImage);

      //add 128 offset back
      hiImage = vec_adds(hiImage,gainSub);
      loImage = vec_adds(loImage,gainSub);

      //vec_mergel + vec_mergeh Y and UV
      rightData[0] = (vector unsigned char)vec_packsu(hiImage, loImage);
      inData[0] = (vector unsigned char)vec_packsu(hiImage, loImage);

      inData++;
      rightData++;
    }
  }
# ifndef PPC970
  //stop the cache streams
  vec_dss( 0 );
  vec_dss( 1 );
  vec_dss( 2 );
  vec_dss( 3 );
# endif


}/* end of working altivec function */
Exemplo n.º 11
0
void YUV422_to_YV12_altivec(short*pY, short*pY2, short*pU, short*pV,
			    const unsigned char *gem_image, int xsize, int ysize)
{
  // UYVY UYVY UYVY UYVY
  const vector unsigned char *pixels1=reinterpret_cast<const vector unsigned char *>(gem_image);
  const vector unsigned char *pixels2=reinterpret_cast<const vector unsigned char *>(gem_image+(xsize*2));
  // PDP packet to be filled:
  // first Y plane
  vector signed short *py1 = reinterpret_cast<vector signed short *>(pY);
  // 2nd Y pixel
  vector signed short *py2 = reinterpret_cast<vector signed short *>(pY2);
  // U plane
  vector signed short *pCr = reinterpret_cast<vector signed short *>(pU);
  // V plane
  vector signed short *pCb = reinterpret_cast<vector signed short *>(pV);
  vector signed short uvSub = static_cast<vector signed short>( 128, 128, 128, 128,
													 128, 128, 128, 128 );
  vector unsigned short yShift = static_cast<vector unsigned short>( 7, 7, 7, 7, 7, 7, 7, 7 );
  vector unsigned short uvShift = static_cast<vector unsigned short>( 8, 8, 8, 8, 8, 8, 8, 8 );

  vector signed short tempY1, tempY2, tempY3, tempY4,
		tempUV1, tempUV2, tempUV3, tempUV4, tempUV5, tempUV6;

  vector unsigned char uvPerm = static_cast<vector unsigned char>( 16, 0, 17, 4, 18,  8, 19, 12,   // u0..u3
  														20, 2, 21, 6, 22, 10, 23, 14 ); // v0..v3

  vector unsigned char uPerm = static_cast<vector unsigned char>( 0, 1, 2, 3, 4, 5, 6, 7,
													   16,17,18,19,20,21,22,23);
  vector unsigned char vPerm = static_cast<vector unsigned char>( 8, 9, 10,11,12,13,14,15,
													   24,25,26,27,28,29,30,31);

  vector unsigned char yPerm = static_cast<vector unsigned char>( 16, 1, 17,  3, 18,  5, 19,  7, // y0..y3
													   20, 9, 21, 11, 23, 13, 25, 15);// y4..y7
  vector unsigned char zeroVec = static_cast<vector unsigned char>(0);

  int row=ysize>>1;
  int cols=xsize>>4;
#if 0
# ifndef PPC970
  UInt32	prefetchSize = GetPrefetchConstant( 16, 1, 256 );
  vec_dst( pu, prefetchSize, 0 );
  vec_dst( pv, prefetchSize, 0 );
  vec_dst( py1, prefetchSize, 0 );
  vec_dst( py2, prefetchSize, 0 );
# endif
#endif
  while(row--){
    int col=cols;
    while(col--){
#if 0
# ifndef PPC970
      vec_dst( );
# endif
#endif
      tempUV1 = static_cast<vector signed short>(vec_perm( *pixels1, zeroVec, uvPerm));
      tempY1  = static_cast<vector signed short>(vec_perm( *pixels1, zeroVec, yPerm));
      tempY2  = static_cast<vector signed short>(vec_perm( *pixels2, zeroVec, yPerm));
	  pixels1++;pixels2++;

      tempUV2 = static_cast<vector signed short>(vec_perm( *pixels1, zeroVec, uvPerm));
      tempY3  = static_cast<vector signed short>(vec_perm( *pixels1, zeroVec, yPerm));
      tempY4  = static_cast<vector signed short>(vec_perm( *pixels2, zeroVec, yPerm));
	  pixels1++;pixels2++;

	  tempUV3 = vec_sub( tempUV1, uvSub );
	  tempUV4 = vec_sub( tempUV2, uvSub );
	  tempUV5 = vec_sl( tempUV3, uvShift );
	  tempUV6 = vec_sl( tempUV4, uvShift );

	  *pCb = vec_perm( tempUV5, tempUV6, uPerm );
	  *pCr = vec_perm( tempUV5, tempUV6, vPerm );
	  pCr++; pCb++;

	  *py1++ = vec_sl( tempY1, yShift);
      *py2++ = vec_sl( tempY2, yShift);
      *py1++ = vec_sl( tempY3, yShift);
      *py2++ = vec_sl( tempY4, yShift);
	}

	py1+=(xsize>>3); py2+=(xsize>>3);
	pixels1+=(xsize*2)>>4; pixels2+=(xsize*2)>>4;
  }
}
Exemplo n.º 12
0
void YV12_to_YUV422_altivec(const short*Y, const short*U, const short*V,
			    unsigned char *data, int xsize, int ysize)
{
  // from [email protected], 3/15/2005
  // #1. Don't use the pointers. Use vec_ld with an index that you increment (by 16) instead.
  vector unsigned char *pixels1=reinterpret_cast<vector unsigned char *>(data);
  vector unsigned char *pixels2=reinterpret_cast<vector unsigned char *>(data+(xsize*2));
  const vector unsigned short *py1 = reinterpret_cast<const vector unsigned short *>(Y);
  const vector unsigned short *py2 = reinterpret_cast<const vector unsigned short *>(Y + xsize );
  const vector unsigned short *pu = reinterpret_cast<const vector unsigned short *>(U);
  const vector unsigned short *pv = reinterpret_cast<const vector unsigned short *>(V);
  vector unsigned short uvAdd = static_cast<vector unsigned short>( 128, 128, 128, 128,
                                                         128, 128, 128, 128 );
  vector unsigned short yShift = static_cast<vector unsigned short>( 7, 7, 7, 7, 7, 7, 7, 7 );
  vector unsigned short uvShift = static_cast<vector unsigned short>( 8, 8, 8, 8, 8, 8, 8, 8 );
  vector unsigned short tempU, tempV, doneU, doneV, tempY1, tempY2, tempY3, tempY4,
    uv1, uv2, out1, out2, out3, out4, out5, out6, out7, out8;
  vector unsigned char Perm1 =
    static_cast<vector unsigned char>( 0, 1, 16, 17, 2, 3, 18, 19,
                            4, 5, 20, 21, 6, 7, 22, 23 );
  vector unsigned char Perm2 =
    static_cast<vector unsigned char>(  8,  9, 24, 25, 10, 11, 26, 27,
                             12, 13, 28, 29, 14, 15, 30, 31 );
  int row=ysize>>1;
  int cols=xsize>>4;
#if 0
# ifndef PPC970
  UInt32	prefetchSize = GetPrefetchConstant( 16, 1, 256 );
  vec_dst( pu, prefetchSize, 0 );
  vec_dst( pv, prefetchSize, 0 );
  vec_dst( py1, prefetchSize, 0 );
  vec_dst( py2, prefetchSize, 0 );
# endif
#endif
  while(row--){
    int col=cols;
    while(col--){
#if 0
# ifndef PPC970
      vec_dst( );
# endif
#endif
      tempU = vec_sra( (*pu++), uvShift );
      tempV = vec_sra( (*pv++), uvShift );
      doneU = vec_add( tempU, uvAdd );
      doneV = vec_add( tempV, uvAdd );

      uv1 = vec_perm( doneU, doneV, Perm1 ); // uvuvuvuv uvuvuvuv
      uv2 = vec_perm( doneU, doneV, Perm2 );

      tempY1 = vec_sra( (*py1++), yShift );
      tempY2 = vec_sra( (*py2++), yShift );

      out1 = vec_perm( uv1, tempY1, Perm1 ); //fill Y's, U's & V's
      out2 = vec_perm( uv1, tempY1, Perm2 );
      out3 = vec_perm( uv1, tempY2, Perm1 ); //fill 2nd Y's, U's & V's
      out4 = vec_perm( uv1, tempY2, Perm2 );

      *pixels1 = vec_packsu( out1, out2 );
      *pixels2 = vec_packsu( out3, out4 );
      pixels1++; pixels2++;

      tempY3 = vec_sra( (*py1++), yShift ); // load second set of Y's
      tempY4 = vec_sra( (*py2++), yShift );

      out5 = vec_perm( uv2, tempY3, Perm1 );
      out6 = vec_perm( uv2, tempY3, Perm2 );
      out7 = vec_perm( uv2, tempY4, Perm1 );
      out8 = vec_perm( uv2, tempY4, Perm2 );

      *pixels1 = vec_packsu( out5, out6 );
      *pixels2 = vec_packsu( out7, out8 );
      pixels1++; pixels2++;
    }
    pixels1+=(xsize*2)>>4; pixels2+=(xsize*2)>>4;
    py1+=xsize>>3; py2+=xsize>>3;
  }
}