コード例 #1
0
void Msharpen::blur_plane(ADMImage *src, ADMImage *blur, int plane) 
{
/*
  uint64_t mask1 = 0x00001C711C711C71LL;
  uint64_t mask2 = 0x1C711C711C710000LL;
  uint64_t mask3 = 0x0000200000002000LL;
  uint64_t mask4 = 0x0000000000ff0000LL;

  const unsigned char *srcp = src->GetReadPtr(plane);
	const unsigned char *srcp_saved = srcp;
	unsigned char *blurp_saved = blurp;
  int src_pitch = src->GetPitch(plane);
  int blur_pitch = blur->GetPitch(plane);
  int h = src->GetHeight(plane);
  int w = src->GetRowSize(plane);
*/
const unsigned char *srcp,*srcpn,*srcpp; 
const unsigned char *srcp_saved ;
unsigned char *wk,*wk_saved;
unsigned char *blurp_saved,*blurp ;
int src_pitch;
int blur_pitch;
int work_pitch;
int h;
int w ;
int wh ,ww,hh;
 	blurp_saved=blurp=blur->GetWritePtr((ADM_PLANE)plane);
        srcp_saved=srcp=src->GetReadPtr((ADM_PLANE)plane);
        wk_saved=wk=work->GetWritePtr((ADM_PLANE)plane);
        
        ww=src->GetWidth((ADM_PLANE)plane);
        hh=src->GetHeight((ADM_PLANE)plane);
        
        src_pitch=src->GetPitch((ADM_PLANE)plane);
        blur_pitch=blur->GetPitch((ADM_PLANE)plane);
        work_pitch=work->GetPitch((ADM_PLANE)plane);
        
        w=ww;
        h=hh;


        wk+=work_pitch;
        srcpp=srcp;
        srcp+=src_pitch,
        srcpn=srcp+src_pitch;
        int val;

  // Vertical only for now      
#ifdef ADM_CPU_X86
  if(CpuCaps::hasMMX())
  {
  int off;
#ifdef GCC_2_95_X
        __asm__(
                        ADM_ASM_ALIGN16
                        "pxor  %mm7,%mm7\n"
                : : );
#else
        __asm__(
                        ADM_ASM_ALIGN16
                        "pxor  %%mm7,%%mm7\n"
                : : );
#endif
  int wmod8=w>>3;                      
  for (int y=1; y<h-1 ;y++) 
  {               
        off=0;
        for (int x =0;x< wmod8; x++)
                {
                                               
                        __asm__(
                        ADM_ASM_ALIGN16
                        "movq  (%0),%%mm0\n"
                        "movq  %%mm0,%%mm6\n"
                        "punpckhbw %%mm7,%%mm0\n" // High part extended to 16 bits
                        "punpcklbw %%mm7,%%mm6\n" // low part ditto
                        
                        "movq  (%1),%%mm1\n"
                        "movq  %%mm1,%%mm5\n"
                        "punpckhbw %%mm7,%%mm1\n"
                        "punpcklbw %%mm7,%%mm5\n"
                        
                        "movq  (%2),%%mm2\n"
                        "movq  %%mm2,%%mm4\n"
                        "punpckhbw %%mm7,%%mm2\n"
                        "punpcklbw %%mm7,%%mm4\n"
                        
                        "paddw %%mm1,%%mm0\n"
                        "paddw %%mm5,%%mm6\n"
                        
                        "paddw %%mm1,%%mm2\n"
                        "paddw %%mm5,%%mm4\n"
                        
                        "paddw %%mm0,%%mm2\n"
                        "paddw %%mm6,%%mm4\n"
                        "psrlw $2, %%mm4\n"
                        "psrlw $2, %%mm2\n"
                        "packuswb %%mm2,%%mm4\n"
                        "movq %%mm4,(%3)\n" //
                        
                        : : "r" (srcpn+off),
                           "r" (srcp+off), "r" (srcpp+off), "r" (wk+off)
                        );
                        off+=8; 
                }    
        // mod 8 fix
        for(int x=wmod8*8;x<w;x++)
        {
                val=2*srcp[x]+srcpn[x]+srcpp[x];
                wk[x]=(val)>>2;
        }
        srcp+=src_pitch;
        srcpp+=src_pitch;
        srcpn+=src_pitch;
        wk+=work_pitch;     
  }
  __asm__("emms\n");
 }
 else
#endif      
  {
  for (int y=1; y<h-1 ;y++)