FakePlaneOfBlocks::FakePlaneOfBlocks(int32_t sizeX, int32_t sizeY, int32_t lv, int32_t pel, int32_t _nOverlapX, int32_t _nOverlapY, int32_t _nBlkX, int32_t _nBlkY) { nBlkSizeX = sizeX; nBlkSizeY = sizeY; nOverlapX = _nOverlapX; nOverlapY = _nOverlapY; nBlkX = _nBlkX; nBlkY = _nBlkY; nWidth_Bi = nOverlapX + nBlkX*(nBlkSizeX - nOverlapX); nHeight_Bi = nOverlapY + nBlkY*(nBlkSizeY - nOverlapY); nBlkCount = nBlkX * nBlkY; nPel = pel; nLogPel = ilog2(nPel); nLogScale = lv; nScale = iexp2(nLogScale); blocks = new FakeBlockData[nBlkCount]; for (int32_t j = 0, blkIdx = 0; j < nBlkY; j++) for (int32_t i = 0; i < nBlkX; i++, blkIdx++) blocks[blkIdx].Init(i * (nBlkSizeX - nOverlapX), j * (nBlkSizeY - nOverlapY)); }
PlaneOfBlocks::PlaneOfBlocks(int _nBlkX, int _nBlkY, int _nBlkSizeX, int _nBlkSizeY, int _nPel, int _nLevel, int _nFlags, int _nOverlapX, int _nOverlapY, int _yRatioUV) { /* constant fields */ nPel = _nPel; nLogPel = ilog2(nPel); // nLogPel=0 for nPel=1, 1 for nPel=2, 2 for nPel=4, i.e. (x*nPel) = (x<<nLogPel) nLogScale = _nLevel; nScale = iexp2(nLogScale); nBlkSizeX = _nBlkSizeX; nBlkSizeY = _nBlkSizeY; nOverlapX = _nOverlapX; nOverlapY = _nOverlapY; nBlkX = _nBlkX; nBlkY = _nBlkY; nBlkCount = nBlkX * nBlkY; nFlags = _nFlags; yRatioUV = _yRatioUV; smallestPlane = (bool)(nFlags & MOTION_SMALLEST_PLANE); mmx = (bool)(nFlags & MOTION_USE_MMX); isse = (bool)(nFlags & MOTION_USE_ISSE); chroma = (bool)(nFlags & MOTION_USE_CHROMA_MOTION); bool mmxext = (bool)(nFlags & CPU_MMXEXT); bool cache32 = (bool)(nFlags & CPU_CACHELINE_32); bool cache64 = (bool)(nFlags & CPU_CACHELINE_64); bool sse2 = (bool)(nFlags & CPU_SSE2_IS_FAST); bool sse3 = (bool)(nFlags & CPU_SSE3); bool ssse3 = (bool)(nFlags & CPU_SSSE3); bool ssse3pha = (bool)(nFlags & CPU_PHADD_IS_FAST); bool ssd = (bool)(nFlags & MOTION_USE_SSD); bool satd = (bool)(nFlags & MOTION_USE_SATD); // ssd=false; // satd=false; globalMVPredictor.x = zeroMV.x; globalMVPredictor.y = zeroMV.y; globalMVPredictor.sadLuma = zeroMV.sadLuma; globalMVPredictor.sadChromaU = zeroMV.sadChromaU; globalMVPredictor.sadChromaV = zeroMV.sadChromaV; /* arrays memory allocation */ vectors = new VECTOR[nBlkCount]; /* function's pointers initialization */ #define SET_FUNCPTR(blksizex, blksizey, blksizex2, blksizey2) \ SAD = Sad##blksizex##x##blksizey##_iSSE; \ VAR = Var##blksizex##x##blksizey##_iSSE; \ LUMA = Luma##blksizex##x##blksizey##_iSSE; \ BLITLUMA = Copy_C<blksizex , blksizey>; \ if (yRatioUV==2) { \ BLITCHROMA = Copy_C<blksizex2 , blksizey2>; \ SADCHROMA = Sad##blksizex2##x##blksizey2##_iSSE; \ } \ else { \ BLITCHROMA = Copy_C<blksizex2 , blksizey>; \ SADCHROMA = Sad##blksizex2##x##blksizey##_iSSE; \ } #define SET_FUNCPTR_C(blksizex, blksizey, blksizex2, blksizey2) \ SAD = Sad_C<blksizex , blksizey>; \ VAR = Var_C<blksizex , blksizey>; \ LUMA = Luma_C<blksizex , blksizey>; \ BLITLUMA = Copy_C<blksizex , blksizey>; \ if (yRatioUV==2) { \ BLITCHROMA = Copy_C<blksizex2 , blksizey2>; \ SADCHROMA = Sad_C<blksizex2 , blksizey2>; \ } \ else { \ BLITCHROMA = Copy_C<blksizex2 , blksizey>; \ SADCHROMA = Sad_C<blksizex2 , blksizey>; \ } //#define NEWBLIT #ifdef NEWBLIT //I suppose it is faster to use MMX than use SSE on a cache line split affected platform if that occurs #define SETBLIT16(blksizex, blksizey, type) \ type = copy_mc_##blksizex##x##blksizey##_mmx; \ if (sse2&&(((!cache32)&&(!cache64))||(!ssse3))) type = copy_mc_##blksizex##x##blksizey##_sse2; \ if (sse3&&(((!cache32)&&(!cache64))||(!ssse3))) type = copy_mc_##blksizex##x##blksizey##_sse3 if (sse2&&(nOverlapX %16 == 0)) type = copy_mc_##blksizex##x##blksizey##_aligned_sse2; \ #define SETBLITX(blksizex, blksizey, type) \ type = copy_mc_##blksizex##x##blksizey##_mmx #else #define SETBLIT16(blksizex, blksizey, type) #define SETBLITX(blksizex, blksizey, type) #endif #define SET_FUNCPTR_x264(blksizex, blksizey, type) \ type = x264_pixel_sad_##blksizex##x##blksizey##_mmxext; \ if (cache32) type = x264_pixel_sad_##blksizex##x##blksizey##_cache32_mmxext; \ if (cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_mmxext; \ if (sse2) type = x264_pixel_sad_##blksizex##x##blksizey##_sse2; \ if (sse3) type = x264_pixel_sad_##blksizex##x##blksizey##_sse3; \ if (cache32&&cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_sse2; \ if (ssse3&&cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_ssse3; \ if (ssd) type = x264_pixel_ssd_##blksizex##x##blksizey##_mmx; \ if (satd) type = x264_pixel_satd_##blksizex##x##blksizey##_mmxext; \ if (satd&&sse2) type = x264_pixel_satd_##blksizex##x##blksizey##_sse2; \ if (satd&&ssse3) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3; \ if (satd&&ssse3pha) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3_phadd #define SET_FUNCPTR_x264_mmx(blksizex, blksizey, type) \ type = x264_pixel_sad_##blksizex##x##blksizey##_mmxext; \ if (cache32) type = x264_pixel_sad_##blksizex##x##blksizey##_cache32_mmxext; \ if (cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_mmxext; \ if (ssd) type = x264_pixel_ssd_##blksizex##x##blksizey##_mmx; \ if (satd) type = x264_pixel_satd_##blksizex##x##blksizey##_mmxext; \ if (satd&&sse2) type = x264_pixel_satd_##blksizex##x##blksizey##_sse2; \ if (satd&&ssse3) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3; \ if (satd&&ssse3pha) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3_phadd #define SET_FUNCPTR_x264_mmx_4x(blksizey, type) \ type = x264_pixel_sad_4x##blksizey##_mmxext; \ if (ssd) type = x264_pixel_ssd_4x##blksizey##_mmx; \ if (satd) type = x264_pixel_satd_4x##blksizey##_mmxext #define SET_FUNCPTR_x264_SATD(blksizex,blksizey) \ SATD = x264_pixel_satd_##blksizex##x##blksizey##_mmxext; \ if (sse2) SATD = x264_pixel_satd_##blksizex##x##blksizey##_sse2; \ if (ssse3) SATD = x264_pixel_satd_##blksizex##x##blksizey##_ssse3 SATD = Sad0_C; //for now disable SATD if default functions are used if ( isse ) { switch (nBlkSizeX) { case 16: if (nBlkSizeY==16) { SET_FUNCPTR(16,16,8,8) } else if (nBlkSizeY==8) { SET_FUNCPTR(16,8,8,4) } else if (nBlkSizeY==2) { SET_FUNCPTR(16,2,8,1) } else if (nBlkSizeY==1){ SAD = Sad16x1_iSSE; VAR = Var_C<16,1>;; LUMA = Luma_C<16,1>; BLITLUMA = Copy_C<16,1>; if (yRatioUV==2) { //error } else { //yRatioUV==1 BLITCHROMA = Copy_C<8, 1>; SADCHROMA = Sad8x1_iSSE; } } break; case 4: SET_FUNCPTR(4,4,2,2) if (yRatioUV==2) { //SADCHROMA = Sad2x2_iSSE_T; } else { //SADCHROMA = Sad2x4_iSSE_T; } break; case 32: if (nBlkSizeY==16) { SET_FUNCPTR(32,16,16,8) } break; case 8: default: if (nBlkSizeY == 8) { SET_FUNCPTR(8,8,4,4) } else if (nBlkSizeY == 4) { // 8x4 SET_FUNCPTR(8,4,4,2) } }//end switch }//end isse else { switch (nBlkSizeX)