FakePlaneOfBlocks::FakePlaneOfBlocks(int32_t sizeX, int32_t sizeY, int32_t lv, int32_t pel, int32_t _nOverlapX, int32_t _nOverlapY, int32_t _nBlkX, int32_t _nBlkY) {
	nBlkSizeX = sizeX;
	nBlkSizeY = sizeY;
	nOverlapX = _nOverlapX;
	nOverlapY = _nOverlapY;
	nBlkX = _nBlkX;
	nBlkY = _nBlkY;
	nWidth_Bi = nOverlapX + nBlkX*(nBlkSizeX - nOverlapX);
	nHeight_Bi = nOverlapY + nBlkY*(nBlkSizeY - nOverlapY);
	nBlkCount = nBlkX * nBlkY;
	nPel = pel;
	nLogPel = ilog2(nPel);
	nLogScale = lv;
	nScale = iexp2(nLogScale);
	blocks = new FakeBlockData[nBlkCount];
	for (int32_t j = 0, blkIdx = 0; j < nBlkY; j++)
		for (int32_t i = 0; i < nBlkX; i++, blkIdx++)
			blocks[blkIdx].Init(i * (nBlkSizeX - nOverlapX), j * (nBlkSizeY - nOverlapY));
}
PlaneOfBlocks::PlaneOfBlocks(int _nBlkX, int _nBlkY, int _nBlkSizeX, int _nBlkSizeY, int _nPel, int _nLevel, int _nFlags, int _nOverlapX, int _nOverlapY, int _yRatioUV)
{

/* constant fields */

    nPel = _nPel;
    nLogPel = ilog2(nPel);
    // nLogPel=0 for nPel=1, 1 for nPel=2, 2 for nPel=4, i.e. (x*nPel) = (x<<nLogPel)
    nLogScale = _nLevel;
    nScale = iexp2(nLogScale);

    nBlkSizeX = _nBlkSizeX;
    nBlkSizeY = _nBlkSizeY;
    nOverlapX = _nOverlapX;
    nOverlapY = _nOverlapY;

    nBlkX = _nBlkX;
    nBlkY = _nBlkY;
    nBlkCount = nBlkX * nBlkY;

    nFlags = _nFlags;
    yRatioUV = _yRatioUV;

    smallestPlane = (bool)(nFlags & MOTION_SMALLEST_PLANE);
    mmx = (bool)(nFlags & MOTION_USE_MMX);
    isse = (bool)(nFlags & MOTION_USE_ISSE);
    chroma = (bool)(nFlags & MOTION_USE_CHROMA_MOTION);

    bool mmxext = (bool)(nFlags & CPU_MMXEXT);
    bool cache32 = (bool)(nFlags & CPU_CACHELINE_32);
    bool cache64 = (bool)(nFlags & CPU_CACHELINE_64);
    bool sse2 = (bool)(nFlags & CPU_SSE2_IS_FAST);
    bool sse3 = (bool)(nFlags & CPU_SSE3);
    bool ssse3 = (bool)(nFlags & CPU_SSSE3);
    bool ssse3pha = (bool)(nFlags & CPU_PHADD_IS_FAST);
    bool ssd = (bool)(nFlags & MOTION_USE_SSD);
    bool satd = (bool)(nFlags & MOTION_USE_SATD);
   
//	ssd=false;
//	satd=false;

    globalMVPredictor.x = zeroMV.x;
    globalMVPredictor.y = zeroMV.y;
    globalMVPredictor.sadLuma    = zeroMV.sadLuma;
    globalMVPredictor.sadChromaU = zeroMV.sadChromaU;
    globalMVPredictor.sadChromaV = zeroMV.sadChromaV;

/* arrays memory allocation */

    vectors = new VECTOR[nBlkCount];

/* function's pointers initialization */

#define SET_FUNCPTR(blksizex, blksizey, blksizex2, blksizey2) \
        SAD = Sad##blksizex##x##blksizey##_iSSE; \
        VAR = Var##blksizex##x##blksizey##_iSSE; \
        LUMA = Luma##blksizex##x##blksizey##_iSSE; \
        BLITLUMA = Copy_C<blksizex , blksizey>; \
        if (yRatioUV==2) { \
            BLITCHROMA = Copy_C<blksizex2 , blksizey2>;  \
            SADCHROMA = Sad##blksizex2##x##blksizey2##_iSSE; \
        } \
        else { \
            BLITCHROMA = Copy_C<blksizex2 , blksizey>; \
            SADCHROMA = Sad##blksizex2##x##blksizey##_iSSE; \
        }

#define SET_FUNCPTR_C(blksizex, blksizey, blksizex2, blksizey2) \
        SAD = Sad_C<blksizex , blksizey>; \
        VAR = Var_C<blksizex , blksizey>; \
        LUMA = Luma_C<blksizex , blksizey>; \
        BLITLUMA = Copy_C<blksizex , blksizey>; \
        if (yRatioUV==2) { \
            BLITCHROMA = Copy_C<blksizex2 , blksizey2>; \
            SADCHROMA = Sad_C<blksizex2 , blksizey2>; \
        } \
        else { \
            BLITCHROMA = Copy_C<blksizex2 , blksizey>; \
            SADCHROMA = Sad_C<blksizex2  , blksizey>; \
        }

//#define NEWBLIT

#ifdef NEWBLIT
//I suppose it is faster to use MMX than use SSE on a cache line split affected platform if that occurs
#define SETBLIT16(blksizex, blksizey, type) \
	type = copy_mc_##blksizex##x##blksizey##_mmx; \
	if (sse2&&(((!cache32)&&(!cache64))||(!ssse3))) type = copy_mc_##blksizex##x##blksizey##_sse2; \
	if (sse3&&(((!cache32)&&(!cache64))||(!ssse3))) type = copy_mc_##blksizex##x##blksizey##_sse3
	if (sse2&&(nOverlapX %16 == 0)) type = copy_mc_##blksizex##x##blksizey##_aligned_sse2; \
#define SETBLITX(blksizex, blksizey, type) \
	type = copy_mc_##blksizex##x##blksizey##_mmx
#else
#define SETBLIT16(blksizex, blksizey, type)
#define SETBLITX(blksizex, blksizey, type)
#endif


#define SET_FUNCPTR_x264(blksizex, blksizey, type) \
        type = x264_pixel_sad_##blksizex##x##blksizey##_mmxext; \
        if (cache32) type = x264_pixel_sad_##blksizex##x##blksizey##_cache32_mmxext; \
        if (cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_mmxext; \
        if (sse2) type = x264_pixel_sad_##blksizex##x##blksizey##_sse2; \
        if (sse3) type = x264_pixel_sad_##blksizex##x##blksizey##_sse3; \
        if (cache32&&cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_sse2; \
        if (ssse3&&cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_ssse3; \
        if (ssd) type = x264_pixel_ssd_##blksizex##x##blksizey##_mmx; \
        if (satd) type = x264_pixel_satd_##blksizex##x##blksizey##_mmxext; \
        if (satd&&sse2) type = x264_pixel_satd_##blksizex##x##blksizey##_sse2; \
        if (satd&&ssse3) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3; \
        if (satd&&ssse3pha) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3_phadd
		
  
#define SET_FUNCPTR_x264_mmx(blksizex, blksizey, type) \
        type = x264_pixel_sad_##blksizex##x##blksizey##_mmxext; \
        if (cache32) type = x264_pixel_sad_##blksizex##x##blksizey##_cache32_mmxext; \
        if (cache64) type = x264_pixel_sad_##blksizex##x##blksizey##_cache64_mmxext; \
        if (ssd) type = x264_pixel_ssd_##blksizex##x##blksizey##_mmx; \
        if (satd) type = x264_pixel_satd_##blksizex##x##blksizey##_mmxext; \
        if (satd&&sse2) type = x264_pixel_satd_##blksizex##x##blksizey##_sse2; \
        if (satd&&ssse3) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3; \
        if (satd&&ssse3pha) type = x264_pixel_satd_##blksizex##x##blksizey##_ssse3_phadd

#define SET_FUNCPTR_x264_mmx_4x(blksizey, type) \
        type = x264_pixel_sad_4x##blksizey##_mmxext; \
        if (ssd) type = x264_pixel_ssd_4x##blksizey##_mmx; \
        if (satd) type = x264_pixel_satd_4x##blksizey##_mmxext

#define SET_FUNCPTR_x264_SATD(blksizex,blksizey) \
        SATD = x264_pixel_satd_##blksizex##x##blksizey##_mmxext; \
        if (sse2) SATD = x264_pixel_satd_##blksizex##x##blksizey##_sse2; \
        if (ssse3) SATD = x264_pixel_satd_##blksizex##x##blksizey##_ssse3


	SATD = Sad0_C; //for now disable SATD if default functions are used
	if ( isse )
	{
		switch (nBlkSizeX)
		{
		case 16:
			if (nBlkSizeY==16) {
				SET_FUNCPTR(16,16,8,8)
			} else if (nBlkSizeY==8) {
			    SET_FUNCPTR(16,8,8,4)
			} else if (nBlkSizeY==2) {
				SET_FUNCPTR(16,2,8,1)
				} else if (nBlkSizeY==1){
					SAD = Sad16x1_iSSE;
					VAR = Var_C<16,1>;;
					LUMA = Luma_C<16,1>;
					BLITLUMA = Copy_C<16,1>;
					if (yRatioUV==2) {
						//error
					}
					else { //yRatioUV==1
						BLITCHROMA = Copy_C<8, 1>;
						SADCHROMA = Sad8x1_iSSE;
					}
				}
			break;
		case 4:
			SET_FUNCPTR(4,4,2,2)
				if (yRatioUV==2) {
					//SADCHROMA = Sad2x2_iSSE_T;
				}
				else {	
					//SADCHROMA = Sad2x4_iSSE_T;
				}
			break;
		case 32:
			if (nBlkSizeY==16) {
				SET_FUNCPTR(32,16,16,8)
			}
			break;
		case 8:
		default:
			if (nBlkSizeY == 8) {
				SET_FUNCPTR(8,8,4,4)
			} else if (nBlkSizeY == 4) { // 8x4
				SET_FUNCPTR(8,4,4,2)
			}
		}//end switch
	}//end isse
	else
	{
		switch (nBlkSizeX)