void FilteringEdgeLumaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) { int32_t iLineSize = pFilter->iCsStride[0]; int32_t iMbStride = pFilter->iMbStride; uint8_t* pDestY; int8_t iCurQp; int32_t iIdexA, iAlpha, iBeta; int32_t iMbX = pCurMb->iMbX; int32_t iMbY = pCurMb->iMbY; bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0) && (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))}; bool bTopBsValid[2] = { (iMbY > 0), ((iMbY > 0) && (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))}; int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc]; int32_t iTopFlag = bTopBsValid[pFilter->uiFilterIdc]; ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4); pDestY = pFilter->pCsData[0]; iCurQp = pCurMb->uiLumaQp; * (uint32_t*)uiBSx4 = 0x03030303; // luma v if (iLeftFlag) { pFilter->uiLumaQP = (iCurQp + (pCurMb - 1)->uiLumaQp + 1) >> 1; FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize, NULL); } pFilter->uiLumaQP = iCurQp; GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, iBeta); if (iAlpha | iBeta) { TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 0); pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc); pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc); pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc); } // luma h if (iTopFlag) { pFilter->uiLumaQP = (iCurQp + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1; FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize, NULL); } pFilter->uiLumaQP = iCurQp; if (iAlpha | iBeta) { pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc); } }
/*vertical pLeft*/ void WelsI4x4LumaPredVL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { /*get pTop*/ const uint8_t kuiT0 = pRef[-kiStride]; const uint8_t kuiT1 = pRef[1 - kiStride]; const uint8_t kuiT2 = pRef[2 - kiStride]; const uint8_t kuiT3 = pRef[3 - kiStride]; const uint8_t kuiT4 = pRef[4 - kiStride]; const uint8_t kuiT5 = pRef[5 - kiStride]; const uint8_t kuiT6 = pRef[6 - kiStride]; const uint8_t kuiVL0 = (1 + kuiT0 + kuiT1) >> 1; // uiVL0 const uint8_t kuiVL1 = (1 + kuiT1 + kuiT2) >> 1; // uiVL1 const uint8_t kuiVL2 = (1 + kuiT2 + kuiT3) >> 1; // uiVL2 const uint8_t kuiVL3 = (1 + kuiT3 + kuiT4) >> 1; // uiVL3 const uint8_t kuiVL4 = (1 + kuiT4 + kuiT5) >> 1; // uiVL4 const uint8_t kuiVL5 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // uiVL5 const uint8_t kuiVL6 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; // uiVL6 const uint8_t kuiVL7 = (2 + kuiT2 + (kuiT3 << 1) + kuiT4) >> 2; // uiVL7 const uint8_t kuiVL8 = (2 + kuiT3 + (kuiT4 << 1) + kuiT5) >> 2; // uiVL8 const uint8_t kuiVL9 = (2 + kuiT4 + (kuiT5 << 1) + kuiT6) >> 2; // uiVL9 ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = kuiVL0; uiSrc[1] = uiSrc[8] = kuiVL1; uiSrc[2] = uiSrc[9] = kuiVL2; uiSrc[3] = uiSrc[10] = kuiVL3; uiSrc[4] = kuiVL5; uiSrc[5] = uiSrc[12] = kuiVL6; uiSrc[6] = uiSrc[13] = kuiVL7; uiSrc[7] = uiSrc[14] = kuiVL8; uiSrc[11] = kuiVL4; uiSrc[15] = kuiVL9; WelsFillingPred8x2to16 (pPred, uiSrc); }
/*vertical pLeft*/ void WelsI4x4LumaPredVLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { uint8_t* pTopLeft = &pRef[-kiStride - 1]; // pTop-pLeft /*get pTop*/ const uint8_t kuiT0 = * (pTopLeft + 1); const uint8_t kuiT1 = * (pTopLeft + 2); const uint8_t kuiT2 = * (pTopLeft + 3); const uint8_t kuiT3 = * (pTopLeft + 4); const uint8_t kuiVLT0 = (1 + kuiT0 + kuiT1) >> 1; // uiVLT0 const uint8_t kuiVLT1 = (1 + kuiT1 + kuiT2) >> 1; // uiVLT1 const uint8_t kuiVLT2 = (1 + kuiT2 + kuiT3) >> 1; // uiVLT2 const uint8_t kuiVLT3 = (1 + (kuiT3 << 1)) >> 1; // uiVLT3 const uint8_t kuiVLT4 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // uiVLT4 const uint8_t kuiVLT5 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; // uiVLT5 const uint8_t kuiVLT6 = (2 + kuiT2 + (kuiT3 << 1) + kuiT3) >> 2; // uiVLT6 const uint8_t kuiVLT7 = (2 + (kuiT3 << 2)) >> 2; // uiVLT7 ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = kuiVLT0; uiSrc[1] = uiSrc[8] = kuiVLT1; uiSrc[2] = uiSrc[9] = kuiVLT2; uiSrc[3] = uiSrc[10] = uiSrc[11] = kuiVLT3; uiSrc[4] = kuiVLT4; uiSrc[5] = uiSrc[12] = kuiVLT5; uiSrc[6] = uiSrc[13] = kuiVLT6; uiSrc[7] = uiSrc[14] = uiSrc[15] = kuiVLT7; WelsFillingPred8x2to16 (pPred, uiSrc); }
/*vertical right*/ void WelsI4x4LumaPredVR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const int32_t kiStridex2 = kiStride << 1; const uint8_t kuiLT = pRef[-kiStride - 1]; // pTop-pLeft /*get pLeft and pTop*/ const uint8_t kuiL0 = pRef[-1]; const uint8_t kuiL1 = pRef[kiStride - 1]; const uint8_t kuiL2 = pRef[kiStridex2 - 1]; const uint8_t kuiT0 = pRef[-kiStride]; const uint8_t kuiT1 = pRef[1 - kiStride]; const uint8_t kuiT2 = pRef[2 - kiStride]; const uint8_t kuiT3 = pRef[3 - kiStride]; const uint8_t kuiVR0 = (1 + kuiLT + kuiT0) >> 1; const uint8_t kuiVR1 = (1 + kuiT0 + kuiT1) >> 1; const uint8_t kuiVR2 = (1 + kuiT1 + kuiT2) >> 1; const uint8_t kuiVR3 = (1 + kuiT2 + kuiT3) >> 1; const uint8_t kuiVR4 = (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2; const uint8_t kuiVR5 = (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2; const uint8_t kuiVR6 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; const uint8_t kuiVR7 = (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2; const uint8_t kuiVR8 = (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2; const uint8_t kuiVR9 = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2; ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = uiSrc[9] = kuiVR0; uiSrc[1] = uiSrc[10] = kuiVR1; uiSrc[2] = uiSrc[11] = kuiVR2; uiSrc[3] = kuiVR3; uiSrc[4] = uiSrc[13] = kuiVR4; uiSrc[5] = uiSrc[14] = kuiVR5; uiSrc[6] = uiSrc[15] = kuiVR6; uiSrc[7] = kuiVR7; uiSrc[8] = kuiVR8; uiSrc[12] = kuiVR9; WelsFillingPred8x2to16 (pPred, uiSrc); }
void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor; uiBS[0][1][0] = uiBsx3[0]; uiBS[0][2][0] = uiBsx3[1]; uiBS[0][3][0] = uiBsx3[2]; * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor; uiBS[0][1][1] = uiBsx3[0]; uiBS[0][2][1] = uiBsx3[1]; uiBS[0][3][1] = uiBsx3[2]; * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor; * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor; uiBS[0][1][2] = uiBsx3[0]; uiBS[0][2][2] = uiBsx3[1]; uiBS[0][3][2] = uiBsx3[2]; * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor; * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor; uiBS[0][1][3] = uiBsx3[0]; uiBS[0][2][3] = uiBsx3[1]; uiBS[0][3][3] = uiBsx3[2]; * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor; }
/*horizontal up*/ void WelsI4x4LumaPredHU_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const int32_t kiStridex2 = kiStride << 1; const int32_t kiStridex3 = kiStride + kiStridex2; /*get pLeft*/ const uint8_t kuiL0 = pRef[-1]; const uint8_t kuiL1 = pRef[kiStride - 1]; const uint8_t kuiL2 = pRef[kiStridex2 - 1]; const uint8_t kuiL3 = pRef[kiStridex3 - 1]; const uint16_t kuiL01 = (1 + kuiL0 + kuiL1); const uint16_t kuiL12 = (1 + kuiL1 + kuiL2); const uint16_t kuiL23 = (1 + kuiL2 + kuiL3); const uint8_t kuiHU0 = kuiL01 >> 1; const uint8_t kuiHU1 = (kuiL01 + kuiL12) >> 2; const uint8_t kuiHU2 = kuiL12 >> 1; const uint8_t kuiHU3 = (kuiL12 + kuiL23) >> 2; const uint8_t kuiHU4 = kuiL23 >> 1; const uint8_t kuiHU5 = (1 + kuiL23 + (kuiL3 << 1)) >> 2; ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = kuiHU0; uiSrc[1] = kuiHU1; uiSrc[2] = uiSrc[4] = kuiHU2; uiSrc[3] = uiSrc[5] = kuiHU3; uiSrc[6] = uiSrc[8] = kuiHU4; uiSrc[7] = uiSrc[9] = kuiHU5; memset (&uiSrc[10], kuiL3, 6 * sizeof (uint8_t)); WelsFillingPred8x2to16 (pPred, uiSrc); }
void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const uint32_t kuiSrc = LD32 (&pRef[-kiStride]); ENFORCE_STACK_ALIGN_1D (uint32_t, uiSrcx2, 2, 16) uiSrcx2[0] = uiSrcx2[1] = kuiSrc; WelsFillingPred8to16 (pPred, (uint8_t*)&uiSrcx2[0]); }
/*down pLeft*/ void WelsI4x4LumaPredDDL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { /*get pTop*/ const uint8_t kuiT0 = pRef[-kiStride]; const uint8_t kuiT1 = pRef[1 - kiStride]; const uint8_t kuiT2 = pRef[2 - kiStride]; const uint8_t kuiT3 = pRef[3 - kiStride]; const uint8_t kuiT4 = pRef[4 - kiStride]; const uint8_t kuiT5 = pRef[5 - kiStride]; const uint8_t kuiT6 = pRef[6 - kiStride]; const uint8_t kuiT7 = pRef[7 - kiStride]; const uint8_t kuiDDL0 = (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2; // uiDDL0 const uint8_t kuiDDL1 = (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2; // uiDDL1 const uint8_t kuiDDL2 = (2 + kuiT2 + kuiT4 + (kuiT3 << 1)) >> 2; // uiDDL2 const uint8_t kuiDDL3 = (2 + kuiT3 + kuiT5 + (kuiT4 << 1)) >> 2; // uiDDL3 const uint8_t kuiDDL4 = (2 + kuiT4 + kuiT6 + (kuiT5 << 1)) >> 2; // uiDDL4 const uint8_t kuiDDL5 = (2 + kuiT5 + kuiT7 + (kuiT6 << 1)) >> 2; // uiDDL5 const uint8_t kuiDDL6 = (2 + kuiT6 + kuiT7 + (kuiT7 << 1)) >> 2; // uiDDL6 ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = kuiDDL0; uiSrc[1] = uiSrc[4] = kuiDDL1; uiSrc[2] = uiSrc[5] = uiSrc[8] = kuiDDL2; uiSrc[3] = uiSrc[6] = uiSrc[9] = uiSrc[12] = kuiDDL3; uiSrc[7] = uiSrc[10] = uiSrc[13] = kuiDDL4; uiSrc[11] = uiSrc[14] = kuiDDL5; uiSrc[15] = kuiDDL6; WelsFillingPred8x2to16 (pPred, uiSrc); }
static inline void McHorVer33_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16); ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); } else if (iWidth == 8) { McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight); PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); } else { McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight); McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight); PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight); } }
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab, int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0); nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1); nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4); nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5); nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8); nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9); nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12); nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13); nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14); // horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0); nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1); nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2); nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4); nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5); nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6); nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8); nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9); nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10); nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11); }
void CComplexityAnalysisScreen::GomComplexityAnalysisIntra (SPixMap* pSrc) { int32_t iWidth = pSrc->sRect.iRectWidth; int32_t iHeight = pSrc->sRect.iRectHeight; int32_t iBlockWidth = iWidth >> 4; int32_t iBlockHeight = iHeight >> 4; int32_t iBlockSadH, iBlockSadV, iGomSad = 0; int32_t iIdx = 0; uint8_t* pPtrY = NULL; int32_t iStrideY = 0; int32_t iRowStrideY = 0; uint8_t* pTmpCur = NULL; ENFORCE_STACK_ALIGN_1D (uint8_t, iMemPredMb, 256, 16) pPtrY = (uint8_t*)pSrc->pPixel[0]; iStrideY = pSrc->iStride[0]; iRowStrideY = iStrideY << 4; m_ComplexityAnalysisParam.iFrameComplexity = 0; for (int32_t j = 0; j < iBlockHeight; j ++) { pTmpCur = pPtrY; for (int32_t i = 0; i < iBlockWidth; i++) { iBlockSadH = iBlockSadV = 0x7fffffff; // INT_MAX if (j > 0) { m_pIntraFunc[0] (iMemPredMb, pTmpCur, iStrideY); iBlockSadH = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); } if (i > 0) { m_pIntraFunc[1] (iMemPredMb, pTmpCur, iStrideY); iBlockSadV = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); } if (i || j) iGomSad += WELS_MIN (iBlockSadH, iBlockSadV); pTmpCur += 16; if (i == iBlockWidth - 1 && ((j + 1) % m_ComplexityAnalysisParam.iMbRowInGom == 0 || j == iBlockHeight - 1)) { m_ComplexityAnalysisParam.pGomComplexity[iIdx] = iGomSad; m_ComplexityAnalysisParam.iFrameComplexity += iGomSad; iIdx++; iGomSad = 0; } } pPtrY += iRowStrideY; } m_ComplexityAnalysisParam.iGomNumInFrame = iIdx; }
void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8_t* pNnzTab) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0); uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1); uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4); uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5); uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8); uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9); uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12); uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13); uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14); //horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); uiBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 4, 0); uiBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 5, 1); uiBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 6, 2); uiBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); uiBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 8, 4); uiBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 9, 5); uiBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 10, 6); uiBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); uiBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 12, 8); uiBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 13, 9); uiBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 14, 10); uiBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 15, 11); }
static inline void McHorVer10_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight) { ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16); if (iWidth == 16) { McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); } else if (iWidth == 8) { McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight); PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); } else { McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pTmp, 16, iHeight); PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight); } }
void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) { int32_t iIndexA; int32_t iAlpha; int32_t iBeta; ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); if (iAlpha | iBeta) { TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0); pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc); } return; }
void FilteringEdgeChromaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) { int32_t iIdexA; int32_t iAlpha; int32_t iBeta; ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16); GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha, iBeta); if (iAlpha | iBeta) { TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1); pfDeblocking->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc); } return; }
void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const uint32_t kiStridex2Left = (kiStride << 1) - 1; const uint32_t kiStridex3Left = kiStride + kiStridex2Left; const uint8_t kuiHor1 = pRef[-1]; const uint8_t kuiHor2 = pRef[kiStride - 1]; const uint8_t kuiHor3 = pRef[kiStridex2Left]; const uint8_t kuiHor4 = pRef[kiStridex3Left]; const uint8_t kuiVec1[4] = {kuiHor1, kuiHor1, kuiHor1, kuiHor1}; const uint8_t kuiVec2[4] = {kuiHor2, kuiHor2, kuiHor2, kuiHor2}; const uint8_t kuiVec3[4] = {kuiHor3, kuiHor3, kuiHor3, kuiHor3}; const uint8_t kuiVec4[4] = {kuiHor4, kuiHor4, kuiHor4, kuiHor4}; ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows ST32 (&uiSrc[0], LD32 (kuiVec1)); ST32 (&uiSrc[4], LD32 (kuiVec2)); ST32 (&uiSrc[8], LD32 (kuiVec3)); ST32 (&uiSrc[12], LD32 (kuiVec4)); WelsFillingPred8x2to16 (pPred, uiSrc); }
/*down pLeft*/ void WelsI4x4LumaPredDDLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { /*get pTop*/ const uint8_t kuiT0 = pRef[-kiStride]; const uint8_t kuiT1 = pRef[1 - kiStride]; const uint8_t kuiT2 = pRef[2 - kiStride]; const uint8_t kuiT3 = pRef[3 - kiStride]; const uint8_t kuiDLT0 = (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2; // uiDLT0 const uint8_t kuiDLT1 = (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2; // uiDLT1 const uint8_t kuiDLT2 = (2 + kuiT2 + kuiT3 + (kuiT3 << 1)) >> 2; // uiDLT2 const uint8_t kuiDLT3 = (2 + (kuiT3 << 2)) >> 2; // uiDLT3 ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows memset (&uiSrc[6], kuiDLT3, 10 * sizeof (uint8_t)); uiSrc[0] = kuiDLT0; uiSrc[1] = uiSrc[4] = kuiDLT1; uiSrc[2] = uiSrc[5] = uiSrc[8] = kuiDLT2; uiSrc[3] = kuiDLT3; WelsFillingPred8x2to16 (pPred, uiSrc); }
/*down right*/ void WelsI4x4LumaPredDDR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const int32_t kiStridex2 = kiStride << 1; const int32_t kiStridex3 = kiStride + kiStridex2; const uint8_t kuiLT = pRef[-kiStride - 1]; // pTop-pLeft /*get pLeft and pTop*/ const uint8_t kuiL0 = pRef[-1]; const uint8_t kuiL1 = pRef[kiStride - 1]; const uint8_t kuiL2 = pRef[kiStridex2 - 1]; const uint8_t kuiL3 = pRef[kiStridex3 - 1]; const uint8_t kuiT0 = pRef[-kiStride]; const uint8_t kuiT1 = pRef[1 - kiStride]; const uint8_t kuiT2 = pRef[2 - kiStride]; const uint8_t kuiT3 = pRef[3 - kiStride]; const uint16_t kuiTL0 = 1 + kuiLT + kuiL0; const uint16_t kuiLT0 = 1 + kuiLT + kuiT0; const uint16_t kuiT01 = 1 + kuiT0 + kuiT1; const uint16_t kuiT12 = 1 + kuiT1 + kuiT2; const uint16_t kuiT23 = 1 + kuiT2 + kuiT3; const uint16_t kuiL01 = 1 + kuiL0 + kuiL1; const uint16_t kuiL12 = 1 + kuiL1 + kuiL2; const uint16_t kuiL23 = 1 + kuiL2 + kuiL3; const uint8_t kuiDDR0 = (kuiTL0 + kuiLT0) >> 2; const uint8_t kuiDDR1 = (kuiLT0 + kuiT01) >> 2; const uint8_t kuiDDR2 = (kuiT01 + kuiT12) >> 2; const uint8_t kuiDDR3 = (kuiT12 + kuiT23) >> 2; const uint8_t kuiDDR4 = (kuiTL0 + kuiL01) >> 2; const uint8_t kuiDDR5 = (kuiL01 + kuiL12) >> 2; const uint8_t kuiDDR6 = (kuiL12 + kuiL23) >> 2; ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = uiSrc[5] = uiSrc[10] = uiSrc[15] = kuiDDR0; uiSrc[1] = uiSrc[6] = uiSrc[11] = kuiDDR1; uiSrc[2] = uiSrc[7] = kuiDDR2; uiSrc[3] = kuiDDR3; uiSrc[4] = uiSrc[9] = uiSrc[14] = kuiDDR4; uiSrc[8] = uiSrc[13] = kuiDDR5; uiSrc[12] = kuiDDR6; WelsFillingPred8x2to16 (pPred, uiSrc); }
/*horizontal down*/ void WelsI4x4LumaPredHD_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const int32_t kiStridex2 = kiStride << 1; const int32_t kiStridex3 = kiStride + kiStridex2; const uint8_t kuiLT = pRef[-kiStride - 1]; // pTop-pLeft /*get pLeft and pTop*/ const uint8_t kuiL0 = pRef[-1]; const uint8_t kuiL1 = pRef[kiStride - 1]; const uint8_t kuiL2 = pRef[kiStridex2 - 1]; const uint8_t kuiL3 = pRef[kiStridex3 - 1]; const uint8_t kuiT0 = pRef[-kiStride]; const uint8_t kuiT1 = pRef[1 - kiStride]; const uint8_t kuiT2 = pRef[2 - kiStride]; const uint8_t kuiHD0 = (1 + kuiLT + kuiL0) >> 1; // uiHD0 const uint8_t kuiHD1 = (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2; // uiHD1 const uint8_t kuiHD2 = (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2; // uiHD2 const uint8_t kuiHD3 = (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2; // uiHD3 const uint8_t kuiHD4 = (1 + kuiL0 + kuiL1) >> 1; // uiHD4 const uint8_t kuiHD5 = (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2; // uiHD5 const uint8_t kuiHD6 = (1 + kuiL1 + kuiL2) >> 1; // uiHD6 const uint8_t kuiHD7 = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2; // uiHD7 const uint8_t kuiHD8 = (1 + kuiL2 + kuiL3) >> 1; // uiHD8 const uint8_t kuiHD9 = (2 + kuiL1 + (kuiL2 << 1) + kuiL3) >> 2; // uiHD9 ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows uiSrc[0] = uiSrc[6] = kuiHD0; uiSrc[1] = uiSrc[7] = kuiHD1; uiSrc[2] = kuiHD2; uiSrc[3] = kuiHD3; uiSrc[4] = uiSrc[10] = kuiHD4; uiSrc[5] = uiSrc[11] = kuiHD5; uiSrc[8] = uiSrc[14] = kuiHD6; uiSrc[9] = uiSrc[15] = kuiHD7; uiSrc[12] = kuiHD8; uiSrc[13] = kuiHD9; WelsFillingPred8x2to16 (pPred, uiSrc); }
void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) { int32_t iIndexA; int32_t iAlpha; int32_t iBeta; ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16); if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); if (iAlpha | iBeta) { TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1); pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc); } } else { for (int i = 0; i < 2; i++) { GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha, iBeta); if (iAlpha | iBeta) { uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr; TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1); pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc); } } } return; }
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab, int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); int8_t i8x8NnzTab[4]; if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (int32_t i = 0; i < 4; i++) { int32_t iBlkIdx = i << 2; i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]); } //vertical nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]); nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]); //horizontal nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]); nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]); } else { uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0); nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1); nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4); nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5); nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8); nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9); nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12); nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13); nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14); // horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0); nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1); nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2); nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4); nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5); nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6); nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8); nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9); nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10); nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11); } }
void CComplexityAnalysisScreen::GomComplexityAnalysisInter (SPixMap* pSrc, SPixMap* pRef, bool bScrollFlag) { int32_t iWidth = pSrc->sRect.iRectWidth; int32_t iHeight = pSrc->sRect.iRectHeight; int32_t iBlockWidth = iWidth >> 4; int32_t iBlockHeight = iHeight >> 4; int32_t iInterSad, iScrollSad, iBlockSadH, iBlockSadV, iGomSad = 0; int32_t iIdx = 0; int32_t iScrollMvX = m_ComplexityAnalysisParam.sScrollResult.iScrollMvX; int32_t iScrollMvY = m_ComplexityAnalysisParam.sScrollResult.iScrollMvY; uint8_t* pPtrX = NULL, *pPtrY = NULL; int32_t iStrideX = 0, iStrideY = 0; int32_t iRowStrideX = 0, iRowStrideY = 0; uint8_t* pTmpRef = NULL, *pTmpCur = NULL, *pTmpRefScroll = NULL; ENFORCE_STACK_ALIGN_1D (uint8_t, iMemPredMb, 256, 16) pPtrX = (uint8_t*)pRef->pPixel[0]; pPtrY = (uint8_t*)pSrc->pPixel[0]; iStrideX = pRef->iStride[0]; iStrideY = pSrc->iStride[0]; iRowStrideX = pRef->iStride[0] << 4; iRowStrideY = pSrc->iStride[0] << 4; m_ComplexityAnalysisParam.iFrameComplexity = 0; for (int32_t j = 0; j < iBlockHeight; j ++) { pTmpRef = pPtrX; pTmpCur = pPtrY; for (int32_t i = 0; i < iBlockWidth; i++) { int32_t iBlockPointX = i << 4; int32_t iBlockPointY = j << 4; iInterSad = m_pSadFunc (pTmpCur, iStrideY, pTmpRef, iStrideX); if (bScrollFlag) { if ((iInterSad != 0) && (iBlockPointX + iScrollMvX >= 0) && (iBlockPointX + iScrollMvX <= iWidth - 8) && (iBlockPointY + iScrollMvY >= 0) && (iBlockPointY + iScrollMvY <= iHeight - 8)) { pTmpRefScroll = pTmpRef - iScrollMvY * iStrideX + iScrollMvX; iScrollSad = m_pSadFunc (pTmpCur, iStrideY, pTmpRefScroll, iStrideX); if (iScrollSad < iInterSad) { iInterSad = iScrollSad; } } } iBlockSadH = iBlockSadV = 0x7fffffff; // INT_MAX if (j > 0) { m_pIntraFunc[0] (iMemPredMb, pTmpCur, iStrideY); iBlockSadH = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); } if (i > 0) { m_pIntraFunc[1] (iMemPredMb, pTmpCur, iStrideY); iBlockSadV = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16); } iGomSad += WELS_MIN (WELS_MIN (iBlockSadH, iBlockSadV), iInterSad); if (i == iBlockWidth - 1 && ((j + 1) % m_ComplexityAnalysisParam.iMbRowInGom == 0 || j == iBlockHeight - 1)) { m_ComplexityAnalysisParam.pGomComplexity[iIdx] = iGomSad; m_ComplexityAnalysisParam.iFrameComplexity += iGomSad; iIdx++; iGomSad = 0; } pTmpRef += 16; pTmpCur += 16; } pPtrX += iRowStrideX; pPtrY += iRowStrideY; } m_ComplexityAnalysisParam.iGomNumInFrame = iIdx; }