void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab, int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0); nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1); nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4); nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5); nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8); nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9); nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12); nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13); nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14); // horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0); nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1); nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2); nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4); nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5); nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6); nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8); nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9); nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10); nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11); }
void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8_t* pNnzTab) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0); uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1); uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4); uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5); uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8); uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9); uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12); uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13); uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14); //horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); uiBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 4, 0); uiBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 5, 1); uiBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 6, 2); uiBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); uiBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 8, 4); uiBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 9, 5); uiBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 10, 6); uiBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); uiBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 12, 8); uiBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 13, 9); uiBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 14, 10); uiBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 15, 11); }
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab, int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); int8_t i8x8NnzTab[4]; if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (int32_t i = 0; i < 4; i++) { int32_t iBlkIdx = i << 2; i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]); } //vertical nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]); nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]); //horizontal nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]); nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]); } else { uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); uiNnz32b1 = * (uint32_t*) (pNnzTab + 4); uiNnz32b2 = * (uint32_t*) (pNnzTab + 8); uiNnz32b3 = * (uint32_t*) (pNnzTab + 12); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0); nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1); nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4); nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5); nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8); nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9); nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12); nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13); nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14); // horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0); nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1); nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2); nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4); nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5); nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6); nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8); nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9); nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10); nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11); } }