void WelsIChromaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const int32_t kuiL1 = kiStride - 1; const int32_t kuiL2 = kuiL1 + kiStride; const int32_t kuiL3 = kuiL2 + kiStride; const int32_t kuiL4 = kuiL3 + kiStride; const int32_t kuiL5 = kuiL4 + kiStride; const int32_t kuiL6 = kuiL5 + kiStride; const int32_t kuiL7 = kuiL6 + kiStride; /*caculate the iMean value*/ const uint8_t kuiMean1 = (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + pRef[-1] + pRef[kuiL1] + pRef[kuiL2] + pRef[kuiL3] + 4) >> 3; const uint32_t kuiSum2 = pRef[4 - kiStride] + pRef[5 - kiStride] + pRef[6 - kiStride] + pRef[7 - kiStride]; const uint32_t kuiSum3 = pRef[kuiL4] + pRef[kuiL5] + pRef[kuiL6] + pRef[kuiL7]; const uint8_t kuiMean2 = (kuiSum2 + 2) >> 2; const uint8_t kuiMean3 = (kuiSum3 + 2) >> 2; const uint8_t kuiMean4 = (kuiSum2 + kuiSum3 + 4) >> 3; const uint8_t kuiTopMean[8] = {kuiMean1, kuiMean1, kuiMean1, kuiMean1, kuiMean2, kuiMean2, kuiMean2, kuiMean2}; const uint8_t kuiBottomMean[8] = {kuiMean3, kuiMean3, kuiMean3, kuiMean3, kuiMean4, kuiMean4, kuiMean4, kuiMean4}; const uint64_t kuiTopMean64 = LD64 (kuiTopMean); const uint64_t kuiBottomMean64 = LD64 (kuiBottomMean); ST64 (pPred , kuiTopMean64); ST64 (pPred + 8 , kuiTopMean64); ST64 (pPred + 16, kuiTopMean64); ST64 (pPred + 24, kuiTopMean64); ST64 (pPred + 32, kuiBottomMean64); ST64 (pPred + 40, kuiBottomMean64); ST64 (pPred + 48, kuiBottomMean64); ST64 (pPred + 56, kuiBottomMean64); }
void WelsIChromaPredDc_c (uint8_t* pPred, const int32_t kiStride) { const int32_t kiL1 = kiStride - 1; const int32_t kiL2 = kiL1 + kiStride; const int32_t kiL3 = kiL2 + kiStride; const int32_t kiL4 = kiL3 + kiStride; const int32_t kiL5 = kiL4 + kiStride; const int32_t kiL6 = kiL5 + kiStride; const int32_t kiL7 = kiL6 + kiStride; /*caculate the kMean value*/ const uint8_t kuiM1 = (pPred[-kiStride] + pPred[1 - kiStride] + pPred[2 - kiStride] + pPred[3 - kiStride] + pPred[-1] + pPred[kiL1] + pPred[kiL2] + pPred[kiL3] + 4) >> 3 ; const uint32_t kuiSum2 = pPred[4 - kiStride] + pPred[5 - kiStride] + pPred[6 - kiStride] + pPred[7 - kiStride]; const uint32_t kuiSum3 = pPred[kiL4] + pPred[kiL5] + pPred[kiL6] + pPred[kiL7]; const uint8_t kuiM2 = (kuiSum2 + 2) >> 2; const uint8_t kuiM3 = (kuiSum3 + 2) >> 2; const uint8_t kuiM4 = (kuiSum2 + kuiSum3 + 4) >> 3; const uint8_t kuiMUP[8] = {kuiM1, kuiM1, kuiM1, kuiM1, kuiM2, kuiM2, kuiM2, kuiM2}; const uint8_t kuiMDown[8] = {kuiM3, kuiM3, kuiM3, kuiM3, kuiM4, kuiM4, kuiM4, kuiM4}; const uint64_t kuiUP64 = LD64 (kuiMUP); const uint64_t kuiDN64 = LD64 (kuiMDown); ST64 (pPred , kuiUP64); ST64 (pPred + kiL1 + 1, kuiUP64); ST64 (pPred + kiL2 + 1, kuiUP64); ST64 (pPred + kiL3 + 1, kuiUP64); ST64 (pPred + kiL4 + 1, kuiDN64); ST64 (pPred + kiL5 + 1, kuiDN64); ST64 (pPred + kiL6 + 1, kuiDN64); ST64 (pPred + kiL7 + 1, kuiDN64); }
static inline void McCopyWidthEq16_c (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { int32_t i; for (i = 0; i < iHeight; i++) { ST64 (pDst , LD64 (pSrc)); ST64 (pDst + 8, LD64 (pSrc + 8)); pDst += iDstStride; pSrc += iSrcStride; } }
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { uint8_t i = 15; const int8_t* kpSrc = (int8_t*)&pRef[-kiStride]; const uint64_t kuiT1 = LD64 (kpSrc); const uint64_t kuiT2 = LD64 (kpSrc + 8); uint8_t* pDst = pPred; do { ST64 (pDst , kuiT1); ST64 (pDst + 8, kuiT2); pDst += 16; } while (i-- > 0); }
void WelsI16x16LumaPredV_c (uint8_t* pPred, const int32_t kiStride) { int32_t iTmp = (kiStride << 4) - kiStride; const uint64_t kuiTop1 = LD64 (pPred - kiStride); const uint64_t kuiTop2 = LD64 (pPred - kiStride + 8); uint8_t i = 15; do { ST64 (pPred + iTmp , kuiTop1); ST64 (pPred + iTmp + 8, kuiTop2); iTmp -= kiStride; } while (i-- > 0); }
void WelsIChromaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const uint64_t kuiSrc64 = LD64 (&pRef[-kiStride]); ST64 (pPred , kuiSrc64); ST64 (pPred + 8 , kuiSrc64); ST64 (pPred + 16, kuiSrc64); ST64 (pPred + 24, kuiSrc64); ST64 (pPred + 32, kuiSrc64); ST64 (pPred + 40, kuiSrc64); ST64 (pPred + 48, kuiSrc64); ST64 (pPred + 56, kuiSrc64); }
void WelsIChromaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { /*caculate the iMean value*/ const uint8_t kuiMean1 = (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + 2) >> 2; const uint8_t kuiMean2 = (pRef[4 - kiStride] + pRef[5 - kiStride] + pRef[6 - kiStride] + pRef[7 - kiStride] + 2) >> 2; const uint8_t kuiMean[8] = {kuiMean1, kuiMean1, kuiMean1, kuiMean1, kuiMean2, kuiMean2, kuiMean2, kuiMean2}; const uint64_t kuiMean64 = LD64 (kuiMean); ST64 (pPred , kuiMean64); ST64 (pPred + 8 , kuiMean64); ST64 (pPred + 16, kuiMean64); ST64 (pPred + 24, kuiMean64); ST64 (pPred + 32, kuiMean64); ST64 (pPred + 40, kuiMean64); ST64 (pPred + 48, kuiMean64); ST64 (pPred + 56, kuiMean64); }
void WelsIChromaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) { int32_t iTmp = (kiStride << 3) - kiStride; /*caculate the kMean value*/ const uint8_t kuiM1 = (pPred[-kiStride] + pPred[1 - kiStride] + pPred[2 - kiStride] + pPred[3 - kiStride] + 2) >> 2; const uint8_t kuiM2 = (pPred[4 - kiStride] + pPred[5 - kiStride] + pPred[6 - kiStride] + pPred[7 - kiStride] + 2) >> 2; const uint8_t kuiM[8] = {kuiM1, kuiM1, kuiM1, kuiM1, kuiM2, kuiM2, kuiM2, kuiM2}; uint8_t i = 7; do { ST64 (pPred + iTmp, LD64 (kuiM)); iTmp -= kiStride; } while (i-- > 0); }
void WelsFillCacheInter (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) { int32_t iCurXy = pCurLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; int32_t iLeftTopXy = 0; int32_t iRightTopXy = 0; //stuff non_zero_coeff_count from pNeighAvail(left and top) WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); if (pNeighAvail->iTopAvail) { iTopXy = iCurXy - pCurLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; } if (pNeighAvail->iLeftTopAvail) { iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth; } if (pNeighAvail->iRightTopAvail) { iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth; } //stuff mv_cache and iRefIdxArray from left and top (inter) if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3])); ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7])); ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11])); ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15])); iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3]; iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7]; iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11]; iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15]; } else { ST32 (iMvArray[0][ 6], 0); ST32 (iMvArray[0][12], 0); ST32 (iMvArray[0][18], 0); ST32 (iMvArray[0][24], 0); if (0 == pNeighAvail->iLeftAvail) { //not available iRefIdxArray[0][ 6] = iRefIdxArray[0][12] = iRefIdxArray[0][18] = iRefIdxArray[0][24] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][ 6] = iRefIdxArray[0][12] = iRefIdxArray[0][18] = iRefIdxArray[0][24] = REF_NOT_IN_LIST; } } if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15]; } else { ST32 (iMvArray[0][0], 0); if (0 == pNeighAvail->iLeftTopAvail) { //not available iRefIdxArray[0][0] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][0] = REF_NOT_IN_LIST; } } if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12])); ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14])); ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12])); } else { ST64 (iMvArray[0][1], 0); ST64 (iMvArray[0][3], 0); if (0 == pNeighAvail->iTopAvail) { //not available iRefIdxArray[0][1] = iRefIdxArray[0][2] = iRefIdxArray[0][3] = iRefIdxArray[0][4] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][1] = iRefIdxArray[0][2] = iRefIdxArray[0][3] = iRefIdxArray[0][4] = REF_NOT_IN_LIST; } } if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12])); iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12]; } else { ST32 (iMvArray[0][5], 0); if (0 == pNeighAvail->iRightTopAvail) { //not available iRefIdxArray[0][5] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][5] = REF_NOT_IN_LIST; } } //right-top 4*4 block unavailable ST32 (iMvArray[0][ 9], 0); ST32 (iMvArray[0][21], 0); ST32 (iMvArray[0][11], 0); ST32 (iMvArray[0][17], 0); ST32 (iMvArray[0][23], 0); iRefIdxArray[0][ 9] = iRefIdxArray[0][21] = iRefIdxArray[0][11] = iRefIdxArray[0][17] = iRefIdxArray[0][23] = REF_NOT_AVAIL; }
static inline void WelsFillingPred1to16_c (uint8_t* pPred, const uint8_t kuiSrc) { const uint8_t kuiSrc8[8] = { kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc }; ST64 (pPred , LD64 (kuiSrc8)); ST64 (pPred + 8, LD64 (kuiSrc8)); }
static inline void WelsFillingPred8x2to16_c (uint8_t* pPred, uint8_t* pSrc) { ST64 (pPred , LD64 (pSrc)); ST64 (pPred + 8, LD64 (pSrc + 8)); }