void_t PredInter8x16Mv(int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) { if (0 == iPartIdx) { const int8_t kiLeftRef = iRefIndex[0][6]; if (iRef == kiLeftRef) { ST32( iMVP, LD32(&iMotionVector[0][6][0]) ); return; } } else // 1 == iPartIdx { int8_t iDiagonalRef = iRefIndex[0][5]; //top-right int8_t index = 5; if (REF_NOT_AVAIL == iDiagonalRef) { iDiagonalRef = iRefIndex[0][2]; //top-left for 8*8 block(index 1) index = 2; } if (iRef == iDiagonalRef) { ST32( iMVP, LD32(&iMotionVector[0][index][0]) ); return; } } PredMv(iMotionVector, iRefIndex, iPartIdx, 2, iRef, iMVP); }
//basic iMVs prediction unit for iMVs partition width (4, 2, 1) void_t PredMv(int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]) { const uint8_t kuiLeftIdx = g_kuiCache30ScanIdx[iPartIdx] - 1; const uint8_t kuiTopIdx = g_kuiCache30ScanIdx[iPartIdx] - 6; const uint8_t kuiRightTopIdx= kuiTopIdx + iPartWidth; const uint8_t kuiLeftTopIdx = kuiTopIdx - 1; const int8_t kiLeftRef = iRefIndex[0][kuiLeftIdx]; const int8_t kiTopRef = iRefIndex[0][ kuiTopIdx]; const int8_t kiRightTopRef = iRefIndex[0][kuiRightTopIdx]; const int8_t kiLeftTopRef = iRefIndex[0][ kuiLeftTopIdx]; int8_t iDiagonalRef = kiRightTopRef; int8_t iMatchRef = 0; int16_t iAMV[2], iBMV[2], iCMV[2]; *(int32_t*)iAMV = INTD32(iMotionVector[0][ kuiLeftIdx]); *(int32_t*)iBMV = INTD32(iMotionVector[0][ kuiTopIdx]); *(int32_t*)iCMV = INTD32(iMotionVector[0][kuiRightTopIdx]); if (REF_NOT_AVAIL == iDiagonalRef) { iDiagonalRef = kiLeftTopRef; *(int32_t*)iCMV = INTD32(iMotionVector[0][kuiLeftTopIdx]); } iMatchRef = (iRef == kiLeftRef) + (iRef == kiTopRef) + (iRef == iDiagonalRef); if (REF_NOT_AVAIL == kiTopRef && REF_NOT_AVAIL == iDiagonalRef && kiLeftRef >= REF_NOT_IN_LIST) { ST32(iMVP, LD32(iAMV)); return; } if (1 == iMatchRef) { if (iRef == kiLeftRef) { ST32(iMVP, LD32(iAMV)); } else if (iRef == kiTopRef) { ST32(iMVP, LD32(iBMV)); } else { ST32(iMVP, LD32(iCMV)); } } else { iMVP[0] = WelsMedian(iAMV[0], iBMV[0], iCMV[0]); iMVP[1] = WelsMedian(iAMV[1], iBMV[1], iCMV[1]); } }
void WelsCopy16x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { int32_t i; for (i = 0; i < 16; i++) { ST32 (pDst, LD32 (pSrc)); ST32 (pDst + 4 , LD32 (pSrc + 4)); ST32 (pDst + 8 , LD32 (pSrc + 8)); ST32 (pDst + 12 , LD32 (pSrc + 12)); pDst += iStrideD ; pSrc += iStrideS; } }
/**************************************************************************** * Copy functions ****************************************************************************/ void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { const int32_t kiSrcStride2 = iStrideS << 1; const int32_t kiSrcStride3 = iStrideS + kiSrcStride2; const int32_t kiDstStride2 = iStrideD << 1; const int32_t kiDstStride3 = iStrideD + kiDstStride2; ST32 (pDst, LD32 (pSrc)); ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS)); ST32 (pDst + kiDstStride2, LD32 (pSrc + kiSrcStride2)); ST32 (pDst + kiDstStride3, LD32 (pSrc + kiSrcStride3)); }
void WelsCopy8x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) { int32_t i; for (i = 0; i < 8; ++i) { ST32 (pDst, LD32 (pSrc)); ST32 (pDst + 4 , LD32 (pSrc + 4)); ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS)); ST32 (pDst + iStrideD + 4 , LD32 (pSrc + iStrideS + 4)); pDst += iStrideD << 1; pSrc += iStrideS << 1; } }
//update iRefIndex and iMVs of both Mb and Mb_cache, only for P8x16 void_t UpdateP8x16MotionInfo(PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) { const int16_t kiRef2 = (iRef << 8) | iRef; const int32_t kiMV32 = LD32(iMVs); int32_t i; int32_t iMbXy = pCurDqLayer->iMbXyIndex; for (i = 0; i < 2; i++, iPartIdx+=8) { const uint8_t kuiScan4Idx = g_kuiScan4[iPartIdx]; const uint8_t kuiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; const uint8_t kuiScan4IdxPlus4= 4 + kuiScan4Idx; const uint8_t kuiCacheIdxPlus6= 6 + kuiCacheIdx; //mb ST16( &pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4Idx ], kiRef2 ); ST16( &pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4IdxPlus4], kiRef2 ); ST32( pCurDqLayer->pMv[0][iMbXy][ kuiScan4Idx ], kiMV32 ); ST32( pCurDqLayer->pMv[0][iMbXy][1+kuiScan4Idx ], kiMV32 ); ST32( pCurDqLayer->pMv[0][iMbXy][ kuiScan4IdxPlus4], kiMV32 ); ST32( pCurDqLayer->pMv[0][iMbXy][1+kuiScan4IdxPlus4], kiMV32 ); //cache ST16( &iRefIndex[0][kuiCacheIdx ], kiRef2 ); ST16( &iRefIndex[0][kuiCacheIdxPlus6], kiRef2 ); ST32( iMotionVector[0][ kuiCacheIdx ], kiMV32 ); ST32( iMotionVector[0][1+kuiCacheIdx ], kiMV32 ); ST32( iMotionVector[0][ kuiCacheIdxPlus6], kiMV32 ); ST32( iMotionVector[0][1+kuiCacheIdxPlus6], kiMV32 ); } }
//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x8 void UpdateP8x8MotionInfo(SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, SMVUnitXY* pMv) { SMVComponentUnit *pMvComp = &pMbCache->sMvComponents; const uint32_t kuiMv32 = LD32(pMv); const uint64_t kuiMv64 = BUTTERFLY4x8(kuiMv32); const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; const int16_t kiCacheIdx1 = 1+kiCacheIdx; const int16_t kiCacheIdx6 = 6+kiCacheIdx; const int16_t kiCacheIdx7 = 7+kiCacheIdx; //mb ST64( &pCurMb->sMv[ kiScan4Idx], kuiMv64 ); ST64( &pCurMb->sMv[4+kiScan4Idx], kuiMv64 ); //cache pMvComp->iRefIndexCache[kiCacheIdx ] = pMvComp->iRefIndexCache[kiCacheIdx1] = pMvComp->iRefIndexCache[kiCacheIdx6] = pMvComp->iRefIndexCache[kiCacheIdx7] = kiRef; pMvComp->sMotionVectorCache[kiCacheIdx ] = pMvComp->sMotionVectorCache[kiCacheIdx1] = pMvComp->sMotionVectorCache[kiCacheIdx6] = pMvComp->sMotionVectorCache[kiCacheIdx7] = *pMv; }
void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const uint32_t kuiSrc = LD32 (&pRef[-kiStride]); ENFORCE_STACK_ALIGN_1D (uint32_t, uiSrcx2, 2, 16) uiSrcx2[0] = uiSrcx2[1] = kuiSrc; WelsFillingPred8to16 (pPred, (uint8_t*)&uiSrcx2[0]); }
/*! * \brief encode NAL with emulation forbidden three bytes checking * \param pDst pDst NAL pData * \param pDstLen length of pDst NAL output * \param annexeb annexeb flag * \param pRawNal pRawNal NAL pData * \return length of pDst NAL */ int32_t WelsEncodeNal (SWelsNalRaw* pRawNal, void* pDst, int32_t* pDstLen) { uint8_t* pDstStart = (uint8_t*)pDst; uint8_t* pDstPointer = pDstStart; uint8_t* pSrcPointer = pRawNal->pRawData; uint8_t* pSrcEnd = pRawNal->pRawData + pRawNal->iPayloadSize; int32_t iZeroCount = 0; int32_t iNalLength = 0; static const uint8_t kuiStartCodePrefix[4] = { 0, 0, 0, 1 }; ST32 (pDstPointer, LD32 (&kuiStartCodePrefix[0])); pDstPointer += 4; /* NAL Unit Header */ *pDstPointer++ = (pRawNal->sNalExt.sNalHeader.uiNalRefIdc << 5) | (pRawNal->sNalExt.sNalHeader.eNalUnitType & 0x1f); while (pSrcPointer < pSrcEnd) { if (iZeroCount == 2 && *pSrcPointer <= 3) { *pDstPointer++ = 3; iZeroCount = 0; } if (*pSrcPointer == 0) { ++ iZeroCount; } else { iZeroCount = 0; } *pDstPointer++ = *pSrcPointer++; } /* count length of NAL Unit */ iNalLength = pDstPointer - pDstStart; if (NULL != pDstLen) *pDstLen = iNalLength; return iNalLength; }
void_t WelsFillCacheNonZeroCount(PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer) //no matter slice type, intra_pred_constrained_flag { int32_t iCurXy = pCurLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; GetNeighborAvailMbType( pNeighAvail, pCurLayer ); if ( pNeighAvail->iTopAvail ) { iTopXy = iCurXy - pCurLayer->iMbWidth; } if ( pNeighAvail->iLeftAvail ) { iLeftXy = iCurXy - 1; } //stuff non_zero_coeff_count from pNeighAvail(left and top) if (pNeighAvail->iTopAvail) { ST32(&pNonZeroCount[1], LD32(&pCurLayer->pNzc[iTopXy][12])); pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0; ST16(&pNonZeroCount[6], LD16(&pCurLayer->pNzc[iTopXy][20])); ST16(&pNonZeroCount[30], LD16(&pCurLayer->pNzc[iTopXy][22])); } else { ST32(&pNonZeroCount[1], 0xFFFFFFFFU); pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0xFF; ST16(&pNonZeroCount[6], 0xFFFF); ST16(&pNonZeroCount[30], 0xFFFF); } if (pNeighAvail->iLeftAvail) { pNonZeroCount[8 * 1] = pCurLayer->pNzc[iLeftXy][3]; pNonZeroCount[8 * 2] = pCurLayer->pNzc[iLeftXy][7]; pNonZeroCount[8 * 3] = pCurLayer->pNzc[iLeftXy][11]; pNonZeroCount[8 * 4] = pCurLayer->pNzc[iLeftXy][15]; pNonZeroCount[5 + 8 * 1] = pCurLayer->pNzc[iLeftXy][17]; pNonZeroCount[5 + 8 * 2] = pCurLayer->pNzc[iLeftXy][21]; pNonZeroCount[5 + 8 * 4] = pCurLayer->pNzc[iLeftXy][19]; pNonZeroCount[5 + 8 * 5] = pCurLayer->pNzc[iLeftXy][23]; } else { pNonZeroCount[8 * 1] = pNonZeroCount[8 * 2] = pNonZeroCount[8 * 3] = pNonZeroCount[8 * 4] = -1;//unavailable pNonZeroCount[5 + 8 * 1] = pNonZeroCount[5 + 8 * 2] = -1;//unavailable pNonZeroCount[5 + 8 * 4] = pNonZeroCount[5 + 8 * 5] = -1;//unavailable } }
void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) { if (0 == iPartIdx) { const int8_t kiTopRef = iRefIndex[0][1]; if (iRef == kiTopRef) { ST32 (iMVP, LD32 (&iMotionVector[0][1][0])); return; } } else { // 8 == iPartIdx const int8_t kiLeftRef = iRefIndex[0][18]; if (iRef == kiLeftRef) { ST32 (iMVP, LD32 (&iMotionVector[0][18][0])); return; } } PredMv (iMotionVector, iRefIndex, iPartIdx, 4, iRef, iMVP); }
static inline void McCopyWidthEq4_c (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) { int32_t i; for (i = 0; i < iHeight; i++) { ST32 (pDst, LD32 (pSrc)); pDst += iDstStride; pSrc += iSrcStride; } }
void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) { const uint32_t kiStridex2Left = (kiStride << 1) - 1; const uint32_t kiStridex3Left = kiStride + kiStridex2Left; const uint8_t kuiHor1 = pRef[-1]; const uint8_t kuiHor2 = pRef[kiStride - 1]; const uint8_t kuiHor3 = pRef[kiStridex2Left]; const uint8_t kuiHor4 = pRef[kiStridex3Left]; const uint8_t kuiVec1[4] = {kuiHor1, kuiHor1, kuiHor1, kuiHor1}; const uint8_t kuiVec2[4] = {kuiHor2, kuiHor2, kuiHor2, kuiHor2}; const uint8_t kuiVec3[4] = {kuiHor3, kuiHor3, kuiHor3, kuiHor3}; const uint8_t kuiVec4[4] = {kuiHor4, kuiHor4, kuiHor4, kuiHor4}; ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16) // TobeCont'd about assign opt as follows ST32 (&uiSrc[0], LD32 (kuiVec1)); ST32 (&uiSrc[4], LD32 (kuiVec2)); ST32 (&uiSrc[8], LD32 (kuiVec3)); ST32 (&uiSrc[12], LD32 (kuiVec4)); WelsFillingPred8x2to16 (pPred, uiSrc); }
/*! * \brief encode a nal into a pBuffer for any type of NAL, involved WelsEncodeNal introduced in AVC * * \param pDst pDst NAL pData * \param pDstLen length of pDst NAL output * \param annexeb annexeb flag * \param pRawNal pRawNal NAL pData * \param pNalHeaderExt pointer of SNalUnitHeaderExt * * \return length of pDst NAL */ int32_t WelsEncodeNalExt (SWelsNalRaw* pRawNal, void* pNalHeaderExt, void* pDst, int32_t* pDstLen) { SNalUnitHeaderExt* sNalExt = (SNalUnitHeaderExt*)pNalHeaderExt; uint8_t* pDstStart = (uint8_t*)pDst; uint8_t* pDstPointer = pDstStart; uint8_t* pSrcPointer = pRawNal->pRawData; uint8_t* pSrcEnd = pRawNal->pRawData + pRawNal->iPayloadSize; int32_t iZeroCount = 0; int32_t iNalLength = 0; if (pRawNal->sNalExt.sNalHeader.eNalUnitType != NAL_UNIT_PREFIX && pRawNal->sNalExt.sNalHeader.eNalUnitType != NAL_UNIT_CODED_SLICE_EXT) { return WelsEncodeNal (pRawNal, pDst, pDstLen); } /* FIXME this code doesn't check overflow */ static const uint8_t kuiStartCodePrefixExt[4] = { 0, 0, 0, 1 }; ST32 (pDstPointer, LD32 (&kuiStartCodePrefixExt[0])); pDstPointer += 4; /* NAL Unit Header */ *pDstPointer++ = (pRawNal->sNalExt.sNalHeader.uiNalRefIdc << 5) | (pRawNal->sNalExt.sNalHeader.eNalUnitType & 0x1f); /* NAL UNIT Extension Header */ *pDstPointer++ = (0x80) | (sNalExt->bIdrFlag << 6); *pDstPointer++ = (0x80) | (sNalExt->uiDependencyId << 4); *pDstPointer++ = (sNalExt->uiTemporalId << 5) | (sNalExt->bDiscardableFlag << 3) | (0x07); while (pSrcPointer < pSrcEnd) { if (iZeroCount == 2 && *pSrcPointer <= 3) { *pDstPointer++ = 3; iZeroCount = 0; } if (*pSrcPointer == 0) { ++ iZeroCount; } else { iZeroCount = 0; } *pDstPointer++ = *pSrcPointer++; } /* count length of NAL Unit */ iNalLength = pDstPointer - pDstStart; if (NULL != pDstLen) *pDstLen = iNalLength; return iNalLength; }
void_t WelsFillCacheConstrain0Intra4x4(PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, PDqLayer pCurLayer) //no matter slice type { int32_t iCurXy = pCurLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; //stuff non_zero_coeff_count from pNeighAvail(left and top) WelsFillCacheNonZeroCount( pNeighAvail, pNonZeroCount, pCurLayer ); if ( pNeighAvail->iTopAvail ) { iTopXy = iCurXy - pCurLayer->iMbWidth; } if ( pNeighAvail->iLeftAvail ) { iLeftXy = iCurXy - 1; } //intra4x4_pred_mode if (pNeighAvail->iTopAvail && IS_INTRA4x4(pNeighAvail->iTopType)) //top { ST32(pIntraPredMode + 1, LD32(&pCurLayer->pIntraPredMode[iTopXy][0])); } else { int32_t iPred; if( pNeighAvail->iTopAvail ) iPred= 0x02020202; else iPred= 0xffffffff; ST32(pIntraPredMode + 1, iPred); } if (pNeighAvail->iLeftAvail && IS_INTRA4x4(pNeighAvail->iLeftType)) //left { pIntraPredMode[ 0 + 8 * 1] = pCurLayer->pIntraPredMode[iLeftXy][4]; pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5]; pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6]; pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3]; } else { int8_t iPred; if( pNeighAvail->iLeftAvail ) iPred= 2; else iPred= -1; pIntraPredMode[ 0 + 8 * 1] = pIntraPredMode[ 0 + 8 * 2] = pIntraPredMode[ 0 + 8 * 3] = pIntraPredMode[ 0 + 8 * 4] = iPred; } }
/* * compare pixel line between previous and current one * return: 0 for totally equal, otherwise 1 */ int32_t CompareLine (uint8_t* pYSrc, uint8_t* pYRef, const int32_t kiWidth) { int32_t iCmp = 1; if (LD32 (pYSrc) != LD32 (pYRef)) return 1; if (LD32 (pYSrc + 4) != LD32 (pYRef + 4)) return 1; if (LD32 (pYSrc + 8) != LD32 (pYRef + 8)) return 1; if (kiWidth > 12) iCmp = WelsMemcmp (pYSrc + 12, pYRef + 12, kiWidth - 12); return iCmp; }
//update uiRefIndex and pMv of both SMB and Mb_cache, only for P8x16 void update_P8x16_motion_info(SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, SMVUnitXY* pMv) { // optimized 11/25/2011 SMVComponentUnit *pMvComp = &pMbCache->sMvComponents; const uint32_t kuiMv32 = LD32(pMv); const uint64_t kuiMv64 = BUTTERFLY4x8(kuiMv32); const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; const int16_t kiCacheIdx1 = 1+kiCacheIdx; const int16_t kiCacheIdx3 = 3+kiCacheIdx; const int16_t kiCacheIdx12 = 12+kiCacheIdx; const int16_t kiCacheIdx13 = 13+kiCacheIdx; const int16_t kiCacheIdx15 = 15+kiCacheIdx; const int16_t kiBlkIdx = kiPartIdx>>2; const uint16_t kuiRef16 = BUTTERFLY1x2(kiRef); pCurMb->pRefIndex[kiBlkIdx] = kiRef; pCurMb->pRefIndex[2+kiBlkIdx]= kiRef; ST64( &pCurMb->sMv[kiScan4Idx], kuiMv64 ); ST64( &pCurMb->sMv[4+kiScan4Idx], kuiMv64 ); ST64( &pCurMb->sMv[8+kiScan4Idx], kuiMv64 ); ST64( &pCurMb->sMv[12+kiScan4Idx], kuiMv64 ); /* * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 */ pMvComp->iRefIndexCache[kiCacheIdx] = kiRef; ST16(&pMvComp->iRefIndexCache[kiCacheIdx1], kuiRef16); pMvComp->iRefIndexCache[kiCacheIdx3] = kiRef; pMvComp->iRefIndexCache[kiCacheIdx12] = kiRef; ST16(&pMvComp->iRefIndexCache[kiCacheIdx13], kuiRef16); pMvComp->iRefIndexCache[kiCacheIdx15] = kiRef; /* * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 */ pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv; ST64( &pMvComp->sMotionVectorCache[kiCacheIdx1], kuiMv64 ); pMvComp->sMotionVectorCache[kiCacheIdx3] = *pMv; pMvComp->sMotionVectorCache[kiCacheIdx12] = *pMv; ST64( &pMvComp->sMotionVectorCache[kiCacheIdx13], kuiMv64 ); pMvComp->sMotionVectorCache[kiCacheIdx15] = *pMv; }
/* can be further optimized */ void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int8_t iRef, int16_t iMVs[2]) { const int16_t kiRef2 = (iRef << 8) | iRef; const int32_t kiMV32 = LD32 (iMVs); int32_t i; int32_t iMbXy = pCurDqLayer->iMbXyIndex; for (i = 0; i < 16; i += 4) { //mb const uint8_t kuiScan4Idx = g_kuiScan4[i]; const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4Idx ], kiRef2); ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4IdxPlus4], kiRef2); ST32 (pCurDqLayer->pMv[0][iMbXy][ kuiScan4Idx ], kiMV32); ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4Idx ], kiMV32); ST32 (pCurDqLayer->pMv[0][iMbXy][ kuiScan4IdxPlus4], kiMV32); ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); } }
//update uiRefIndex and pMv of both SMB and Mb_cache, only for P16x8 void UpdateP16x8MotionInfo(SMbCache* pMbCache, SMB* pCurMb, const int32_t kiPartIdx, const int8_t kiRef, SMVUnitXY* pMv) { // optimized 11/25/2011 SMVComponentUnit *pMvComp = &pMbCache->sMvComponents; const uint32_t kuiMv32 = LD32(pMv); const uint64_t kuiMv64 = BUTTERFLY4x8(kuiMv32); uint64_t uiMvBuf[4] = { kuiMv64, kuiMv64, kuiMv64, kuiMv64 }; const int16_t kiScan4Idx = g_kuiMbCountScan4Idx[kiPartIdx]; const int16_t kiCacheIdx = g_kuiCache30ScanIdx[kiPartIdx]; const int16_t kiCacheIdx1 = 1+kiCacheIdx; const int16_t kiCacheIdx3 = 3+kiCacheIdx; const int16_t kiCacheIdx6 = 6+kiCacheIdx; const int16_t kiCacheIdx7 = 7+kiCacheIdx; const int16_t kiCacheIdx9 = 9+kiCacheIdx; const uint16_t kuiRef16 = BUTTERFLY1x2(kiRef); ST16( &pCurMb->pRefIndex[(kiPartIdx>>2)], kuiRef16 ); memcpy( &pCurMb->sMv[kiScan4Idx], uiMvBuf, sizeof(uiMvBuf) ); // confirmed_safe_unsafe_usage /* * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 */ pMvComp->iRefIndexCache[kiCacheIdx] = kiRef; ST16(&pMvComp->iRefIndexCache[kiCacheIdx1], kuiRef16); pMvComp->iRefIndexCache[kiCacheIdx3] = kiRef; pMvComp->iRefIndexCache[kiCacheIdx6] = kiRef; ST16(&pMvComp->iRefIndexCache[kiCacheIdx7], kuiRef16); pMvComp->iRefIndexCache[kiCacheIdx9] = kiRef; /* * blocks 0: g_kuiCache30ScanIdx[iPartIdx]~g_kuiCache30ScanIdx[iPartIdx]+3, 1: g_kuiCache30ScanIdx[iPartIdx]+6~g_kuiCache30ScanIdx[iPartIdx]+9 */ pMvComp->sMotionVectorCache[kiCacheIdx] = *pMv; ST64( &pMvComp->sMotionVectorCache[kiCacheIdx1], kuiMv64 ); pMvComp->sMotionVectorCache[kiCacheIdx3]= *pMv; pMvComp->sMotionVectorCache[kiCacheIdx6]= *pMv; ST64( &pMvComp->sMotionVectorCache[kiCacheIdx7], kuiMv64 ); pMvComp->sMotionVectorCache[kiCacheIdx9]= *pMv; }
void WelsFillCacheInter (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) { int32_t iCurXy = pCurLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; int32_t iLeftTopXy = 0; int32_t iRightTopXy = 0; //stuff non_zero_coeff_count from pNeighAvail(left and top) WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); if (pNeighAvail->iTopAvail) { iTopXy = iCurXy - pCurLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; } if (pNeighAvail->iLeftTopAvail) { iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth; } if (pNeighAvail->iRightTopAvail) { iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth; } //stuff mv_cache and iRefIdxArray from left and top (inter) if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3])); ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7])); ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11])); ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15])); iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3]; iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7]; iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11]; iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15]; } else { ST32 (iMvArray[0][ 6], 0); ST32 (iMvArray[0][12], 0); ST32 (iMvArray[0][18], 0); ST32 (iMvArray[0][24], 0); if (0 == pNeighAvail->iLeftAvail) { //not available iRefIdxArray[0][ 6] = iRefIdxArray[0][12] = iRefIdxArray[0][18] = iRefIdxArray[0][24] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][ 6] = iRefIdxArray[0][12] = iRefIdxArray[0][18] = iRefIdxArray[0][24] = REF_NOT_IN_LIST; } } if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15]; } else { ST32 (iMvArray[0][0], 0); if (0 == pNeighAvail->iLeftTopAvail) { //not available iRefIdxArray[0][0] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][0] = REF_NOT_IN_LIST; } } if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12])); ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14])); ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12])); } else { ST64 (iMvArray[0][1], 0); ST64 (iMvArray[0][3], 0); if (0 == pNeighAvail->iTopAvail) { //not available iRefIdxArray[0][1] = iRefIdxArray[0][2] = iRefIdxArray[0][3] = iRefIdxArray[0][4] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][1] = iRefIdxArray[0][2] = iRefIdxArray[0][3] = iRefIdxArray[0][4] = REF_NOT_IN_LIST; } } if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12])); iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12]; } else { ST32 (iMvArray[0][5], 0); if (0 == pNeighAvail->iRightTopAvail) { //not available iRefIdxArray[0][5] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][5] = REF_NOT_IN_LIST; } } //right-top 4*4 block unavailable ST32 (iMvArray[0][ 9], 0); ST32 (iMvArray[0][21], 0); ST32 (iMvArray[0][11], 0); ST32 (iMvArray[0][17], 0); ST32 (iMvArray[0][23], 0); iRefIdxArray[0][ 9] = iRefIdxArray[0][21] = iRefIdxArray[0][11] = iRefIdxArray[0][17] = iRefIdxArray[0][23] = REF_NOT_AVAIL; }
void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail; int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; int32_t iLeftTopType, iRightTopType, iTopType, iLeftType; int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy, iLeftTopXy, iRightTopXy; int8_t iLeftRef; int8_t iTopRef; int8_t iRightTopRef; int8_t iLeftTopRef; int8_t iDiagonalRef; int8_t iMatchRef; int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2]; iCurXy = pCurLayer->iMbXyIndex; iCurX = pCurLayer->iMbX; iCurY = pCurLayer->iMbY; iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy]; if (iCurX != 0) { iLeftXy = iCurXy - 1; iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy]; bLeftAvail = (iLeftSliceIdc == iCurSliceIdc); } else { bLeftAvail = 0; bLeftTopAvail = 0; } if (iCurY != 0) { iTopXy = iCurXy - pCurLayer->iMbWidth; iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy]; bTopAvail = (iTopSliceIdc == iCurSliceIdc); if (iCurX != 0) { iLeftTopXy = iTopXy - 1; iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy]; bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); } else { bLeftTopAvail = 0; } if (iCurX != (pCurLayer->iMbWidth - 1)) { iRightTopXy = iTopXy + 1; iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy]; bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); } else { bRightTopAvail = 0; } } else { bTopAvail = 0; bLeftTopAvail = 0; bRightTopAvail = 0; } iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0); iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0); iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail) ? pCurLayer->pMbType[iLeftTopXy] : 0); iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) ? pCurLayer->pMbType[iRightTopXy] : 0); /*get neb mv&iRefIdxArray*/ /*left*/ if (bLeftAvail && IS_INTER (iLeftType)) { ST32 (iMvA, LD32 (pCurLayer->pMv[0][iLeftXy][3])); iLeftRef = pCurLayer->pRefIndex[0][iLeftXy][3]; } else { ST32 (iMvA, 0); if (0 == bLeftAvail) { //not available iLeftRef = REF_NOT_AVAIL; } else { //available but is intra mb type iLeftRef = REF_NOT_IN_LIST; } } if (REF_NOT_AVAIL == iLeftRef || (0 == iLeftRef && 0 == * (int32_t*)iMvA)) { ST32 (iMvp, 0); return; } /*top*/ if (bTopAvail && IS_INTER (iTopType)) { ST32 (iMvB, LD32 (pCurLayer->pMv[0][iTopXy][12])); iTopRef = pCurLayer->pRefIndex[0][iTopXy][12]; } else { ST32 (iMvB, 0); if (0 == bTopAvail) { //not available iTopRef = REF_NOT_AVAIL; } else { //available but is intra mb type iTopRef = REF_NOT_IN_LIST; } } if (REF_NOT_AVAIL == iTopRef || (0 == iTopRef && 0 == * (int32_t*)iMvB)) { ST32 (iMvp, 0); return; } /*right_top*/ if (bRightTopAvail && IS_INTER (iRightTopType)) { ST32 (iMvC, LD32 (pCurLayer->pMv[0][iRightTopXy][12])); iRightTopRef = pCurLayer->pRefIndex[0][iRightTopXy][12]; } else { ST32 (iMvC, 0); if (0 == bRightTopAvail) { //not available iRightTopRef = REF_NOT_AVAIL; } else { //available but is intra mb type iRightTopRef = REF_NOT_IN_LIST; } } /*left_top*/ if (bLeftTopAvail && IS_INTER (iLeftTopType)) { ST32 (iMvD, LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); iLeftTopRef = pCurLayer->pRefIndex[0][iLeftTopXy][15]; } else { ST32 (iMvD, 0); if (0 == bLeftTopAvail) { //not available iLeftTopRef = REF_NOT_AVAIL; } else { //available but is intra mb type iLeftTopRef = REF_NOT_IN_LIST; } } iDiagonalRef = iRightTopRef; if (REF_NOT_AVAIL == iDiagonalRef) { iDiagonalRef = iLeftTopRef; * (int32_t*)iMvC = * (int32_t*)iMvD; } if (REF_NOT_AVAIL == iTopRef && REF_NOT_AVAIL == iDiagonalRef && iLeftRef >= REF_NOT_IN_LIST) { ST32 (iMvp, LD32 (iMvA)); return; } iMatchRef = (0 == iLeftRef) + (0 == iTopRef) + (0 == iDiagonalRef); if (1 == iMatchRef) { if (0 == iLeftRef) { ST32 (iMvp, LD32 (iMvA)); } else if (0 == iTopRef) { ST32 (iMvp, LD32 (iMvB)); } else { ST32 (iMvp, LD32 (iMvC)); } } else { iMvp[0] = WelsMedian (iMvA[0], iMvB[0], iMvC[0]); iMvp[1] = WelsMedian (iMvA[1], iMvB[1], iMvC[1]); } }