void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; int32_t* pRatioList = (int32_t*)pRatio; int32_t iAvI[MAX_SLICES_NUM]; int32_t iSumAv = 0; uint32_t* pSliceTime = (uint32_t*)pSliceConsume; int32_t* pCountMbInSlice = (int32_t*)pSliceCtx->pCountMbNumInSlice; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; int32_t iSliceIdx = 0; WelsEmms(); while (iSliceIdx < kiSliceCount) { iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]); MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d", iSliceIdx, pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]); iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; } while (-- iSliceIdx >= 0) { pRatioList[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); } }
int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) { if ( NULL == pSliceInLayer ) return false; uint32_t uiTotalConsume = 0; int32_t iSliceIdx = 0; int32_t iNeedAdj = false; WelsEmms(); while (iSliceIdx < iSliceNum) { uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime; iSliceIdx ++; } if (uiTotalConsume == 0) { MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d", iSliceNum); return false; } iSliceIdx = 0; float fThr = EPSN; // threshold for various cores cases float fRmse = .0f; // root mean square error of pSlice consume ratios const float kfMeanRatio = 1.0f / iSliceNum; do { const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume; const float fDiffRatio = fRatio - kfMeanRatio; fRmse += (fDiffRatio * fDiffRatio); ++ iSliceIdx; } while (iSliceIdx + 1 < iSliceNum); fRmse = sqrtf (fRmse / iSliceNum); if (iSliceNum >= 8) { fThr += THRESHOLD_RMSE_CORE8; } else if (iSliceNum >= 4) { fThr += THRESHOLD_RMSE_CORE4; } else if (iSliceNum >= 2) { fThr += THRESHOLD_RMSE_CORE2; } else fThr = 1.0f; if (fRmse > fThr) iNeedAdj = true; MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d", iNeedAdj, fRmse, fThr, iSliceNum); return iNeedAdj; }
void CalcSliceComplexRatio (SDqLayer* pCurDq) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer; int32_t iSumAv = 0; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; int32_t iSliceIdx = 0; int32_t iAvI[MAX_SLICES_NUM]; WelsEmms(); while (iSliceIdx < kiSliceCount) { iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pSliceInLayer[iSliceIdx].iCountMbNumInSlice, pSliceInLayer[iSliceIdx].uiSliceConsumeTime); MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d", iSliceIdx, pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pSliceInLayer[iSliceIdx].iCountMbNumInSlice); iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; } while (-- iSliceIdx >= 0) { pSliceInLayer[iSliceIdx].iSliceComplexRatio = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); } }
void DynamicAdjustSlicing (sWelsEncCtx* pCtx, SDqLayer* pCurDqLayer, void* pComplexRatio, int32_t iCurDid) { SSliceCtx* pSliceCtx = pCurDqLayer->pSliceEncCtx; const int32_t kiCountSliceNum = pSliceCtx->iSliceNumInFrame; const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame; int32_t iMinimalMbNum = pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required int32_t iMaximalMbNum = 0; // dynamically assign later int32_t* pSliceComplexRatio = (int32_t*)pComplexRatio; int32_t iMbNumLeft = kiCountNumMb; int32_t iRunLen[MAX_THREADS_NUM] = {0}; int32_t iSliceIdx = 0; int32_t iNumMbInEachGom = 0; SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid]; if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { iNumMbInEachGom = pWelsSvcRc->iNumberMbGom; if (iNumMbInEachGom <= 0) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d", iNumMbInEachGom, iCurDid, kiCountNumMb); return; } // do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment, // extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result. if (iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb) { return; } iMinimalMbNum = iNumMbInEachGom; } if (kiCountSliceNum < 2 || (kiCountSliceNum & 0x01)) // we need suppose uiSliceNum is even for multiple threading return; iMaximalMbNum = kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum; WelsEmms(); MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d", iCurDid, kiCountNumMb); iSliceIdx = 0; while (iSliceIdx + 1 < kiCountSliceNum) { int32_t iNumMbAssigning = WELS_DIV_ROUND (kiCountNumMb * pSliceComplexRatio[iSliceIdx], INT_MULTIPLY); // GOM boundary aligned if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom; } // make sure one GOM at least in each pSlice for safe if (iNumMbAssigning < iMinimalMbNum) iNumMbAssigning = iMinimalMbNum; else if (iNumMbAssigning > iMaximalMbNum) iNumMbAssigning = iMaximalMbNum; assert (iNumMbAssigning > 0); iMbNumLeft -= iNumMbAssigning; if (iMbNumLeft <= 0) { // error due to we can not support slice_skip now yet, do not adjust this time assert (0); return; } iRunLen[iSliceIdx] = iNumMbAssigning; MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), uiSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", iSliceIdx, pSliceComplexRatio[iSliceIdx] * 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iNumMbAssigning); ++ iSliceIdx; iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts } iRunLen[iSliceIdx] = iMbNumLeft; MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", iSliceIdx, pSliceComplexRatio[iSliceIdx] * 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iMbNumLeft); if (DynamicAdjustSlicePEncCtxAll (pSliceCtx, iRunLen) == 0) { const int32_t kiThreadNum = pCtx->pSvcParam->iCountThreadsNum; int32_t iThreadIdx = 0; do { WelsEventSignal (&pCtx->pSliceThreading->pUpdateMbListEvent[iThreadIdx]); WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iThreadIdx]); ++ iThreadIdx; } while (iThreadIdx < kiThreadNum); WelsMultipleEventsWaitAllBlocking (kiThreadNum, &pCtx->pSliceThreading->pFinUpdateMbListEvent[0]); } }