int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) { if ( NULL == pSliceInLayer ) return false; uint32_t uiTotalConsume = 0; int32_t iSliceIdx = 0; int32_t iNeedAdj = false; WelsEmms(); while (iSliceIdx < iSliceNum) { uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime; iSliceIdx ++; } if (uiTotalConsume == 0) { MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d", iSliceNum); return false; } iSliceIdx = 0; float fThr = EPSN; // threshold for various cores cases float fRmse = .0f; // root mean square error of pSlice consume ratios const float kfMeanRatio = 1.0f / iSliceNum; do { const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume; const float fDiffRatio = fRatio - kfMeanRatio; fRmse += (fDiffRatio * fDiffRatio); ++ iSliceIdx; } while (iSliceIdx + 1 < iSliceNum); fRmse = sqrtf (fRmse / iSliceNum); if (iSliceNum >= 8) { fThr += THRESHOLD_RMSE_CORE8; } else if (iSliceNum >= 4) { fThr += THRESHOLD_RMSE_CORE4; } else if (iSliceNum >= 2) { fThr += THRESHOLD_RMSE_CORE2; } else fThr = 1.0f; if (fRmse > fThr) iNeedAdj = true; MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d", iNeedAdj, fRmse, fThr, iSliceNum); return iNeedAdj; }
void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; int32_t* pRatioList = (int32_t*)pRatio; int32_t iAvI[MAX_SLICES_NUM]; int32_t iSumAv = 0; uint32_t* pSliceTime = (uint32_t*)pSliceConsume; int32_t* pCountMbInSlice = (int32_t*)pSliceCtx->pCountMbNumInSlice; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; int32_t iSliceIdx = 0; WelsEmms(); while (iSliceIdx < kiSliceCount) { iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]); MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d", iSliceIdx, pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]); iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; } while (-- iSliceIdx >= 0) { pRatioList[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); } }
int32_t CreateSliceThreads (sWelsEncCtx* pCtx) { const int32_t kiThreadCount = pCtx->pSvcParam->iCountThreadsNum; int32_t iIdx = 0; while (iIdx < kiThreadCount) { if (WelsThreadCreate (&pCtx->pSliceThreading->pThreadHandles[iIdx], CodingSliceThreadProc, &pCtx->pSliceThreading->pThreadPEncCtx[iIdx], 0)) { return 1; } ++ iIdx; } MT_TRACE_LOG (pCtx, WELS_LOG_INFO, "CreateSliceThreads() exit.."); return 0; }
void CalcSliceComplexRatio (SDqLayer* pCurDq) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer; int32_t iSumAv = 0; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; int32_t iSliceIdx = 0; int32_t iAvI[MAX_SLICES_NUM]; WelsEmms(); while (iSliceIdx < kiSliceCount) { iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pSliceInLayer[iSliceIdx].iCountMbNumInSlice, pSliceInLayer[iSliceIdx].uiSliceConsumeTime); MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d", iSliceIdx, pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pSliceInLayer[iSliceIdx].iCountMbNumInSlice); iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; } while (-- iSliceIdx >= 0) { pSliceInLayer[iSliceIdx].iSliceComplexRatio = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); } }
void DynamicAdjustSlicing (sWelsEncCtx* pCtx, SDqLayer* pCurDqLayer, void* pComplexRatio, int32_t iCurDid) { SSliceCtx* pSliceCtx = pCurDqLayer->pSliceEncCtx; const int32_t kiCountSliceNum = pSliceCtx->iSliceNumInFrame; const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame; int32_t iMinimalMbNum = pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required int32_t iMaximalMbNum = 0; // dynamically assign later int32_t* pSliceComplexRatio = (int32_t*)pComplexRatio; int32_t iMbNumLeft = kiCountNumMb; int32_t iRunLen[MAX_THREADS_NUM] = {0}; int32_t iSliceIdx = 0; int32_t iNumMbInEachGom = 0; SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid]; if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { iNumMbInEachGom = pWelsSvcRc->iNumberMbGom; if (iNumMbInEachGom <= 0) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d", iNumMbInEachGom, iCurDid, kiCountNumMb); return; } // do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment, // extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result. if (iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb) { return; } iMinimalMbNum = iNumMbInEachGom; } if (kiCountSliceNum < 2 || (kiCountSliceNum & 0x01)) // we need suppose uiSliceNum is even for multiple threading return; iMaximalMbNum = kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum; WelsEmms(); MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d", iCurDid, kiCountNumMb); iSliceIdx = 0; while (iSliceIdx + 1 < kiCountSliceNum) { int32_t iNumMbAssigning = WELS_DIV_ROUND (kiCountNumMb * pSliceComplexRatio[iSliceIdx], INT_MULTIPLY); // GOM boundary aligned if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom; } // make sure one GOM at least in each pSlice for safe if (iNumMbAssigning < iMinimalMbNum) iNumMbAssigning = iMinimalMbNum; else if (iNumMbAssigning > iMaximalMbNum) iNumMbAssigning = iMaximalMbNum; assert (iNumMbAssigning > 0); iMbNumLeft -= iNumMbAssigning; if (iMbNumLeft <= 0) { // error due to we can not support slice_skip now yet, do not adjust this time assert (0); return; } iRunLen[iSliceIdx] = iNumMbAssigning; MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), uiSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", iSliceIdx, pSliceComplexRatio[iSliceIdx] * 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iNumMbAssigning); ++ iSliceIdx; iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts } iRunLen[iSliceIdx] = iMbNumLeft; MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", iSliceIdx, pSliceComplexRatio[iSliceIdx] * 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iMbNumLeft); if (DynamicAdjustSlicePEncCtxAll (pSliceCtx, iRunLen) == 0) { const int32_t kiThreadNum = pCtx->pSvcParam->iCountThreadsNum; int32_t iThreadIdx = 0; do { WelsEventSignal (&pCtx->pSliceThreading->pUpdateMbListEvent[iThreadIdx]); WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iThreadIdx]); ++ iThreadIdx; } while (iThreadIdx < kiThreadNum); WelsMultipleEventsWaitAllBlocking (kiThreadNum, &pCtx->pSliceThreading->pFinUpdateMbListEvent[0]); } }
// thread process for coding one pSlice WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) { SSliceThreadPrivateData* pPrivateData = (SSliceThreadPrivateData*)arg; sWelsEncCtx* pEncPEncCtx = NULL; SDqLayer* pCurDq = NULL; SSlice* pSlice = NULL; SWelsSliceBs* pSliceBs = NULL; WELS_EVENT pEventsList[3]; int32_t iEventCount = 0; WELS_THREAD_ERROR_CODE iWaitRet = WELS_THREAD_ERROR_GENERAL; uint32_t uiThrdRet = 0; int32_t iSliceSize = 0; int32_t iSliceIdx = -1; int32_t iThreadIdx = -1; int32_t iEventIdx = -1; bool bNeedPrefix = false; EWelsNalUnitType eNalType = NAL_UNIT_UNSPEC_0; EWelsNalRefIdc eNalRefIdc = NRI_PRI_LOWEST; int32_t iReturn = ENC_RETURN_SUCCESS; if (NULL == pPrivateData) WELS_THREAD_ROUTINE_RETURN (1); pEncPEncCtx = (sWelsEncCtx*)pPrivateData->pWelsPEncCtx; iThreadIdx = pPrivateData->iThreadIndex; iEventIdx = iThreadIdx; pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx]; pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pExitEncodeEvent[iEventIdx]; pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx]; WelsThreadSetName ("OpenH264Enc_CodingSliceThreadProc"); do { MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO, "[MT] CodingSliceThreadProc(), try to call WelsMultipleEventsWaitSingleBlocking(pEventsList= %p %p %p), pEncPEncCtx= %p!", pEventsList[0], pEventsList[1], pEventsList[1], (void*)pEncPEncCtx); iWaitRet = WelsMultipleEventsWaitSingleBlocking (iEventCount, &pEventsList[0], &pEncPEncCtx->pSliceThreading->pThreadMasterEvent[iEventIdx]); // blocking until at least one event is signalled if (WELS_THREAD_ERROR_WAIT_OBJECT_0 == iWaitRet) { // start pSlice coding signal waited //int iLayerIndex = pEncPEncCtx->pOut->iLayerBsIndex; //SFrameBSInfo* pFrameBsInfo = pPrivateData->pFrameBsInfo; //SLayerBSInfo* pLbi = &pFrameBsInfo->sLayerInfo [iLayerIndex]; const int32_t kiCurDid = pEncPEncCtx->uiDependencyId; SWelsSvcCodingParam* pCodingParam = pEncPEncCtx->pSvcParam; SSpatialLayerConfig* pParamD = &pCodingParam->sSpatialLayers[kiCurDid]; pCurDq = pEncPEncCtx->pCurDqLayer; eNalType = pEncPEncCtx->eNalType; eNalRefIdc = pEncPEncCtx->eNalPriority; bNeedPrefix = pEncPEncCtx->bNeedPrefixNalFlag; if (pParamD->sSliceArgument.uiSliceMode != SM_SIZELIMITED_SLICE) { int64_t iSliceStart = 0; bool bDsaFlag = false; iSliceIdx = pPrivateData->iSliceIndex; pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx]; pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx]; bDsaFlag = ((pParamD->sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE) && pCodingParam->iMultipleThreadIdc > 1 && pCodingParam->iMultipleThreadIdc >= pParamD->sSliceArgument.uiSliceNum); if (bDsaFlag) iSliceStart = WelsTime(); pSliceBs->uiBsPos = 0; pSliceBs->iNalIndex = 0; assert ((void*) (&pSliceBs->sBsWrite) == (void*)pSlice->pSliceBsa); InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize); #if MT_DEBUG_BS_WR pSliceBs->bSliceCodedFlag = false; #endif//MT_DEBUG_BS_WR if (bNeedPrefix) { if (eNalRefIdc != NRI_PRI_LOWEST) { WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc); WelsWriteSVCPrefixNal (&pSliceBs->sBsWrite, eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == eNalType)); WelsUnloadNalForSlice (pSliceBs); } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc); // No need write any syntax of prefix NAL Unit RBSP here WelsUnloadNalForSlice (pSliceBs); } } WelsLoadNalForSlice (pSliceBs, eNalType, eNalRefIdc); iReturn = WelsCodeOneSlice (pEncPEncCtx, iSliceIdx, eNalType); if (ENC_RETURN_SUCCESS != iReturn) { uiThrdRet = iReturn; WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent, pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent, iEventIdx); } WelsUnloadNalForSlice (pSliceBs); int32_t iLeftBufferSize = (iSliceIdx > 0) ? (pSliceBs->uiSize - pSliceBs->uiBsPos) : (pEncPEncCtx->iFrameBsSize - pEncPEncCtx->iPosBsBuffer); iReturn = WriteSliceBs (pEncPEncCtx, pSliceBs->pBs, &pSliceBs->iNalLen[0], iLeftBufferSize, iSliceIdx, iSliceSize); if (ENC_RETURN_SUCCESS != iReturn) { uiThrdRet = iReturn; WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent, pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent, iEventIdx); } if (0 == iSliceIdx) { pEncPEncCtx->iPosBsBuffer += iSliceSize; } pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx); if (bDsaFlag) { pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime = (uint32_t) ( WelsTime() - iSliceStart); MT_TRACE_LOG (& (pEncPEncCtx->sLogCtx), WELS_LOG_INFO, "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d", pEncPEncCtx->iCodingIndex, iSliceIdx, pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime, iSliceSize, pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice, pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].iCountMbNumInSlice); } #if defined(SLICE_INFO_OUTPUT) fprintf (stderr, "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n", iSliceIdx, (pEncPEncCtx->eSliceType == P_SLICE ? 'P' : 'I'), eNalRefIdc, iSliceSize ); #endif//SLICE_INFO_OUTPUT #if MT_DEBUG_BS_WR pSliceBs->bSliceCodedFlag = true; #endif//MT_DEBUG_BS_WR WelsEventSignal ( &pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice WelsEventSignal ( &pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent); } else { // for SM_SIZELIMITED_SLICE parallelization SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; const int32_t kiPartitionId = iThreadIdx; const int32_t kiSliceIdxStep = pEncPEncCtx->iActiveThreadsNum; const int32_t kiFirstMbInPartition = pPrivateData->iStartMbIndex; // inclusive const int32_t kiEndMbInPartition = pPrivateData->iEndMbIndex; // exclusive int32_t iAnyMbLeftInPartition = kiEndMbInPartition - kiFirstMbInPartition; iSliceIdx = pPrivateData->iSliceIndex; SSliceHeaderExt* pStartSliceHeaderExt = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt; pStartSliceHeaderExt->sSliceHeader.iFirstMbInSlice = kiFirstMbInPartition; pCurDq->pNumSliceCodedOfPartition[kiPartitionId] = 1; // one pSlice per partition intialized, dynamic slicing inside pCurDq->pLastMbIdxOfPartition[kiPartitionId] = kiEndMbInPartition - 1; pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId] = 0; while (iAnyMbLeftInPartition > 0) { if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) { // TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage // No idea about its solution due MAX_SLICES_NUM is fixed lenght in relevent pData structure uiThrdRet = 1; WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent, pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent, iEventIdx); } SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, iSliceIdx); pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx]; pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx]; pSliceBs->uiBsPos = 0; pSliceBs->iNalIndex = 0; InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize); if (bNeedPrefix) { if (eNalRefIdc != NRI_PRI_LOWEST) { WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc); WelsWriteSVCPrefixNal (&pSliceBs->sBsWrite, eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == eNalType)); WelsUnloadNalForSlice (pSliceBs); } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc); // No need write any syntax of prefix NAL Unit RBSP here WelsUnloadNalForSlice (pSliceBs); } } WelsLoadNalForSlice (pSliceBs, eNalType, eNalRefIdc); iReturn = WelsCodeOneSlice (pEncPEncCtx, iSliceIdx, eNalType); if (ENC_RETURN_SUCCESS != iReturn) { uiThrdRet = iReturn; WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent, pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent, iEventIdx); } WelsUnloadNalForSlice (pSliceBs); int32_t iLeftBufferSize = (iSliceIdx > 0) ? (pSliceBs->uiSize - pSliceBs->uiBsPos) : (pEncPEncCtx->iFrameBsSize - pEncPEncCtx->iPosBsBuffer); iReturn = WriteSliceBs (pEncPEncCtx, pSliceBs->pBs, &pSliceBs->iNalLen[0], iLeftBufferSize, iSliceIdx, iSliceSize); if (ENC_RETURN_SUCCESS != iReturn) { uiThrdRet = iReturn; WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent, pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent, iEventIdx); } if (0 == iSliceIdx) { pEncPEncCtx->iPosBsBuffer += iSliceSize; } pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx); #if defined(SLICE_INFO_OUTPUT) fprintf (stderr, "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n", iSliceIdx, (pEncPEncCtx->eSliceType == P_SLICE ? 'P' : 'I'), eNalRefIdc, iSliceSize ); #endif//SLICE_INFO_OUTPUT MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO, "[MT] CodingSliceThreadProc(), coding_idx %d, iPartitionId %d, uiSliceIdx %d, iSliceSize %d, count_mb_slice %d, iEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n", pEncPEncCtx->iCodingIndex, kiPartitionId, iSliceIdx, iSliceSize, pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].iCountMbNumInSlice, kiEndMbInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]); iAnyMbLeftInPartition = kiEndMbInPartition - (1 + pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]); iSliceIdx += kiSliceIdxStep; } if (uiThrdRet) { // any exception?? WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent, pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent, iEventIdx); } WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent); } } else if (WELS_THREAD_ERROR_WAIT_OBJECT_0 + 1 == iWaitRet) { // exit thread signal uiThrdRet = 0; break; } else if (WELS_THREAD_ERROR_WAIT_OBJECT_0 + 2 == iWaitRet) { // update pMb list singal iSliceIdx = iEventIdx; // pPrivateData->iSliceIndex; old threads can not be terminated, pPrivateData is not correct for applicable pCurDq = pEncPEncCtx->pCurDqLayer; UpdateMbListNeighborParallel (pCurDq, pCurDq->sMbDataP, iSliceIdx); WelsEventSignal ( &pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx]); // mean finished update pMb list for this pSlice } else { // WELS_THREAD_ERROR_WAIT_TIMEOUT, or WELS_THREAD_ERROR_WAIT_FAILED WelsLog (& (pEncPEncCtx->sLogCtx), WELS_LOG_WARNING, "[MT] CodingSliceThreadProc(), waiting pReadySliceCodingEvent[%d] failed(%d) and thread%d terminated!", iEventIdx, iWaitRet, iThreadIdx); uiThrdRet = 1; break; } } while (1); //sync multi-threading error WelsMutexLock (&pEncPEncCtx->mutexEncoderError); if (uiThrdRet) pEncPEncCtx->iEncoderError |= uiThrdRet; WelsMutexUnlock (&pEncPEncCtx->mutexEncoderError); WELS_THREAD_ROUTINE_RETURN (uiThrdRet); }
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen, const int32_t iMaxSliceBufferSize, bool bDynamicSlice) { CMemoryAlign* pMa = NULL; SWelsSvcCodingParam* pPara = NULL; SSliceThreading* pSmt = NULL; int32_t iNumSpatialLayers = 0; int32_t iThreadNum = 0; int32_t iIdx = 0; int16_t iMaxSliceNum = 1; int32_t iReturn = ENC_RETURN_SUCCESS; bool bWillUseTaskManage = false; if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0) return 1; pMa = (*ppCtx)->pMemAlign; pPara = pCodingParam; iNumSpatialLayers = pPara->iSpatialLayerNum; iThreadNum = pPara->iCountThreadsNum; iMaxSliceNum = (*ppCtx)->iMaxSliceCount; pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt), FreeMemorySvc (ppCtx)) (*ppCtx)->pSliceThreading = pSmt; pSmt->pThreadPEncCtx = (SSliceThreadPrivateData*)pMa->WelsMalloc (sizeof (SSliceThreadPrivateData) * iThreadNum, "pThreadPEncCtx"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadPEncCtx), FreeMemorySvc (ppCtx)) #ifdef _WIN32 // Dummy event namespace, the windows events don't actually use this WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p", (void*) *ppCtx); #else WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p%x", (void*) *ppCtx, getpid()); #endif//!_WIN32 iIdx = 0; while (iIdx < iNumSpatialLayers) { SSliceArgument* pSliceArgument = &pPara->sSpatialLayers[iIdx].sSliceArgument; if (pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE || pSliceArgument->uiSliceMode == SM_RASTER_SLICE) { bWillUseTaskManage = true; } ++ iIdx; } #ifdef MT_DEBUG // file handle for MT debug pSmt->pFSliceDiff = NULL; if (pSmt->pFSliceDiff) { fclose (pSmt->pFSliceDiff); pSmt->pFSliceDiff = NULL; } pSmt->pFSliceDiff = fopen ("slice_time.txt", "wt+"); #endif//MT_DEBUG MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "encpEncCtx= 0x%p", (void*) *ppCtx); char name[SEM_NAME_MAX] = {0}; WELS_GCC_UNUSED WELS_THREAD_ERROR_CODE err = 0; iIdx = 0; while (iIdx < iThreadNum) { pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) *ppCtx; pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx; pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx; pSmt->pThreadHandles[iIdx] = 0; WelsSnprintf (name, SEM_NAME_MAX, "ee%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pExitEncodeEvent[iIdx], name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pExitEncodeEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "tm%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pThreadMasterEvent[iIdx], name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pThreadMasterEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); // length of semaphore name should be system constrained at least on mac 10.7 WelsSnprintf (name, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pUpdateMbListEvent[iIdx], name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pFinUpdateMbListEvent[iIdx], name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pSliceCodedEvent[iIdx], name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pReadySliceCodingEvent[iIdx], name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx, (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno); pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pSmt->pThreadBsBuffer"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]), FreeMemorySvc (ppCtx)) ++ iIdx; } for (; iIdx < MAX_THREADS_NUM; iIdx++) { pSmt->pThreadBsBuffer[iIdx] = NULL; } //previous conflict WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name); MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno); //previous conflict ends iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum, iMaxSliceBufferSize, iCountBsLen, bDynamicSlice); WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx)) iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate); WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) if (bWillUseTaskManage) { (*ppCtx)->pTaskManage = IWelsTaskManage::CreateTaskManage (*ppCtx, iNumSpatialLayers, bDynamicSlice); WELS_VERIFY_RETURN_PROC_IF (iReturn, (NULL == (*ppCtx)->pTaskManage), FreeMemorySvc (ppCtx)) } memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool)); iReturn = WelsMutexInit (&pSmt->mutexThreadBsBufferUsage); WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) iReturn = WelsMutexInit (& (*ppCtx)->mutexEncoderError); WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iCountSliceNum= %d", pPara->iCountThreadsNum, iMaxSliceNum); return 0; }