예제 #1
0
int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) {
  if ( NULL == pSliceInLayer )
    return false;

  uint32_t uiTotalConsume       = 0;
  int32_t iSliceIdx             = 0;
  int32_t iNeedAdj              = false;

  WelsEmms();

  while (iSliceIdx < iSliceNum) {
    uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime;
    iSliceIdx ++;
  }
  if (uiTotalConsume == 0) {
    MT_TRACE_LOG (NULL, WELS_LOG_DEBUG,
                  "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d",
                  iSliceNum);
    return false;
  }

  iSliceIdx = 0;
  float fThr                    = EPSN; // threshold for various cores cases
  float fRmse                   = .0f;  // root mean square error of pSlice consume ratios
  const float kfMeanRatio       = 1.0f / iSliceNum;
  do {
    const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume;
    const float fDiffRatio = fRatio - kfMeanRatio;
    fRmse += (fDiffRatio * fDiffRatio);
    ++ iSliceIdx;
  } while (iSliceIdx + 1 < iSliceNum);
  fRmse = sqrtf (fRmse / iSliceNum);
  if (iSliceNum >= 8) {
    fThr += THRESHOLD_RMSE_CORE8;
  } else if (iSliceNum >= 4) {
    fThr += THRESHOLD_RMSE_CORE4;
  } else if (iSliceNum >= 2) {
    fThr += THRESHOLD_RMSE_CORE2;
  } else
    fThr = 1.0f;
  if (fRmse > fThr)
    iNeedAdj = true;
  MT_TRACE_LOG (NULL, WELS_LOG_DEBUG,
                "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d",
                iNeedAdj, fRmse, fThr, iSliceNum);

  return iNeedAdj;
}
예제 #2
0
void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) {
  SSliceCtx* pSliceCtx          = &pCurDq->sSliceEncCtx;
  int32_t* pRatioList           = (int32_t*)pRatio;
  int32_t iAvI[MAX_SLICES_NUM];
  int32_t iSumAv                = 0;
  uint32_t* pSliceTime          = (uint32_t*)pSliceConsume;
  int32_t* pCountMbInSlice      = (int32_t*)pSliceCtx->pCountMbNumInSlice;
  const int32_t kiSliceCount    = pSliceCtx->iSliceNumInFrame;
  int32_t iSliceIdx             = 0;

  WelsEmms();

  while (iSliceIdx < kiSliceCount) {
    iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]);
    MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d",
                  iSliceIdx,
                  pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]);
    iSumAv += iAvI[iSliceIdx];

    ++ iSliceIdx;
  }
  while (-- iSliceIdx >= 0) {
    pRatioList[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv);
  }
}
예제 #3
0
int32_t CreateSliceThreads (sWelsEncCtx* pCtx) {
  const int32_t kiThreadCount = pCtx->pSvcParam->iCountThreadsNum;
  int32_t iIdx = 0;

  while (iIdx < kiThreadCount) {
    if (WelsThreadCreate (&pCtx->pSliceThreading->pThreadHandles[iIdx], CodingSliceThreadProc,
                          &pCtx->pSliceThreading->pThreadPEncCtx[iIdx], 0)) {
      return 1;
    }


    ++ iIdx;
  }
  MT_TRACE_LOG (pCtx, WELS_LOG_INFO, "CreateSliceThreads() exit..");
  return 0;
}
예제 #4
0
void CalcSliceComplexRatio (SDqLayer* pCurDq) {
  SSliceCtx* pSliceCtx          = &pCurDq->sSliceEncCtx;
  SSlice*    pSliceInLayer      = pCurDq->sLayerInfo.pSliceInLayer;
  int32_t iSumAv                = 0;
  const int32_t kiSliceCount    = pSliceCtx->iSliceNumInFrame;
  int32_t iSliceIdx             = 0;
  int32_t iAvI[MAX_SLICES_NUM];

  WelsEmms();

  while (iSliceIdx < kiSliceCount) {
    iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pSliceInLayer[iSliceIdx].iCountMbNumInSlice,
                                      pSliceInLayer[iSliceIdx].uiSliceConsumeTime);
    MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d",
                  iSliceIdx,
                  pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pSliceInLayer[iSliceIdx].iCountMbNumInSlice);
    iSumAv += iAvI[iSliceIdx];

    ++ iSliceIdx;
  }
  while (-- iSliceIdx >= 0) {
    pSliceInLayer[iSliceIdx].iSliceComplexRatio = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv);
  }
}
void DynamicAdjustSlicing (sWelsEncCtx* pCtx,
                           SDqLayer* pCurDqLayer,
                           void* pComplexRatio,
                           int32_t iCurDid) {
  SSliceCtx* pSliceCtx	= pCurDqLayer->pSliceEncCtx;
  const int32_t kiCountSliceNum	= pSliceCtx->iSliceNumInFrame;
  const int32_t kiCountNumMb		= pSliceCtx->iMbNumInFrame;
  int32_t iMinimalMbNum			= pSliceCtx->iMbWidth;	// in theory we need only 1 SMB, here let it as one SMB row required
  int32_t iMaximalMbNum			= 0;	// dynamically assign later
  int32_t* pSliceComplexRatio	= (int32_t*)pComplexRatio;
  int32_t iMbNumLeft					= kiCountNumMb;
  int32_t iRunLen[MAX_THREADS_NUM]	= {0};
  int32_t iSliceIdx					= 0;

  int32_t iNumMbInEachGom = 0;
  SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid];
  if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) {
    iNumMbInEachGom = pWelsSvcRc->iNumberMbGom;

    if (iNumMbInEachGom <= 0) {
      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
               "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d", iNumMbInEachGom,
               iCurDid, kiCountNumMb);
      return;
    }

    // do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment,
    // extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result.
    if (iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb) {
      return;
    }
    iMinimalMbNum	= iNumMbInEachGom;
  }

  if (kiCountSliceNum < 2 || (kiCountSliceNum & 0x01))	// we need suppose uiSliceNum is even for multiple threading
    return;

  iMaximalMbNum	= kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum;

  WelsEmms();

  MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d", iCurDid, kiCountNumMb);

  iSliceIdx	= 0;
  while (iSliceIdx + 1 < kiCountSliceNum) {
    int32_t iNumMbAssigning = WELS_DIV_ROUND (kiCountNumMb * pSliceComplexRatio[iSliceIdx], INT_MULTIPLY);

    // GOM boundary aligned
    if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) {
      iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom;
    }

    // make sure one GOM at least in each pSlice for safe
    if (iNumMbAssigning < iMinimalMbNum)
      iNumMbAssigning	= iMinimalMbNum;
    else if (iNumMbAssigning > iMaximalMbNum)
      iNumMbAssigning	= iMaximalMbNum;

    assert (iNumMbAssigning > 0);

    iMbNumLeft -= iNumMbAssigning;
    if (iMbNumLeft <= 0) {	// error due to we can not support slice_skip now yet, do not adjust this time
      assert (0);
      return;
    }
    iRunLen[iSliceIdx]	= iNumMbAssigning;
    MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG,
                  "[MT] DynamicAdjustSlicing(), uiSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d",
                  iSliceIdx, pSliceComplexRatio[iSliceIdx] * 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx],
                  iNumMbAssigning);
    ++ iSliceIdx;
    iMaximalMbNum	= iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum;	// get maximal num_mb in left parts
  }
  iRunLen[iSliceIdx] = iMbNumLeft;
  MT_TRACE_LOG (pCtx, WELS_LOG_DEBUG,
                "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d",
                iSliceIdx, pSliceComplexRatio[iSliceIdx] * 1.0f / INT_MULTIPLY, pSliceCtx->pCountMbNumInSlice[iSliceIdx], iMbNumLeft);


  if (DynamicAdjustSlicePEncCtxAll (pSliceCtx, iRunLen) == 0) {
    const int32_t kiThreadNum	= pCtx->pSvcParam->iCountThreadsNum;
    int32_t iThreadIdx			= 0;
    do {
      WelsEventSignal (&pCtx->pSliceThreading->pUpdateMbListEvent[iThreadIdx]);
      WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iThreadIdx]);
      ++ iThreadIdx;
    } while (iThreadIdx < kiThreadNum);

    WelsMultipleEventsWaitAllBlocking (kiThreadNum, &pCtx->pSliceThreading->pFinUpdateMbListEvent[0]);
  }
}
예제 #6
0
// thread process for coding one pSlice
WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
  SSliceThreadPrivateData* pPrivateData = (SSliceThreadPrivateData*)arg;
  sWelsEncCtx* pEncPEncCtx      = NULL;
  SDqLayer* pCurDq              = NULL;
  SSlice* pSlice                = NULL;
  SWelsSliceBs* pSliceBs        = NULL;
  WELS_EVENT pEventsList[3];
  int32_t iEventCount           = 0;
  WELS_THREAD_ERROR_CODE iWaitRet = WELS_THREAD_ERROR_GENERAL;
  uint32_t uiThrdRet            = 0;
  int32_t iSliceSize            = 0;
  int32_t iSliceIdx             = -1;
  int32_t iThreadIdx            = -1;
  int32_t iEventIdx             = -1;
  bool bNeedPrefix              = false;
  EWelsNalUnitType eNalType     = NAL_UNIT_UNSPEC_0;
  EWelsNalRefIdc eNalRefIdc     = NRI_PRI_LOWEST;
  int32_t iReturn = ENC_RETURN_SUCCESS;

  if (NULL == pPrivateData)
    WELS_THREAD_ROUTINE_RETURN (1);

  pEncPEncCtx   = (sWelsEncCtx*)pPrivateData->pWelsPEncCtx;

  iThreadIdx    = pPrivateData->iThreadIndex;
  iEventIdx     = iThreadIdx;

  pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx];
  pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pExitEncodeEvent[iEventIdx];
  pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx];

  WelsThreadSetName ("OpenH264Enc_CodingSliceThreadProc");

  do {
    MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO,
                  "[MT] CodingSliceThreadProc(), try to call WelsMultipleEventsWaitSingleBlocking(pEventsList= %p %p %p), pEncPEncCtx= %p!",
                  pEventsList[0], pEventsList[1], pEventsList[1], (void*)pEncPEncCtx);
    iWaitRet = WelsMultipleEventsWaitSingleBlocking (iEventCount,
               &pEventsList[0],
               &pEncPEncCtx->pSliceThreading->pThreadMasterEvent[iEventIdx]); // blocking until at least one event is signalled
    if (WELS_THREAD_ERROR_WAIT_OBJECT_0 == iWaitRet) { // start pSlice coding signal waited
      //int             iLayerIndex  = pEncPEncCtx->pOut->iLayerBsIndex;
      //SFrameBSInfo*   pFrameBsInfo = pPrivateData->pFrameBsInfo;
      //SLayerBSInfo*   pLbi = &pFrameBsInfo->sLayerInfo [iLayerIndex];

      const int32_t kiCurDid            = pEncPEncCtx->uiDependencyId;
      SWelsSvcCodingParam* pCodingParam = pEncPEncCtx->pSvcParam;
      SSpatialLayerConfig* pParamD      = &pCodingParam->sSpatialLayers[kiCurDid];

      pCurDq            = pEncPEncCtx->pCurDqLayer;
      eNalType          = pEncPEncCtx->eNalType;
      eNalRefIdc        = pEncPEncCtx->eNalPriority;
      bNeedPrefix       = pEncPEncCtx->bNeedPrefixNalFlag;

      if (pParamD->sSliceArgument.uiSliceMode != SM_SIZELIMITED_SLICE) {
        int64_t iSliceStart = 0;
        bool bDsaFlag = false;
        iSliceIdx               = pPrivateData->iSliceIndex;
        pSlice                  = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
        pSliceBs                = &pEncPEncCtx->pSliceBs[iSliceIdx];

        bDsaFlag = ((pParamD->sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE) &&
                    pCodingParam->iMultipleThreadIdc > 1 &&
                    pCodingParam->iMultipleThreadIdc >= pParamD->sSliceArgument.uiSliceNum);
        if (bDsaFlag)
          iSliceStart = WelsTime();

        pSliceBs->uiBsPos       = 0;
        pSliceBs->iNalIndex     = 0;
        assert ((void*) (&pSliceBs->sBsWrite) == (void*)pSlice->pSliceBsa);
        InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize);

#if MT_DEBUG_BS_WR
        pSliceBs->bSliceCodedFlag = false;
#endif//MT_DEBUG_BS_WR

        if (bNeedPrefix) {
          if (eNalRefIdc != NRI_PRI_LOWEST) {
            WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc);
            WelsWriteSVCPrefixNal (&pSliceBs->sBsWrite, eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == eNalType));
            WelsUnloadNalForSlice (pSliceBs);
          } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension
            WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc);
            // No need write any syntax of prefix NAL Unit RBSP here
            WelsUnloadNalForSlice (pSliceBs);
          }
        }

        WelsLoadNalForSlice (pSliceBs, eNalType, eNalRefIdc);

        iReturn = WelsCodeOneSlice (pEncPEncCtx, iSliceIdx, eNalType);
        if (ENC_RETURN_SUCCESS != iReturn) {
          uiThrdRet = iReturn;
          WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
                                        pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
                                        iEventIdx);
        }

        WelsUnloadNalForSlice (pSliceBs);

        int32_t iLeftBufferSize = (iSliceIdx > 0) ?
                                  (pSliceBs->uiSize - pSliceBs->uiBsPos)
                                  : (pEncPEncCtx->iFrameBsSize - pEncPEncCtx->iPosBsBuffer);
        iReturn = WriteSliceBs (pEncPEncCtx, pSliceBs->pBs,
                                &pSliceBs->iNalLen[0],
                                iLeftBufferSize,
                                iSliceIdx, iSliceSize);
        if (ENC_RETURN_SUCCESS != iReturn) {
          uiThrdRet = iReturn;
          WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
                                        pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
                                        iEventIdx);
        }
        if (0 == iSliceIdx) {
          pEncPEncCtx->iPosBsBuffer += iSliceSize;
        }

        pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);

        if (bDsaFlag) {
            pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime = (uint32_t) (
                WelsTime() - iSliceStart);
          MT_TRACE_LOG (& (pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
                        "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
                        pEncPEncCtx->iCodingIndex, iSliceIdx,
                        pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime, iSliceSize,
                        pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
                        pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].iCountMbNumInSlice);
        }

#if defined(SLICE_INFO_OUTPUT)
        fprintf (stderr,
                 "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n",
                 iSliceIdx,
                 (pEncPEncCtx->eSliceType == P_SLICE ? 'P' : 'I'),
                 eNalRefIdc,
                 iSliceSize
                );
#endif//SLICE_INFO_OUTPUT

#if MT_DEBUG_BS_WR
        pSliceBs->bSliceCodedFlag = true;
#endif//MT_DEBUG_BS_WR

        WelsEventSignal (
          &pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
        WelsEventSignal (
          &pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
      } else { // for SM_SIZELIMITED_SLICE parallelization
        SSliceCtx* pSliceCtx                    = &pCurDq->sSliceEncCtx;
        const int32_t kiPartitionId             = iThreadIdx;
        const int32_t kiSliceIdxStep            = pEncPEncCtx->iActiveThreadsNum;
        const int32_t kiFirstMbInPartition      = pPrivateData->iStartMbIndex;  // inclusive
        const int32_t kiEndMbInPartition        = pPrivateData->iEndMbIndex;            // exclusive
        int32_t iAnyMbLeftInPartition           = kiEndMbInPartition - kiFirstMbInPartition;

        iSliceIdx = pPrivateData->iSliceIndex;
        SSliceHeaderExt* pStartSliceHeaderExt                   = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt;
        pStartSliceHeaderExt->sSliceHeader.iFirstMbInSlice      = kiFirstMbInPartition;
        pCurDq->pNumSliceCodedOfPartition[kiPartitionId]        =
          1;    // one pSlice per partition intialized, dynamic slicing inside
        pCurDq->pLastMbIdxOfPartition[kiPartitionId]            = kiEndMbInPartition - 1;

        pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]       = 0;

        while (iAnyMbLeftInPartition > 0) {
          if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
            // TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage
            // No idea about its solution due MAX_SLICES_NUM is fixed lenght in relevent pData structure
            uiThrdRet = 1;
            WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
                                          pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
                                          iEventIdx);
          }

          SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, iSliceIdx);
          pSlice                = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
          pSliceBs              = &pEncPEncCtx->pSliceBs[iSliceIdx];

          pSliceBs->uiBsPos     = 0;
          pSliceBs->iNalIndex   = 0;
          InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize);

          if (bNeedPrefix) {
            if (eNalRefIdc != NRI_PRI_LOWEST) {
              WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc);
              WelsWriteSVCPrefixNal (&pSliceBs->sBsWrite, eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == eNalType));
              WelsUnloadNalForSlice (pSliceBs);
            } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension
              WelsLoadNalForSlice (pSliceBs, NAL_UNIT_PREFIX, eNalRefIdc);
              // No need write any syntax of prefix NAL Unit RBSP here
              WelsUnloadNalForSlice (pSliceBs);
            }
          }


          WelsLoadNalForSlice (pSliceBs, eNalType, eNalRefIdc);

          iReturn = WelsCodeOneSlice (pEncPEncCtx, iSliceIdx, eNalType);
          if (ENC_RETURN_SUCCESS != iReturn) {
            uiThrdRet = iReturn;
            WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
                                          pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
                                          iEventIdx);
          }

          WelsUnloadNalForSlice (pSliceBs);

          int32_t iLeftBufferSize = (iSliceIdx > 0) ? (pSliceBs->uiSize - pSliceBs->uiBsPos) : (pEncPEncCtx->iFrameBsSize - pEncPEncCtx->iPosBsBuffer);
          iReturn = WriteSliceBs (pEncPEncCtx, pSliceBs->pBs, &pSliceBs->iNalLen[0],
                                    iLeftBufferSize,
                                    iSliceIdx, iSliceSize);
          if (ENC_RETURN_SUCCESS != iReturn) {
            uiThrdRet = iReturn;
            WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
                                          pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
                                          iEventIdx);
          }
          if (0 == iSliceIdx) {
            pEncPEncCtx->iPosBsBuffer += iSliceSize;
          }

          pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);

#if defined(SLICE_INFO_OUTPUT)
          fprintf (stderr,
                   "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n",
                   iSliceIdx,
                   (pEncPEncCtx->eSliceType == P_SLICE ? 'P' : 'I'),
                   eNalRefIdc,
                   iSliceSize
                  );
#endif//SLICE_INFO_OUTPUT

          MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO,
                        "[MT] CodingSliceThreadProc(), coding_idx %d, iPartitionId %d, uiSliceIdx %d, iSliceSize %d, count_mb_slice %d, iEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
                        pEncPEncCtx->iCodingIndex, kiPartitionId, iSliceIdx, iSliceSize,
                        pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].iCountMbNumInSlice,
                        kiEndMbInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);

          iAnyMbLeftInPartition = kiEndMbInPartition - (1 + pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
          iSliceIdx += kiSliceIdxStep;
        }

        if (uiThrdRet) { // any exception??
          WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
                                        pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
                                        iEventIdx);
        }

        WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
        WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
      }
    } else if (WELS_THREAD_ERROR_WAIT_OBJECT_0 + 1 == iWaitRet) { // exit thread signal
      uiThrdRet = 0;
      break;
    } else if (WELS_THREAD_ERROR_WAIT_OBJECT_0 + 2 == iWaitRet) { // update pMb list singal
      iSliceIdx =
        iEventIdx; // pPrivateData->iSliceIndex; old threads can not be terminated, pPrivateData is not correct for applicable
      pCurDq = pEncPEncCtx->pCurDqLayer;
      UpdateMbListNeighborParallel (pCurDq, pCurDq->sMbDataP, iSliceIdx);
      WelsEventSignal (
        &pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx]); // mean finished update pMb list for this pSlice
    } else { // WELS_THREAD_ERROR_WAIT_TIMEOUT, or WELS_THREAD_ERROR_WAIT_FAILED
      WelsLog (& (pEncPEncCtx->sLogCtx), WELS_LOG_WARNING,
               "[MT] CodingSliceThreadProc(), waiting pReadySliceCodingEvent[%d] failed(%d) and thread%d terminated!", iEventIdx,
               iWaitRet, iThreadIdx);
      uiThrdRet = 1;
      break;
    }
  } while (1);

  //sync multi-threading error
  WelsMutexLock (&pEncPEncCtx->mutexEncoderError);
  if (uiThrdRet) pEncPEncCtx->iEncoderError |= uiThrdRet;
  WelsMutexUnlock (&pEncPEncCtx->mutexEncoderError);

  WELS_THREAD_ROUTINE_RETURN (uiThrdRet);
}
예제 #7
0
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
                           const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
  CMemoryAlign* pMa             = NULL;
  SWelsSvcCodingParam* pPara = NULL;
  SSliceThreading* pSmt         = NULL;
  int32_t iNumSpatialLayers     = 0;
  int32_t iThreadNum            = 0;
  int32_t iIdx                  = 0;
  int16_t iMaxSliceNum          = 1;
  int32_t iReturn = ENC_RETURN_SUCCESS;
  bool bWillUseTaskManage = false;

  if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0)
    return 1;

  pMa = (*ppCtx)->pMemAlign;
  pPara = pCodingParam;
  iNumSpatialLayers = pPara->iSpatialLayerNum;
  iThreadNum = pPara->iCountThreadsNum;
  iMaxSliceNum = (*ppCtx)->iMaxSliceCount;

  pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt), FreeMemorySvc (ppCtx))
  (*ppCtx)->pSliceThreading = pSmt;
  pSmt->pThreadPEncCtx = (SSliceThreadPrivateData*)pMa->WelsMalloc (sizeof (SSliceThreadPrivateData) * iThreadNum,
                         "pThreadPEncCtx");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadPEncCtx), FreeMemorySvc (ppCtx))

#ifdef _WIN32
  // Dummy event namespace, the windows events don't actually use this
  WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p", (void*) *ppCtx);
#else
  WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p%x", (void*) *ppCtx, getpid());
#endif//!_WIN32

  iIdx = 0;
  while (iIdx < iNumSpatialLayers) {
    SSliceArgument* pSliceArgument = &pPara->sSpatialLayers[iIdx].sSliceArgument;
    if (pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE || pSliceArgument->uiSliceMode == SM_RASTER_SLICE) {
      bWillUseTaskManage = true;
    }
    ++ iIdx;
  }

#ifdef MT_DEBUG
  // file handle for MT debug
  pSmt->pFSliceDiff = NULL;

  if (pSmt->pFSliceDiff) {
    fclose (pSmt->pFSliceDiff);
    pSmt->pFSliceDiff = NULL;
  }
  pSmt->pFSliceDiff = fopen ("slice_time.txt", "wt+");
#endif//MT_DEBUG

  MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "encpEncCtx= 0x%p", (void*) *ppCtx);

  char name[SEM_NAME_MAX] = {0};
  WELS_GCC_UNUSED WELS_THREAD_ERROR_CODE err = 0;

  iIdx = 0;
  while (iIdx < iThreadNum) {
    pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx     = (void*) *ppCtx;
    pSmt->pThreadPEncCtx[iIdx].iSliceIndex      = iIdx;
    pSmt->pThreadPEncCtx[iIdx].iThreadIndex     = iIdx;
    pSmt->pThreadHandles[iIdx]                  = 0;

    WelsSnprintf (name, SEM_NAME_MAX, "ee%d%s", iIdx, pSmt->eventNamespace);
    err = WelsEventOpen (&pSmt->pExitEncodeEvent[iIdx], name);
    MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pExitEncodeEvent%d named(%s) ret%d err%d", iIdx, name, err, errno);
    WelsSnprintf (name, SEM_NAME_MAX, "tm%d%s", iIdx, pSmt->eventNamespace);
    err = WelsEventOpen (&pSmt->pThreadMasterEvent[iIdx], name);
    MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pThreadMasterEvent%d named(%s) ret%d err%d", iIdx, name, err, errno);
    // length of semaphore name should be system constrained at least on mac 10.7
    WelsSnprintf (name, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace);
    err = WelsEventOpen (&pSmt->pUpdateMbListEvent[iIdx], name);
    MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno);
    WelsSnprintf (name, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace);
    err = WelsEventOpen (&pSmt->pFinUpdateMbListEvent[iIdx], name);
    MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err,
                  errno);
    WelsSnprintf (name, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace);
    err = WelsEventOpen (&pSmt->pSliceCodedEvent[iIdx], name);
    MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedEvent%d named(%s) ret%d err%d", iIdx, name, err, errno);
    WelsSnprintf (name, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace);
    err = WelsEventOpen (&pSmt->pReadySliceCodingEvent[iIdx], name);
    MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx,
                  (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno);

    pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pSmt->pThreadBsBuffer");
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]), FreeMemorySvc (ppCtx))

    ++ iIdx;
  }
  for (; iIdx < MAX_THREADS_NUM; iIdx++) {
    pSmt->pThreadBsBuffer[iIdx] = NULL;
  }

  //previous conflict
  WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
  err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
  MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
  //previous conflict ends

  iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum,
                                 iMaxSliceBufferSize,
                                 iCountBsLen,
                                 bDynamicSlice);
  WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))

  iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate);
  WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))

  if (bWillUseTaskManage) {
    (*ppCtx)->pTaskManage = IWelsTaskManage::CreateTaskManage (*ppCtx, iNumSpatialLayers, bDynamicSlice);
    WELS_VERIFY_RETURN_PROC_IF (iReturn, (NULL == (*ppCtx)->pTaskManage), FreeMemorySvc (ppCtx))
  }

  memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool));
  iReturn = WelsMutexInit (&pSmt->mutexThreadBsBufferUsage);
  WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))

  iReturn = WelsMutexInit (& (*ppCtx)->mutexEncoderError);
  WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))

  MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iCountSliceNum= %d",
                pPara->iCountThreadsNum,
                iMaxSliceNum);

  return 0;
}