void WelsIChromaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const int32_t kuiL1 = kiStride - 1;
  const int32_t kuiL2 = kuiL1 + kiStride;
  const int32_t kuiL3 = kuiL2 + kiStride;
  const int32_t kuiL4 = kuiL3 + kiStride;
  const int32_t kuiL5 = kuiL4 + kiStride;
  const int32_t kuiL6 = kuiL5 + kiStride;
  const int32_t kuiL7 = kuiL6 + kiStride;
  /*caculate the iMean value*/
  const uint8_t kuiMean1	= (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] +
                             pRef[-1] + pRef[kuiL1] + pRef[kuiL2] + pRef[kuiL3] + 4) >> 3;
  const uint32_t kuiSum2 = pRef[4 - kiStride] + pRef[5 - kiStride] + pRef[6 - kiStride] + pRef[7 - kiStride];
  const uint32_t kuiSum3 = pRef[kuiL4] + pRef[kuiL5] + pRef[kuiL6] + pRef[kuiL7];
  const uint8_t kuiMean2 = (kuiSum2 + 2) >> 2;
  const uint8_t kuiMean3 = (kuiSum3 + 2) >> 2;
  const uint8_t kuiMean4 = (kuiSum2 + kuiSum3 + 4) >> 3;

  const uint8_t kuiTopMean[8] = {kuiMean1, kuiMean1, kuiMean1, kuiMean1, kuiMean2, kuiMean2, kuiMean2, kuiMean2};
  const uint8_t kuiBottomMean[8] = {kuiMean3, kuiMean3, kuiMean3, kuiMean3, kuiMean4, kuiMean4, kuiMean4, kuiMean4};
  const uint64_t kuiTopMean64 = LD64 (kuiTopMean);
  const uint64_t kuiBottomMean64 = LD64 (kuiBottomMean);

  ST64 (pPred   , kuiTopMean64);
  ST64 (pPred + 8 , kuiTopMean64);
  ST64 (pPred + 16, kuiTopMean64);
  ST64 (pPred + 24, kuiTopMean64);
  ST64 (pPred + 32, kuiBottomMean64);
  ST64 (pPred + 40, kuiBottomMean64);
  ST64 (pPred + 48, kuiBottomMean64);
  ST64 (pPred + 56, kuiBottomMean64);
}
Example #2
0
void WelsIChromaPredDc_c (uint8_t* pPred, const int32_t kiStride) {
  const int32_t kiL1		= kiStride - 1;
  const int32_t kiL2		= kiL1 + kiStride;
  const int32_t kiL3		= kiL2 + kiStride;
  const int32_t kiL4		= kiL3 + kiStride;
  const int32_t kiL5		= kiL4 + kiStride;
  const int32_t kiL6		= kiL5 + kiStride;
  const int32_t kiL7		= kiL6 + kiStride;
  /*caculate the kMean value*/
  const uint8_t kuiM1		= (pPred[-kiStride] + pPred[1 - kiStride] + pPred[2 - kiStride] + pPred[3 - kiStride] +
                           pPred[-1] + pPred[kiL1] + pPred[kiL2] + pPred[kiL3] + 4) >> 3 ;
  const uint32_t kuiSum2	= pPred[4 - kiStride] + pPred[5 - kiStride] + pPred[6 - kiStride] + pPred[7 - kiStride];
  const uint32_t kuiSum3	= pPred[kiL4] + pPred[kiL5] + pPred[kiL6] + pPred[kiL7];
  const uint8_t kuiM2		= (kuiSum2 + 2) >> 2;
  const uint8_t kuiM3		= (kuiSum3 + 2) >> 2;
  const uint8_t kuiM4		= (kuiSum2 + kuiSum3 + 4) >> 3;
  const uint8_t kuiMUP[8]	= {kuiM1, kuiM1, kuiM1, kuiM1, kuiM2, kuiM2, kuiM2, kuiM2};
  const uint8_t kuiMDown[8]	= {kuiM3, kuiM3, kuiM3, kuiM3, kuiM4, kuiM4, kuiM4, kuiM4};
  const uint64_t kuiUP64		= LD64 (kuiMUP);
  const uint64_t kuiDN64		= LD64 (kuiMDown);

  ST64 (pPred       , kuiUP64);
  ST64 (pPred + kiL1 + 1, kuiUP64);
  ST64 (pPred + kiL2 + 1, kuiUP64);
  ST64 (pPred + kiL3 + 1, kuiUP64);
  ST64 (pPred + kiL4 + 1, kuiDN64);
  ST64 (pPred + kiL5 + 1, kuiDN64);
  ST64 (pPred + kiL6 + 1, kuiDN64);
  ST64 (pPred + kiL7 + 1, kuiDN64);
}
Example #3
0
static inline void McCopyWidthEq16_c (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                        int32_t iHeight) {
  int32_t i;
  for (i = 0; i < iHeight; i++) {
    ST64 (pDst  , LD64 (pSrc));
    ST64 (pDst + 8, LD64 (pSrc + 8));
    pDst += iDstStride;
    pSrc += iSrcStride;
  }
}
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  uint8_t i = 15;
  const int8_t* kpSrc = (int8_t*)&pRef[-kiStride];
  const uint64_t kuiT1 = LD64 (kpSrc);
  const uint64_t kuiT2 = LD64 (kpSrc + 8);
  uint8_t* pDst = pPred;

  do {
    ST64 (pDst  , kuiT1);
    ST64 (pDst + 8, kuiT2);
    pDst += 16;
  } while (i-- > 0);
}
Example #5
0
void WelsI16x16LumaPredV_c (uint8_t* pPred, const int32_t kiStride) {
  int32_t iTmp			= (kiStride << 4) - kiStride;
  const uint64_t kuiTop1	= LD64 (pPred - kiStride);
  const uint64_t kuiTop2  = LD64 (pPred - kiStride + 8);
  uint8_t i = 15;

  do {
    ST64 (pPred + iTmp  , kuiTop1);
    ST64 (pPred + iTmp + 8, kuiTop2);

    iTmp -= kiStride;
  } while (i-- > 0);
}
void WelsIChromaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const uint64_t kuiSrc64 = LD64 (&pRef[-kiStride]);

  ST64 (pPred   , kuiSrc64);
  ST64 (pPred + 8 , kuiSrc64);
  ST64 (pPred + 16, kuiSrc64);
  ST64 (pPred + 24, kuiSrc64);
  ST64 (pPred + 32, kuiSrc64);
  ST64 (pPred + 40, kuiSrc64);
  ST64 (pPred + 48, kuiSrc64);
  ST64 (pPred + 56, kuiSrc64);
}
void WelsIChromaPredDcTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  /*caculate the iMean value*/
  const uint8_t kuiMean1 = (pRef[-kiStride] + pRef[1 - kiStride] + pRef[2 - kiStride] + pRef[3 - kiStride] + 2) >> 2;
  const uint8_t kuiMean2 = (pRef[4 - kiStride] + pRef[5 - kiStride] + pRef[6 - kiStride] + pRef[7 - kiStride] + 2) >> 2;
  const uint8_t kuiMean[8] = {kuiMean1, kuiMean1, kuiMean1, kuiMean1, kuiMean2, kuiMean2, kuiMean2, kuiMean2};
  const uint64_t kuiMean64 = LD64 (kuiMean);

  ST64 (pPred   , kuiMean64);
  ST64 (pPred + 8 , kuiMean64);
  ST64 (pPred + 16, kuiMean64);
  ST64 (pPred + 24, kuiMean64);
  ST64 (pPred + 32, kuiMean64);
  ST64 (pPred + 40, kuiMean64);
  ST64 (pPred + 48, kuiMean64);
  ST64 (pPred + 56, kuiMean64);
}
Example #8
0
void WelsIChromaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) {
  int32_t iTmp			= (kiStride << 3) - kiStride;
  /*caculate the kMean value*/
  const uint8_t kuiM1	    = (pPred[-kiStride] + pPred[1 - kiStride] + pPred[2 - kiStride] + pPred[3 - kiStride] + 2) >> 2;
  const uint8_t kuiM2	    = (pPred[4 - kiStride] + pPred[5 - kiStride] + pPred[6 - kiStride] + pPred[7 - kiStride] + 2) >>
                            2;
  const uint8_t kuiM[8]	= {kuiM1, kuiM1, kuiM1, kuiM1, kuiM2, kuiM2, kuiM2, kuiM2};

  uint8_t i = 7;

  do {
    ST64 (pPred + iTmp, LD64 (kuiM));

    iTmp -= kiStride;
  } while (i-- > 0);
}
Example #9
0
void WelsFillCacheInter (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
                         int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) {
  int32_t iCurXy      = pCurLayer->iMbXyIndex;
  int32_t iTopXy      = 0;
  int32_t iLeftXy     = 0;
  int32_t iLeftTopXy  = 0;
  int32_t iRightTopXy = 0;

  //stuff non_zero_coeff_count from pNeighAvail(left and top)
  WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);

  if (pNeighAvail->iTopAvail) {
    iTopXy = iCurXy - pCurLayer->iMbWidth;
  }
  if (pNeighAvail->iLeftAvail) {
    iLeftXy = iCurXy - 1;
  }
  if (pNeighAvail->iLeftTopAvail) {
    iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
  }
  if (pNeighAvail->iRightTopAvail) {
    iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
  }

  //stuff mv_cache and iRefIdxArray from left and top (inter)
  if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
    ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3]));
    ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7]));
    ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11]));
    ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15]));
    iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3];
    iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7];
    iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11];
    iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15];
  } else {
    ST32 (iMvArray[0][ 6], 0);
    ST32 (iMvArray[0][12], 0);
    ST32 (iMvArray[0][18], 0);
    ST32 (iMvArray[0][24], 0);

    if (0 == pNeighAvail->iLeftAvail) { //not available
      iRefIdxArray[0][ 6] =
        iRefIdxArray[0][12] =
          iRefIdxArray[0][18] =
            iRefIdxArray[0][24] = REF_NOT_AVAIL;
    } else { //available but is intra mb type
      iRefIdxArray[0][ 6] =
        iRefIdxArray[0][12] =
          iRefIdxArray[0][18] =
            iRefIdxArray[0][24] = REF_NOT_IN_LIST;
    }
  }
  if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
    ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
    iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15];
  } else {
    ST32 (iMvArray[0][0], 0);
    if (0 == pNeighAvail->iLeftTopAvail) { //not available
      iRefIdxArray[0][0] = REF_NOT_AVAIL;
    } else { //available but is intra mb type
      iRefIdxArray[0][0] = REF_NOT_IN_LIST;
    }
  }

  if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
    ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12]));
    ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14]));
    ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12]));
  } else {
    ST64 (iMvArray[0][1], 0);
    ST64 (iMvArray[0][3], 0);

    if (0 == pNeighAvail->iTopAvail) { //not available
      iRefIdxArray[0][1] =
        iRefIdxArray[0][2] =
          iRefIdxArray[0][3] =
            iRefIdxArray[0][4] = REF_NOT_AVAIL;
    } else { //available but is intra mb type
      iRefIdxArray[0][1] =
        iRefIdxArray[0][2] =
          iRefIdxArray[0][3] =
            iRefIdxArray[0][4] = REF_NOT_IN_LIST;
    }
  }

  if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
    ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
    iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12];
  } else {
    ST32 (iMvArray[0][5], 0);
    if (0 == pNeighAvail->iRightTopAvail) { //not available
      iRefIdxArray[0][5] = REF_NOT_AVAIL;
    } else { //available but is intra mb type
      iRefIdxArray[0][5] = REF_NOT_IN_LIST;
    }
  }

  //right-top 4*4 block unavailable
  ST32 (iMvArray[0][ 9], 0);
  ST32 (iMvArray[0][21], 0);
  ST32 (iMvArray[0][11], 0);
  ST32 (iMvArray[0][17], 0);
  ST32 (iMvArray[0][23], 0);
  iRefIdxArray[0][ 9] =
    iRefIdxArray[0][21] =
      iRefIdxArray[0][11] =
        iRefIdxArray[0][17] =
          iRefIdxArray[0][23] = REF_NOT_AVAIL;
}
static inline void WelsFillingPred1to16_c (uint8_t* pPred, const uint8_t kuiSrc) {
  const uint8_t kuiSrc8[8] = { kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc, kuiSrc };
  ST64 (pPred  , LD64 (kuiSrc8));
  ST64 (pPred + 8, LD64 (kuiSrc8));
}
static inline void WelsFillingPred8x2to16_c (uint8_t* pPred, uint8_t* pSrc) {
  ST64 (pPred  , LD64 (pSrc));
  ST64 (pPred + 8, LD64 (pSrc + 8));
}