예제 #1
0
void FilteringEdgeLumaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
  int32_t iLineSize  = pFilter->iCsStride[0];
  int32_t iMbStride = pFilter->iMbStride;

  uint8_t*  pDestY;
  int8_t   iCurQp;
  int32_t  iIdexA, iAlpha, iBeta;

  int32_t iMbX = pCurMb->iMbX;
  int32_t iMbY = pCurMb->iMbY;

  bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0) && (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
  bool bTopBsValid[2]  = { (iMbY > 0), ((iMbY > 0) && (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};

  int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
  int32_t iTopFlag  = bTopBsValid[pFilter->uiFilterIdc];

  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);

  pDestY  = pFilter->pCsData[0];
  iCurQp  = pCurMb->uiLumaQp;

  * (uint32_t*)uiBSx4 = 0x03030303;

  // luma v
  if (iLeftFlag) {
    pFilter->uiLumaQP = (iCurQp + (pCurMb - 1)->uiLumaQp + 1) >> 1;
    FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize, NULL);
  }

  pFilter->uiLumaQP   = iCurQp;
  GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
                          iBeta);
  if (iAlpha | iBeta) {
    TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 0);
    pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
    pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
    pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);

  }

  // luma h
  if (iTopFlag) {
    pFilter->uiLumaQP   = (iCurQp   + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1;
    FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize, NULL);
  }

  pFilter->uiLumaQP   = iCurQp;
  if (iAlpha | iBeta) {
    pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
    pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
    pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
  }
}
/*vertical pLeft*/
void WelsI4x4LumaPredVL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  /*get pTop*/
  const uint8_t kuiT0		= pRef[-kiStride];
  const uint8_t kuiT1		= pRef[1 - kiStride];
  const uint8_t kuiT2		= pRef[2 - kiStride];
  const uint8_t kuiT3		= pRef[3 - kiStride];
  const uint8_t kuiT4		= pRef[4 - kiStride];
  const uint8_t kuiT5		= pRef[5 - kiStride];
  const uint8_t kuiT6		= pRef[6 - kiStride];
  const uint8_t kuiVL0	= (1 + kuiT0 + kuiT1) >> 1;				// uiVL0
  const uint8_t kuiVL1	= (1 + kuiT1 + kuiT2) >> 1;				// uiVL1
  const uint8_t kuiVL2	= (1 + kuiT2 + kuiT3) >> 1;				// uiVL2
  const uint8_t kuiVL3	= (1 + kuiT3 + kuiT4) >> 1;				// uiVL3
  const uint8_t kuiVL4	= (1 + kuiT4 + kuiT5) >> 1;				// uiVL4
  const uint8_t kuiVL5	= (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2;	// uiVL5
  const uint8_t kuiVL6	= (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2;	// uiVL6
  const uint8_t kuiVL7	= (2 + kuiT2 + (kuiT3 << 1) + kuiT4) >> 2;	// uiVL7
  const uint8_t kuiVL8	= (2 + kuiT3 + (kuiT4 << 1) + kuiT5) >> 2;	// uiVL8
  const uint8_t kuiVL9	= (2 + kuiT4 + (kuiT5 << 1) + kuiT6) >> 2;	// uiVL9
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = kuiVL0;
  uiSrc[1] = uiSrc[8] = kuiVL1;
  uiSrc[2] = uiSrc[9] = kuiVL2;
  uiSrc[3] = uiSrc[10] = kuiVL3;
  uiSrc[4] = kuiVL5;
  uiSrc[5] = uiSrc[12] = kuiVL6;
  uiSrc[6] = uiSrc[13] = kuiVL7;
  uiSrc[7] = uiSrc[14] = kuiVL8;
  uiSrc[11] = kuiVL4;
  uiSrc[15] = kuiVL9;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
/*vertical pLeft*/
void WelsI4x4LumaPredVLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  uint8_t* pTopLeft		= &pRef[-kiStride - 1];	// pTop-pLeft
  /*get pTop*/
  const uint8_t kuiT0		= * (pTopLeft + 1);
  const uint8_t kuiT1		= * (pTopLeft + 2);
  const uint8_t kuiT2		= * (pTopLeft + 3);
  const uint8_t kuiT3		= * (pTopLeft + 4);
  const uint8_t kuiVLT0	= (1 + kuiT0 + kuiT1) >> 1;				// uiVLT0
  const uint8_t kuiVLT1	= (1 + kuiT1 + kuiT2) >> 1;				// uiVLT1
  const uint8_t kuiVLT2	= (1 + kuiT2 + kuiT3) >> 1;				// uiVLT2
  const uint8_t kuiVLT3	= (1 + (kuiT3 << 1)) >> 1;				// uiVLT3
  const uint8_t kuiVLT4	= (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2;	// uiVLT4
  const uint8_t kuiVLT5	= (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2;	// uiVLT5
  const uint8_t kuiVLT6	= (2 + kuiT2 + (kuiT3 << 1) + kuiT3) >> 2;	// uiVLT6
  const uint8_t kuiVLT7	= (2 + (kuiT3 << 2)) >> 2;				// uiVLT7
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = kuiVLT0;
  uiSrc[1] = uiSrc[8] = kuiVLT1;
  uiSrc[2] = uiSrc[9] = kuiVLT2;
  uiSrc[3] = uiSrc[10] = uiSrc[11] = kuiVLT3;
  uiSrc[4] = kuiVLT4;
  uiSrc[5] = uiSrc[12] = kuiVLT5;
  uiSrc[6] = uiSrc[13] = kuiVLT6;
  uiSrc[7] = uiSrc[14] = uiSrc[15] = kuiVLT7;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
/*vertical right*/
void WelsI4x4LumaPredVR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const int32_t kiStridex2	= kiStride << 1;
  const uint8_t kuiLT			= pRef[-kiStride - 1];	// pTop-pLeft
  /*get pLeft and pTop*/
  const uint8_t kuiL0			= pRef[-1];
  const uint8_t kuiL1			= pRef[kiStride - 1];
  const uint8_t kuiL2			= pRef[kiStridex2 - 1];
  const uint8_t kuiT0			= pRef[-kiStride];
  const uint8_t kuiT1			= pRef[1 - kiStride];
  const uint8_t kuiT2			= pRef[2 - kiStride];
  const uint8_t kuiT3			= pRef[3 - kiStride];
  const uint8_t kuiVR0		= (1 + kuiLT + kuiT0) >> 1;
  const uint8_t kuiVR1		= (1 + kuiT0 + kuiT1) >> 1;
  const uint8_t kuiVR2		= (1 + kuiT1 + kuiT2) >> 1;
  const uint8_t kuiVR3		= (1 + kuiT2 + kuiT3) >> 1;
  const uint8_t kuiVR4		= (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2;
  const uint8_t kuiVR5		= (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2;
  const uint8_t kuiVR6		= (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2;
  const uint8_t kuiVR7		= (2 + kuiT1 + (kuiT2 << 1) + kuiT3) >> 2;
  const uint8_t kuiVR8		= (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2;
  const uint8_t kuiVR9		= (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2;
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = uiSrc[9] = kuiVR0;
  uiSrc[1] = uiSrc[10] = kuiVR1;
  uiSrc[2] = uiSrc[11] = kuiVR2;
  uiSrc[3] = kuiVR3;
  uiSrc[4] = uiSrc[13] = kuiVR4;
  uiSrc[5] = uiSrc[14] = kuiVR5;
  uiSrc[6] = uiSrc[15] = kuiVR6;
  uiSrc[7] = kuiVR7;
  uiSrc[8] = kuiVR8;
  uiSrc[12] = kuiVR9;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
예제 #5
0
void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) {
  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);

  uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
  uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
  uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
  uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);

  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor;
  uiBS[0][1][0] = uiBsx3[0];
  uiBS[0][2][0] = uiBsx3[1];
  uiBS[0][3][0] = uiBsx3[2];

  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor;
  uiBS[0][1][1] = uiBsx3[0];
  uiBS[0][2][1] = uiBsx3[1];
  uiBS[0][3][1] = uiBsx3[2];
  * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;

  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor;
  uiBS[0][1][2] = uiBsx3[0];
  uiBS[0][2][2] = uiBsx3[1];
  uiBS[0][3][2] = uiBsx3[2];
  * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;

  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor;
  uiBS[0][1][3] = uiBsx3[0];
  uiBS[0][2][3] = uiBsx3[1];
  uiBS[0][3][3] = uiBsx3[2];
  * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;

}
/*horizontal up*/
void WelsI4x4LumaPredHU_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const int32_t kiStridex2	= kiStride << 1;
  const int32_t kiStridex3	= kiStride + kiStridex2;
  /*get pLeft*/
  const uint8_t kuiL0			= pRef[-1];
  const uint8_t kuiL1			= pRef[kiStride - 1];
  const uint8_t kuiL2			= pRef[kiStridex2 - 1];
  const uint8_t kuiL3			= pRef[kiStridex3 - 1];
  const uint16_t kuiL01		= (1 + kuiL0 + kuiL1);
  const uint16_t kuiL12		= (1 + kuiL1 + kuiL2);
  const uint16_t kuiL23		= (1 + kuiL2 + kuiL3);
  const uint8_t kuiHU0		= kuiL01 >> 1;
  const uint8_t kuiHU1		= (kuiL01 + kuiL12) >> 2;
  const uint8_t kuiHU2		= kuiL12 >> 1;
  const uint8_t kuiHU3		= (kuiL12 + kuiL23) >> 2;
  const uint8_t kuiHU4		= kuiL23 >> 1;
  const uint8_t kuiHU5		= (1 + kuiL23 + (kuiL3 << 1)) >> 2;
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = kuiHU0;
  uiSrc[1] = kuiHU1;
  uiSrc[2] = uiSrc[4] = kuiHU2;
  uiSrc[3] = uiSrc[5] = kuiHU3;
  uiSrc[6] = uiSrc[8] = kuiHU4;
  uiSrc[7] = uiSrc[9] = kuiHU5;
  memset (&uiSrc[10], kuiL3, 6 * sizeof (uint8_t));

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const uint32_t kuiSrc = LD32 (&pRef[-kiStride]);
  ENFORCE_STACK_ALIGN_1D (uint32_t, uiSrcx2, 2, 16)
  uiSrcx2[0] = uiSrcx2[1] = kuiSrc;

  WelsFillingPred8to16 (pPred, (uint8_t*)&uiSrcx2[0]);
}
/*down pLeft*/
void WelsI4x4LumaPredDDL_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  /*get pTop*/
  const uint8_t kuiT0		= pRef[-kiStride];
  const uint8_t kuiT1		= pRef[1 - kiStride];
  const uint8_t kuiT2		= pRef[2 - kiStride];
  const uint8_t kuiT3		= pRef[3 - kiStride];
  const uint8_t kuiT4		= pRef[4 - kiStride];
  const uint8_t kuiT5		= pRef[5 - kiStride];
  const uint8_t kuiT6		= pRef[6 - kiStride];
  const uint8_t kuiT7		= pRef[7 - kiStride];
  const uint8_t kuiDDL0	= (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2;	// uiDDL0
  const uint8_t kuiDDL1	= (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2;	// uiDDL1
  const uint8_t kuiDDL2	= (2 + kuiT2 + kuiT4 + (kuiT3 << 1)) >> 2;	// uiDDL2
  const uint8_t kuiDDL3	= (2 + kuiT3 + kuiT5 + (kuiT4 << 1)) >> 2;	// uiDDL3
  const uint8_t kuiDDL4	= (2 + kuiT4 + kuiT6 + (kuiT5 << 1)) >> 2;	// uiDDL4
  const uint8_t kuiDDL5	= (2 + kuiT5 + kuiT7 + (kuiT6 << 1)) >> 2;	// uiDDL5
  const uint8_t kuiDDL6	= (2 + kuiT6 + kuiT7 + (kuiT7 << 1)) >> 2;	// uiDDL6
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = kuiDDL0;
  uiSrc[1] = uiSrc[4] = kuiDDL1;
  uiSrc[2] = uiSrc[5] = uiSrc[8] = kuiDDL2;
  uiSrc[3] = uiSrc[6] = uiSrc[9] = uiSrc[12] = kuiDDL3;
  uiSrc[7] = uiSrc[10] = uiSrc[13] = kuiDDL4;
  uiSrc[11] = uiSrc[14] = kuiDDL5;
  uiSrc[15] = kuiDDL6;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
예제 #9
0
파일: mc.cpp 프로젝트: Ehuos/openh264
static inline void McHorVer33_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                      int32_t iWidth, int32_t iHeight) {
  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
  if (iWidth == 16) {
    McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
    McHorVer02WidthEq16_sse2 (pSrc + 1,          iSrcStride, pVerTmp, 16, iHeight);
    PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
  } else if (iWidth == 8) {
    McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
    McHorVer02WidthEq8_sse2 (pSrc + 1,          iSrcStride, pVerTmp, 16, iHeight);
    PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
  } else {
    McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
    McHorVer02_c (pSrc + 1,          iSrcStride, pVerTmp, 16, 4, iHeight);
    PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
  }
}
예제 #10
0
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
    int32_t iMbXy) {
  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
  int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);

  uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
  uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
  uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
  uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
  nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
  nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
  nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
  nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
  nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
  nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
  nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
  nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
  nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
  nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
  nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
  nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);

  // horizontal
  * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
  nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
  nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
  nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
  nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);

  * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
  nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
  nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
  nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
  nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);

  * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
  nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
  nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
  nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
  nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
}
예제 #11
0
void CComplexityAnalysisScreen::GomComplexityAnalysisIntra (SPixMap* pSrc) {
    int32_t iWidth                  = pSrc->sRect.iRectWidth;
    int32_t iHeight                 = pSrc->sRect.iRectHeight;
    int32_t iBlockWidth             = iWidth  >> 4;
    int32_t iBlockHeight            = iHeight >> 4;

    int32_t iBlockSadH, iBlockSadV, iGomSad = 0;
    int32_t iIdx = 0;

    uint8_t* pPtrY = NULL;
    int32_t iStrideY = 0;
    int32_t iRowStrideY = 0;

    uint8_t* pTmpCur = NULL;

    ENFORCE_STACK_ALIGN_1D (uint8_t, iMemPredMb, 256, 16)

    pPtrY = (uint8_t*)pSrc->pPixel[0];

    iStrideY  = pSrc->iStride[0];
    iRowStrideY = iStrideY << 4;

    m_ComplexityAnalysisParam.iFrameComplexity = 0;

    for (int32_t j = 0; j < iBlockHeight; j ++) {
        pTmpCur = pPtrY;

        for (int32_t i = 0; i < iBlockWidth; i++) {
            iBlockSadH = iBlockSadV = 0x7fffffff; // INT_MAX
            if (j > 0) {
                m_pIntraFunc[0] (iMemPredMb, pTmpCur, iStrideY);
                iBlockSadH = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16);
            }
            if (i > 0) {
                m_pIntraFunc[1] (iMemPredMb, pTmpCur, iStrideY);
                iBlockSadV = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16);
            }
            if (i || j)
                iGomSad += WELS_MIN (iBlockSadH, iBlockSadV);

            pTmpCur += 16;

            if (i == iBlockWidth - 1 && ((j + 1) % m_ComplexityAnalysisParam.iMbRowInGom == 0 || j == iBlockHeight - 1)) {
                m_ComplexityAnalysisParam.pGomComplexity[iIdx] = iGomSad;
                m_ComplexityAnalysisParam.iFrameComplexity += iGomSad;
                iIdx++;
                iGomSad = 0;
            }
        }

        pPtrY += iRowStrideY;
    }
    m_ComplexityAnalysisParam.iGomNumInFrame = iIdx;
}
예제 #12
0
void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8_t* pNnzTab) {
  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);

  uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
  uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
  uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
  uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
  uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0);
  uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1);
  uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
  uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4);
  uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5);
  uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
  uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8);
  uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9);
  uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10);

  for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
  uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12);
  uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13);
  uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);

  //horizontal
  * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
  uiBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 4, 0);
  uiBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 5, 1);
  uiBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 6, 2);
  uiBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 7, 3);

  * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
  uiBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 8, 4);
  uiBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 9, 5);
  uiBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 10, 6);
  uiBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 11, 7);

  * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
  uiBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 12, 8);
  uiBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 13, 9);
  uiBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 14, 10);
  uiBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 15, 11);
}
예제 #13
0
파일: mc.cpp 프로젝트: VincentYu/openh264
static inline void McHorVer10_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                      int32_t iWidth, int32_t iHeight) {
  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
  if (iWidth == 16) {
    McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
    PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
  } else if (iWidth == 8) {
    McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
    PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
  } else {
    McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pTmp, 16, iHeight);
    PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
  }
}
예제 #14
0
void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
  int32_t  iIndexA;
  int32_t  iAlpha;
  int32_t  iBeta;
  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);

  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                          iBeta);

  if (iAlpha | iBeta) {
    TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
    pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc);
  }
  return;
}
예제 #15
0
void FilteringEdgeChromaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr,
                           int32_t iStride, uint8_t* pBS) {
  int32_t iIdexA;
  int32_t iAlpha;
  int32_t iBeta;
  ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);

  GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
                          iBeta);

  if (iAlpha | iBeta) {
    TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1);
    pfDeblocking->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc);
  }
  return;
}
void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const uint32_t kiStridex2Left = (kiStride << 1) - 1;
  const uint32_t kiStridex3Left = kiStride + kiStridex2Left;
  const uint8_t kuiHor1 = pRef[-1];
  const uint8_t kuiHor2 = pRef[kiStride - 1];
  const uint8_t kuiHor3 = pRef[kiStridex2Left];
  const uint8_t kuiHor4 = pRef[kiStridex3Left];
  const uint8_t kuiVec1[4] = {kuiHor1, kuiHor1, kuiHor1, kuiHor1};
  const uint8_t kuiVec2[4] = {kuiHor2, kuiHor2, kuiHor2, kuiHor2};
  const uint8_t kuiVec3[4] = {kuiHor3, kuiHor3, kuiHor3, kuiHor3};
  const uint8_t kuiVec4[4] = {kuiHor4, kuiHor4, kuiHor4, kuiHor4};
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  ST32 (&uiSrc[0], LD32 (kuiVec1));
  ST32 (&uiSrc[4], LD32 (kuiVec2));
  ST32 (&uiSrc[8], LD32 (kuiVec3));
  ST32 (&uiSrc[12], LD32 (kuiVec4));

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
/*down pLeft*/
void WelsI4x4LumaPredDDLTop_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  /*get pTop*/
  const uint8_t kuiT0	= pRef[-kiStride];
  const uint8_t kuiT1	= pRef[1 - kiStride];
  const uint8_t kuiT2	= pRef[2 - kiStride];
  const uint8_t kuiT3	= pRef[3 - kiStride];
  const uint8_t kuiDLT0	= (2 + kuiT0 + kuiT2 + (kuiT1 << 1)) >> 2;	// uiDLT0
  const uint8_t kuiDLT1	= (2 + kuiT1 + kuiT3 + (kuiT2 << 1)) >> 2;	// uiDLT1
  const uint8_t kuiDLT2	= (2 + kuiT2 + kuiT3 + (kuiT3 << 1)) >> 2;	// uiDLT2
  const uint8_t kuiDLT3	= (2 + (kuiT3 << 2)) >> 2;				// uiDLT3
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  memset (&uiSrc[6], kuiDLT3, 10 * sizeof (uint8_t));
  uiSrc[0] = kuiDLT0;
  uiSrc[1] = uiSrc[4] = kuiDLT1;
  uiSrc[2] = uiSrc[5] = uiSrc[8] = kuiDLT2;
  uiSrc[3] = kuiDLT3;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
/*down right*/
void WelsI4x4LumaPredDDR_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const int32_t kiStridex2	= kiStride << 1;
  const int32_t kiStridex3	= kiStride + kiStridex2;
  const uint8_t kuiLT			= pRef[-kiStride - 1];	// pTop-pLeft
  /*get pLeft and pTop*/
  const uint8_t kuiL0			= pRef[-1];
  const uint8_t kuiL1			= pRef[kiStride - 1];
  const uint8_t kuiL2			= pRef[kiStridex2 - 1];
  const uint8_t kuiL3			= pRef[kiStridex3 - 1];
  const uint8_t kuiT0			= pRef[-kiStride];
  const uint8_t kuiT1			= pRef[1 - kiStride];
  const uint8_t kuiT2			= pRef[2 - kiStride];
  const uint8_t kuiT3			= pRef[3 - kiStride];
  const uint16_t kuiTL0		= 1 + kuiLT + kuiL0;
  const uint16_t kuiLT0		= 1 + kuiLT + kuiT0;
  const uint16_t kuiT01		= 1 + kuiT0 + kuiT1;
  const uint16_t kuiT12		= 1 + kuiT1 + kuiT2;
  const uint16_t kuiT23		= 1 + kuiT2 + kuiT3;
  const uint16_t kuiL01		= 1 + kuiL0 + kuiL1;
  const uint16_t kuiL12		= 1 + kuiL1 + kuiL2;
  const uint16_t kuiL23		= 1 + kuiL2 + kuiL3;
  const uint8_t kuiDDR0		= (kuiTL0 + kuiLT0) >> 2;
  const uint8_t kuiDDR1		= (kuiLT0 + kuiT01) >> 2;
  const uint8_t kuiDDR2		= (kuiT01 + kuiT12) >> 2;
  const uint8_t kuiDDR3		= (kuiT12 + kuiT23) >> 2;
  const uint8_t kuiDDR4		= (kuiTL0 + kuiL01) >> 2;
  const uint8_t kuiDDR5		= (kuiL01 + kuiL12) >> 2;
  const uint8_t kuiDDR6		= (kuiL12 + kuiL23) >> 2;
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = uiSrc[5] = uiSrc[10] = uiSrc[15] = kuiDDR0;
  uiSrc[1] = uiSrc[6] = uiSrc[11] = kuiDDR1;
  uiSrc[2] = uiSrc[7] = kuiDDR2;
  uiSrc[3] = kuiDDR3;
  uiSrc[4] = uiSrc[9] = uiSrc[14] = kuiDDR4;
  uiSrc[8] = uiSrc[13] = kuiDDR5;
  uiSrc[12] = kuiDDR6;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
/*horizontal down*/
void WelsI4x4LumaPredHD_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
  const int32_t kiStridex2	= kiStride << 1;
  const int32_t kiStridex3	= kiStride + kiStridex2;
  const uint8_t kuiLT		= pRef[-kiStride - 1];	// pTop-pLeft
  /*get pLeft and pTop*/
  const uint8_t kuiL0		= pRef[-1];
  const uint8_t kuiL1		= pRef[kiStride - 1];
  const uint8_t kuiL2		= pRef[kiStridex2 - 1];
  const uint8_t kuiL3		= pRef[kiStridex3 - 1];
  const uint8_t kuiT0		= pRef[-kiStride];
  const uint8_t kuiT1		= pRef[1 - kiStride];
  const uint8_t kuiT2		= pRef[2 - kiStride];
  const uint8_t kuiHD0		= (1 + kuiLT + kuiL0) >> 1;				// uiHD0
  const uint8_t kuiHD1		= (2 + kuiL0 + (kuiLT << 1) + kuiT0) >> 2;	// uiHD1
  const uint8_t kuiHD2		= (2 + kuiLT + (kuiT0 << 1) + kuiT1) >> 2;	// uiHD2
  const uint8_t kuiHD3		= (2 + kuiT0 + (kuiT1 << 1) + kuiT2) >> 2;	// uiHD3
  const uint8_t kuiHD4		= (1 + kuiL0 + kuiL1) >> 1;				// uiHD4
  const uint8_t kuiHD5		= (2 + kuiLT + (kuiL0 << 1) + kuiL1) >> 2;	// uiHD5
  const uint8_t kuiHD6		= (1 + kuiL1 + kuiL2) >> 1;				// uiHD6
  const uint8_t kuiHD7		= (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2;	// uiHD7
  const uint8_t kuiHD8		= (1 + kuiL2 + kuiL3) >> 1;				// uiHD8
  const uint8_t kuiHD9		= (2 + kuiL1 + (kuiL2 << 1) + kuiL3) >> 2;	// uiHD9
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiSrc, 16, 16)	// TobeCont'd about assign opt as follows
  uiSrc[0] = uiSrc[6] = kuiHD0;
  uiSrc[1] = uiSrc[7] = kuiHD1;
  uiSrc[2] = kuiHD2;
  uiSrc[3] = kuiHD3;
  uiSrc[4] = uiSrc[10] = kuiHD4;
  uiSrc[5] = uiSrc[11] = kuiHD5;
  uiSrc[8] = uiSrc[14] = kuiHD6;
  uiSrc[9] = uiSrc[15] = kuiHD7;
  uiSrc[12] = kuiHD8;
  uiSrc[13] = kuiHD9;

  WelsFillingPred8x2to16 (pPred, uiSrc);
}
예제 #20
0
void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
                           uint8_t* pBS) {
  int32_t iIndexA;
  int32_t iAlpha;
  int32_t iBeta;
  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {


    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                            iBeta);

    if (iAlpha | iBeta) {
      TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
      pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
    }


  } else {

    for (int i = 0; i < 2; i++) {

      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                              iBeta);

      if (iAlpha | iBeta) {
        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
        TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
        pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
      }


    }
  }
  return;
}
예제 #21
0
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
    int32_t iMbXy) {
  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
  int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);

  int8_t i8x8NnzTab[4];

  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
    for (int32_t i = 0; i < 4; i++) {
      int32_t iBlkIdx = i << 2;
      i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
                       pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
    }
    //vertical
    nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
                                           g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
    nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
                                           g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);

    //horizontal
    nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
                                           g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
    nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
                                           g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
  } else {
    uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
    uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
    uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
    uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);

    for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
    nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
    nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
    nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);

    for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
    nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
    nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
    nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);

    for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
    nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
    nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
    nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);

    for (int i = 0; i < 3; i++)
      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
    nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
    nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
    nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);

    // horizontal
    * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
    nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
    nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
    nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
    nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);

    * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
    nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
    nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
    nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
    nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);

    * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
    nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
    nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
    nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
    nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
  }
}
예제 #22
0
void CComplexityAnalysisScreen::GomComplexityAnalysisInter (SPixMap* pSrc, SPixMap* pRef, bool bScrollFlag) {
    int32_t iWidth                  = pSrc->sRect.iRectWidth;
    int32_t iHeight                 = pSrc->sRect.iRectHeight;
    int32_t iBlockWidth             = iWidth  >> 4;
    int32_t iBlockHeight            = iHeight >> 4;

    int32_t iInterSad, iScrollSad, iBlockSadH, iBlockSadV, iGomSad = 0;
    int32_t iIdx = 0;

    int32_t iScrollMvX = m_ComplexityAnalysisParam.sScrollResult.iScrollMvX;
    int32_t iScrollMvY = m_ComplexityAnalysisParam.sScrollResult.iScrollMvY;

    uint8_t* pPtrX = NULL, *pPtrY = NULL;
    int32_t iStrideX = 0, iStrideY = 0;
    int32_t iRowStrideX = 0, iRowStrideY = 0;

    uint8_t* pTmpRef = NULL, *pTmpCur = NULL, *pTmpRefScroll = NULL;

    ENFORCE_STACK_ALIGN_1D (uint8_t, iMemPredMb, 256, 16)

    pPtrX = (uint8_t*)pRef->pPixel[0];
    pPtrY = (uint8_t*)pSrc->pPixel[0];

    iStrideX  = pRef->iStride[0];
    iStrideY  = pSrc->iStride[0];

    iRowStrideX  = pRef->iStride[0] << 4;
    iRowStrideY  = pSrc->iStride[0] << 4;

    m_ComplexityAnalysisParam.iFrameComplexity = 0;

    for (int32_t j = 0; j < iBlockHeight; j ++) {
        pTmpRef  = pPtrX;
        pTmpCur  = pPtrY;

        for (int32_t i = 0; i < iBlockWidth; i++) {
            int32_t iBlockPointX = i << 4;
            int32_t iBlockPointY = j << 4;

            iInterSad = m_pSadFunc (pTmpCur, iStrideY, pTmpRef, iStrideX);
            if (bScrollFlag) {
                if ((iInterSad != 0) &&
                        (iBlockPointX + iScrollMvX >= 0) && (iBlockPointX + iScrollMvX <= iWidth - 8) &&
                        (iBlockPointY + iScrollMvY >= 0) && (iBlockPointY + iScrollMvY <= iHeight - 8)) {
                    pTmpRefScroll = pTmpRef - iScrollMvY * iStrideX + iScrollMvX;
                    iScrollSad = m_pSadFunc (pTmpCur, iStrideY, pTmpRefScroll, iStrideX);

                    if (iScrollSad < iInterSad) {
                        iInterSad = iScrollSad;
                    }
                }

            }

            iBlockSadH = iBlockSadV = 0x7fffffff; // INT_MAX

            if (j > 0) {
                m_pIntraFunc[0] (iMemPredMb, pTmpCur, iStrideY);
                iBlockSadH = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16);
            }
            if (i > 0) {
                m_pIntraFunc[1] (iMemPredMb, pTmpCur, iStrideY);
                iBlockSadV = m_pSadFunc (pTmpCur, iStrideY, iMemPredMb, 16);
            }

            iGomSad += WELS_MIN (WELS_MIN (iBlockSadH, iBlockSadV), iInterSad);

            if (i == iBlockWidth - 1 && ((j + 1) % m_ComplexityAnalysisParam.iMbRowInGom == 0 || j == iBlockHeight - 1)) {
                m_ComplexityAnalysisParam.pGomComplexity[iIdx] = iGomSad;
                m_ComplexityAnalysisParam.iFrameComplexity += iGomSad;
                iIdx++;
                iGomSad = 0;
            }

            pTmpRef += 16;
            pTmpCur += 16;
        }
        pPtrX += iRowStrideX;
        pPtrY += iRowStrideY;
    }
    m_ComplexityAnalysisParam.iGomNumInFrame = iIdx;
}