/**
 * Function: omxVCM4P2_DecodeBlockCoef_Inter
 *
 * Description:
 * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
 * positioning and IDCT, with appropriate clipping on each step, are performed
 * on the coefficients. The results (residuals) are placed in a contiguous array
 * of 64 elements. For INTER block, the output buffer holds the residuals for
 * further reconstruction.
 *
 * Remarks:
 *
 * Parameters:
 * [in]	ppBitStream		pointer to the pointer to the current byte in
 *								the bit stream buffer. There is no boundary
 *								check for the bit stream buffer.
 * [in]	pBitOffset		pointer to the bit position in the byte pointed
 *								to by *ppBitStream. *pBitOffset is valid within
 *								[0-7]
 * [in]	QP				quantization parameter
 * [in] shortVideoHeader    a flag indicating presence of short_video_header;
 *                           shortVideoHeader==1 indicates using quantization method defined in short
 *                           video header mode, and shortVideoHeader==0 indicates normail quantization method.
 * [out] ppBitStream 	*ppBitStream is updated after the block is decoded, so that it points to the
 *                      current byte in the bit stream buffer.
 * [out] pBitOffset		*pBitOffset is updated so that it points to the current bit position in the
 *                      byte pointed by *ppBitStream
 * [out] pDst			pointer to the decoded residual buffer (a contiguous array of 64 elements of
 *                      OMX_S16 data type). Must be 16-byte aligned.
 *
 * Return Value:
 * OMX_Sts_NoErr - no error
 * OMX_Sts_BadArgErr - bad arguments
 *   - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
 *   - At least one of the below case:
 *   - *pBitOffset exceeds [0,7], QP <= 0;
 *	 - pDst not 16-byte aligned
 * OMX_Sts_Err - status error
 *
 */
OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
     const OMX_U8 ** ppBitStream,
     OMX_INT * pBitOffset,
     OMX_S16 * pDst,
     OMX_INT QP,
     OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    15 more elements of padding */
    OMX_S16 tempBuf[79];
    OMX_S16 *pTempBuf1;
    OMXResult errorCode;
    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf);
    
    
    /* VLD and zigzag */
    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
                                        pTempBuf1,shortVideoHeader);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Dequantization */
    errorCode = omxVCM4P2_QuantInvInter_I(
     pTempBuf1,
     QP);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Inverse transform */
    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
	    
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_TransRecBlockCoef_inter(
     const OMX_S16 *pSrc,
     OMX_S16 * pDst,
     OMX_S16 * pRec,
     OMX_U8 QP,
     OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need 
    8 more elements of padding */
    OMX_S16 tempBuffer[72];
    OMX_S16 *pTempBuffer;
    OMX_INT i;
        
    /* Aligning the local buffers */
    pTempBuffer = armAlignTo16Bytes(tempBuffer);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
    
    omxVCM4P2_DCT8x8blk (pSrc, pDst);
    omxVCM4P2_QuantInter_I(
     pDst,
     QP,
     shortVideoHeader);

    for (i = 0; i < 64; i++)
    {
        pTempBuffer[i] = pDst[i];
    }

    omxVCM4P2_QuantInvInter_I(
     pTempBuffer,
     QP);
    omxVCM4P2_IDCT8x8blk (pTempBuffer, pRec);

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_MotionEstimationMB (
    const OMX_U8 *pSrcCurrBuf,
    OMX_S32 srcCurrStep,
    const OMX_U8 *pSrcRefBuf,
    OMX_S32 srcRefStep,
    const OMXRect*pRefRect,
    const OMXVCM4P2Coordinate *pCurrPointPos,
    void *pMESpec,
    const OMXVCM4P2MBInfoPtr *pMBInfo,
    OMXVCM4P2MBInfo *pSrcDstMBCurr,
    OMX_U16 *pDstSAD,
    OMX_U16 *pDstBlockSAD
)
{
 
    OMX_INT intraSAD, average, count, index, x, y;
    OMXVCMotionVector dstMV16x16;
    OMX_INT           dstSAD16x16;
    OMX_INT           dstSAD8x8;
    OMXVCM4P2MEParams  *pMEParams; 
	OMXVCM4P2Coordinate TempCurrPointPos; 
    OMXVCM4P2Coordinate *pTempCurrPointPos; 
    OMX_U8 aTempSrcCurrBuf[271];
    OMX_U8 *pTempSrcCurrBuf;
    OMX_U8 *pDst;
    OMX_U8 aDst[71];
    OMX_S32 dstStep = 8;
    OMX_INT predictType;
	OMX_S32 Sad;
    const OMX_U8 *pTempSrcRefBuf;
    OMXVCMotionVector* pSrcCandMV1[4];
    OMXVCMotionVector* pSrcCandMV2[4];
    OMXVCMotionVector* pSrcCandMV3[4];
        
    /* Argument error checks */
    armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
	armRetArgErrIf(!armIs16ByteAligned(pSrcRefBuf), OMX_Sts_BadArgErr);
    armRetArgErrIf(((srcCurrStep % 16) || (srcRefStep % 16)), OMX_Sts_BadArgErr);
	armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
	armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);    
    armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
    
    
    pTempCurrPointPos = &(TempCurrPointPos);
    pTempSrcCurrBuf = armAlignTo16Bytes(aTempSrcCurrBuf);
    pMEParams = (OMXVCM4P2MEParams *)pMESpec;
    pTempCurrPointPos->x = pCurrPointPos->x;
    pTempCurrPointPos->y = pCurrPointPos->y;
    pSrcDstMBCurr->mbType = OMX_VC_INTER;
    
    /* Preparing a linear buffer for block match */
    for (y = 0, index = count = 0; y < 16; y++, index += srcCurrStep - 16)
    {
        for(x = 0; x < 16; x++, count++, index++)
        {
            pTempSrcCurrBuf[count] = pSrcCurrBuf[index];
        }
    }
    for(y = 0, index = 0; y < 2; y++)
    {
        for(x = 0; x < 2; x++,index++)
        {
            if((pMBInfo[0] != NULL) && (pMBInfo[0]->mbType != OMX_VC_INTRA))
            {
               pSrcCandMV1[index] = &(pMBInfo[0]->pMV0[y][x]); 
            }
            else
            {
               pSrcCandMV1[index] = NULL;
            }
            if((pMBInfo[1] != NULL) && (pMBInfo[1]->mbType != OMX_VC_INTRA))
            {
               pSrcCandMV2[index] = &(pMBInfo[1]->pMV0[y][x]);
            }
            else
            {
               pSrcCandMV2[index] = NULL; 
            }
            if((pMBInfo[3] != NULL) && (pMBInfo[3]->mbType != OMX_VC_INTRA))
            {
               pSrcCandMV3[index] = &(pMBInfo[3]->pMV0[y][x]);
            }
            else
            {
               pSrcCandMV3[index] = NULL; 
            }
        }
    }
	/* Calculating SAD at MV(0,0) */
	armVCCOMM_SAD(pTempSrcCurrBuf,
					  16,
					  pSrcRefBuf,
					  srcRefStep,
					  &Sad,
					  16,
					  16);
	*pDstSAD = Sad;

    /* Mode decision for NOT_CODED MB */
	if(*pDstSAD == 0)
	{
        pSrcDstMBCurr->pMV0[0][0].dx = 0;
        pSrcDstMBCurr->pMV0[0][0].dy = 0;
        *pDstSAD   = 0;
		return OMX_Sts_NoErr;
	}

    omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[0][0]),
                    pSrcCandMV1[0],
                    pSrcCandMV2[0],
                    pSrcCandMV3[0],
                    &(pSrcDstMBCurr->pMVPred[0][0]),
                    NULL,
                    0);
                    
    /* Inter 1 MV */
    armVCM4P2_BlockMatch_16x16(
        pSrcRefBuf,
        srcRefStep,
        pRefRect,
        pTempSrcCurrBuf,
        pCurrPointPos,
        &(pSrcDstMBCurr->pMVPred[0][0]),
        NULL,
        pMEParams,
        &dstMV16x16,
        &dstSAD16x16);
    
    /* Initialize all with 1 MV values */
    pSrcDstMBCurr->pMV0[0][0].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[0][0].dy = dstMV16x16.dy;
    pSrcDstMBCurr->pMV0[0][1].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[0][1].dy = dstMV16x16.dy;
    pSrcDstMBCurr->pMV0[1][0].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[1][0].dy = dstMV16x16.dy;
    pSrcDstMBCurr->pMV0[1][1].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[1][1].dy = dstMV16x16.dy; 
    
    *pDstSAD   = dstSAD16x16;       
    
    if (pMEParams->searchEnable8x8)
    {
        /* Inter 4MV */
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[0][0]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[0][0]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[0] = dstSAD8x8;
        *pDstSAD = dstSAD8x8;
        pTempCurrPointPos->x += 8;
        pSrcRefBuf += 8;
        omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[0][1]),
                    pSrcCandMV1[1],
                    pSrcCandMV2[1],
                    pSrcCandMV3[1],
                    &(pSrcDstMBCurr->pMVPred[0][1]),
                    NULL,
                    1);
        
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[0][1]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[0][1]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[1] = dstSAD8x8;
        *pDstSAD += dstSAD8x8;
        pTempCurrPointPos->x -= 8;
        pTempCurrPointPos->y += 8;
        pSrcRefBuf += (srcRefStep * 8) - 8;
        
        omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[1][0]),
                    pSrcCandMV1[2],
                    pSrcCandMV2[2],
                    pSrcCandMV3[2],
                    &(pSrcDstMBCurr->pMVPred[1][0]),
                    NULL,
                    2);
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[1][0]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[1][0]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[2] = dstSAD8x8;
        *pDstSAD += dstSAD8x8;
        pTempCurrPointPos->x += 8;
        pSrcRefBuf += 8;
        omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[1][1]),
                    pSrcCandMV1[3],
                    pSrcCandMV2[3],
                    pSrcCandMV3[3],
                    &(pSrcDstMBCurr->pMVPred[1][1]),
                    NULL,
                    3);
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[1][1]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[1][1]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[3] = dstSAD8x8;
        *pDstSAD += dstSAD8x8;   
        
        
        /* Checking if 4MV is equal to 1MV */
        if (
            (pSrcDstMBCurr->pMV0[0][0].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[0][0].dy != dstMV16x16.dy) ||
            (pSrcDstMBCurr->pMV0[0][1].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[0][1].dy != dstMV16x16.dy) ||
            (pSrcDstMBCurr->pMV0[1][0].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[1][0].dy != dstMV16x16.dy) ||
            (pSrcDstMBCurr->pMV0[1][1].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[1][1].dy != dstMV16x16.dy)
           )
        {
            /* select the 4 MV */
            pSrcDstMBCurr->mbType = OMX_VC_INTER4V;
        }                                      
    }
                                         
    /* finding the error in intra mode */
    for (count = 0, average = 0; count < 256 ; count++)
    {
        average = average + pTempSrcCurrBuf[count];
    }
    average = average/256;
    
	intraSAD = 0;

    /* Intra SAD calculation */
    for (count = 0; count < 256 ; count++)
    {
        intraSAD += armAbs ((pTempSrcCurrBuf[count]) - (average));
    }
    
	/* Using the MPEG4 VM formula for intra/inter mode decision 
	   Var < (SAD - 2*NB) where NB = N^2 is the number of pixels
	   of the macroblock.*/

    if (intraSAD <= (*pDstSAD - 512))
    {
        pSrcDstMBCurr->mbType = OMX_VC_INTRA;
        pSrcDstMBCurr->pMV0[0][0].dx = 0;
        pSrcDstMBCurr->pMV0[0][0].dy = 0;
        *pDstSAD   = intraSAD;
        pDstBlockSAD[0] = 0xFFFF;
        pDstBlockSAD[1] = 0xFFFF;
        pDstBlockSAD[2] = 0xFFFF;
        pDstBlockSAD[3] = 0xFFFF;
    }

    if(pSrcDstMBCurr->mbType == OMX_VC_INTER)
    {
      pTempSrcRefBuf = pSrcRefBuf + (srcRefStep * dstMV16x16.dy) + dstMV16x16.dx;
    
      if((dstMV16x16.dx & 0x1) && (dstMV16x16.dy & 0x1))
      {
        predictType = OMX_VC_HALF_PIXEL_XY;
      }
      else if(dstMV16x16.dx & 0x1)
      {
        predictType = OMX_VC_HALF_PIXEL_X;
      }
      else if(dstMV16x16.dy & 0x1)
      {
        predictType = OMX_VC_HALF_PIXEL_Y;
      }
      else
      {
        predictType = OMX_VC_INTEGER_PIXEL;
      }
      
      pDst = armAlignTo8Bytes(&(aDst[0]));
      /* Calculating Block SAD at MV(dstMV16x16.dx,dstMV16x16.dy) */
	  /* Block 0 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf,
	                             srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);
    
      armVCCOMM_SAD(pTempSrcCurrBuf,
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[0] = Sad;
   
      /* Block 1 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + 8,
                                 srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);					  

      armVCCOMM_SAD(pTempSrcCurrBuf + 8,
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[1] = Sad;
	
      /* Block 2 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8),
                                 srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);

      armVCCOMM_SAD(pTempSrcCurrBuf + (16*8),
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[2] = Sad;

	  /* Block 3 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8) + 8,
                                 srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);

      armVCCOMM_SAD(pTempSrcCurrBuf + (16*8) + 8,
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[3] = Sad;
    }
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_TransRecBlockCoef_intra(
     const OMX_U8 *pSrc,
     OMX_S16 * pDst,
     OMX_U8 * pRec,
     OMX_S16 *pPredBufRow,
     OMX_S16 *pPredBufCol,
     OMX_S16 * pPreACPredict,
     OMX_INT *pSumErr,
     OMX_INT blockIndex,
     OMX_U8 curQp,
     const OMX_U8 *pQpBuf,
     OMX_INT srcStep,
     OMX_INT dstStep,
	 OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    8 more elements of padding */
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 tempBuf3[79];
    OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3;
    OMXVCM4P2VideoComponent videoComp;
    OMX_U8  flag;
    OMX_INT x, y, count, predDir;
    OMX_INT predQP, ACPredFlag;
    

    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    pTempBuf3 = armAlignTo16Bytes(tempBuf3);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) ||
                (dstStep & 7) || (srcStep & 7)
                , OMX_Sts_BadArgErr);
    armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr);

    armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr);


   /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    /* Converting from 2-d to 1-d buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x= 0; x < 8; x++, count++)
        {
            pTempBuf1[count] = pSrc[(y*srcStep) + x];
        }
    }

    omxVCM4P2_DCT8x8blk  (pTempBuf1, pTempBuf2);
    omxVCM4P2_QuantIntra_I(
        pTempBuf2,
        curQp,
        blockIndex,
        shortVideoHeader);

    /* Converting from 1-D to 2-D buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x = 0; x < 8; x++, count++)
        {
            /* storing tempbuf2 to tempbuf1 */
            pTempBuf1[count] = pTempBuf2[count];
            pDst[(y*dstStep) + x] = pTempBuf2[count];
        }
    }

    /* AC and DC prediction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pPredBufRow,
        pPredBufCol,
        &predDir,
        &predQP,
        pQpBuf);

    armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);

    flag = 1;
    if (*pSumErr < 0)
    {
        ACPredFlag = 0;
    }
    else
    {
        ACPredFlag = 1;
    }

    armVCM4P2_ACDCPredict(
        pTempBuf2,
        pPreACPredict,
        pPredBufRow,
        pPredBufCol,
        curQp,
        predQP,
        predDir,
        ACPredFlag,
        videoComp,
        flag,
        pSumErr);

    /* Reconstructing the texture data */
    omxVCM4P2_QuantInvIntra_I(
        pTempBuf1,
        curQp,
        videoComp,
        shortVideoHeader);
    omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3);
    for(count = 0; count < 64; count++)
    {
        pRec[count] = armMax(0,pTempBuf3[count]);
    }

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
     const OMX_U8 ** ppBitStream,
     OMX_INT *pBitOffset,
     OMX_U8 *pDst,
     OMX_INT step,
     OMX_S16 *pCoefBufRow,
     OMX_S16 *pCoefBufCol,
     OMX_U8 curQP,
     const OMX_U8 *pQPBuf,
     OMX_INT blockIndex,
     OMX_INT intraDCVLC,
     OMX_INT ACPredFlag,
	 OMX_INT shortVideoHeader
 )
{
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 *pTempBuf1, *pTempBuf2;
    OMX_INT predDir, predACDir;
    OMX_INT  predQP;
    OMXVCM4P2VideoComponent videoComp;
    OMXResult errorCode;
    
    
    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    
    /* Setting the AC prediction direction and prediction direction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pCoefBufRow,
        pCoefBufCol,
        &predDir,
        &predQP,
        pQPBuf);

    predACDir = predDir;

    
    if (ACPredFlag == 0)
    {
        predACDir = OMX_VC_NONE;
    }

    /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    

    /* VLD and zigzag */
    if (intraDCVLC == 1)
    {
        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
            ppBitStream,
            pBitOffset,
            pTempBuf1,
            predACDir,
            shortVideoHeader,
            videoComp);
        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    }
    else
    {
        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
            ppBitStream,
            pBitOffset,
            pTempBuf1,
            predACDir,
            shortVideoHeader);
        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    }

    /* AC DC prediction */
    errorCode = omxVCM4P2_PredictReconCoefIntra(
        pTempBuf1,
        pCoefBufRow,
        pCoefBufCol,
        curQP,
        predQP,
        predDir,
        ACPredFlag,
        videoComp);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Dequantization */
    errorCode = omxVCM4P2_QuantInvIntra_I(
     pTempBuf1,
     curQP,
     videoComp,
     shortVideoHeader);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Inverse transform */
    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Placing the linear array into the destination plane and clipping
       it to 0 to 255 */
    
	armVCM4P2_Clip8(pTempBuf2,pDst,step);
	
	
    return OMX_Sts_NoErr;
}