/**
 * Function: omxVCM4P2_DecodeBlockCoef_Inter
 *
 * Description:
 * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
 * positioning and IDCT, with appropriate clipping on each step, are performed
 * on the coefficients. The results (residuals) are placed in a contiguous array
 * of 64 elements. For INTER block, the output buffer holds the residuals for
 * further reconstruction.
 *
 * Remarks:
 *
 * Parameters:
 * [in]	ppBitStream		pointer to the pointer to the current byte in
 *								the bit stream buffer. There is no boundary
 *								check for the bit stream buffer.
 * [in]	pBitOffset		pointer to the bit position in the byte pointed
 *								to by *ppBitStream. *pBitOffset is valid within
 *								[0-7]
 * [in]	QP				quantization parameter
 * [in] shortVideoHeader    a flag indicating presence of short_video_header;
 *                           shortVideoHeader==1 indicates using quantization method defined in short
 *                           video header mode, and shortVideoHeader==0 indicates normail quantization method.
 * [out] ppBitStream 	*ppBitStream is updated after the block is decoded, so that it points to the
 *                      current byte in the bit stream buffer.
 * [out] pBitOffset		*pBitOffset is updated so that it points to the current bit position in the
 *                      byte pointed by *ppBitStream
 * [out] pDst			pointer to the decoded residual buffer (a contiguous array of 64 elements of
 *                      OMX_S16 data type). Must be 16-byte aligned.
 *
 * Return Value:
 * OMX_Sts_NoErr - no error
 * OMX_Sts_BadArgErr - bad arguments
 *   - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
 *   - At least one of the below case:
 *   - *pBitOffset exceeds [0,7], QP <= 0;
 *	 - pDst not 16-byte aligned
 * OMX_Sts_Err - status error
 *
 */
OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
     const OMX_U8 ** ppBitStream,
     OMX_INT * pBitOffset,
     OMX_S16 * pDst,
     OMX_INT QP,
     OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    15 more elements of padding */
    OMX_S16 tempBuf[79];
    OMX_S16 *pTempBuf1;
    OMXResult errorCode;
    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf);
    
    
    /* VLD and zigzag */
    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
                                        pTempBuf1,shortVideoHeader);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Dequantization */
    errorCode = omxVCM4P2_QuantInvInter_I(
     pTempBuf1,
     QP);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Inverse transform */
    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
	    
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_TransRecBlockCoef_inter(
     const OMX_S16 *pSrc,
     OMX_S16 * pDst,
     OMX_S16 * pRec,
     OMX_U8 QP,
     OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need 
    8 more elements of padding */
    OMX_S16 tempBuffer[72];
    OMX_S16 *pTempBuffer;
    OMX_INT i;
        
    /* Aligning the local buffers */
    pTempBuffer = armAlignTo16Bytes(tempBuffer);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
    
    omxVCM4P2_DCT8x8blk (pSrc, pDst);
    omxVCM4P2_QuantInter_I(
     pDst,
     QP,
     shortVideoHeader);

    for (i = 0; i < 64; i++)
    {
        pTempBuffer[i] = pDst[i];
    }

    omxVCM4P2_QuantInvInter_I(
     pTempBuffer,
     QP);
    omxVCM4P2_IDCT8x8blk (pTempBuffer, pRec);

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_TransRecBlockCoef_intra(
     const OMX_U8 *pSrc,
     OMX_S16 * pDst,
     OMX_U8 * pRec,
     OMX_S16 *pPredBufRow,
     OMX_S16 *pPredBufCol,
     OMX_S16 * pPreACPredict,
     OMX_INT *pSumErr,
     OMX_INT blockIndex,
     OMX_U8 curQp,
     const OMX_U8 *pQpBuf,
     OMX_INT srcStep,
     OMX_INT dstStep,
	 OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    8 more elements of padding */
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 tempBuf3[79];
    OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3;
    OMXVCM4P2VideoComponent videoComp;
    OMX_U8  flag;
    OMX_INT x, y, count, predDir;
    OMX_INT predQP, ACPredFlag;
    

    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    pTempBuf3 = armAlignTo16Bytes(tempBuf3);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) ||
                (dstStep & 7) || (srcStep & 7)
                , OMX_Sts_BadArgErr);
    armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr);

    armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr);


   /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    /* Converting from 2-d to 1-d buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x= 0; x < 8; x++, count++)
        {
            pTempBuf1[count] = pSrc[(y*srcStep) + x];
        }
    }

    omxVCM4P2_DCT8x8blk  (pTempBuf1, pTempBuf2);
    omxVCM4P2_QuantIntra_I(
        pTempBuf2,
        curQp,
        blockIndex,
        shortVideoHeader);

    /* Converting from 1-D to 2-D buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x = 0; x < 8; x++, count++)
        {
            /* storing tempbuf2 to tempbuf1 */
            pTempBuf1[count] = pTempBuf2[count];
            pDst[(y*dstStep) + x] = pTempBuf2[count];
        }
    }

    /* AC and DC prediction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pPredBufRow,
        pPredBufCol,
        &predDir,
        &predQP,
        pQpBuf);

    armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);

    flag = 1;
    if (*pSumErr < 0)
    {
        ACPredFlag = 0;
    }
    else
    {
        ACPredFlag = 1;
    }

    armVCM4P2_ACDCPredict(
        pTempBuf2,
        pPreACPredict,
        pPredBufRow,
        pPredBufCol,
        curQp,
        predQP,
        predDir,
        ACPredFlag,
        videoComp,
        flag,
        pSumErr);

    /* Reconstructing the texture data */
    omxVCM4P2_QuantInvIntra_I(
        pTempBuf1,
        curQp,
        videoComp,
        shortVideoHeader);
    omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3);
    for(count = 0; count < 64; count++)
    {
        pRec[count] = armMax(0,pTempBuf3[count]);
    }

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
     const OMX_U8 ** ppBitStream,
     OMX_INT *pBitOffset,
     OMX_U8 *pDst,
     OMX_INT step,
     OMX_S16 *pCoefBufRow,
     OMX_S16 *pCoefBufCol,
     OMX_U8 curQP,
     const OMX_U8 *pQPBuf,
     OMX_INT blockIndex,
     OMX_INT intraDCVLC,
     OMX_INT ACPredFlag,
	 OMX_INT shortVideoHeader
 )
{
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 *pTempBuf1, *pTempBuf2;
    OMX_INT predDir, predACDir;
    OMX_INT  predQP;
    OMXVCM4P2VideoComponent videoComp;
    OMXResult errorCode;
    
    
    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    
    /* Setting the AC prediction direction and prediction direction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pCoefBufRow,
        pCoefBufCol,
        &predDir,
        &predQP,
        pQPBuf);

    predACDir = predDir;

    
    if (ACPredFlag == 0)
    {
        predACDir = OMX_VC_NONE;
    }

    /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    

    /* VLD and zigzag */
    if (intraDCVLC == 1)
    {
        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
            ppBitStream,
            pBitOffset,
            pTempBuf1,
            predACDir,
            shortVideoHeader,
            videoComp);
        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    }
    else
    {
        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
            ppBitStream,
            pBitOffset,
            pTempBuf1,
            predACDir,
            shortVideoHeader);
        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    }

    /* AC DC prediction */
    errorCode = omxVCM4P2_PredictReconCoefIntra(
        pTempBuf1,
        pCoefBufRow,
        pCoefBufCol,
        curQP,
        predQP,
        predDir,
        ACPredFlag,
        videoComp);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Dequantization */
    errorCode = omxVCM4P2_QuantInvIntra_I(
     pTempBuf1,
     curQP,
     videoComp,
     shortVideoHeader);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Inverse transform */
    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Placing the linear array into the destination plane and clipping
       it to 0 to 255 */
    
	armVCM4P2_Clip8(pTempBuf2,pDst,step);
	
	
    return OMX_Sts_NoErr;
}