/**
 * Function: omxVCM4P2_DecodeBlockCoef_Inter
 *
 * Description:
 * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
 * positioning and IDCT, with appropriate clipping on each step, are performed
 * on the coefficients. The results (residuals) are placed in a contiguous array
 * of 64 elements. For INTER block, the output buffer holds the residuals for
 * further reconstruction.
 *
 * Remarks:
 *
 * Parameters:
 * [in]	ppBitStream		pointer to the pointer to the current byte in
 *								the bit stream buffer. There is no boundary
 *								check for the bit stream buffer.
 * [in]	pBitOffset		pointer to the bit position in the byte pointed
 *								to by *ppBitStream. *pBitOffset is valid within
 *								[0-7]
 * [in]	QP				quantization parameter
 * [in] shortVideoHeader    a flag indicating presence of short_video_header;
 *                           shortVideoHeader==1 indicates using quantization method defined in short
 *                           video header mode, and shortVideoHeader==0 indicates normail quantization method.
 * [out] ppBitStream 	*ppBitStream is updated after the block is decoded, so that it points to the
 *                      current byte in the bit stream buffer.
 * [out] pBitOffset		*pBitOffset is updated so that it points to the current bit position in the
 *                      byte pointed by *ppBitStream
 * [out] pDst			pointer to the decoded residual buffer (a contiguous array of 64 elements of
 *                      OMX_S16 data type). Must be 16-byte aligned.
 *
 * Return Value:
 * OMX_Sts_NoErr - no error
 * OMX_Sts_BadArgErr - bad arguments
 *   - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
 *   - At least one of the below case:
 *   - *pBitOffset exceeds [0,7], QP <= 0;
 *	 - pDst not 16-byte aligned
 * OMX_Sts_Err - status error
 *
 */
OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
     const OMX_U8 ** ppBitStream,
     OMX_INT * pBitOffset,
     OMX_S16 * pDst,
     OMX_INT QP,
     OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    15 more elements of padding */
    OMX_S16 tempBuf[79];
    OMX_S16 *pTempBuf1;
    OMXResult errorCode;
    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf);
    
    
    /* VLD and zigzag */
    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
                                        pTempBuf1,shortVideoHeader);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Dequantization */
    errorCode = omxVCM4P2_QuantInvInter_I(
     pTempBuf1,
     QP);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Inverse transform */
    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
	    
    return OMX_Sts_NoErr;
}
OMXResult armVCM4P10_DecodeCoeffsToPair(
     const OMX_U8** ppBitStream,
     OMX_S32* pOffset,
     OMX_U8* pNumCoeff,
     OMX_U8  **ppPosCoefbuf,
     OMX_INT nTable,
     OMX_INT sMaxNumCoeff        
 )
{
    int CoeffToken, TotalCoeff, TrailingOnes;
    int Level, LevelCode, LevelPrefix, LevelSuffix, LevelSuffixSize;
    int SuffixLength, Run, ZerosLeft,CoeffNum;
    int i, Flags;
    OMX_U8 *pPosCoefbuf = *ppPosCoefbuf;
    OMX_S16 pLevel[16];
    OMX_U8  pRun[16];

    CoeffToken = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCCoeffTokenTables[nTable]);
    armRetDataErrIf(CoeffToken == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);

    TrailingOnes = armVCM4P10_CAVLCTrailingOnes[CoeffToken];
    TotalCoeff   = armVCM4P10_CAVLCTotalCoeff[CoeffToken];
    *pNumCoeff   = (OMX_U8)TotalCoeff;

    DEBUG_PRINTF_2("TotalCoeff = %d, TrailingOnes = %d\n", TotalCoeff, TrailingOnes);

    if (TotalCoeff == 0)
    {
        /* Nothing to do */
        return OMX_Sts_NoErr;
    }

    /* Decode trailing ones */
    for (i=TotalCoeff-1; i>=TotalCoeff-TrailingOnes; i--)
    {
        if (armGetBits(ppBitStream, pOffset, 1))
        {
            Level = -1;
        }
        else
        {
            Level = +1;
        }
        pLevel[i] = (OMX_S16)Level;

        DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]);
    }

    /* Decode (non zero) level values */
    SuffixLength = 0;
    if (TotalCoeff>10 && TrailingOnes<3)
    {
        SuffixLength=1;
    }
    for ( ; i>=0; i--)
    {
        LevelPrefix = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCLevelPrefix);
        armRetDataErrIf(LevelPrefix == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);

        LevelSuffixSize = SuffixLength;
        if (LevelPrefix==14 && SuffixLength==0)
        {
            LevelSuffixSize = 4;
        }
        if (LevelPrefix==15)
        {
            LevelSuffixSize = 12;
        }
        
        LevelSuffix = 0;
        if (LevelSuffixSize > 0)
        {
            LevelSuffix = armGetBits(ppBitStream, pOffset, LevelSuffixSize);
        }

        LevelCode = (LevelPrefix << SuffixLength) + LevelSuffix;


        if (LevelPrefix==15 && SuffixLength==0)
        {
            LevelCode += 15;
        }

        /* LevelCode = 2*(magnitude-1) + sign */

        if (i==TotalCoeff-1-TrailingOnes && TrailingOnes<3)
        {
            /* Level magnitude can't be 1 */
            LevelCode += 2;
        }
        if (LevelCode & 1)
        {
            /* 2a+1 maps to -a-1 */
            Level = (-LevelCode-1)>>1;
        }
        else
        {
OMXResult omxVCM4P2_TransRecBlockCoef_intra(
     const OMX_U8 *pSrc,
     OMX_S16 * pDst,
     OMX_U8 * pRec,
     OMX_S16 *pPredBufRow,
     OMX_S16 *pPredBufCol,
     OMX_S16 * pPreACPredict,
     OMX_INT *pSumErr,
     OMX_INT blockIndex,
     OMX_U8 curQp,
     const OMX_U8 *pQpBuf,
     OMX_INT srcStep,
     OMX_INT dstStep,
	 OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    8 more elements of padding */
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 tempBuf3[79];
    OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3;
    OMXVCM4P2VideoComponent videoComp;
    OMX_U8  flag;
    OMX_INT x, y, count, predDir;
    OMX_INT predQP, ACPredFlag;
    

    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    pTempBuf3 = armAlignTo16Bytes(tempBuf3);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) ||
                (dstStep & 7) || (srcStep & 7)
                , OMX_Sts_BadArgErr);
    armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr);

    armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr);


   /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    /* Converting from 2-d to 1-d buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x= 0; x < 8; x++, count++)
        {
            pTempBuf1[count] = pSrc[(y*srcStep) + x];
        }
    }

    omxVCM4P2_DCT8x8blk  (pTempBuf1, pTempBuf2);
    omxVCM4P2_QuantIntra_I(
        pTempBuf2,
        curQp,
        blockIndex,
        shortVideoHeader);

    /* Converting from 1-D to 2-D buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x = 0; x < 8; x++, count++)
        {
            /* storing tempbuf2 to tempbuf1 */
            pTempBuf1[count] = pTempBuf2[count];
            pDst[(y*dstStep) + x] = pTempBuf2[count];
        }
    }

    /* AC and DC prediction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pPredBufRow,
        pPredBufCol,
        &predDir,
        &predQP,
        pQpBuf);

    armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);

    flag = 1;
    if (*pSumErr < 0)
    {
        ACPredFlag = 0;
    }
    else
    {
        ACPredFlag = 1;
    }

    armVCM4P2_ACDCPredict(
        pTempBuf2,
        pPreACPredict,
        pPredBufRow,
        pPredBufCol,
        curQp,
        predQP,
        predDir,
        ACPredFlag,
        videoComp,
        flag,
        pSumErr);

    /* Reconstructing the texture data */
    omxVCM4P2_QuantInvIntra_I(
        pTempBuf1,
        curQp,
        videoComp,
        shortVideoHeader);
    omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3);
    for(count = 0; count < 64; count++)
    {
        pRec[count] = armMax(0,pTempBuf3[count]);
    }

    return OMX_Sts_NoErr;
}
OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
     const OMX_U8 ** ppBitStream,
     OMX_INT * pBitOffset,
     OMX_S16 * pDst,
     OMX_U8 predDir,
     OMX_INT shortVideoHeader,
     OMX_U8  start
)
{
    OMX_U8  last = 0;
    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
    OMXResult errorCode;
    
    /* Argument error checks */
    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
    armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr);

    switch (predDir)
    {
        case OMX_VC_NONE:
        {
            pZigzagTable = armVCM4P2_aClassicalZigzagScan;
            break;
        }

        case OMX_VC_HORIZONTAL:
        {
            pZigzagTable = armVCM4P2_aVerticalZigzagScan;
            break;
        }

        case OMX_VC_VERTICAL:
        {
            pZigzagTable = armVCM4P2_aHorizontalZigzagScan;
            break;
        }
    }
    
    errorCode = armVCM4P2_GetVLCBits (
              ppBitStream,
              pBitOffset,
			  pDst,
			  shortVideoHeader,
			  start,
			  &last,
			  10,
			  62,
			   7,
			  21,
              armVCM4P2_IntraL0RunIdx,
              armVCM4P2_IntraVlcL0,
			  armVCM4P2_IntraL1RunIdx,
              armVCM4P2_IntraVlcL1,
              armVCM4P2_IntraL0LMAX,
              armVCM4P2_IntraL1LMAX,
              armVCM4P2_IntraL0RMAX,
              armVCM4P2_IntraL1RMAX,
              pZigzagTable );
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    if (last == 0)
    {
        return OMX_Sts_Err;
    }
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
     const OMX_U8 ** ppBitStream,
     OMX_INT *pBitOffset,
     OMX_U8 *pDst,
     OMX_INT step,
     OMX_S16 *pCoefBufRow,
     OMX_S16 *pCoefBufCol,
     OMX_U8 curQP,
     const OMX_U8 *pQPBuf,
     OMX_INT blockIndex,
     OMX_INT intraDCVLC,
     OMX_INT ACPredFlag,
	 OMX_INT shortVideoHeader
 )
{
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 *pTempBuf1, *pTempBuf2;
    OMX_INT predDir, predACDir;
    OMX_INT  predQP;
    OMXVCM4P2VideoComponent videoComp;
    OMXResult errorCode;
    
    
    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    
    /* Setting the AC prediction direction and prediction direction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pCoefBufRow,
        pCoefBufCol,
        &predDir,
        &predQP,
        pQPBuf);

    predACDir = predDir;

    
    if (ACPredFlag == 0)
    {
        predACDir = OMX_VC_NONE;
    }

    /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    

    /* VLD and zigzag */
    if (intraDCVLC == 1)
    {
        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
            ppBitStream,
            pBitOffset,
            pTempBuf1,
            predACDir,
            shortVideoHeader,
            videoComp);
        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    }
    else
    {
        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
            ppBitStream,
            pBitOffset,
            pTempBuf1,
            predACDir,
            shortVideoHeader);
        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    }

    /* AC DC prediction */
    errorCode = omxVCM4P2_PredictReconCoefIntra(
        pTempBuf1,
        pCoefBufRow,
        pCoefBufCol,
        curQP,
        predQP,
        predDir,
        ACPredFlag,
        videoComp);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Dequantization */
    errorCode = omxVCM4P2_QuantInvIntra_I(
     pTempBuf1,
     curQP,
     videoComp,
     shortVideoHeader);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Inverse transform */
    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    /* Placing the linear array into the destination plane and clipping
       it to 0 to 255 */
    
	armVCM4P2_Clip8(pTempBuf2,pDst,step);
	
	
    return OMX_Sts_NoErr;
}