OMXResult omxVCCOMM_Copy16x16(
    const OMX_U8 *pSrc,
    OMX_U8 *pDst,
    OMX_INT step)
{
    /* Definitions and Initializations*/

    OMX_INT count,index, x, y;

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(((step < 16) || (step % 16)), OMX_Sts_BadArgErr);


    /* Copying the ref 16x16 blk to the curr blk */
    for (y = 0, count = 0, index = 0; y < 16; y++, count = count + step - 16)
    {
        for (x = 0; x < 16; x++, count++, index++)
        {
            pDst[index] = pSrc[count];
        }
    }
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P10_DeblockLuma_I(
	OMX_U8* pSrcDst, 
	OMX_S32 srcdstStep, 
	const OMX_U8* pAlpha, 
	const OMX_U8* pBeta, 
	const OMX_U8* pThresholds, 
	const OMX_U8 *pBS
)
{
    OMXResult errorCode;
    
    armRetArgErrIf(pSrcDst == NULL,             OMX_Sts_BadArgErr);
    armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(srcdstStep & 7,              OMX_Sts_BadArgErr);    
    armRetArgErrIf(pAlpha == NULL,              OMX_Sts_BadArgErr);
    armRetArgErrIf(pBeta == NULL,               OMX_Sts_BadArgErr);
    armRetArgErrIf(pThresholds == NULL,         OMX_Sts_BadArgErr);
    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);

    errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);

    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
    
    errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);

    return errorCode;
}
OMXResult omxSP_FFTGetBufSize_R_S16S32(
     OMX_INT order,
     OMX_INT *pSize
 )
{
    OMX_INT     Nby2;
    OMX_INT     N;
    
    /* Input parameter check */ 
    armRetArgErrIf(pSize == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(order < 0, OMX_Sts_BadArgErr)
    armRetArgErrIf(order > 12, OMX_Sts_BadArgErr)
    
    /* Check for order zero */
    if (order == 0)
    {
        *pSize = sizeof(ARMsFFTSpec_FC64);   
        return OMX_Sts_NoErr;
    }
    
    Nby2 = 1 << (order - 1);
    N = 1 << order;

    /* 2 pointers to store bitreversed array and twiddle factor array */
    *pSize = sizeof(ARMsFFTSpec_FC64)
    /* N bitreversed Numbers */
           + sizeof(OMX_U16) * Nby2
    /* Twiddle factors  */
           + sizeof(OMX_FC64) * Nby2
           + sizeof(OMX_F64) * (2 + N);

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_QuantInvInter_I(
     OMX_S16 * pSrcDst,
     OMX_INT QP
	 )
{

    OMX_INT coeffCount, Sign;
    
    /* Argument error checks */
    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);

    /* Second Inverse quantisation method */
    for (coeffCount = 0; coeffCount < 64; coeffCount++)
    {
        /* check sign */
        Sign =  armSignCheck (pSrcDst[coeffCount]);
              
        /* Quantize the coeff */
        if (QP & 0x1)
        {
            pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP;
            pSrcDst[coeffCount] *= Sign;
        }
        else
        {
            pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1)
                                                                * QP - 1;
            pSrcDst[coeffCount] *= Sign;
        }
        /* Saturate */
        pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]);
    }
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_DCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst)
{
    OMX_INT x, y, u, v;
    
    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);


    for (u = 0; u < 8; u++)
    {
        for (v = 0; v < 8; v++)
        {
            OMX_F64 sum = 0.0;
            for (x = 0; x < 8; x++)
            {
                for (y = 0; y < 8; y++)
                {
                    sum += pSrc[(x * 8) + y] *
                       armVCM4P2_preCalcDCTCos[x][u] *
                       armVCM4P2_preCalcDCTCos[y][v];
                }
            }
            pDst[(u * 8) + v]= armRoundFloatToS16 (sum);            
        }
    }

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P10_MEInit(
        OMXVCM4P10MEMode MEMode,
        const OMXVCM4P10MEParams *pMEParams,
        void *pMESpec
       )
{
    ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec;
    
    armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
    armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr);
    armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) && 
                   (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
    armRetArgErrIf((pMEParams->searchRange16x16 <= 0) || 
                   (pMEParams->searchRange8x8 <= 0) || 
                   (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr);
    
    armMESpec->MEParams.blockSplitEnable8x8 = pMEParams->blockSplitEnable8x8;
    armMESpec->MEParams.blockSplitEnable4x4 = pMEParams->blockSplitEnable4x4;
    armMESpec->MEParams.halfSearchEnable    = pMEParams->halfSearchEnable;
    armMESpec->MEParams.quarterSearchEnable = pMEParams->quarterSearchEnable;
    armMESpec->MEParams.intraEnable4x4      = pMEParams->intraEnable4x4;     
    armMESpec->MEParams.searchRange16x16    = pMEParams->searchRange16x16;   
    armMESpec->MEParams.searchRange8x8      = pMEParams->searchRange8x8;
    armMESpec->MEParams.searchRange4x4      = pMEParams->searchRange4x4;
    armMESpec->MEMode                       = MEMode;
    
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_QuantInter_I(
     OMX_S16 * pSrcDst,
     OMX_U8 QP,
	 OMX_INT shortVideoHeader
)
{

    /* Definitions and Initializations*/
    OMX_INT coeffCount;
    OMX_INT fSign;
    OMX_INT maxClpAC = 0, minClpAC = 0;
    OMX_INT maxClpDC = 0, minClpDC = 0;
    
    /* Argument error checks */
    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
   /* One argument check is delayed until we have ascertained that  */
   /* pQMatrix is not NULL.                                         */
                
    /* Set the Clip Range based on SVH on/off */
    if(shortVideoHeader == 1)
    {
       maxClpDC = 254;
       minClpDC = 1;
       maxClpAC = 127;
       minClpAC = -127;        
    }
    else
    {
        maxClpDC = 2047;
        minClpDC = -2047;
        maxClpAC = 2047;
        minClpAC = -2047;   
    }
                
    /* Second Inverse quantisation method */
    for (coeffCount = 0; coeffCount < 64; coeffCount++)
    {
        fSign =  armSignCheck (pSrcDst[coeffCount]);  
        pSrcDst[coeffCount] = (armAbs(pSrcDst[coeffCount]) 
                              - (QP/2))/(2 * QP);
        pSrcDst[coeffCount] *= fSign;
        
        /* Clip */
        if (coeffCount == 0)
        {
           pSrcDst[coeffCount] =
           (OMX_S16) armClip (minClpDC, maxClpDC, pSrcDst[coeffCount]);
        }
        else
        {
           pSrcDst[coeffCount] =
           (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]);
        }
    }
    return OMX_Sts_NoErr;

}
OMXResult omxVCM4P2_BlockMatch_Integer_16x16(
     const OMX_U8 *pSrcRefBuf,
     OMX_INT refWidth,
     const OMXRect *pRefRect,
     const OMX_U8 *pSrcCurrBuf,
     const OMXVCM4P2Coordinate *pCurrPointPos,
     const OMXVCMotionVector *pSrcPreMV,
     const OMX_INT *pSrcPreSAD,
     void *pMESpec,
     OMXVCMotionVector *pDstMV,
     OMX_INT *pDstSAD
)
{

   OMX_U8 BlockSize = 16;
   
   /* Argument error checks */  
   armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
   
   return ( armVCM4P2_BlockMatch_Integer(
     pSrcRefBuf,
     refWidth,
     pRefRect,
     pSrcCurrBuf,
     pCurrPointPos,
     pSrcPreMV,
     pSrcPreSAD,
     pMESpec,
     pDstMV,
     pDstSAD,
     BlockSize)
     );


}
/**
 * Function:  omxVCM4P2_EncodeVLCZigzag_Inter   (6.2.4.5.3)
 *
 * Description:
 * Performs classical zigzag scanning and VLC encoding for one inter block. 
 *
 * Input Arguments:
 *   
 *   ppBitStream - pointer to the pointer to the current byte in the bit 
 *            stream 
 *   pBitOffset - pointer to the bit position in the byte pointed by 
 *            *ppBitStream. Valid within 0 to 7 
 *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
 *   pattern - block pattern which is used to decide whether this block is 
 *            encoded 
 *   shortVideoHeader - binary flag indicating presence of 
 *            short_video_header; escape modes 0-3 are used if 
 *            shortVideoHeader==0, and escape mode 4 is used when 
 *            shortVideoHeader==1. 
 *
 * Output Arguments:
 *   
 *   ppBitStream - *ppBitStream is updated after the block is encoded so that 
 *            it points to the current byte in the bit stream buffer. 
 *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
 *            position in the byte pointed by *ppBitStream. 
 *
 * Return Value:
 *    
 *    OMX_Sts_NoErr - no error 
 *    OMX_Sts_BadArgErr - Bad arguments 
 *    -    At least one of the pointers: is NULL: ppBitStream, *ppBitStream, 
 *              pBitOffset, pQDctBlkCoef 
 *    -   *pBitOffset < 0, or *pBitOffset >7. 
 *
 */
OMXResult omxVCM4P2_EncodeVLCZigzag_Inter(
     OMX_U8 **ppBitStream,
     OMX_INT * pBitOffset,
     const OMX_S16 *pQDctBlkCoef,
     OMX_U8 pattern,
	 OMX_INT shortVideoHeader
)
{
    OMX_U8 start = 0;
    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;

    /* Argument error checks */
    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);

    if (pattern)
    {
        armVCM4P2_PutVLCBits (
              ppBitStream,
              pBitOffset,
              pQDctBlkCoef,
              shortVideoHeader,
              start,
              26,
              40,
              10,
              1,
              armVCM4P2_InterL0RunIdx,
              armVCM4P2_InterVlcL0,
			  armVCM4P2_InterL1RunIdx,
              armVCM4P2_InterVlcL1,
              armVCM4P2_InterL0LMAX,
              armVCM4P2_InterL1LMAX,
              armVCM4P2_InterL0RMAX,
              armVCM4P2_InterL1RMAX,
              pZigzagTable
        );
    } /* Pattern check ends*/

    return OMX_Sts_NoErr;

}
/**
 * Function:  omxVCCOMM_ExpandFrame_I   (6.1.3.2.1)
 *
 * Description:
 * This function expands a reconstructed frame in-place.  The unexpanded 
 * source frame should be stored in a plane buffer with sufficient space 
 * pre-allocated for edge expansion, and the input frame should be located in 
 * the plane buffer center.  This function executes the pixel expansion by 
 * replicating source frame edge pixel intensities in the empty pixel 
 * locations (expansion region) between the source frame edge and the plane 
 * buffer edge.  The width/height of the expansion regions on the 
 * horizontal/vertical edges is controlled by the parameter iExpandPels. 
 *
 * Input Arguments:
 *   
 *   pSrcDstPlane - pointer to the top-left corner of the frame to be 
 *            expanded; must be aligned on an 8-byte boundary. 
 *   iFrameWidth - frame width; must be a multiple of 8. 
 *   iFrameHeight -frame height; must be a multiple of 8. 
 *   iExpandPels - number of pixels to be expanded in the horizontal and 
 *            vertical directions; must be a multiple of 8. 
 *   iPlaneStep - distance, in bytes, between the start of consecutive lines 
 *            in the plane buffer; must be larger than or equal to 
 *            (iFrameWidth + 2 * iExpandPels). 
 *
 * Output Arguments:
 *   
 *   pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the 
 *            top-left corner of the plane); must be aligned on an 8-byte 
 *            boundary. 
 *
 * Return Value:
 *    
 *    OMX_Sts_NoErr - no error 
 *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
 *              conditions: 
 *    -    pSrcDstPlane is NULL. 
 *    -    pSrcDstPlane is not aligned on an 8-byte boundary. 
 *    -    one of the following parameters is either equal to zero or is a 
 *              non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or 
 *              iExpandPels. 
 *    -    iPlaneStep < (iFrameWidth + 2 * iExpandPels). 
 *
 */
OMXResult omxVCCOMM_ExpandFrame_I(
	OMX_U8*	pSrcDstPlane, 
	OMX_U32	iFrameWidth, 
	OMX_U32	iFrameHeight, 
	OMX_U32	iExpandPels, 
	OMX_U32	iPlaneStep
)
{
    OMX_INT     x, y;
    OMX_U8*     pLeft;
    OMX_U8*     pRight;
    OMX_U8*     pTop;
    OMX_U8*     pBottom;

    /* check for argument error */
    armRetArgErrIf(pSrcDstPlane == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(armNot8ByteAligned(pSrcDstPlane), OMX_Sts_BadArgErr)
    armRetArgErrIf(iFrameWidth == 0 || iFrameWidth & 7, OMX_Sts_BadArgErr)
    armRetArgErrIf(iFrameHeight== 0 || iFrameHeight & 7, OMX_Sts_BadArgErr)
    armRetArgErrIf(iExpandPels == 0 || iExpandPels & 7, OMX_Sts_BadArgErr)
    armRetArgErrIf(iPlaneStep == 0 || iPlaneStep & 7, OMX_Sts_BadArgErr)
    armRetArgErrIf(iPlaneStep < (iFrameWidth + 2 * iExpandPels), 
                   OMX_Sts_BadArgErr)

    /* Top and Bottom */
    pTop = pSrcDstPlane - (iExpandPels * iPlaneStep);
    pBottom = pSrcDstPlane + (iFrameHeight * iPlaneStep);

    for (y = 0; y < (OMX_INT)iExpandPels; y++)
    {
        for (x = 0; x < (OMX_INT)iFrameWidth; x++)
        {
            pTop [y * iPlaneStep + x] = 
                pSrcDstPlane [x];
            pBottom [y * iPlaneStep + x] = 
                pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x];
        }
    }

    /* Left, Right and Corners */
    pLeft = pSrcDstPlane - iExpandPels;
    pRight = pSrcDstPlane + iFrameWidth;

    for (y = -(OMX_INT)iExpandPels; y < (OMX_INT)(iFrameHeight + iExpandPels); y++)
    {
        for (x = 0; x < (OMX_INT)iExpandPels; x++)
        {
            pLeft [y * iPlaneStep + x] = 
                pSrcDstPlane [y * iPlaneStep + 0];
            pRight [y * iPlaneStep + x] = 
                pSrcDstPlane [y * iPlaneStep + (iFrameWidth - 1)];
        }
    }

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_BlockMatch_Half_8x8(
     const OMX_U8 *pSrcRefBuf,
     OMX_INT refWidth,
     const OMXRect *pRefRect,
     const OMX_U8 *pSrcCurrBuf,
     const OMXVCM4P2Coordinate *pSearchPointRefPos,
     OMX_INT rndVal,
     OMXVCMotionVector *pSrcDstMV,
     OMX_INT *pDstSAD
)
{
    /* For a blocksize of 8x8 */
    OMX_U8 BlockSize = 8;
    
    /* Argument error checks */  
    armRetArgErrIf(pSrcRefBuf         == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRefRect           == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSrcCurrBuf        == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSrcDstMV          == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
   
    return (armVCM4P2_BlockMatch_Half(
                                pSrcRefBuf,
                                refWidth,
                                pRefRect,
                                pSrcCurrBuf,
                                pSearchPointRefPos,
                                rndVal,
                                pSrcDstMV,
                                pDstSAD,
                                BlockSize));

}
OMXResult omxVCM4P2_MEInit(
    OMXVCM4P2MEMode MEMode, 
    const OMXVCM4P2MEParams *pMEParams, 
    void *pMESpec
   )
{
    ARMVCM4P2_MESpec *armMESpec = (ARMVCM4P2_MESpec *) pMESpec;
    
    armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
    armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr);
    armRetArgErrIf((MEMode != OMX_VC_M4P2_FAST_SEARCH) && 
                   (MEMode != OMX_VC_M4P2_FULL_SEARCH), OMX_Sts_BadArgErr);
    armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr);
    
    armMESpec->MEParams.searchEnable8x8     = pMEParams->searchEnable8x8;
    armMESpec->MEParams.halfPelSearchEnable = pMEParams->halfPelSearchEnable;
    armMESpec->MEParams.searchRange         = pMEParams->searchRange;        
    armMESpec->MEParams.rndVal              = pMEParams->rndVal;
    armMESpec->MEMode                       = MEMode;
    
    return OMX_Sts_NoErr;
}
OMXResult armVCM4P10_InterpolateHalfHor_Luma(
        const OMX_U8*     pSrc, 
        OMX_U32     iSrcStep, 
        OMX_U8*     pDst, 
        OMX_U32     iDstStep, 
        OMX_U32     iWidth, 
        OMX_U32     iHeight
)
{
    OMX_INT     x, y;
    OMX_S32     HalfCoeff, pos;

    /* check for argument error */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)

    for (y = 0; y < iHeight; y++)
    {
        for (x = 0; x < iWidth; x++)
        {
            pos = y * iSrcStep + x;
            HalfCoeff = 
                pSrc [pos - 2] - 
                5 * pSrc [pos - 1] + 
                20 * pSrc [pos] + 
                20 * pSrc [pos + 1] - 
                5 * pSrc [pos + 2] + 
                pSrc [pos + 3];

            HalfCoeff = (HalfCoeff + 16) >> 5;
            HalfCoeff = armClip(0, 255, HalfCoeff);

            pDst [y * iDstStep + x] = HalfCoeff;
        } /* x */
    } /* y */

    return OMX_Sts_NoErr;
}
Beispiel #14
0
/**
 * Functions: armSwapElem
 *
 * Description:
 * These function swaps two elements at the specified pointer locations.
 * The size of each element could be anything as specified by <elemSize>
 *
 * Return Value:
 * OMXResult -- Error status from the function
 */
OMXResult armSwapElem(
        OMX_U8 *pBuf1,
        OMX_U8 *pBuf2,
        OMX_INT elemSize
       )
{
    OMX_INT i;
    OMX_U8 temp;
    armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_NullPtrErr);
    
    for(i = 0; i < elemSize; i++)
    {
        temp = *(pBuf1 + i);
        *(pBuf1 + i) = *(pBuf2 + i);
        *(pBuf2 + i) = temp;
    }
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
     const OMX_U8** ppBitStream,
     OMX_S32* pOffset,
     OMX_U8* pNumCoeff,
     OMX_U8** ppPosCoefbuf        
 )

{
    armRetArgErrIf(ppBitStream==NULL   , OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppBitStream==NULL  , OMX_Sts_BadArgErr);
    armRetArgErrIf(pOffset==NULL       , OMX_Sts_BadArgErr);
    armRetArgErrIf(*pOffset<0          , OMX_Sts_BadArgErr);
    armRetArgErrIf(*pOffset>7          , OMX_Sts_BadArgErr);
    armRetArgErrIf(pNumCoeff==NULL     , OMX_Sts_BadArgErr);
    armRetArgErrIf(ppPosCoefbuf==NULL  , OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);

    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
                                         ppPosCoefbuf, 4, 4);

}
OMXResult omxVCM4P2_TransRecBlockCoef_inter(
     const OMX_S16 *pSrc,
     OMX_S16 * pDst,
     OMX_S16 * pRec,
     OMX_U8 QP,
     OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need 
    8 more elements of padding */
    OMX_S16 tempBuffer[72];
    OMX_S16 *pTempBuffer;
    OMX_INT i;
        
    /* Aligning the local buffers */
    pTempBuffer = armAlignTo16Bytes(tempBuffer);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
    
    omxVCM4P2_DCT8x8blk (pSrc, pDst);
    omxVCM4P2_QuantInter_I(
     pDst,
     QP,
     shortVideoHeader);

    for (i = 0; i < 64; i++)
    {
        pTempBuffer[i] = pDst[i];
    }

    omxVCM4P2_QuantInvInter_I(
     pTempBuffer,
     QP);
    omxVCM4P2_IDCT8x8blk (pTempBuffer, pRec);

    return OMX_Sts_NoErr;
}
OMXResult omxSP_FIROne_Direct_S16(
     OMX_S16 val,
     OMX_S16 * pResult,
     const OMX_S16 * pTapsQ15,
     OMX_INT tapsLen,
     OMX_S16 * pDelayLine,
     OMX_INT * pDelayLineIndex
 )
 {
    OMX_U32 index;
    OMX_S32 accum;
    OMX_S16 *pDelayCurrent;

    /* Input parameter check */ 
    armRetArgErrIf((pResult == NULL), OMX_Sts_BadArgErr)
    armRetArgErrIf((pTapsQ15 == NULL), OMX_Sts_BadArgErr)
    armRetArgErrIf((tapsLen <= 0), OMX_Sts_BadArgErr)
    armRetArgErrIf((pDelayLine == NULL), OMX_Sts_BadArgErr)
    armRetArgErrIf((pDelayLineIndex == NULL), OMX_Sts_BadArgErr)
    armRetArgErrIf((*pDelayLineIndex < 0), OMX_Sts_BadArgErr)
    armRetArgErrIf((*pDelayLineIndex >= tapsLen), OMX_Sts_BadArgErr)

    /* Update the delay state */
    pDelayCurrent = &pDelayLine [*pDelayLineIndex];
    
    /* Copy input to current delay line position */
    pDelayCurrent [0] = pDelayCurrent [tapsLen] = val;

    accum = 0;
    for (index = 0; index < tapsLen; index++)
    {
        accum += (OMX_S32)pTapsQ15 [index] * 
                 (OMX_S32)pDelayCurrent [index]; 
    }
    
    if (--(*pDelayLineIndex) < 0)
    {
        *pDelayLineIndex = tapsLen - 1;     
    }
    
    /* Store the result */
    *pResult = armSatRoundLeftShift_S32(accum, -15);
    return OMX_Sts_NoErr;
 }
OMXResult omxSP_IIROne_BiQuadDirect_S16_I(
     OMX_S16 * pValResult,
     const OMX_S16 * pTaps,
     OMX_INT numBiquad,
     OMX_S32 * pDelayLine
 )
{
    OMXResult errorCode;
    
    armRetArgErrIf(pValResult == NULL, OMX_Sts_BadArgErr)

    errorCode = omxSP_IIROne_BiQuadDirect_S16(
                        *pValResult,
                        pValResult,
                        pTaps,
                        numBiquad,
                        pDelayLine);

    return errorCode;
    
} 
OMXResult omxVCM4P10_TransformDequantChromaDCFromPair(
     const OMX_U8 **ppSrc,
     OMX_S16* pDst,
     OMX_INT QP        
 )
{
    armRetArgErrIf(ppSrc  == NULL,           OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppSrc == NULL,           OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst   == NULL,           OMX_Sts_BadArgErr);
    armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(QP<0,                     OMX_Sts_BadArgErr);
    armRetArgErrIf(QP>51,                    OMX_Sts_BadArgErr);

    armVCM4P10_UnpackBlock2x2(ppSrc, pDst);
    InvTransformDC2x2(pDst);
    DequantChromaDC2x2(pDst, QP);

    return OMX_Sts_NoErr;
}
OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
     const OMX_U8 ** ppBitStream,
     OMX_INT * pBitOffset,
     OMX_S16 * pDst,
     OMX_U8 predDir,
     OMX_INT shortVideoHeader,
     OMX_U8  start
)
{
    OMX_U8  last = 0;
    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
    OMXResult errorCode;
    
    /* Argument error checks */
    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
    armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr);

    switch (predDir)
    {
        case OMX_VC_NONE:
        {
            pZigzagTable = armVCM4P2_aClassicalZigzagScan;
            break;
        }

        case OMX_VC_HORIZONTAL:
        {
            pZigzagTable = armVCM4P2_aVerticalZigzagScan;
            break;
        }

        case OMX_VC_VERTICAL:
        {
            pZigzagTable = armVCM4P2_aHorizontalZigzagScan;
            break;
        }
    }
    
    errorCode = armVCM4P2_GetVLCBits (
              ppBitStream,
              pBitOffset,
			  pDst,
			  shortVideoHeader,
			  start,
			  &last,
			  10,
			  62,
			   7,
			  21,
              armVCM4P2_IntraL0RunIdx,
              armVCM4P2_IntraVlcL0,
			  armVCM4P2_IntraL1RunIdx,
              armVCM4P2_IntraVlcL1,
              armVCM4P2_IntraL0LMAX,
              armVCM4P2_IntraL1LMAX,
              armVCM4P2_IntraL0RMAX,
              armVCM4P2_IntraL1RMAX,
              pZigzagTable );
    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
    
    if (last == 0)
    {
        return OMX_Sts_Err;
    }
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd(
     const OMX_U8 **ppSrc,
     const OMX_U8 *pPred,
     const OMX_S16 *pDC,
     OMX_U8 *pDst,
     OMX_INT predStep,
     OMX_INT dstStep,
     OMX_INT QP,
     OMX_INT AC        
)
{
    OMX_S16 pBuffer[16+4];
    OMX_S16 *pDelta;
    int i,x,y;
    
    armRetArgErrIf(pPred == NULL,            OMX_Sts_BadArgErr);
    armRetArgErrIf(armNot4ByteAligned(pPred),OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst   == NULL,           OMX_Sts_BadArgErr);
    armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(predStep & 3,             OMX_Sts_BadArgErr);
    armRetArgErrIf(dstStep & 3,              OMX_Sts_BadArgErr);
    armRetArgErrIf(AC!=0 && (QP<0),          OMX_Sts_BadArgErr);
    armRetArgErrIf(AC!=0 && (QP>51),         OMX_Sts_BadArgErr);
    armRetArgErrIf(AC!=0 && ppSrc==NULL,     OMX_Sts_BadArgErr);
    armRetArgErrIf(AC!=0 && *ppSrc==NULL,    OMX_Sts_BadArgErr);
    armRetArgErrIf(AC==0 && pDC==NULL,       OMX_Sts_BadArgErr);
    
    pDelta = armAlignTo8Bytes(pBuffer);    

    for (i=0; i<16; i++)
    {
        pDelta[i] = 0;
    }
    if (AC)
    {
        armVCM4P10_UnpackBlock4x4(ppSrc, pDelta);
        DequantLumaAC4x4(pDelta, QP);
    }
    if (pDC)
    {
        pDelta[0] = pDC[0];
    }
    armVCM4P10_TransformResidual4x4(pDelta,pDelta);

    for (y=0; y<4; y++)
    {
        for (x=0; x<4; x++)
        {
            pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,pPred[y*predStep+x] + pDelta[4*y+x]);
        }
    }

    return OMX_Sts_NoErr;
}
/**
 * Function:  omxVCM4P10_InvTransformDequant_LumaDC   (6.3.5.6.3)
 *
 * Description:
 * This function performs inverse 4x4 Hadamard transform and then dequantizes 
 * the coefficients. 
 *
 * Input Arguments:
 *   
 *   pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and 
 *            quantized coefficients.  16 byte alignment required. 
 *   iQP - Quantization parameter; must be in the range [0,51]. 
 *
 * Output Arguments:
 *   
 *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
 *            16-byte alignment required. 
 *
 * Return Value:
 *    
 *    OMX_Sts_NoErr - no error 
 *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
 *              conditions are true: 
 *    -    at least one of the following pointers is NULL: pSrc 
 *    -    pSrc or pDst is not aligned on a 16-byte boundary 
 *
 */
OMXResult omxVCM4P10_InvTransformDequant_LumaDC(	
	const OMX_S16* 	pSrc,
	OMX_S16*	pDst,
	OMX_U32		iQP
)
{
    OMX_INT     i, j;
    OMX_S32     m1[4][4], m2[4][4], Value;
    OMX_S32     QPer, V;

    /* check for argument error */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
    armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr)
    armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr)

    /* Inv Hadamard Transform for DC Luma 4x4 block */
    /* Horizontal */
    for (i = 0; i < 4; i++)
    {
        j = i * 4;
        
        m1[i][0] = pSrc[j + 0] + pSrc[j + 2]; /* a+c */
        m1[i][1] = pSrc[j + 1] + pSrc[j + 3]; /* b+d */
        m1[i][2] = pSrc[j + 0] - pSrc[j + 2]; /* a-c */
        m1[i][3] = pSrc[j + 1] - pSrc[j + 3]; /* b-d */

        m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
        m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
        m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
        m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */

    }

    /* Vertical */
    for (i = 0; i < 4; i++)
    {
        m1[0][i] = m2[0][i] + m2[2][i];
        m1[1][i] = m2[1][i] + m2[3][i];
        m1[2][i] = m2[0][i] - m2[2][i];
        m1[3][i] = m2[1][i] - m2[3][i];

        m2[0][i] = m1[0][i] + m1[1][i];
        m2[1][i] = m1[2][i] + m1[3][i];
        m2[2][i] = m1[2][i] - m1[3][i];
        m2[3][i] = m1[0][i] - m1[1][i];
    }

    
    /* Scaling */
    QPer = iQP / 6;
    V = armVCM4P10_VMatrix [iQP % 6][0];

    for (j = 0; j < 4; j++)
    {
        for (i = 0; i < 4; i++)
        {
            if (QPer < 2)
            {
                Value = (m2[j][i] * V + (1 << (1 - QPer))) >> (2 - QPer);
            }
            else
            {
                Value = m2[j][i] * V * (1 << (QPer - 2));
            }
                        
            pDst[j * 4 + i] = (OMX_S16) Value;
            
        }
/**
 * Function:  omxVCM4P10_GetVLCInfo   (6.3.5.9.1)
 *
 * Description:
 * This function extracts run-length encoding (RLE) information from the 
 * coefficient matrix.  The results are returned in an OMXVCM4P10VLCInfo 
 * structure. 
 *
 * Input Arguments:
 *   
 *   pSrcCoeff - pointer to the transform coefficient matrix.  8-byte 
 *            alignment required. 
 *   pScanMatrix - pointer to the scan order definition matrix.  For a luma 
 *            block the scan matrix should follow [ISO14496-10] section 8.5.4, 
 *            and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 
 *            10, 7, 11, 14, 15.  For a chroma block, the scan matrix should 
 *            contain the values 0, 1, 2, 3. 
 *   bAC - indicates presence of a DC coefficient; 0 = DC coefficient 
 *            present, 1= DC coefficient absent. 
 *   MaxNumCoef - specifies the number of coefficients contained in the 
 *            transform coefficient matrix, pSrcCoeff. The value should be 16 
 *            for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The 
 *            value should be 4 for blocks of type CHROMADC. 
 *
 * Output Arguments:
 *   
 *   pDstVLCInfo - pointer to structure that stores information for 
 *            run-length coding. 
 *
 * Return Value:
 *    
 *    OMX_Sts_NoErr - no error 
 *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
 *              conditions are true: 
 *    -    at least one of the following pointers is NULL: 
 *            pSrcCoeff, pScanMatrix, pDstVLCInfo 
 *    -    pSrcCoeff is not aligned on an 8-byte boundary 
 *
 */
OMXResult omxVCM4P10_GetVLCInfo (
	const OMX_S16*		    pSrcCoeff,
	const OMX_U8*			    pScanMatrix,
	OMX_U8			    bAC,
	OMX_U32			    MaxNumCoef,
	OMXVCM4P10VLCInfo*	pDstVLCInfo
)
{
    OMX_INT     i, MinIndex;
    OMX_S32     Value;
    OMX_U32     Mask = 4, RunBefore;
    OMX_S16     *pLevel;
    OMX_U8      *pRun;
    OMX_S16     Buf [16];

    /* check for argument error */
    armRetArgErrIf(pSrcCoeff == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(armNot8ByteAligned(pSrcCoeff), OMX_Sts_BadArgErr)
    armRetArgErrIf(pScanMatrix == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(pDstVLCInfo == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr)
    armRetArgErrIf(MaxNumCoef > 16, OMX_Sts_BadArgErr)

    /* Initialize RLE Info structure */
    pDstVLCInfo->uTrailing_Ones = 0;
    pDstVLCInfo->uTrailing_One_Signs = 0;
    pDstVLCInfo->uNumCoeffs = 0;
    pDstVLCInfo->uTotalZeros = 0;

    for (i = 0; i < 16; i++)
    {
        pDstVLCInfo->iLevels [i] = 0;
        pDstVLCInfo->uRuns [i] = 0;
    }
    
    MinIndex = (bAC == 0 && MaxNumCoef == 15) ? 1 : 0;
    for (i = MinIndex; i < (MaxNumCoef + MinIndex); i++)
    {        
        /* Scan */
        Buf [i - MinIndex] = pSrcCoeff [pScanMatrix [i]];
    }

    /* skip zeros at the end */
    i = MaxNumCoef - 1;
    while (!Buf [i] && i >= 0)
    {
        i--;
    }
    
    if (i < 0)
    {
        return OMX_Sts_NoErr;
    }

    /* Fill RLE Info structure */
    pLevel = pDstVLCInfo->iLevels;
    pRun = pDstVLCInfo->uRuns;
    RunBefore = 0;

    /* Handle first non zero separate */
    pDstVLCInfo->uNumCoeffs++;
    Value = Buf [i];
    if (Value == 1 || Value == -1)
    {
        pDstVLCInfo->uTrailing_Ones++;
        
        pDstVLCInfo->uTrailing_One_Signs |= 
            Value == -1 ? Mask : 0;
        Mask >>= 1;
    }
OMXResult omxVCM4P2_QuantIntra_I(
     OMX_S16 * pSrcDst,
     OMX_U8 QP,
     OMX_INT blockIndex,
	 OMX_INT shortVideoHeader
 )
{

    /* Definitions and Initializations*/
    /* Initialized to remove compilation error */
    OMX_INT dcScaler = 0, coeffCount,fSign;
    OMX_INT maxClpAC, minClpAC;

    /* Argument error checks */
    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(((blockIndex < 0) || (blockIndex >= 10)), OMX_Sts_BadArgErr);
    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
   /* One argument check is delayed until we have ascertained that  */
   /* pQMatrix is not NULL.                                         */

    
    /* Set the Clip Range based on SVH on/off */
    if(shortVideoHeader == 1)
    {
        maxClpAC = 127;
        minClpAC = -127;
        dcScaler = 8;
        /* Dequant the DC value, this applies to both the methods */
        pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler);
    
        /* Clip between 1 and 254 */
        pSrcDst[0] = (OMX_S16) armClip (1, 254, pSrcDst[0]);
    }
    else
    {
        maxClpAC = 2047;
        minClpAC = -2047;   
        /* Calculate the DC scaler value */
        if ((blockIndex  < 4) || (blockIndex  > 5))
        {
            if (QP >= 1 && QP <= 4)
            {
                dcScaler = 8;
            }
            else if (QP >= 5 && QP <= 8)
            {
                dcScaler = 2 * QP;
            }
            else if (QP >= 9 && QP <= 24)
            {
                dcScaler = QP + 8;
            }
            else
            {
                dcScaler = (2 * QP) - 16;
            }
        }
        else if (blockIndex < 6)
        {
            if (QP >= 1 && QP <= 4)
            {
                dcScaler = 8;
            }
            else if (QP >= 5 && QP <= 24)
            {
                dcScaler = (QP + 13)/2;
            }
            else
            {
                dcScaler = QP - 6;
            }
        }
        
        /* Dequant the DC value, this applies to both the methods */
        pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler);
    }
    
    /* Second Inverse quantisation method */
    for (coeffCount = 1; coeffCount < 64; coeffCount++)
    {
        fSign =  armSignCheck (pSrcDst[coeffCount]);  
        pSrcDst[coeffCount] = armAbs(pSrcDst[coeffCount])/(2 * QP);
        pSrcDst[coeffCount] *= fSign;

        /* Clip */
        pSrcDst[coeffCount] =
        (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]);
    }
    return OMX_Sts_NoErr;

}
OMXResult omxSP_FFTFwd_CToC_SC16_Sfs(
     const OMX_SC16 *pSrc,
     OMX_SC16 *pDst,
     const OMXFFTSpec_C_SC16 *pFFTSpec,
     OMX_INT scaleFactor
)
{
    OMX_INT     block, point;
    OMX_INT     i, j, N, NBy2;
    OMX_U16     *pRevIndex;
    OMX_FC64     *out;
    OMX_FC64    *pT1, *pT2, *pT, *pTw, T;
    ARMsFFTSpec_FC64 *pFFTStruct;

    /* Input parameter check */ 
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(armNot32ByteAligned(pSrc), OMX_Sts_BadArgErr)
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(armNot32ByteAligned(pDst), OMX_Sts_BadArgErr)
    armRetArgErrIf(pFFTSpec == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(scaleFactor < 0, OMX_Sts_BadArgErr)
    armRetArgErrIf(scaleFactor > 16, OMX_Sts_BadArgErr)

    /* Order range check */ 
    pFFTStruct = (ARMsFFTSpec_FC64 *) pFFTSpec;    
    N = pFFTStruct->N;
    armRetArgErrIf(N < 1, OMX_Sts_BadArgErr)
    armRetArgErrIf(N > (1 << 12), OMX_Sts_BadArgErr)
    
    /* Handle order zero case separate */
    if (N == 1)
    {
        pDst [0].Re = armSatRoundRightShift_S32_S16 (pSrc[0].Re, scaleFactor);
        pDst [0].Im = armSatRoundRightShift_S32_S16 (pSrc[0].Im, scaleFactor);
        return OMX_Sts_NoErr;        
    }

    /* Do fft in float */
    out = pFFTStruct->pBuf;

    /* bit reversal */    
    pRevIndex = pFFTStruct->pBitRev;
    for (i = 0; i < N; i++)
    {
        out [pRevIndex [i]].Re = (OMX_F64) pSrc [i]. Re;
        out [pRevIndex [i]].Im = (OMX_F64) pSrc [i]. Im;
    }
    
    NBy2 = N >> 1;
    pT = &T;
    point = 2;
    for (block = NBy2; block > 0; block >>= 1)
    {
        pTw = pFFTStruct->pTwiddle;
        for (i = 0; i < point / 2; i++)
        {
            pT1 = out + i;
            pT2 = pT1 + (point / 2);
            for (j = 0; j < block; j++)
            {
                armSP_CPLX_MUL (pT, pTw, pT2);
                armSP_CPLX_SUB (pT2, pT1, pT);
                armSP_CPLX_ADD (pT1, pT1, pT);
                pT1 += point;
                pT2 += point;
            }
            pTw += block;
        }
        point <<= 1;
    }
    

    /* revert back from float */
    for (i = 0; i < N; i++)
    {
        out [i].Re /= (1 << scaleFactor);
        out [i].Im /= (1 << scaleFactor);
        pDst [i]. Re = armSatRoundFloatToS16 (out [i].Re);
        pDst [i]. Im = armSatRoundFloatToS16 (out [i].Im);
    }
    
    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_MotionEstimationMB (
    const OMX_U8 *pSrcCurrBuf,
    OMX_S32 srcCurrStep,
    const OMX_U8 *pSrcRefBuf,
    OMX_S32 srcRefStep,
    const OMXRect*pRefRect,
    const OMXVCM4P2Coordinate *pCurrPointPos,
    void *pMESpec,
    const OMXVCM4P2MBInfoPtr *pMBInfo,
    OMXVCM4P2MBInfo *pSrcDstMBCurr,
    OMX_U16 *pDstSAD,
    OMX_U16 *pDstBlockSAD
)
{
 
    OMX_INT intraSAD, average, count, index, x, y;
    OMXVCMotionVector dstMV16x16;
    OMX_INT           dstSAD16x16;
    OMX_INT           dstSAD8x8;
    OMXVCM4P2MEParams  *pMEParams; 
	OMXVCM4P2Coordinate TempCurrPointPos; 
    OMXVCM4P2Coordinate *pTempCurrPointPos; 
    OMX_U8 aTempSrcCurrBuf[271];
    OMX_U8 *pTempSrcCurrBuf;
    OMX_U8 *pDst;
    OMX_U8 aDst[71];
    OMX_S32 dstStep = 8;
    OMX_INT predictType;
	OMX_S32 Sad;
    const OMX_U8 *pTempSrcRefBuf;
    OMXVCMotionVector* pSrcCandMV1[4];
    OMXVCMotionVector* pSrcCandMV2[4];
    OMXVCMotionVector* pSrcCandMV3[4];
        
    /* Argument error checks */
    armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
	armRetArgErrIf(!armIs16ByteAligned(pSrcRefBuf), OMX_Sts_BadArgErr);
    armRetArgErrIf(((srcCurrStep % 16) || (srcRefStep % 16)), OMX_Sts_BadArgErr);
	armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
	armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);    
    armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
    
    
    pTempCurrPointPos = &(TempCurrPointPos);
    pTempSrcCurrBuf = armAlignTo16Bytes(aTempSrcCurrBuf);
    pMEParams = (OMXVCM4P2MEParams *)pMESpec;
    pTempCurrPointPos->x = pCurrPointPos->x;
    pTempCurrPointPos->y = pCurrPointPos->y;
    pSrcDstMBCurr->mbType = OMX_VC_INTER;
    
    /* Preparing a linear buffer for block match */
    for (y = 0, index = count = 0; y < 16; y++, index += srcCurrStep - 16)
    {
        for(x = 0; x < 16; x++, count++, index++)
        {
            pTempSrcCurrBuf[count] = pSrcCurrBuf[index];
        }
    }
    for(y = 0, index = 0; y < 2; y++)
    {
        for(x = 0; x < 2; x++,index++)
        {
            if((pMBInfo[0] != NULL) && (pMBInfo[0]->mbType != OMX_VC_INTRA))
            {
               pSrcCandMV1[index] = &(pMBInfo[0]->pMV0[y][x]); 
            }
            else
            {
               pSrcCandMV1[index] = NULL;
            }
            if((pMBInfo[1] != NULL) && (pMBInfo[1]->mbType != OMX_VC_INTRA))
            {
               pSrcCandMV2[index] = &(pMBInfo[1]->pMV0[y][x]);
            }
            else
            {
               pSrcCandMV2[index] = NULL; 
            }
            if((pMBInfo[3] != NULL) && (pMBInfo[3]->mbType != OMX_VC_INTRA))
            {
               pSrcCandMV3[index] = &(pMBInfo[3]->pMV0[y][x]);
            }
            else
            {
               pSrcCandMV3[index] = NULL; 
            }
        }
    }
	/* Calculating SAD at MV(0,0) */
	armVCCOMM_SAD(pTempSrcCurrBuf,
					  16,
					  pSrcRefBuf,
					  srcRefStep,
					  &Sad,
					  16,
					  16);
	*pDstSAD = Sad;

    /* Mode decision for NOT_CODED MB */
	if(*pDstSAD == 0)
	{
        pSrcDstMBCurr->pMV0[0][0].dx = 0;
        pSrcDstMBCurr->pMV0[0][0].dy = 0;
        *pDstSAD   = 0;
		return OMX_Sts_NoErr;
	}

    omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[0][0]),
                    pSrcCandMV1[0],
                    pSrcCandMV2[0],
                    pSrcCandMV3[0],
                    &(pSrcDstMBCurr->pMVPred[0][0]),
                    NULL,
                    0);
                    
    /* Inter 1 MV */
    armVCM4P2_BlockMatch_16x16(
        pSrcRefBuf,
        srcRefStep,
        pRefRect,
        pTempSrcCurrBuf,
        pCurrPointPos,
        &(pSrcDstMBCurr->pMVPred[0][0]),
        NULL,
        pMEParams,
        &dstMV16x16,
        &dstSAD16x16);
    
    /* Initialize all with 1 MV values */
    pSrcDstMBCurr->pMV0[0][0].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[0][0].dy = dstMV16x16.dy;
    pSrcDstMBCurr->pMV0[0][1].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[0][1].dy = dstMV16x16.dy;
    pSrcDstMBCurr->pMV0[1][0].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[1][0].dy = dstMV16x16.dy;
    pSrcDstMBCurr->pMV0[1][1].dx = dstMV16x16.dx;
    pSrcDstMBCurr->pMV0[1][1].dy = dstMV16x16.dy; 
    
    *pDstSAD   = dstSAD16x16;       
    
    if (pMEParams->searchEnable8x8)
    {
        /* Inter 4MV */
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[0][0]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[0][0]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[0] = dstSAD8x8;
        *pDstSAD = dstSAD8x8;
        pTempCurrPointPos->x += 8;
        pSrcRefBuf += 8;
        omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[0][1]),
                    pSrcCandMV1[1],
                    pSrcCandMV2[1],
                    pSrcCandMV3[1],
                    &(pSrcDstMBCurr->pMVPred[0][1]),
                    NULL,
                    1);
        
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[0][1]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[0][1]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[1] = dstSAD8x8;
        *pDstSAD += dstSAD8x8;
        pTempCurrPointPos->x -= 8;
        pTempCurrPointPos->y += 8;
        pSrcRefBuf += (srcRefStep * 8) - 8;
        
        omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[1][0]),
                    pSrcCandMV1[2],
                    pSrcCandMV2[2],
                    pSrcCandMV3[2],
                    &(pSrcDstMBCurr->pMVPred[1][0]),
                    NULL,
                    2);
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[1][0]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[1][0]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[2] = dstSAD8x8;
        *pDstSAD += dstSAD8x8;
        pTempCurrPointPos->x += 8;
        pSrcRefBuf += 8;
        omxVCM4P2_FindMVpred(
                    &(pSrcDstMBCurr->pMV0[1][1]),
                    pSrcCandMV1[3],
                    pSrcCandMV2[3],
                    pSrcCandMV3[3],
                    &(pSrcDstMBCurr->pMVPred[1][1]),
                    NULL,
                    3);
        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
                                      srcRefStep, pRefRect,
                                      pTempSrcCurrBuf, pTempCurrPointPos,
                                      &(pSrcDstMBCurr->pMVPred[1][1]), NULL,
                                      pMEParams, &(pSrcDstMBCurr->pMV0[1][1]),
                                      &dstSAD8x8
                                      );
        pDstBlockSAD[3] = dstSAD8x8;
        *pDstSAD += dstSAD8x8;   
        
        
        /* Checking if 4MV is equal to 1MV */
        if (
            (pSrcDstMBCurr->pMV0[0][0].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[0][0].dy != dstMV16x16.dy) ||
            (pSrcDstMBCurr->pMV0[0][1].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[0][1].dy != dstMV16x16.dy) ||
            (pSrcDstMBCurr->pMV0[1][0].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[1][0].dy != dstMV16x16.dy) ||
            (pSrcDstMBCurr->pMV0[1][1].dx != dstMV16x16.dx) ||
            (pSrcDstMBCurr->pMV0[1][1].dy != dstMV16x16.dy)
           )
        {
            /* select the 4 MV */
            pSrcDstMBCurr->mbType = OMX_VC_INTER4V;
        }                                      
    }
                                         
    /* finding the error in intra mode */
    for (count = 0, average = 0; count < 256 ; count++)
    {
        average = average + pTempSrcCurrBuf[count];
    }
    average = average/256;
    
	intraSAD = 0;

    /* Intra SAD calculation */
    for (count = 0; count < 256 ; count++)
    {
        intraSAD += armAbs ((pTempSrcCurrBuf[count]) - (average));
    }
    
	/* Using the MPEG4 VM formula for intra/inter mode decision 
	   Var < (SAD - 2*NB) where NB = N^2 is the number of pixels
	   of the macroblock.*/

    if (intraSAD <= (*pDstSAD - 512))
    {
        pSrcDstMBCurr->mbType = OMX_VC_INTRA;
        pSrcDstMBCurr->pMV0[0][0].dx = 0;
        pSrcDstMBCurr->pMV0[0][0].dy = 0;
        *pDstSAD   = intraSAD;
        pDstBlockSAD[0] = 0xFFFF;
        pDstBlockSAD[1] = 0xFFFF;
        pDstBlockSAD[2] = 0xFFFF;
        pDstBlockSAD[3] = 0xFFFF;
    }

    if(pSrcDstMBCurr->mbType == OMX_VC_INTER)
    {
      pTempSrcRefBuf = pSrcRefBuf + (srcRefStep * dstMV16x16.dy) + dstMV16x16.dx;
    
      if((dstMV16x16.dx & 0x1) && (dstMV16x16.dy & 0x1))
      {
        predictType = OMX_VC_HALF_PIXEL_XY;
      }
      else if(dstMV16x16.dx & 0x1)
      {
        predictType = OMX_VC_HALF_PIXEL_X;
      }
      else if(dstMV16x16.dy & 0x1)
      {
        predictType = OMX_VC_HALF_PIXEL_Y;
      }
      else
      {
        predictType = OMX_VC_INTEGER_PIXEL;
      }
      
      pDst = armAlignTo8Bytes(&(aDst[0]));
      /* Calculating Block SAD at MV(dstMV16x16.dx,dstMV16x16.dy) */
	  /* Block 0 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf,
	                             srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);
    
      armVCCOMM_SAD(pTempSrcCurrBuf,
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[0] = Sad;
   
      /* Block 1 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + 8,
                                 srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);					  

      armVCCOMM_SAD(pTempSrcCurrBuf + 8,
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[1] = Sad;
	
      /* Block 2 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8),
                                 srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);

      armVCCOMM_SAD(pTempSrcCurrBuf + (16*8),
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[2] = Sad;

	  /* Block 3 */
      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8) + 8,
                                 srcRefStep,
                                 NULL,
                                 pDst, 
                                 dstStep,
                                 predictType,
                                 pMEParams->rndVal);

      armVCCOMM_SAD(pTempSrcCurrBuf + (16*8) + 8,
                        16,
                        pDst,
                        dstStep,
                        &Sad,
                        8,
                        8);
      pDstBlockSAD[3] = Sad;
    }
    return OMX_Sts_NoErr;
}
OMXResult armVCM4P2_BlockMatch_Integer(
     const OMX_U8 *pSrcRefBuf,
     OMX_INT refWidth,
     const OMXRect *pRefRect,
     const OMX_U8 *pSrcCurrBuf,
     const OMXVCM4P2Coordinate *pCurrPointPos,
     const OMXVCMotionVector *pSrcPreMV,
     const OMX_INT *pSrcPreSAD,
     void *pMESpec,
     OMXVCMotionVector *pDstMV,
     OMX_INT *pDstSAD,
     OMX_U8 BlockSize
)
{

    /* Definitions and Initializations*/

    OMX_INT     outer, inner, count,index;
    OMX_INT     candSAD;
    /*(256*256 +1) this is to make the SAD max initially*/
    OMX_INT     minSAD = 0x10001, fromX, toX, fromY, toY;
    /* Offset to the reference at the begining of the bounding box */
    const OMX_U8      *pTempSrcRefBuf;
    OMX_S16     x, y;
    OMX_INT searchRange;
   
    /* Argument error checks */
    armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
        
    searchRange = ((OMXVCM4P2MEParams *)pMESpec)->searchRange;
    /* Check for valid region */
    fromX = searchRange;
    toX   = searchRange;
    fromY = searchRange;
    toY   = searchRange;

    if ((pCurrPointPos->x - searchRange) < pRefRect->x)
    {
        fromX =  pCurrPointPos->x - pRefRect->x;
    }

    if ((pCurrPointPos->x + BlockSize + searchRange) > (pRefRect->x + pRefRect->width))
    {
        toX   = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - BlockSize;
    }

    if ((pCurrPointPos->y - searchRange) < pRefRect->y)
    {
        fromY = pCurrPointPos->y - pRefRect->y;
    }

    if ((pCurrPointPos->y + BlockSize + searchRange) > (pRefRect->y + pRefRect->height))
    {
        toY   = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - BlockSize;
    }

    pDstMV->dx = -fromX;
    pDstMV->dy = -fromY;
    /* Looping on y- axis */
    for (y = -fromY; y <= toY; y++)
    {

        /* Looping on x- axis */
        for (x = -fromX; x <= toX; x++)
        {
            /* Positioning the pointer */
            pTempSrcRefBuf = pSrcRefBuf + (refWidth * y) + x;

            /* Calculate the SAD */
            for (outer = 0, count = 0, index = 0, candSAD = 0;
                 outer < BlockSize;
                 outer++, index += refWidth - BlockSize)
            {
                for (inner = 0; inner < BlockSize; inner++, count++, index++)
                {
                    candSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]);                    
                }
            }

            /* Result calculations */
            if (armVCM4P2_CompareMV (x, y, candSAD, pDstMV->dx/2, pDstMV->dy/2, minSAD))
            {
                *pDstSAD = candSAD;
                minSAD   = candSAD;
                pDstMV->dx = x*2;
                pDstMV->dy = y*2;
            }

        } /* End of x- axis */
    } /* End of y-axis */

    return OMX_Sts_NoErr;

}
OMXResult omxVCM4P2_TransRecBlockCoef_intra(
     const OMX_U8 *pSrc,
     OMX_S16 * pDst,
     OMX_U8 * pRec,
     OMX_S16 *pPredBufRow,
     OMX_S16 *pPredBufCol,
     OMX_S16 * pPreACPredict,
     OMX_INT *pSumErr,
     OMX_INT blockIndex,
     OMX_U8 curQp,
     const OMX_U8 *pQpBuf,
     OMX_INT srcStep,
     OMX_INT dstStep,
	 OMX_INT shortVideoHeader
)
{
    /* 64 elements are needed but to align it to 16 bytes need
    8 more elements of padding */
    OMX_S16 tempBuf1[79], tempBuf2[79];
    OMX_S16 tempBuf3[79];
    OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3;
    OMXVCM4P2VideoComponent videoComp;
    OMX_U8  flag;
    OMX_INT x, y, count, predDir;
    OMX_INT predQP, ACPredFlag;
    

    /* Aligning the local buffers */
    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
    pTempBuf3 = armAlignTo16Bytes(tempBuf3);

    /* Argument error checks */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr);
    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) ||
                (dstStep & 7) || (srcStep & 7)
                , OMX_Sts_BadArgErr);
    armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr);

    armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr);


   /* Setting the videoComp */
    if (blockIndex <= 3)
    {
        videoComp = OMX_VC_LUMINANCE;
    }
    else
    {
        videoComp = OMX_VC_CHROMINANCE;
    }
    /* Converting from 2-d to 1-d buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x= 0; x < 8; x++, count++)
        {
            pTempBuf1[count] = pSrc[(y*srcStep) + x];
        }
    }

    omxVCM4P2_DCT8x8blk  (pTempBuf1, pTempBuf2);
    omxVCM4P2_QuantIntra_I(
        pTempBuf2,
        curQp,
        blockIndex,
        shortVideoHeader);

    /* Converting from 1-D to 2-D buffer */
    for (y = 0, count = 0; y < 8; y++)
    {
        for(x = 0; x < 8; x++, count++)
        {
            /* storing tempbuf2 to tempbuf1 */
            pTempBuf1[count] = pTempBuf2[count];
            pDst[(y*dstStep) + x] = pTempBuf2[count];
        }
    }

    /* AC and DC prediction */
    armVCM4P2_SetPredDir(
        blockIndex,
        pPredBufRow,
        pPredBufCol,
        &predDir,
        &predQP,
        pQpBuf);

    armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);

    flag = 1;
    if (*pSumErr < 0)
    {
        ACPredFlag = 0;
    }
    else
    {
        ACPredFlag = 1;
    }

    armVCM4P2_ACDCPredict(
        pTempBuf2,
        pPreACPredict,
        pPredBufRow,
        pPredBufCol,
        curQp,
        predQP,
        predDir,
        ACPredFlag,
        videoComp,
        flag,
        pSumErr);

    /* Reconstructing the texture data */
    omxVCM4P2_QuantInvIntra_I(
        pTempBuf1,
        curQp,
        videoComp,
        shortVideoHeader);
    omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3);
    for(count = 0; count < 64; count++)
    {
        pRec[count] = armMax(0,pTempBuf3[count]);
    }

    return OMX_Sts_NoErr;
}
OMXResult omxVCM4P2_EncodeMV(
     OMX_U8 **ppBitStream,
     OMX_INT *pBitOffset,
     const OMXVCMotionVector * pMVCurMB,
     const OMXVCMotionVector * pSrcMVLeftMB,
     const OMXVCMotionVector * pSrcMVUpperMB,
     const OMXVCMotionVector * pSrcMVUpperRightMB,
     OMX_INT fcodeForward,
     OMXVCM4P2MacroblockType MBType
)
{
    OMXVCMotionVector dstMVPred, diffMV;
    OMXVCMotionVector dstMVPredME[12];
    /* Initialized to remove compilation warning */
    OMX_INT iBlk, i, count = 1;
    OMX_S32 mvHorResidual, mvVerResidual, mvHorData, mvVerData;
    OMX_U8 scaleFactor, index;

    /* Argument error checks */
    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(pMVCurMB == NULL, OMX_Sts_BadArgErr);
    armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
    armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \
                    OMX_Sts_BadArgErr);
    
    if ((MBType == OMX_VC_INTRA) ||
        (MBType == OMX_VC_INTRA_Q)
       )
    {
        /* No candidate vectors hence make them zero */
        for (i = 0; i < 12; i++)
        {
            dstMVPredME[i].dx = 0;
            dstMVPredME[i].dy = 0;
        }

        return OMX_Sts_NoErr;
    }

    if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q))
    {
        count = 4;
    }
    else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
    {
        count = 1;
    }

    /* Calculating the scale factor */
    scaleFactor = 1 << (fcodeForward -1);

    for (iBlk = 0; iBlk < count; iBlk++)
    {

        /* Find the predicted vector */
        omxVCM4P2_FindMVpred (
            pMVCurMB,
            pSrcMVLeftMB,
            pSrcMVUpperMB,
            pSrcMVUpperRightMB,
            &dstMVPred,
            dstMVPredME,
            iBlk );

        /* Calculating the differential motion vector (diffMV) */
        diffMV.dx = pMVCurMB[iBlk].dx - dstMVPred.dx;
        diffMV.dy = pMVCurMB[iBlk].dy - dstMVPred.dy;

        /* Calculating the mv_data and mv_residual for Horizantal MV */
        if (diffMV.dx == 0)
        {
            mvHorResidual = 0;
            mvHorData = 0;
        }
        else
        {
            mvHorResidual = ( armAbs(diffMV.dx) - 1) % scaleFactor;
            mvHorData = (armAbs(diffMV.dx) - mvHorResidual + (scaleFactor - 1))
                     / scaleFactor;
            if (diffMV.dx < 0)
            {
                mvHorData = -mvHorData;
            }
        }

        /* Calculating the mv_data and mv_residual for Vertical MV */
        if (diffMV.dy == 0)
        {
            mvVerResidual = 0;
            mvVerData = 0;
        }
        else
        {
            mvVerResidual = ( armAbs(diffMV.dy) - 1) % scaleFactor;
            mvVerData = (armAbs(diffMV.dy) - mvVerResidual + (scaleFactor - 1))
                     / scaleFactor;
            if (diffMV.dy < 0)
            {
                mvVerData = -mvVerData;
            }
        }

        /* Huffman encoding */

        /* The index is actually calculate as
           index = ((float) (mvHorData/2) + 16) * 2,
           meaning the MV data is halfed and then normalized
           to begin with zero and then doubled to take care of indexing
           the fractional part included */
        index = mvHorData + 32;
        armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]);
        if ((fcodeForward > 1) && (diffMV.dx != 0))
        {
            armPackBits (ppBitStream, pBitOffset, mvHorResidual, (fcodeForward -1));
        }

        /* The index is actually calculate as
           index = ((float) (mvVerData/2) + 16) * 2,
           meaning the MV data is halfed and then normalized
           to begin with zero and then doubled to take care of indexing
           the fractional part included */
        index = mvVerData + 32;
        armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]);
        if ((fcodeForward > 1) && (diffMV.dy != 0))
        {
            armPackBits (ppBitStream, pBitOffset, mvVerResidual, (fcodeForward -1));
        }
    }

    return OMX_Sts_NoErr;
}
OMXResult armVCM4P10_InterpolateHalfDiag_Luma(  
        const OMX_U8*     pSrc, 
        OMX_U32     iSrcStep, 
        OMX_U8*     pDst, 
        OMX_U32     iDstStep,
        OMX_U32     iWidth, 
        OMX_U32     iHeight
)
{
    OMX_S32     HalfCoeff, pos;
    OMX_S16     Buf [21 * 16];  /* 21 rows by 16 pixels per row */
    OMX_U32     y, x;

    /* check for argument error */
    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)

    /*
     * Intermediate values will be 1/2 pel at Horizontal direction
     * Starting at (0.5, -2) at top extending to (0.5, height + 3) at bottom
     * Buf contains a 2D array of size (iWidth)X(iHeight + 5)
     */
    for (y = 0; y < iHeight + 5; y++)
    {
        for (x = 0; x < iWidth; x++)
        {
            pos = (y-2) * iSrcStep + x;
            HalfCoeff = 
                pSrc [pos - 2] - 
                5 * pSrc [pos - 1] + 
                20 * pSrc [pos] + 
                20 * pSrc [pos + 1] - 
                5 * pSrc [pos + 2] + 
                pSrc [pos + 3];
            Buf [y * iWidth + x] = (OMX_S16)HalfCoeff;
        } /* x */
    } /* y */

    /* Vertical interpolate */
    for (y = 0; y < iHeight; y++)
    {
        for (x = 0; x < iWidth; x++)
        {
            pos = y * iWidth + x;
            HalfCoeff = 
                Buf [pos] - 
                5 * Buf [pos + 1 * iWidth] + 
                20 * Buf [pos + 2 * iWidth] + 
                20 * Buf [pos + 3 * iWidth] - 
                5 * Buf [pos + 4 * iWidth] + 
                Buf [pos + 5 * iWidth];

            HalfCoeff = (HalfCoeff + 512) >> 10;
            HalfCoeff = armClip(0, 255, HalfCoeff);

            pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff;
        }
    }
        
    return OMX_Sts_NoErr;
}