int main () { int v; long long vll; v = _add2 (a, b); if (v != 0x1000f000) abort (); v = _sub2 (a, b); if (v != 0x9000b000) abort (); v = _sub2 (b, a); if (v != 0x70005000) abort (); v = _add4 (a4, b4); if (v != 0x10f02000) abort (); v = _sub4 (a4, b4); if (v != 0x90b04000) abort (); v = _saddu4 (a4, c4); if (v != 0xfff050ff) abort (); v = _sadd2 (a, b); if (v != 0x1000f000) abort (); v = _sadd2 (a, c); if (v != 0x7fff8000) abort (); v = _ssub2 (a, b); if (v != 0x7fffb000) abort (); v = _ssub2 (b, a); if (v != 0x80005000) abort (); vll = _smpy2ll (a, b); if (vll != 0xd8000000f4000000ll) abort (); vll = _smpy2ll (d, d); if (vll != 0x7fffffff00000002ll) abort (); v = _avg2 (b, e); if (v != 0x08002001) abort (); v = _avgu4 (d4, e4); if (v != 0x88102980) abort (); v = _abs2 (a); if (v != 0x50003000) abort (); v = _abs2 (f); if (v != 0x40007fff) abort (); return 0; }
/** This function allows to get the luminance prediction of a non IDR picture when xFracl = 0 and yFracl = 1. @param image Table of current frame. @param refPicLXl Table of the reference decoded picture buffer. @param PicWidthSamples Stride of the reference buffer. @param stride Stride of the current image. */ void luma_sample_interp_0_1_TI(unsigned char image [], unsigned char refPicLXl[], const short PicWidthSamples, const short stride){ /* No horizontal interpolation */ unsigned int uiLine1,uiLine2,uiLine3,uiLine4,uiLine5,uiLine6,uiLine7,uiLine8,uiLine9; unsigned int uiTmpLine12_h,uiTmpLine34_h,uiTmpLine12_l,uiTmpLine34_l,uiTmpLine1234_4,uiTmpLine1234_2,uiTmpLine1234_3,uiTmpLine1234_1; unsigned int uiTmpLine56_h,uiTmpLine78_h,uiTmpLine56_l,uiTmpLine78_l,uiTmpLine5678_4,uiTmpLine5678_2,uiTmpLine5678_3,uiTmpLine5678_1; unsigned int tmpc1_1,tmpc1_2,tmpc2_1,tmpc2_2,tmpc1,tmpc2; unsigned int tmpc3_1,tmpc3_2,tmpc4_1,tmpc4_2,tmpc3,tmpc4; unsigned int tmp12,tmp34,tmpend1,tmpend2,tmpend3,tmpend4; unsigned int tmpl9l,tmpl9h; unsigned int input1,input2,input3,input4; unsigned char* pRefImgPtr; unsigned char* pImgPtr; pRefImgPtr = refPicLXl-(PicWidthSamples<<1); pImgPtr = image; uiLine1 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine2 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine3 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine4 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine5 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine6 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine7 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine8 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine9 = _mem4(pRefImgPtr); input1 = uiLine3; input2 = uiLine4; input3 = uiLine5; input4 = uiLine6; uiTmpLine12_h = _packh4(uiLine1,uiLine2); uiTmpLine34_h = _packh4(uiLine3,uiLine4); uiTmpLine12_l = _packl4(uiLine1,uiLine2); uiTmpLine34_l = _packl4(uiLine3,uiLine4); uiTmpLine1234_4 = _packh4(uiTmpLine12_h,uiTmpLine34_h); uiTmpLine1234_2 = _packl4(uiTmpLine12_h,uiTmpLine34_h); uiTmpLine1234_3 = _packh4(uiTmpLine12_l,uiTmpLine34_l); uiTmpLine1234_1 = _packl4(uiTmpLine12_l,uiTmpLine34_l); uiTmpLine56_h = _packh4(uiLine5,uiLine6); uiTmpLine78_h = _packh4(uiLine7,uiLine8); uiTmpLine56_l = _packl4(uiLine5,uiLine6); uiTmpLine78_l = _packl4(uiLine7,uiLine8); uiTmpLine5678_4 = _packh4(uiTmpLine56_h,uiTmpLine78_h); uiTmpLine5678_2 = _packl4(uiTmpLine56_h,uiTmpLine78_h); uiTmpLine5678_3 = _packh4(uiTmpLine56_l,uiTmpLine78_l); uiTmpLine5678_1 = _packl4(uiTmpLine56_l,uiTmpLine78_l); tmpc1_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0xFB010000,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0xFB010000,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0xFB010000,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0xFB010000,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend1 = _spacku4(tmp34,tmp12); tmpend1 = _swap4(tmpend1); _amem4(pImgPtr) = _avgu4(tmpend1,input1); pImgPtr += stride; tmpc1_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend2 = _spacku4(tmp34,tmp12); tmpend2 = _swap4(tmpend2); _amem4(pImgPtr) = _avgu4(tmpend2,input2); pImgPtr += stride; tmpc1_1 = _dotpsu4(0x000001FB,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x000001FB,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x000001FB,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x000001FB,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend3 = _spacku4(tmp34,tmp12); tmpend3 = _swap4(tmpend3); _amem4(pImgPtr) = _avgu4(tmpend3,input3); pImgPtr += stride; uiLine9 = _swap4(uiLine9); tmpl9h = _unpkhu4 (uiLine9); tmpl9l = _unpklu4 (uiLine9); tmpc1_1 = _extu(uiTmpLine1234_1,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_1); tmpc2_1 = _extu(uiTmpLine1234_2,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _sadd2(tmp12,tmpl9l); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _extu(uiTmpLine1234_3,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_3); tmpc4_1 = _extu(uiTmpLine1234_4,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _sadd2(tmp34,tmpl9h); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend4 = _spacku4(tmp34,tmp12); tmpend4 = _swap4(tmpend4); _amem4(pImgPtr) = _avgu4(tmpend4,input4); }
void chroma_sample_interpolation_TI(unsigned char image_Cb [RESTRICT], unsigned char image_Cr [RESTRICT] , unsigned char refPicLXCb[RESTRICT], unsigned char refPicLXCr[RESTRICT] , const short xFracl, const short yFracl, const short PicWidthSamples,const short stride) { unsigned char* pucCbPtrA = refPicLXCb; unsigned char* pucCbPtrB = refPicLXCb + 1; unsigned char* pucCbPtrC = refPicLXCb + PicWidthSamples; unsigned char* pucCbPtrD = refPicLXCb + PicWidthSamples + 1; unsigned char* pucCrPtrE = refPicLXCr; unsigned char* pucCrPtrF = refPicLXCr + 1; unsigned char* pucCrPtrG = refPicLXCr + PicWidthSamples; unsigned char* pucCrPtrH = refPicLXCr + PicWidthSamples + 1; unsigned char* pucOutputCbPtr = image_Cb; unsigned char* pucOutputCrPtr = image_Cr; unsigned int uiTmp1,uiTmp2; unsigned int ui1_1,ui1_2,ui2_1,ui2_2,res_1,res_2,res_3,res_4; unsigned int tmpend1_1,tmpend1_2,tmpend2_1,tmpend2_2; unsigned int uiA,uiB,uiC,uiD; unsigned int uiE,uiF,uiG,uiH; unsigned int uicst = xFracl * yFracl; uiTmp1 = _pack2(uicst,uicst); uiTmp2 = (_pack2(xFracl,yFracl)) << 3; uiTmp2 = _sub2(uiTmp2,uiTmp1); uiTmp1 = (uicst) + ((uicst - ((xFracl + yFracl) <<3) + 64) << 16); uicst = _packh2(uiTmp1,uiTmp2); // cst2 cst3 uiTmp1 = _pack2(uiTmp2,uiTmp1); // cst4 cst1 uicst = _spacku4(uicst,uiTmp1); uiA = _mem2(pucCbPtrA); uiB = _mem2(pucCbPtrB); uiC = _mem2(pucCbPtrC); uiD = _mem2(pucCbPtrD); uiE = _mem2(pucCrPtrE); uiF = _mem2(pucCrPtrF); uiG = _mem2(pucCrPtrG); uiH = _mem2(pucCrPtrH); pucCbPtrA += PicWidthSamples; pucCbPtrB += PicWidthSamples; pucCbPtrC += PicWidthSamples; pucCbPtrD += PicWidthSamples; pucCrPtrE += PicWidthSamples; pucCrPtrF += PicWidthSamples; pucCrPtrG += PicWidthSamples; pucCrPtrH += PicWidthSamples; uiA += (_mem2(pucCbPtrA) << 16); uiB += (_mem2(pucCbPtrB) << 16); uiC += (_mem2(pucCbPtrC) << 16); uiD += (_mem2(pucCbPtrD) << 16); uiE += (_mem2(pucCrPtrE) << 16); uiF += (_mem2(pucCrPtrF) << 16); uiG += (_mem2(pucCrPtrG) << 16); uiH += (_mem2(pucCrPtrH) << 16); uiTmp1 = _packh4(uiA,uiB); uiTmp2 = _packh4(uiC,uiD); ui1_1 = _packh4(uiTmp1,uiTmp2); ui2_1 = _packl4(uiTmp1,uiTmp2); uiTmp1 = _packl4(uiA,uiB); uiTmp2 = _packl4(uiC,uiD); ui1_2 = _packh4(uiTmp1,uiTmp2); ui2_2 = _packl4(uiTmp1,uiTmp2); tmpend1_1 = _dotpu4(uicst,ui1_1); tmpend1_2 = _dotpu4(uicst,ui1_2); tmpend2_1 = _dotpu4(uicst,ui2_1); tmpend2_2 = _dotpu4(uicst,ui2_2); res_1 = _pack2(tmpend1_1,tmpend1_2); res_2 = _pack2(tmpend2_1,tmpend2_2); res_1 = _shr2(_sadd2(res_1,0x00200020),6); res_2 = _shr2(_sadd2(res_2,0x00200020),6); res_1 = _spacku4(0x00000000,res_1); res_2 = _spacku4(0x00000000,res_2); _mem2(pucOutputCbPtr) = res_2; pucOutputCbPtr += stride; _mem2(pucOutputCbPtr) = res_1; uiTmp1 = _packh4(uiE,uiF); uiTmp2 = _packh4(uiG,uiH); ui1_1 = _packh4(uiTmp1,uiTmp2); ui2_1 = _packl4(uiTmp1,uiTmp2); uiTmp1 = _packl4(uiE,uiF); uiTmp2 = _packl4(uiG,uiH); ui1_2 = _packh4(uiTmp1,uiTmp2); ui2_2 = _packl4(uiTmp1,uiTmp2); tmpend1_1 = _dotpu4(uicst,ui1_1); tmpend1_2 = _dotpu4(uicst,ui1_2); tmpend2_1 = _dotpu4(uicst,ui2_1); tmpend2_2 = _dotpu4(uicst,ui2_2); res_3 = _pack2(tmpend1_1,tmpend1_2); res_4 = _pack2(tmpend2_1,tmpend2_2); res_3 = _shr2(_sadd2(res_3,0x00200020),6); res_4 = _shr2(_sadd2(res_4,0x00200020),6); res_3 = _spacku4(0x00000000,res_3); res_4 = _spacku4(0x00000000,res_4); _mem2(pucOutputCrPtr) = res_4; pucOutputCrPtr += stride; _mem2(pucOutputCrPtr) = res_3; }