void luma_sample_interp_0_0_TI(unsigned char image [], unsigned char refPicLXl[], const short PicWidthSamples, const short stride){ unsigned int uiTmp1,uiTmp2,uiTmp3,uiTmp4; unsigned char* pRefImgPtr = refPicLXl; unsigned char* pImgPtr = image; uiTmp1 = _mem4 (pRefImgPtr); pRefImgPtr+=PicWidthSamples; uiTmp2 = _mem4 (pRefImgPtr); pRefImgPtr+=PicWidthSamples; uiTmp3 = _mem4 (pRefImgPtr); pRefImgPtr+=PicWidthSamples; uiTmp4 = _mem4 (pRefImgPtr); _amem4 (pImgPtr) = uiTmp1; pImgPtr+=stride; _amem4 (pImgPtr) = uiTmp2; pImgPtr+=stride; _amem4 (pImgPtr) = uiTmp3; pImgPtr+=stride; _amem4 (pImgPtr) = uiTmp4; }
void write_back_motion_TI( const int ai_iB_stride, const int ai_iB8_stride , short MvdL0[ ],short ai_tiMv_cache[ ][2], short ao_tiRef[ ], short ai_tiRef_cache[]) { //1 Iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+12); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+14); MvdL0 += ai_iB_stride; //2 iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+20); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+22); MvdL0 += ai_iB_stride; //3 iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+28); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+30); MvdL0 += ai_iB_stride; //4 iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+36); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+38); MvdL0 += ai_iB_stride; _mem4(ao_tiRef)=(ai_tiRef_cache[14]<<16|ai_tiRef_cache[12]); _mem4(ao_tiRef+ai_iB8_stride)=(ai_tiRef_cache[30]<<16|ai_tiRef_cache[28]); }
_CODE_ACCESS void *memset(void *dst, int fill, size_t len) { char *restrict dst1, *restrict dst2; int pre_bytes, post_bytes, wfill, i; double dfill1, dfill2; dst1 = (char *)dst; /*--------------------------------------------------------------------*/ /* Replicate the 8-bit value in fill into all 4 bytes of wfill */ /*--------------------------------------------------------------------*/ wfill = _pack2 (fill, fill); wfill = _packl4(wfill, wfill); dfill1 = _itod (wfill, wfill); dfill2 = _itod (wfill, wfill); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 8 */ /*--------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst) & 7; if (len > pre_bytes) { len -= pre_bytes; if (pre_bytes & 1) { *dst1 = fill; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = len > 0 ? len : 0; dst2 = dst1 + 8; if (len > 15) for (i = 0; i < len >> 4; i++) { _amemd8(dst1) = dfill1; dst1 += 16; _amemd8(dst2) = dfill2; dst2 += 16; post_bytes -= 16; } /*--------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*--------------------------------------------------------------------*/ if (post_bytes & 8) { _memd8(dst1) = dfill1; dst1 += 8; } if (post_bytes & 4) { _mem4 (dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = wfill; dst1[1] = wfill; dst1 += 2; } if (post_bytes & 1) { *dst1 = fill; dst1 += 1; } return dst; }
/** This function allows to get the luminance prediction of a non IDR picture when xFracl = 0 and yFracl = 1. @param image Table of current frame. @param refPicLXl Table of the reference decoded picture buffer. @param PicWidthSamples Stride of the reference buffer. @param stride Stride of the current image. */ void luma_sample_interp_0_1_TI(unsigned char image [], unsigned char refPicLXl[], const short PicWidthSamples, const short stride){ /* No horizontal interpolation */ unsigned int uiLine1,uiLine2,uiLine3,uiLine4,uiLine5,uiLine6,uiLine7,uiLine8,uiLine9; unsigned int uiTmpLine12_h,uiTmpLine34_h,uiTmpLine12_l,uiTmpLine34_l,uiTmpLine1234_4,uiTmpLine1234_2,uiTmpLine1234_3,uiTmpLine1234_1; unsigned int uiTmpLine56_h,uiTmpLine78_h,uiTmpLine56_l,uiTmpLine78_l,uiTmpLine5678_4,uiTmpLine5678_2,uiTmpLine5678_3,uiTmpLine5678_1; unsigned int tmpc1_1,tmpc1_2,tmpc2_1,tmpc2_2,tmpc1,tmpc2; unsigned int tmpc3_1,tmpc3_2,tmpc4_1,tmpc4_2,tmpc3,tmpc4; unsigned int tmp12,tmp34,tmpend1,tmpend2,tmpend3,tmpend4; unsigned int tmpl9l,tmpl9h; unsigned int input1,input2,input3,input4; unsigned char* pRefImgPtr; unsigned char* pImgPtr; pRefImgPtr = refPicLXl-(PicWidthSamples<<1); pImgPtr = image; uiLine1 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine2 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine3 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine4 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine5 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine6 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine7 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine8 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine9 = _mem4(pRefImgPtr); input1 = uiLine3; input2 = uiLine4; input3 = uiLine5; input4 = uiLine6; uiTmpLine12_h = _packh4(uiLine1,uiLine2); uiTmpLine34_h = _packh4(uiLine3,uiLine4); uiTmpLine12_l = _packl4(uiLine1,uiLine2); uiTmpLine34_l = _packl4(uiLine3,uiLine4); uiTmpLine1234_4 = _packh4(uiTmpLine12_h,uiTmpLine34_h); uiTmpLine1234_2 = _packl4(uiTmpLine12_h,uiTmpLine34_h); uiTmpLine1234_3 = _packh4(uiTmpLine12_l,uiTmpLine34_l); uiTmpLine1234_1 = _packl4(uiTmpLine12_l,uiTmpLine34_l); uiTmpLine56_h = _packh4(uiLine5,uiLine6); uiTmpLine78_h = _packh4(uiLine7,uiLine8); uiTmpLine56_l = _packl4(uiLine5,uiLine6); uiTmpLine78_l = _packl4(uiLine7,uiLine8); uiTmpLine5678_4 = _packh4(uiTmpLine56_h,uiTmpLine78_h); uiTmpLine5678_2 = _packl4(uiTmpLine56_h,uiTmpLine78_h); uiTmpLine5678_3 = _packh4(uiTmpLine56_l,uiTmpLine78_l); uiTmpLine5678_1 = _packl4(uiTmpLine56_l,uiTmpLine78_l); tmpc1_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0xFB010000,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0xFB010000,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0xFB010000,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0xFB010000,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend1 = _spacku4(tmp34,tmp12); tmpend1 = _swap4(tmpend1); _amem4(pImgPtr) = _avgu4(tmpend1,input1); pImgPtr += stride; tmpc1_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend2 = _spacku4(tmp34,tmp12); tmpend2 = _swap4(tmpend2); _amem4(pImgPtr) = _avgu4(tmpend2,input2); pImgPtr += stride; tmpc1_1 = _dotpsu4(0x000001FB,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x000001FB,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x000001FB,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x000001FB,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend3 = _spacku4(tmp34,tmp12); tmpend3 = _swap4(tmpend3); _amem4(pImgPtr) = _avgu4(tmpend3,input3); pImgPtr += stride; uiLine9 = _swap4(uiLine9); tmpl9h = _unpkhu4 (uiLine9); tmpl9l = _unpklu4 (uiLine9); tmpc1_1 = _extu(uiTmpLine1234_1,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_1); tmpc2_1 = _extu(uiTmpLine1234_2,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _sadd2(tmp12,tmpl9l); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _extu(uiTmpLine1234_3,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_3); tmpc4_1 = _extu(uiTmpLine1234_4,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _sadd2(tmp34,tmpl9h); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend4 = _spacku4(tmp34,tmp12); tmpend4 = _swap4(tmpend4); _amem4(pImgPtr) = _avgu4(tmpend4,input4); }
static __inline void *optimized_mem_set(void *mem, int ch, size_t n) { char * restrict dst1, * restrict dst2; int pre_bytes, post_bytes, wfill, i; unsigned char *outbuf = mem; unsigned int count = n; dst1 = (char *)outbuf; #if defined(_TMS320C6400) || defined(_TMS320C6740) || defined(_TMS320C6600) || \ defined(_TI_C6X_TESLA) /*---------------------------------------------------------------------*/ /* We do not use 'dwfill' on other variations of the C6x architecture, */ /* so limit 'dwfill' references to the architectures that use it. */ /*---------------------------------------------------------------------*/ { long long dwfill; /*------------------------------------------------------------------*/ /* Set up 64-bit and 32-bit fill values. */ /*------------------------------------------------------------------*/ wfill = _pack2 (ch, ch); wfill = _packl4(wfill, wfill); dwfill = _itoll (wfill, wfill); /*------------------------------------------------------------------*/ /* Calculate # of bytes to pre-copy to get to an alignment of 8 */ /*------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst1) & 7; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*------------------------------------------------------------------*/ /* Double word fills */ /*------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 8; if (count > 15) for (i = 0; i < count >> 4; i++) { _amem8(dst1) = dwfill; dst1 += 16; _amem8(dst2) = dwfill; dst2 += 16; post_bytes -= 16; } /*------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*------------------------------------------------------------------*/ if (post_bytes & 8) { _mem8(dst1) = dwfill; dst1 += 8; } if (post_bytes & 4) { _mem4(dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = ch; dst1[1] = ch; dst1 += 2; } if (post_bytes & 1) { *dst1 = ch; dst1 += 1; } } #else /*--------------------------------------------------------------------*/ /* Set up 32-bit fill value. */ /*--------------------------------------------------------------------*/ wfill = _mpy(0x101, (int)ch); wfill += (wfill << 16); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 4 */ /*--------------------------------------------------------------------*/ pre_bytes = (4 - (int) dst1) & 3; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 4; if (count > 7) for (i = 0; i < count >> 3; i++) { _amem4(dst1) = wfill; dst1 += 8; _amem4(dst2) = wfill; dst2 += 8; post_bytes -= 8; } /*--------------------------------------------------------------------*/ /* Finish transfer with up to 7 single-byte writes. */ /*--------------------------------------------------------------------*/ if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } #endif return dst1; }