void write_back_motion_TI( const int ai_iB_stride, const int ai_iB8_stride , short MvdL0[ ],short ai_tiMv_cache[ ][2], short ao_tiRef[ ], short ai_tiRef_cache[]) { //1 Iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+12); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+14); MvdL0 += ai_iB_stride; //2 iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+20); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+22); MvdL0 += ai_iB_stride; //3 iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+28); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+30); MvdL0 += ai_iB_stride; //4 iteration _mem8(MvdL0) =_mem8(ai_tiMv_cache+36); _mem8(MvdL0+4)=_mem8(ai_tiMv_cache+38); MvdL0 += ai_iB_stride; _mem4(ao_tiRef)=(ai_tiRef_cache[14]<<16|ai_tiRef_cache[12]); _mem4(ao_tiRef+ai_iB8_stride)=(ai_tiRef_cache[30]<<16|ai_tiRef_cache[28]); }
static __inline void *optimized_mem_set(void *mem, int ch, size_t n) { char * restrict dst1, * restrict dst2; int pre_bytes, post_bytes, wfill, i; unsigned char *outbuf = mem; unsigned int count = n; dst1 = (char *)outbuf; #if defined(_TMS320C6400) || defined(_TMS320C6740) || defined(_TMS320C6600) || \ defined(_TI_C6X_TESLA) /*---------------------------------------------------------------------*/ /* We do not use 'dwfill' on other variations of the C6x architecture, */ /* so limit 'dwfill' references to the architectures that use it. */ /*---------------------------------------------------------------------*/ { long long dwfill; /*------------------------------------------------------------------*/ /* Set up 64-bit and 32-bit fill values. */ /*------------------------------------------------------------------*/ wfill = _pack2 (ch, ch); wfill = _packl4(wfill, wfill); dwfill = _itoll (wfill, wfill); /*------------------------------------------------------------------*/ /* Calculate # of bytes to pre-copy to get to an alignment of 8 */ /*------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst1) & 7; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*------------------------------------------------------------------*/ /* Double word fills */ /*------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 8; if (count > 15) for (i = 0; i < count >> 4; i++) { _amem8(dst1) = dwfill; dst1 += 16; _amem8(dst2) = dwfill; dst2 += 16; post_bytes -= 16; } /*------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*------------------------------------------------------------------*/ if (post_bytes & 8) { _mem8(dst1) = dwfill; dst1 += 8; } if (post_bytes & 4) { _mem4(dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = ch; dst1[1] = ch; dst1 += 2; } if (post_bytes & 1) { *dst1 = ch; dst1 += 1; } } #else /*--------------------------------------------------------------------*/ /* Set up 32-bit fill value. */ /*--------------------------------------------------------------------*/ wfill = _mpy(0x101, (int)ch); wfill += (wfill << 16); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 4 */ /*--------------------------------------------------------------------*/ pre_bytes = (4 - (int) dst1) & 3; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 4; if (count > 7) for (i = 0; i < count >> 3; i++) { _amem4(dst1) = wfill; dst1 += 8; _amem4(dst2) = wfill; dst2 += 8; post_bytes -= 8; } /*--------------------------------------------------------------------*/ /* Finish transfer with up to 7 single-byte writes. */ /*--------------------------------------------------------------------*/ if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } #endif return dst1; }