void luma_sample_interp_0_0_TI(unsigned char image [], unsigned char refPicLXl[], const short PicWidthSamples, const short stride){ unsigned int uiTmp1,uiTmp2,uiTmp3,uiTmp4; unsigned char* pRefImgPtr = refPicLXl; unsigned char* pImgPtr = image; uiTmp1 = _mem4 (pRefImgPtr); pRefImgPtr+=PicWidthSamples; uiTmp2 = _mem4 (pRefImgPtr); pRefImgPtr+=PicWidthSamples; uiTmp3 = _mem4 (pRefImgPtr); pRefImgPtr+=PicWidthSamples; uiTmp4 = _mem4 (pRefImgPtr); _amem4 (pImgPtr) = uiTmp1; pImgPtr+=stride; _amem4 (pImgPtr) = uiTmp2; pImgPtr+=stride; _amem4 (pImgPtr) = uiTmp3; pImgPtr+=stride; _amem4 (pImgPtr) = uiTmp4; }
_CODE_ACCESS void *memset(void *dst, int fill, size_t len) { char *restrict dst1, *restrict dst2; int pre_bytes, post_bytes, wfill, i; dst1 = (char *)dst; /*--------------------------------------------------------------------*/ /* Replicate the 8-bit value in fill into all 4 bytes of wfill */ /*--------------------------------------------------------------------*/ wfill = _mpy(0x101, fill & 0xff); wfill += wfill << 16; /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 4 */ /*--------------------------------------------------------------------*/ pre_bytes = (4 - (int) dst) & 3; if (len > pre_bytes) { len -= pre_bytes; if (pre_bytes & 1) { *dst1 = fill; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = len > 0 ? len : 0; dst2 = dst1 + 4; if (len > 7) for (i = 0; i < len >> 3; i++) { _amem4(dst1) = wfill; dst1 += 8; _amem4(dst2) = wfill; dst2 += 8; post_bytes -= 8; } /*--------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*--------------------------------------------------------------------*/ if (post_bytes) { *dst1++ = fill; post_bytes--; } if (post_bytes) { *dst1++ = fill; post_bytes--; } if (post_bytes) { *dst1++ = fill; post_bytes--; } if (post_bytes) { *dst1++ = fill; post_bytes--; } if (post_bytes) { *dst1++ = fill; post_bytes--; } if (post_bytes) { *dst1++ = fill; post_bytes--; } if (post_bytes) { *dst1++ = fill; post_bytes--; } return dst; }
_CODE_ACCESS void *memset(void *dst, int fill, size_t len) { char *restrict dst1, *restrict dst2; int pre_bytes, post_bytes, wfill, i; double dfill1, dfill2; dst1 = (char *)dst; /*--------------------------------------------------------------------*/ /* Replicate the 8-bit value in fill into all 4 bytes of wfill */ /*--------------------------------------------------------------------*/ wfill = _pack2 (fill, fill); wfill = _packl4(wfill, wfill); dfill1 = _itod (wfill, wfill); dfill2 = _itod (wfill, wfill); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 8 */ /*--------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst) & 7; if (len > pre_bytes) { len -= pre_bytes; if (pre_bytes & 1) { *dst1 = fill; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = len > 0 ? len : 0; dst2 = dst1 + 8; if (len > 15) for (i = 0; i < len >> 4; i++) { _amemd8(dst1) = dfill1; dst1 += 16; _amemd8(dst2) = dfill2; dst2 += 16; post_bytes -= 16; } /*--------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*--------------------------------------------------------------------*/ if (post_bytes & 8) { _memd8(dst1) = dfill1; dst1 += 8; } if (post_bytes & 4) { _mem4 (dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = wfill; dst1[1] = wfill; dst1 += 2; } if (post_bytes & 1) { *dst1 = fill; dst1 += 1; } return dst; }
/** This function allows to get the luminance prediction of a non IDR picture when xFracl = 0 and yFracl = 1. @param image Table of current frame. @param refPicLXl Table of the reference decoded picture buffer. @param PicWidthSamples Stride of the reference buffer. @param stride Stride of the current image. */ void luma_sample_interp_0_1_TI(unsigned char image [], unsigned char refPicLXl[], const short PicWidthSamples, const short stride){ /* No horizontal interpolation */ unsigned int uiLine1,uiLine2,uiLine3,uiLine4,uiLine5,uiLine6,uiLine7,uiLine8,uiLine9; unsigned int uiTmpLine12_h,uiTmpLine34_h,uiTmpLine12_l,uiTmpLine34_l,uiTmpLine1234_4,uiTmpLine1234_2,uiTmpLine1234_3,uiTmpLine1234_1; unsigned int uiTmpLine56_h,uiTmpLine78_h,uiTmpLine56_l,uiTmpLine78_l,uiTmpLine5678_4,uiTmpLine5678_2,uiTmpLine5678_3,uiTmpLine5678_1; unsigned int tmpc1_1,tmpc1_2,tmpc2_1,tmpc2_2,tmpc1,tmpc2; unsigned int tmpc3_1,tmpc3_2,tmpc4_1,tmpc4_2,tmpc3,tmpc4; unsigned int tmp12,tmp34,tmpend1,tmpend2,tmpend3,tmpend4; unsigned int tmpl9l,tmpl9h; unsigned int input1,input2,input3,input4; unsigned char* pRefImgPtr; unsigned char* pImgPtr; pRefImgPtr = refPicLXl-(PicWidthSamples<<1); pImgPtr = image; uiLine1 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine2 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine3 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine4 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine5 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine6 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine7 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine8 = _mem4(pRefImgPtr); pRefImgPtr += PicWidthSamples; uiLine9 = _mem4(pRefImgPtr); input1 = uiLine3; input2 = uiLine4; input3 = uiLine5; input4 = uiLine6; uiTmpLine12_h = _packh4(uiLine1,uiLine2); uiTmpLine34_h = _packh4(uiLine3,uiLine4); uiTmpLine12_l = _packl4(uiLine1,uiLine2); uiTmpLine34_l = _packl4(uiLine3,uiLine4); uiTmpLine1234_4 = _packh4(uiTmpLine12_h,uiTmpLine34_h); uiTmpLine1234_2 = _packl4(uiTmpLine12_h,uiTmpLine34_h); uiTmpLine1234_3 = _packh4(uiTmpLine12_l,uiTmpLine34_l); uiTmpLine1234_1 = _packl4(uiTmpLine12_l,uiTmpLine34_l); uiTmpLine56_h = _packh4(uiLine5,uiLine6); uiTmpLine78_h = _packh4(uiLine7,uiLine8); uiTmpLine56_l = _packl4(uiLine5,uiLine6); uiTmpLine78_l = _packl4(uiLine7,uiLine8); uiTmpLine5678_4 = _packh4(uiTmpLine56_h,uiTmpLine78_h); uiTmpLine5678_2 = _packl4(uiTmpLine56_h,uiTmpLine78_h); uiTmpLine5678_3 = _packh4(uiTmpLine56_l,uiTmpLine78_l); uiTmpLine5678_1 = _packl4(uiTmpLine56_l,uiTmpLine78_l); tmpc1_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0xFB010000,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0xFB010000,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0xFB010000,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x01FB1414,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0xFB010000,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend1 = _spacku4(tmp34,tmp12); tmpend1 = _swap4(tmpend1); _amem4(pImgPtr) = _avgu4(tmpend1,input1); pImgPtr += stride; tmpc1_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x0001FB14,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0x14FB0100,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend2 = _spacku4(tmp34,tmp12); tmpend2 = _swap4(tmpend2); _amem4(pImgPtr) = _avgu4(tmpend2,input2); pImgPtr += stride; tmpc1_1 = _dotpsu4(0x000001FB,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_1); tmpc2_1 = _dotpsu4(0x000001FB,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _dotpsu4(0x000001FB,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_3); tmpc4_1 = _dotpsu4(0x000001FB,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0x1414FB01,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend3 = _spacku4(tmp34,tmp12); tmpend3 = _swap4(tmpend3); _amem4(pImgPtr) = _avgu4(tmpend3,input3); pImgPtr += stride; uiLine9 = _swap4(uiLine9); tmpl9h = _unpkhu4 (uiLine9); tmpl9l = _unpklu4 (uiLine9); tmpc1_1 = _extu(uiTmpLine1234_1,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_1); tmpc1_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_1); tmpc2_1 = _extu(uiTmpLine1234_2,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_2); tmpc2_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_2); tmpc1 = _spack2(tmpc1_1,tmpc2_1); tmpc2 = _spack2(tmpc1_2,tmpc2_2); tmp12 = _sadd2(tmpc1,tmpc2); tmp12 = _sadd2(tmp12,tmpl9l); tmp12 = _shr2(_sadd2(tmp12,0x00100010),5); tmpc3_1 = _extu(uiTmpLine1234_3,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_3); tmpc3_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_3); tmpc4_1 = _extu(uiTmpLine1234_4,24,24);//_dotpsu4(0x00000001,uiTmpLine1234_4); tmpc4_2 = _dotpsu4(0xFB1414FB,uiTmpLine5678_4); tmpc3 = _spack2(tmpc3_1,tmpc4_1); tmpc4 = _spack2(tmpc3_2,tmpc4_2); tmp34 = _sadd2(tmpc3,tmpc4); tmp34 = _sadd2(tmp34,tmpl9h); tmp34 = _shr2(_sadd2(tmp34,0x00100010),5); tmpend4 = _spacku4(tmp34,tmp12); tmpend4 = _swap4(tmpend4); _amem4(pImgPtr) = _avgu4(tmpend4,input4); }
Cplx16 const * const restrict unaligned_raw_samples, ORILIB_t_AlignState * alignStateInpOut, Cplx16 * const restrict aligned_raw_samples ){ #ifdef DEBUG_MODE assert(alignStateInpOut->nAlignedSamplesAlreadyFilled <= SYNC_BUFFER_SIZE_ENERGY * 2); //implementing the same scheme without this condition will take more thought, which i //didn't want to put in at date(then). assert(SYNC_ALIGNED_SAMPLE_BUF_LEN_ACTUAL >= 4 * SYNC_BUFFER_SIZE_ENERGY); #endif //get indices Uint32 uaks1 = _amem4(&alignStateInpOut->uaks1); Uint32 uaks2 = _amem4(&alignStateInpOut->uaks2); Uint32 offset = _amem4(&alignStateInpOut->nAlignedSamplesAlreadyFilled); Uint32 uai1 = uaks1 + offset; //Uint32 uai2 = uaks2 + offset; //uar = uak1, so need need to create a separate variable Cplx16 *alignedSampleBuf; Uint32 i; Cplx16 * alignedSampleBufOut = alignStateInpOut->alignedSampleLookbackBuf; //careful with copying pointers -- honor the restrict flags and do not copy
CV_IMPL void cvAbsDiff( const void* srcarr1, const void* srcarr2, void* dstarr ) { CV_FUNCNAME( "cvAbsDiff" ); __BEGIN__; int coi1 = 0, coi2 = 0, coi3 = 0; CvMat srcstub1, *src1 = (CvMat*)srcarr1; CvMat srcstub2, *src2 = (CvMat*)srcarr2; CvMat dststub, *dst = (CvMat*)dstarr; CvSize size; int type, depth, pixel_size; CV_CALL( src1 = cvGetMat( src1, &srcstub1, &coi1 )); CV_CALL( src2 = cvGetMat( src2, &srcstub2, &coi2 )); CV_CALL( dst = cvGetMat( dst, &dststub, &coi3 )); if( coi1 != 0 || coi2 != 0 || coi3 != 0 ) CV_ERROR( CV_BadCOI, "" ); if( !CV_ARE_SIZES_EQ( src1, src2 ) ) CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes ); type = CV_MAT_TYPE(src1->type); depth = CV_MAT_DEPTH(type); if( !CV_ARE_SIZES_EQ( src1, dst )) CV_ERROR_FROM_CODE( CV_StsUnmatchedSizes ); if( !CV_ARE_TYPES_EQ( src1, src2 )) CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats ); if( !CV_ARE_TYPES_EQ( src1, dst )) CV_ERROR_FROM_CODE( CV_StsUnmatchedFormats ); size.width = src1->step * src1->height; size.height = 1; pixel_size = CV_DEPTH_BYTES[depth]; if(depth == CV_8U) { int idx; unsigned char * p1; unsigned char * p2; unsigned char * pdst; p1 = src1->data.ptr ; p2 = src2->data.ptr; pdst = dst->data.ptr; #ifdef _TMS320C6X for (idx = 0; idx < size.width/pixel_size; idx+=4) { _amem4(pdst) = _subabs4(_amem4_const(p1), _amem4_const(p2) ); p1 += 4; p2 += 4; pdst += 4; } #else for (idx = 0; idx < size.width/pixel_size; idx+=1) { (*pdst) = abs((*p1)-(*p2)); pdst++; p1++; p2++; } #endif } else if(depth == CV_32S) { int idx; int * p1; int * p2; int * pdst; p1 = src1->data.i; p2 = src2->data.i; pdst = dst->data.i; for (idx = 0; idx < size.width/pixel_size; idx++) { #ifdef _TMS320C6X *pdst = _abs(_ssub(*p1, *p2)); #else *pdst = abs((*p1)-(*p2)); #endif p1 += 1; p2 += 1; pdst += 1; } } else { CV_ERROR( CV_StsUnsupportedFormat, "unsupported matrix type." ); } __END__; }
static __inline void *optimized_mem_set(void *mem, int ch, size_t n) { char * restrict dst1, * restrict dst2; int pre_bytes, post_bytes, wfill, i; unsigned char *outbuf = mem; unsigned int count = n; dst1 = (char *)outbuf; #if defined(_TMS320C6400) || defined(_TMS320C6740) || defined(_TMS320C6600) || \ defined(_TI_C6X_TESLA) /*---------------------------------------------------------------------*/ /* We do not use 'dwfill' on other variations of the C6x architecture, */ /* so limit 'dwfill' references to the architectures that use it. */ /*---------------------------------------------------------------------*/ { long long dwfill; /*------------------------------------------------------------------*/ /* Set up 64-bit and 32-bit fill values. */ /*------------------------------------------------------------------*/ wfill = _pack2 (ch, ch); wfill = _packl4(wfill, wfill); dwfill = _itoll (wfill, wfill); /*------------------------------------------------------------------*/ /* Calculate # of bytes to pre-copy to get to an alignment of 8 */ /*------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst1) & 7; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*------------------------------------------------------------------*/ /* Double word fills */ /*------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 8; if (count > 15) for (i = 0; i < count >> 4; i++) { _amem8(dst1) = dwfill; dst1 += 16; _amem8(dst2) = dwfill; dst2 += 16; post_bytes -= 16; } /*------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*------------------------------------------------------------------*/ if (post_bytes & 8) { _mem8(dst1) = dwfill; dst1 += 8; } if (post_bytes & 4) { _mem4(dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = ch; dst1[1] = ch; dst1 += 2; } if (post_bytes & 1) { *dst1 = ch; dst1 += 1; } } #else /*--------------------------------------------------------------------*/ /* Set up 32-bit fill value. */ /*--------------------------------------------------------------------*/ wfill = _mpy(0x101, (int)ch); wfill += (wfill << 16); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 4 */ /*--------------------------------------------------------------------*/ pre_bytes = (4 - (int) dst1) & 3; if (count > pre_bytes) { count -= pre_bytes; if (pre_bytes & 1) { *dst1 = ch; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = count > 0 ? count : 0; dst2 = dst1 + 4; if (count > 7) for (i = 0; i < count >> 3; i++) { _amem4(dst1) = wfill; dst1 += 8; _amem4(dst2) = wfill; dst2 += 8; post_bytes -= 8; } /*--------------------------------------------------------------------*/ /* Finish transfer with up to 7 single-byte writes. */ /*--------------------------------------------------------------------*/ if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } if (post_bytes) { *dst1++ = ch; post_bytes--; } #endif return dst1; }