Ejemplo n.º 1
0
char GetBoudaryStrenght_TI( short mv_cache_l0[][2], short mv_cache_l1[][2], short *ref_cache_l0, short *ref_cache_l1, 
					   short poc_l0[], short poc_l1[], int b_idx, int bn_idx, int slice_type, int mb_type, int mbm_type)
{

	short	ref_p1;
	short	ref_q1;
	short ref_p0 = ref_cache_l0[b_idx]  < 0 ? -1 : poc_l0[ref_cache_l0[b_idx]];
	short ref_q0 = ref_cache_l0[bn_idx] < 0 ? -1 : poc_l0[ref_cache_l0[bn_idx]];
	unsigned int p0q0,p1q1;

	char v = 1;
	char x,y;



	if(slice_type == SLICE_TYPE_B) {
		ref_p1 = ref_cache_l1[b_idx] < 0 ? -1 : poc_l1[ref_cache_l1[b_idx]];
		ref_q1 = ref_cache_l1[bn_idx] < 0 ? -1 : poc_l1[ref_cache_l1[bn_idx]];
	}else{
		ref_p1 = -1;
		ref_q1 = -1;
	}

//	ref_p1 = ((ref_cache_l1[b_idx] < 0)&&(slice_type != SLICE_TYPE_B)) ? -1 : poc_l1[ref_cache_l1[b_idx]];
//	ref_q1 = ((ref_cache_l1[bn_idx] < 0)&&(slice_type != SLICE_TYPE_B)) ? -1 : poc_l1[ref_cache_l1[bn_idx]];

	p0q0=_pack2(ref_q1,ref_q0);
	p1q1=_pack2(ref_p1,ref_p0);
	x =_cmpeq2(p0q0,p1q1);
	y = x | _cmpeq2(p0q0,_rotl(p1q1,16));


	//if ( ((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
	if ( y==3) {
		char l0b_l0bn=bSCheckMvShort (mv_cache_l0[b_idx], mv_cache_l0[bn_idx]);
		char l1b_l1bn=bSCheckMvShort (mv_cache_l1[b_idx], mv_cache_l1[bn_idx]);
		char l0b_l1bn=bSCheckMvShort (mv_cache_l0[b_idx], mv_cache_l1[bn_idx]);
		char l1b_l0bn=bSCheckMvShort (mv_cache_l1[b_idx], mv_cache_l0[bn_idx]);
		if (ref_p0 != ref_p1) 	{ 
			// compare MV for the same reference picture
			if (ref_p0 == ref_q0)  {
				v =  l0b_l0bn || (l1b_l1bn && (!(IS_P(mb_type) || IS_P(mbm_type))));
			} else   {
				v =   (l0b_l1bn) ||  (l1b_l0bn);	
			}
		} else { // L0 and L1 reference pictures of p0 are the same; q0 as well
			v =  ( (l0b_l0bn) ||  (l1b_l1bn)) && ( (l0b_l1bn) ||  (l1b_l0bn));
		}
	}

	return v;

}
Ejemplo n.º 2
0
_CODE_ACCESS void *memset(void *dst, int fill, size_t len)
{   
       char  *restrict dst1, *restrict dst2;
       int    pre_bytes, post_bytes, wfill, i;
       double dfill1, dfill2;

       dst1   = (char *)dst;

       /*--------------------------------------------------------------------*/
       /* Replicate the 8-bit value in fill into all 4 bytes of wfill        */
       /*--------------------------------------------------------------------*/
       wfill  = _pack2 (fill,  fill);
       wfill  = _packl4(wfill, wfill); 
       dfill1 = _itod  (wfill, wfill);
       dfill2 = _itod  (wfill, wfill);

       /*--------------------------------------------------------------------*/
       /* Calculate number of bytes to pre-copy to get to an alignment of 8  */
       /*--------------------------------------------------------------------*/
       pre_bytes = (8 - (int) dst) & 7;

       if (len > pre_bytes)
       {
           len -= pre_bytes;
           if (pre_bytes & 1) { *dst1        = fill;  dst1 += 1; }
           if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; }
           if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; }
       }

       /*--------------------------------------------------------------------*/
       /* Double word fills                                                  */
       /*--------------------------------------------------------------------*/
       post_bytes = len > 0 ? len : 0;   
       dst2       = dst1 + 8;

       if (len > 15)
         for (i = 0; i < len >> 4; i++)
         {
            _amemd8(dst1) = dfill1; dst1 += 16;
            _amemd8(dst2) = dfill2; dst2 += 16;
            post_bytes -= 16;
         }

       /*--------------------------------------------------------------------*/
       /* Finish transfer with 8, 4, 2 and/or 1-byte writes                  */
       /*--------------------------------------------------------------------*/
       if (post_bytes & 8) { _memd8(dst1) = dfill1; dst1 += 8; }
       if (post_bytes & 4) { _mem4 (dst1) = wfill;  dst1 += 4; }
       if (post_bytes & 2) {  dst1[0]     = wfill;  
                              dst1[1]     = wfill;  dst1 += 2; }
       if (post_bytes & 1) { *dst1        = fill;   dst1 += 1; }
       return dst;
}
                   _packlh2(_hi(mask_tmp),_hi(mask_tmp)));

  mask_tmp = _memd8_const((void *) &mask_ptr[1]);
  mask7_4  = _itod(_packlh2(_lo(mask_tmp),_lo(mask_tmp)),
                   _packlh2(_hi(mask_tmp),_hi(mask_tmp)));

  /* -------------------------------------------------------------------- */
  /* The last mask values of each row are loaded into an int              */
  /* -------------------------------------------------------------------- */
  mask8   = mask_ptr[0];
  mask43  = _packlh2(_lo(mask7_4),_hi(mask3_0));

  /* -------------------------------------------------------------------- */
  /* mask2_88 contains the last mask of row 3 in its half words           */
  /* -------------------------------------------------------------------- */
  mask88  = _pack2(mask8,mask8);
  
  /* -------------------------------------------------------------------- */
  /* Pack the last mask of row 1 & 2 into a single int variable           */
  /* -------------------------------------------------------------------- */
  mask52  = _packhl2(_lo(mask7_4),_hi(mask3_0));

  for (i=0; i<width; i+=2) {
    /* ------------------------------------------------------------------ */
    /*  Load 4 pixels at a time from each of the 3 rows using double      */
    /*  word memory accesses.                                             */
    /* ------------------------------------------------------------------ */
    pix0_3210 = _mem8_const((void *) &imgin_ptr[i]);
    pix1_3210 = _mem8_const((void *) &imgin_ptr[i + pitch]);
    pix2_3210 = _mem8_const((void *) &imgin_ptr[i + 2 * pitch]);
Ejemplo n.º 4
0
void chroma_sample_interpolation_TI(unsigned char image_Cb [RESTRICT],	 unsigned char image_Cr [RESTRICT]
					, unsigned char refPicLXCb[RESTRICT], unsigned char refPicLXCr[RESTRICT]
					, const short xFracl, const short yFracl,  const short PicWidthSamples,const short stride)
{




	unsigned char* pucCbPtrA = refPicLXCb;
	unsigned char* pucCbPtrB = refPicLXCb + 1;
	unsigned char* pucCbPtrC = refPicLXCb + PicWidthSamples;
	unsigned char* pucCbPtrD = refPicLXCb + PicWidthSamples + 1;

	unsigned char* pucCrPtrE = refPicLXCr;
	unsigned char* pucCrPtrF = refPicLXCr + 1;
	unsigned char* pucCrPtrG = refPicLXCr + PicWidthSamples;
	unsigned char* pucCrPtrH = refPicLXCr + PicWidthSamples + 1;

	unsigned char* pucOutputCbPtr = image_Cb;
	unsigned char* pucOutputCrPtr = image_Cr;

	unsigned int uiTmp1,uiTmp2;
	unsigned int ui1_1,ui1_2,ui2_1,ui2_2,res_1,res_2,res_3,res_4;
	unsigned int tmpend1_1,tmpend1_2,tmpend2_1,tmpend2_2;

	unsigned int uiA,uiB,uiC,uiD;
	unsigned int uiE,uiF,uiG,uiH;

	unsigned int uicst = xFracl * yFracl;

	uiTmp1 = _pack2(uicst,uicst);
	uiTmp2 = (_pack2(xFracl,yFracl)) << 3;
	uiTmp2 = _sub2(uiTmp2,uiTmp1);
	uiTmp1 = (uicst) + ((uicst - ((xFracl + yFracl) <<3) + 64) << 16);
	uicst = _packh2(uiTmp1,uiTmp2); // cst2 cst3
	uiTmp1 = _pack2(uiTmp2,uiTmp1); // cst4 cst1
	uicst =  _spacku4(uicst,uiTmp1);

	uiA = _mem2(pucCbPtrA);
	uiB = _mem2(pucCbPtrB);
	uiC = _mem2(pucCbPtrC);
	uiD = _mem2(pucCbPtrD);

	uiE = _mem2(pucCrPtrE);
	uiF = _mem2(pucCrPtrF);
	uiG = _mem2(pucCrPtrG);
	uiH = _mem2(pucCrPtrH);

	pucCbPtrA += PicWidthSamples;
	pucCbPtrB += PicWidthSamples;
	pucCbPtrC += PicWidthSamples;
	pucCbPtrD += PicWidthSamples;

	pucCrPtrE += PicWidthSamples;
	pucCrPtrF += PicWidthSamples;
	pucCrPtrG += PicWidthSamples;
	pucCrPtrH += PicWidthSamples;

	uiA += (_mem2(pucCbPtrA) << 16);
	uiB += (_mem2(pucCbPtrB) << 16);
	uiC += (_mem2(pucCbPtrC) << 16);
	uiD += (_mem2(pucCbPtrD) << 16);

	uiE += (_mem2(pucCrPtrE) << 16);
	uiF += (_mem2(pucCrPtrF) << 16);
	uiG += (_mem2(pucCrPtrG) << 16);
	uiH += (_mem2(pucCrPtrH) << 16);

	uiTmp1 = _packh4(uiA,uiB);
	uiTmp2 = _packh4(uiC,uiD);
	ui1_1 = _packh4(uiTmp1,uiTmp2);
	ui2_1 = _packl4(uiTmp1,uiTmp2);
	uiTmp1 = _packl4(uiA,uiB);
	uiTmp2 = _packl4(uiC,uiD);
	ui1_2 = _packh4(uiTmp1,uiTmp2);
	ui2_2 = _packl4(uiTmp1,uiTmp2);

	tmpend1_1 = _dotpu4(uicst,ui1_1);
	tmpend1_2 = _dotpu4(uicst,ui1_2);
	tmpend2_1 = _dotpu4(uicst,ui2_1);
	tmpend2_2 = _dotpu4(uicst,ui2_2);

	res_1 = _pack2(tmpend1_1,tmpend1_2);
	res_2 = _pack2(tmpend2_1,tmpend2_2);
	res_1 = _shr2(_sadd2(res_1,0x00200020),6);
	res_2 = _shr2(_sadd2(res_2,0x00200020),6);

	res_1 = _spacku4(0x00000000,res_1);
	res_2 = _spacku4(0x00000000,res_2);

	_mem2(pucOutputCbPtr) = res_2;
	pucOutputCbPtr += stride;
	_mem2(pucOutputCbPtr) = res_1;

	uiTmp1 = _packh4(uiE,uiF);
	uiTmp2 = _packh4(uiG,uiH);
	ui1_1 = _packh4(uiTmp1,uiTmp2);
	ui2_1 = _packl4(uiTmp1,uiTmp2);
	uiTmp1 = _packl4(uiE,uiF);
	uiTmp2 = _packl4(uiG,uiH);
	ui1_2 = _packh4(uiTmp1,uiTmp2);
	ui2_2 = _packl4(uiTmp1,uiTmp2);

	tmpend1_1 = _dotpu4(uicst,ui1_1);
	tmpend1_2 = _dotpu4(uicst,ui1_2);
	tmpend2_1 = _dotpu4(uicst,ui2_1);
	tmpend2_2 = _dotpu4(uicst,ui2_2);

	res_3 = _pack2(tmpend1_1,tmpend1_2);
	res_4 = _pack2(tmpend2_1,tmpend2_2);
	res_3 = _shr2(_sadd2(res_3,0x00200020),6);
	res_4 = _shr2(_sadd2(res_4,0x00200020),6);

	res_3 = _spacku4(0x00000000,res_3);
	res_4 = _spacku4(0x00000000,res_4);

	_mem2(pucOutputCrPtr) = res_4;
	pucOutputCrPtr += stride;
	_mem2(pucOutputCrPtr) = res_3;

	}
Ejemplo n.º 5
0
void DSP_QMFA_process(DSP_QMFA_bank_t *QMFA_bank_obj)
{
  sint32 i,j;  

  uint32 * restrict filter = (uint32 *)QMFA_bank_obj->flt_ptr;
  uint32 * restrict data_ptr = (uint32 *)(QMFA_bank_obj->data_in_buffer + 0); /*input */

  uint32 L = QMFA_bank_obj->blk_len/2;
  uint32 M = FILT_LEN/2;
  uint32 * restrict hist_rd_ptr0 = (uint32 *)QMFA_bank_obj->history[0];
  uint32 * restrict hist_rd_ptr1 = (uint32 *)QMFA_bank_obj->history[1];
  uint32 * restrict hist_wr_ptr0 = (uint32 *)QMFA_bank_obj->history[0];
  uint32 * restrict hist_wr_ptr1 = (uint32 *)QMFA_bank_obj->history[1];

  uint32 * restrict data_out_lo_ptr = (uint32 *)QMFA_bank_obj->data_out_LO_ptr;
  uint32 * restrict data_out_hi_ptr = (uint32 *)QMFA_bank_obj->data_out_HI_ptr;

  for(i=0;i<L;++i)
  {
    /*Iterations 0 to (L-1) produce the current block of output*/


    register uint32 o0,o1,out0,out1;

    register __int40_t accum0l,accum1l,accum0r,accum1r; /*accumulators for polyphase filter commponent 0 and 1*/
    register long long temp;

    accum0l = 0;accum0r = 0;
    accum1l = 0;accum1r = 0;
    uint32 *dptr = data_ptr + 2*i + 1;
    uint32 *fptr = filter;

    #pragma MUST_ITERATE( 1)
    for(j=0;j<QMFA_bank_obj->iter_count[i];++j)
    {
     /* Limit for j is beginning at the rightmost overlap 
        position to the leftmost overlap position*/
      register uint32 d0,d1;

      register uint32 filt_coef10 = *fptr++;  /*Loads the decimated filter coeficients*/
      register uint32 filt_coef0 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
      register uint32 filt_coef1 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

      d1 = *dptr--;//data_ptr1[(2*(i-j))];  /*load the data right to left with 1:2 split*/
      d0 = *dptr--;//data_ptr0[(2*(i-j))];  /*load the data right to left with 1:2 split*/


      temp = _mpy2ll(d0, filt_coef0);
      accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

      temp = _mpy2ll(d1, filt_coef1);
      accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);

    }

    o0 = _pack2( _sat((accum0r + 16384)>>15), _sat((accum0l + 16384)>>15));
    o1 = _pack2( _sat((accum1r + 16384)>>15), _sat((accum1l + 16384)>>15));
    o0 = _add2(o0, hist_rd_ptr0[i]);  /*add history sample filter 0 overlap add*/
    o1 = _add2(o1, hist_rd_ptr1[i]);  /*add history sample filter 1 overlap add*/

    out0 = _add2(o0,o1) ;
    data_out_lo_ptr[i] = out0; /*out bank0 is out0 + out1*/
    out1 = _sub2(o0,o1) ;
    data_out_hi_ptr[i] = out1; /*out bank1 is out0 - out1*/
  }

  for(i=L;i<L+M-1;++i)
  {

    /*Iterations L to (L + (M-1) -1) produce the 
      history for overlap add for next block*/

	  register uint32 o0,o1;

	  register __int40_t accum0l,accum1l,accum0r,accum1r; /*accumulators for polyphase filter commponent 0 and 1*/
	  register long long temp;

	  accum0l = 0;accum0r = 0;
	  accum1l = 0;accum1r = 0;

    uint32 *dptr = data_ptr + 2*(L-1) + 1;
    uint32 *fptr = filter + (i-L+1);

    #pragma MUST_ITERATE( 1)

    for(j=0;j<QMFA_bank_obj->iter_count[i];++j)
    {
     /*same logic for j starting at the 
       rightmost point of overlap to leftmost*/
        register sint32 d0,d1;

        register sint32 filt_coef10 = *fptr++;;  /*Loads the decimated filter coeficients*/
        register sint32 filt_coef0 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
        register sint32 filt_coef1 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

        d1 = *dptr--;  /*load the data right to left with 1:2 split*/
        d0 = *dptr--;  /*load the data right to left with 1:2 split*/


        temp = _mpy2ll(d0, filt_coef0);
        accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

        temp = _mpy2ll(d1, filt_coef1);
        accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);
    }

    o0 = _pack2( _sat((accum0r + 16384)>>15), _sat((accum0l + 16384)>>15));
    o1 = _pack2( _sat((accum1r + 16384)>>15), _sat((accum1l + 16384)>>15));

    o0 = _add2(o0, hist_rd_ptr0[i]);  /*add history sample filter 0 overlap add*/
    o1 = _add2(o1, hist_rd_ptr1[i]);  /*add history sample filter 1 overlap add*/

    hist_wr_ptr0[(i-L)] = o0; /* write out overlap add history history filter 0*/
    hist_wr_ptr1[(i-L)] = o1; /* write out overlap add history history filter 1*/
  }

  return;
}
Ejemplo n.º 6
0
void DSP_QMFS_process(DSP_QMFS_bank_t *QMFS_bank_obj)
{
  sint32 i,j;  

  uint32 * restrict filter = (uint32 *)QMFS_bank_obj->flt_ptr;
  uint32 * restrict data_ptr0 = (uint32 *)QMFS_bank_obj->data_in_buffer_LO; /*input buffer bank0*/
  uint32 * restrict data_ptr1 = (uint32 *)QMFS_bank_obj->data_in_buffer_HI; /*input buffer bank0*/

  sint32 L = QMFS_bank_obj->blk_len;
  sint32 M = FILT_LEN/2;
  uint32 * restrict hist_rd_ptr0 = (uint32 *)QMFS_bank_obj->history[0];
  uint32 * restrict hist_rd_ptr1 = (uint32 *)QMFS_bank_obj->history[1];
  uint32 * restrict hist_wr_ptr0 = (uint32 *)QMFS_bank_obj->history[0];
  uint32 * restrict hist_wr_ptr1 = (uint32 *)QMFS_bank_obj->history[1];

  uint32 * restrict dataout_ptr = (uint32 *)QMFS_bank_obj->data_out_ptr;
 
  for(i=0;i<L;++i)
  {
    /* iteration 0 to (L-1) produces the L outputs of the current block*/
    register uint32 t0,t1,out;
    register __int40_t accum0l, accum1l, accum0r, accum1r;
    register long long temp;

    accum0l = 0; accum0r = 0;
    accum1l = 0; accum1r = 0;

    uint32 *dptr0 = data_ptr0 + i ;
    uint32 *dptr1 = data_ptr1 + i ;

    uint32 *fptr = filter;

    #pragma MUST_ITERATE( 1)

    for(j=0; j<QMFS_bank_obj->iter_count[i];++j)
    {
     /*j indexes from the point of rightmost overlap to the leftmost overlap position */
      register uint32 d0,d1;

      register uint32 filt_coef10 = *fptr++;  /*Loads the decimated filter coeficients*/
      register uint32 filt_coef0 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
      register uint32 filt_coef1 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

      d1 = *dptr1--;//data_ptr1[(2*(i-j))];  /*load the data right to left with 1:2 split*/
      d0 = *dptr0--;//data_ptr0[(2*(i-j))];  /*load the data right to left with 1:2 split*/

      t0 = _add2(d0,d1);
      t1 = _sub2(d0,d1);

      temp = _mpy2ll(t0, filt_coef0);
      accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

      temp = _mpy2ll(t1, filt_coef1);
      accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);

    }

    t0 = _pack2( _sat((accum0l + 16384)>>15), _sat((accum0l + 16384)>>15));
    t1 = _pack2( _sat((accum1l + 16384)>>15), _sat((accum1l + 16384)>>15));

    out = _add2(t0, hist_rd_ptr0[i]);
    dataout_ptr[2*i + 0] = out;      /*filter 0 produces the odd output*/

    out = _add2(t1, hist_rd_ptr1[i]);
    dataout_ptr[2*i + 1] = out;      /*filter 1 produces the even output*/

  }
 
  for(i=L;i<L+M-1;++i)
  {
	register uint32 t0,t1,out;
	register __int40_t accum0l, accum1l, accum0r, accum1r;
	register long long temp;

    accum0l = 0; accum0r = 0;
    accum1l = 0; accum1r = 0;

    uint32 *dptr0 = data_ptr0 + (L-1) ;
    uint32 *dptr1 = data_ptr1 + (L-1) ;

    uint32 *fptr = filter + (i-L+1);

    #pragma MUST_ITERATE( 1)

    for(j=0; j<QMFS_bank_obj->iter_count[i];++j)
    {

      /*same logic for j starting at the 
       rightmost point of overlap to leftmost*/

      register sint32 d0,d1;

      register sint32 filt_coef10 = *fptr++;;  /*Loads the decimated filter coeficients*/
      register sint32 filt_coef0 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
      register sint32 filt_coef1 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

      d1 = *dptr1--;  /*load the data right to left with 1:2 split*/
      d0 = *dptr0--;  /*load the data right to left with 1:2 split*/

      t0 = _add2(d0,d1);
      t1 = _sub2(d0,d1);

      temp = _mpy2ll(t0, filt_coef0);
      accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

      temp = _mpy2ll(t1, filt_coef1);
      accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);

    }

    t0 = _pack2( _sat((accum0l + 16384)>>15), _sat((accum0l + 16384)>>15));
    t1 = _pack2( _sat((accum1l + 16384)>>15), _sat((accum1l + 16384)>>15));

    out = _add2(t0, hist_rd_ptr0[i]);
    hist_wr_ptr0[(i-L)] = out;         /* write out overlap add history history filter 0*/
    out = _add2(t1, hist_rd_ptr1[i]);
    hist_wr_ptr1[(i-L)] = out;         /* write out overlap add history history filter 1*/
  }

  return;
}
  unsigned int            mask1_44,       mask2_44,       mask3_44;
  unsigned int            mask4_44,       mask5_44;

  const short *restrict   in0;
  const short *restrict   in1;
  const short *restrict   in2;
  const short *restrict   in3;
  const short *restrict   in4;

  /* -------------------------------------------------------------------- */
  /*  Load mask values (reverse order for mask rotation)                  */
  /* -------------------------------------------------------------------- */
  mask_temp  = _mem8_const ((void *) &mask_ptr[21]);
  mask1_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask1_44   = _pack2((int) mask_ptr[20],  (int) mask_ptr[20]);

  mask_temp  = _mem8_const ((void *) &mask_ptr[16]);
  mask2_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask2_44   = _pack2((int) mask_ptr[15],  (int) mask_ptr[15]);

  mask_temp  = _mem8_const ((void *) &mask_ptr[11]);
  mask3_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask3_44   = _pack2((int) mask_ptr[10], (int) mask_ptr[10]);

  mask_temp  = _mem8_const ((void *) &mask_ptr[6]);
  mask4_3210 = _itoll(_packlh2(_loll(mask_temp),_loll(mask_temp)),
                      _packlh2(_hill(mask_temp),_hill(mask_temp)));
  mask4_44   = _pack2((int) mask_ptr[5], (int) mask_ptr[5]);
Ejemplo n.º 8
0
static __inline void *optimized_mem_set(void *mem, int ch, size_t n)
{
   char  * restrict dst1, * restrict dst2;
   int    pre_bytes, post_bytes, wfill, i;

   unsigned char *outbuf = mem;
   unsigned int count = n;

   dst1 = (char *)outbuf;

#if defined(_TMS320C6400) || defined(_TMS320C6740) || defined(_TMS320C6600) || \
    defined(_TI_C6X_TESLA)

   /*---------------------------------------------------------------------*/
   /* We do not use 'dwfill' on other variations of the C6x architecture, */
   /* so limit 'dwfill' references to the architectures that use it.      */
   /*---------------------------------------------------------------------*/
   {
      long long dwfill;

      /*------------------------------------------------------------------*/
      /* Set up 64-bit and 32-bit fill values.                            */
      /*------------------------------------------------------------------*/
      wfill  = _pack2 (ch, ch);
      wfill  = _packl4(wfill, wfill);
      dwfill = _itoll (wfill, wfill); 

      /*------------------------------------------------------------------*/
      /* Calculate # of bytes to pre-copy to get to an alignment of 8     */
      /*------------------------------------------------------------------*/
      pre_bytes = (8 - (int) dst1) & 7;
       
      if (count > pre_bytes)
      {
         count -= pre_bytes;
         if (pre_bytes & 1) { *dst1        = ch;    dst1 += 1; }
         if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; }
         if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; }
      } 
            
      /*------------------------------------------------------------------*/
      /* Double word fills                                                */
      /*------------------------------------------------------------------*/
      post_bytes = count > 0 ? count : 0;
      dst2       = dst1 + 8;
       
      if (count > 15)
         for (i = 0; i < count >> 4; i++)
         {
            _amem8(dst1) = dwfill; dst1 += 16;
            _amem8(dst2) = dwfill; dst2 += 16;
            post_bytes -= 16;
         }
       
      /*------------------------------------------------------------------*/
      /* Finish transfer with 8, 4, 2 and/or 1-byte writes                */
      /*------------------------------------------------------------------*/
      if (post_bytes & 8) { _mem8(dst1) = dwfill; dst1 += 8; }
      if (post_bytes & 4) { _mem4(dst1) = wfill;  dst1 += 4; }
      if (post_bytes & 2) {  dst1[0]    = ch;  
      dst1[1]    = ch;     dst1 += 2; }
      if (post_bytes & 1) { *dst1       = ch;     dst1 += 1; }
   }

#else

   /*--------------------------------------------------------------------*/
   /* Set up 32-bit fill value.                                          */
   /*--------------------------------------------------------------------*/
   wfill  = _mpy(0x101, (int)ch);
   wfill += (wfill << 16);

   /*--------------------------------------------------------------------*/
   /* Calculate number of bytes to pre-copy to get to an alignment of 4  */
   /*--------------------------------------------------------------------*/
   pre_bytes = (4 - (int) dst1) & 3;

   if (count > pre_bytes)
   {
      count -= pre_bytes;
      if (pre_bytes & 1) { *dst1        = ch;    dst1 += 1; }
      if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; }
   }

   /*--------------------------------------------------------------------*/
   /* Double word fills                                                  */
   /*--------------------------------------------------------------------*/
   post_bytes = count > 0 ? count : 0;
   dst2       = dst1 + 4;

   if (count > 7)
      for (i = 0; i < count >> 3; i++)
      {
         _amem4(dst1) = wfill; dst1 += 8;
         _amem4(dst2) = wfill; dst2 += 8;
         post_bytes -= 8;
      }

   /*--------------------------------------------------------------------*/
   /* Finish transfer with up to 7 single-byte writes.                   */
   /*--------------------------------------------------------------------*/
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }
   if (post_bytes) { *dst1++ = ch; post_bytes--; }

#endif

   return dst1;
}