Esempio n. 1
0
int main ()
{
  int v;
  long long vll;

  v = _add2 (a, b);
  if (v != 0x1000f000)
    abort ();
  v = _sub2 (a, b);
  if (v != 0x9000b000)
    abort ();
  v = _sub2 (b, a);
  if (v != 0x70005000)
    abort ();

  v = _add4 (a4, b4);
  if (v != 0x10f02000)
    abort ();
  v = _sub4 (a4, b4);
  if (v != 0x90b04000)
    abort ();
  v = _saddu4 (a4, c4);
  if (v != 0xfff050ff)
    abort ();

  v = _sadd2 (a, b);
  if (v != 0x1000f000)
    abort ();
  v = _sadd2 (a, c);
  if (v != 0x7fff8000)
    abort ();

  v = _ssub2 (a, b);
  if (v != 0x7fffb000)
    abort ();
  v = _ssub2 (b, a);
  if (v != 0x80005000)
    abort ();

  vll = _smpy2ll (a, b);
  if (vll != 0xd8000000f4000000ll)
    abort ();
  vll = _smpy2ll (d, d);
  if (vll != 0x7fffffff00000002ll)
    abort ();

  v = _avg2 (b, e);
  if (v != 0x08002001)
    abort ();
  v = _avgu4 (d4, e4);
  if (v != 0x88102980)
    abort ();

  v = _abs2 (a);
  if (v != 0x50003000)
    abort ();
  v = _abs2 (f);
  if (v != 0x40007fff)
    abort ();

  return 0;
}
Esempio n. 2
0
void DSP_QMFA_process(DSP_QMFA_bank_t *QMFA_bank_obj)
{
  sint32 i,j;  

  uint32 * restrict filter = (uint32 *)QMFA_bank_obj->flt_ptr;
  uint32 * restrict data_ptr = (uint32 *)(QMFA_bank_obj->data_in_buffer + 0); /*input */

  uint32 L = QMFA_bank_obj->blk_len/2;
  uint32 M = FILT_LEN/2;
  uint32 * restrict hist_rd_ptr0 = (uint32 *)QMFA_bank_obj->history[0];
  uint32 * restrict hist_rd_ptr1 = (uint32 *)QMFA_bank_obj->history[1];
  uint32 * restrict hist_wr_ptr0 = (uint32 *)QMFA_bank_obj->history[0];
  uint32 * restrict hist_wr_ptr1 = (uint32 *)QMFA_bank_obj->history[1];

  uint32 * restrict data_out_lo_ptr = (uint32 *)QMFA_bank_obj->data_out_LO_ptr;
  uint32 * restrict data_out_hi_ptr = (uint32 *)QMFA_bank_obj->data_out_HI_ptr;

  for(i=0;i<L;++i)
  {
    /*Iterations 0 to (L-1) produce the current block of output*/


    register uint32 o0,o1,out0,out1;

    register __int40_t accum0l,accum1l,accum0r,accum1r; /*accumulators for polyphase filter commponent 0 and 1*/
    register long long temp;

    accum0l = 0;accum0r = 0;
    accum1l = 0;accum1r = 0;
    uint32 *dptr = data_ptr + 2*i + 1;
    uint32 *fptr = filter;

    #pragma MUST_ITERATE( 1)
    for(j=0;j<QMFA_bank_obj->iter_count[i];++j)
    {
     /* Limit for j is beginning at the rightmost overlap 
        position to the leftmost overlap position*/
      register uint32 d0,d1;

      register uint32 filt_coef10 = *fptr++;  /*Loads the decimated filter coeficients*/
      register uint32 filt_coef0 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
      register uint32 filt_coef1 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

      d1 = *dptr--;//data_ptr1[(2*(i-j))];  /*load the data right to left with 1:2 split*/
      d0 = *dptr--;//data_ptr0[(2*(i-j))];  /*load the data right to left with 1:2 split*/


      temp = _mpy2ll(d0, filt_coef0);
      accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

      temp = _mpy2ll(d1, filt_coef1);
      accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);

    }

    o0 = _pack2( _sat((accum0r + 16384)>>15), _sat((accum0l + 16384)>>15));
    o1 = _pack2( _sat((accum1r + 16384)>>15), _sat((accum1l + 16384)>>15));
    o0 = _add2(o0, hist_rd_ptr0[i]);  /*add history sample filter 0 overlap add*/
    o1 = _add2(o1, hist_rd_ptr1[i]);  /*add history sample filter 1 overlap add*/

    out0 = _add2(o0,o1) ;
    data_out_lo_ptr[i] = out0; /*out bank0 is out0 + out1*/
    out1 = _sub2(o0,o1) ;
    data_out_hi_ptr[i] = out1; /*out bank1 is out0 - out1*/
  }

  for(i=L;i<L+M-1;++i)
  {

    /*Iterations L to (L + (M-1) -1) produce the 
      history for overlap add for next block*/

	  register uint32 o0,o1;

	  register __int40_t accum0l,accum1l,accum0r,accum1r; /*accumulators for polyphase filter commponent 0 and 1*/
	  register long long temp;

	  accum0l = 0;accum0r = 0;
	  accum1l = 0;accum1r = 0;

    uint32 *dptr = data_ptr + 2*(L-1) + 1;
    uint32 *fptr = filter + (i-L+1);

    #pragma MUST_ITERATE( 1)

    for(j=0;j<QMFA_bank_obj->iter_count[i];++j)
    {
     /*same logic for j starting at the 
       rightmost point of overlap to leftmost*/
        register sint32 d0,d1;

        register sint32 filt_coef10 = *fptr++;;  /*Loads the decimated filter coeficients*/
        register sint32 filt_coef0 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
        register sint32 filt_coef1 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

        d1 = *dptr--;  /*load the data right to left with 1:2 split*/
        d0 = *dptr--;  /*load the data right to left with 1:2 split*/


        temp = _mpy2ll(d0, filt_coef0);
        accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

        temp = _mpy2ll(d1, filt_coef1);
        accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);
    }

    o0 = _pack2( _sat((accum0r + 16384)>>15), _sat((accum0l + 16384)>>15));
    o1 = _pack2( _sat((accum1r + 16384)>>15), _sat((accum1l + 16384)>>15));

    o0 = _add2(o0, hist_rd_ptr0[i]);  /*add history sample filter 0 overlap add*/
    o1 = _add2(o1, hist_rd_ptr1[i]);  /*add history sample filter 1 overlap add*/

    hist_wr_ptr0[(i-L)] = o0; /* write out overlap add history history filter 0*/
    hist_wr_ptr1[(i-L)] = o1; /* write out overlap add history history filter 1*/
  }

  return;
}
void chroma_sample_interpolation_TI(unsigned char image_Cb [RESTRICT],	 unsigned char image_Cr [RESTRICT]
					, unsigned char refPicLXCb[RESTRICT], unsigned char refPicLXCr[RESTRICT]
					, const short xFracl, const short yFracl,  const short PicWidthSamples,const short stride)
{




	unsigned char* pucCbPtrA = refPicLXCb;
	unsigned char* pucCbPtrB = refPicLXCb + 1;
	unsigned char* pucCbPtrC = refPicLXCb + PicWidthSamples;
	unsigned char* pucCbPtrD = refPicLXCb + PicWidthSamples + 1;

	unsigned char* pucCrPtrE = refPicLXCr;
	unsigned char* pucCrPtrF = refPicLXCr + 1;
	unsigned char* pucCrPtrG = refPicLXCr + PicWidthSamples;
	unsigned char* pucCrPtrH = refPicLXCr + PicWidthSamples + 1;

	unsigned char* pucOutputCbPtr = image_Cb;
	unsigned char* pucOutputCrPtr = image_Cr;

	unsigned int uiTmp1,uiTmp2;
	unsigned int ui1_1,ui1_2,ui2_1,ui2_2,res_1,res_2,res_3,res_4;
	unsigned int tmpend1_1,tmpend1_2,tmpend2_1,tmpend2_2;

	unsigned int uiA,uiB,uiC,uiD;
	unsigned int uiE,uiF,uiG,uiH;

	unsigned int uicst = xFracl * yFracl;

	uiTmp1 = _pack2(uicst,uicst);
	uiTmp2 = (_pack2(xFracl,yFracl)) << 3;
	uiTmp2 = _sub2(uiTmp2,uiTmp1);
	uiTmp1 = (uicst) + ((uicst - ((xFracl + yFracl) <<3) + 64) << 16);
	uicst = _packh2(uiTmp1,uiTmp2); // cst2 cst3
	uiTmp1 = _pack2(uiTmp2,uiTmp1); // cst4 cst1
	uicst =  _spacku4(uicst,uiTmp1);

	uiA = _mem2(pucCbPtrA);
	uiB = _mem2(pucCbPtrB);
	uiC = _mem2(pucCbPtrC);
	uiD = _mem2(pucCbPtrD);

	uiE = _mem2(pucCrPtrE);
	uiF = _mem2(pucCrPtrF);
	uiG = _mem2(pucCrPtrG);
	uiH = _mem2(pucCrPtrH);

	pucCbPtrA += PicWidthSamples;
	pucCbPtrB += PicWidthSamples;
	pucCbPtrC += PicWidthSamples;
	pucCbPtrD += PicWidthSamples;

	pucCrPtrE += PicWidthSamples;
	pucCrPtrF += PicWidthSamples;
	pucCrPtrG += PicWidthSamples;
	pucCrPtrH += PicWidthSamples;

	uiA += (_mem2(pucCbPtrA) << 16);
	uiB += (_mem2(pucCbPtrB) << 16);
	uiC += (_mem2(pucCbPtrC) << 16);
	uiD += (_mem2(pucCbPtrD) << 16);

	uiE += (_mem2(pucCrPtrE) << 16);
	uiF += (_mem2(pucCrPtrF) << 16);
	uiG += (_mem2(pucCrPtrG) << 16);
	uiH += (_mem2(pucCrPtrH) << 16);

	uiTmp1 = _packh4(uiA,uiB);
	uiTmp2 = _packh4(uiC,uiD);
	ui1_1 = _packh4(uiTmp1,uiTmp2);
	ui2_1 = _packl4(uiTmp1,uiTmp2);
	uiTmp1 = _packl4(uiA,uiB);
	uiTmp2 = _packl4(uiC,uiD);
	ui1_2 = _packh4(uiTmp1,uiTmp2);
	ui2_2 = _packl4(uiTmp1,uiTmp2);

	tmpend1_1 = _dotpu4(uicst,ui1_1);
	tmpend1_2 = _dotpu4(uicst,ui1_2);
	tmpend2_1 = _dotpu4(uicst,ui2_1);
	tmpend2_2 = _dotpu4(uicst,ui2_2);

	res_1 = _pack2(tmpend1_1,tmpend1_2);
	res_2 = _pack2(tmpend2_1,tmpend2_2);
	res_1 = _shr2(_sadd2(res_1,0x00200020),6);
	res_2 = _shr2(_sadd2(res_2,0x00200020),6);

	res_1 = _spacku4(0x00000000,res_1);
	res_2 = _spacku4(0x00000000,res_2);

	_mem2(pucOutputCbPtr) = res_2;
	pucOutputCbPtr += stride;
	_mem2(pucOutputCbPtr) = res_1;

	uiTmp1 = _packh4(uiE,uiF);
	uiTmp2 = _packh4(uiG,uiH);
	ui1_1 = _packh4(uiTmp1,uiTmp2);
	ui2_1 = _packl4(uiTmp1,uiTmp2);
	uiTmp1 = _packl4(uiE,uiF);
	uiTmp2 = _packl4(uiG,uiH);
	ui1_2 = _packh4(uiTmp1,uiTmp2);
	ui2_2 = _packl4(uiTmp1,uiTmp2);

	tmpend1_1 = _dotpu4(uicst,ui1_1);
	tmpend1_2 = _dotpu4(uicst,ui1_2);
	tmpend2_1 = _dotpu4(uicst,ui2_1);
	tmpend2_2 = _dotpu4(uicst,ui2_2);

	res_3 = _pack2(tmpend1_1,tmpend1_2);
	res_4 = _pack2(tmpend2_1,tmpend2_2);
	res_3 = _shr2(_sadd2(res_3,0x00200020),6);
	res_4 = _shr2(_sadd2(res_4,0x00200020),6);

	res_3 = _spacku4(0x00000000,res_3);
	res_4 = _spacku4(0x00000000,res_4);

	_mem2(pucOutputCrPtr) = res_4;
	pucOutputCrPtr += stride;
	_mem2(pucOutputCrPtr) = res_3;

	}
Esempio n. 4
0
void DSP_QMFS_process(DSP_QMFS_bank_t *QMFS_bank_obj)
{
  sint32 i,j;  

  uint32 * restrict filter = (uint32 *)QMFS_bank_obj->flt_ptr;
  uint32 * restrict data_ptr0 = (uint32 *)QMFS_bank_obj->data_in_buffer_LO; /*input buffer bank0*/
  uint32 * restrict data_ptr1 = (uint32 *)QMFS_bank_obj->data_in_buffer_HI; /*input buffer bank0*/

  sint32 L = QMFS_bank_obj->blk_len;
  sint32 M = FILT_LEN/2;
  uint32 * restrict hist_rd_ptr0 = (uint32 *)QMFS_bank_obj->history[0];
  uint32 * restrict hist_rd_ptr1 = (uint32 *)QMFS_bank_obj->history[1];
  uint32 * restrict hist_wr_ptr0 = (uint32 *)QMFS_bank_obj->history[0];
  uint32 * restrict hist_wr_ptr1 = (uint32 *)QMFS_bank_obj->history[1];

  uint32 * restrict dataout_ptr = (uint32 *)QMFS_bank_obj->data_out_ptr;
 
  for(i=0;i<L;++i)
  {
    /* iteration 0 to (L-1) produces the L outputs of the current block*/
    register uint32 t0,t1,out;
    register __int40_t accum0l, accum1l, accum0r, accum1r;
    register long long temp;

    accum0l = 0; accum0r = 0;
    accum1l = 0; accum1r = 0;

    uint32 *dptr0 = data_ptr0 + i ;
    uint32 *dptr1 = data_ptr1 + i ;

    uint32 *fptr = filter;

    #pragma MUST_ITERATE( 1)

    for(j=0; j<QMFS_bank_obj->iter_count[i];++j)
    {
     /*j indexes from the point of rightmost overlap to the leftmost overlap position */
      register uint32 d0,d1;

      register uint32 filt_coef10 = *fptr++;  /*Loads the decimated filter coeficients*/
      register uint32 filt_coef0 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
      register uint32 filt_coef1 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

      d1 = *dptr1--;//data_ptr1[(2*(i-j))];  /*load the data right to left with 1:2 split*/
      d0 = *dptr0--;//data_ptr0[(2*(i-j))];  /*load the data right to left with 1:2 split*/

      t0 = _add2(d0,d1);
      t1 = _sub2(d0,d1);

      temp = _mpy2ll(t0, filt_coef0);
      accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

      temp = _mpy2ll(t1, filt_coef1);
      accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);

    }

    t0 = _pack2( _sat((accum0l + 16384)>>15), _sat((accum0l + 16384)>>15));
    t1 = _pack2( _sat((accum1l + 16384)>>15), _sat((accum1l + 16384)>>15));

    out = _add2(t0, hist_rd_ptr0[i]);
    dataout_ptr[2*i + 0] = out;      /*filter 0 produces the odd output*/

    out = _add2(t1, hist_rd_ptr1[i]);
    dataout_ptr[2*i + 1] = out;      /*filter 1 produces the even output*/

  }
 
  for(i=L;i<L+M-1;++i)
  {
	register uint32 t0,t1,out;
	register __int40_t accum0l, accum1l, accum0r, accum1r;
	register long long temp;

    accum0l = 0; accum0r = 0;
    accum1l = 0; accum1r = 0;

    uint32 *dptr0 = data_ptr0 + (L-1) ;
    uint32 *dptr1 = data_ptr1 + (L-1) ;

    uint32 *fptr = filter + (i-L+1);

    #pragma MUST_ITERATE( 1)

    for(j=0; j<QMFS_bank_obj->iter_count[i];++j)
    {

      /*same logic for j starting at the 
       rightmost point of overlap to leftmost*/

      register sint32 d0,d1;

      register sint32 filt_coef10 = *fptr++;;  /*Loads the decimated filter coeficients*/
      register sint32 filt_coef0 = _pack2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/
      register sint32 filt_coef1 = _packh2(filt_coef10,filt_coef10);  /*in the correct order i.e. with reversal*/

      d1 = *dptr1--;  /*load the data right to left with 1:2 split*/
      d0 = *dptr0--;  /*load the data right to left with 1:2 split*/

      t0 = _add2(d0,d1);
      t1 = _sub2(d0,d1);

      temp = _mpy2ll(t0, filt_coef0);
      accum0l = _lsadd(_loll(temp), accum0l); accum0r = _lsadd(_hill(temp), accum0r);

      temp = _mpy2ll(t1, filt_coef1);
      accum1l = _lsadd(_loll(temp), accum1l); accum1r = _lsadd(_hill(temp), accum1r);

    }

    t0 = _pack2( _sat((accum0l + 16384)>>15), _sat((accum0l + 16384)>>15));
    t1 = _pack2( _sat((accum1l + 16384)>>15), _sat((accum1l + 16384)>>15));

    out = _add2(t0, hist_rd_ptr0[i]);
    hist_wr_ptr0[(i-L)] = out;         /* write out overlap add history history filter 0*/
    out = _add2(t1, hist_rd_ptr1[i]);
    hist_wr_ptr1[(i-L)] = out;         /* write out overlap add history history filter 1*/
  }

  return;
}