C++ (Cpp) MULSHIFT32示例

示例#1

0

显示文件

文件： mlt.c 项目： jprothwell/sc-fix

/**************************************************************************************
 * Function:    PostMultiplyRescale
 *
 * Description: post-twiddle stage of MDCT, with rescaling for extra guard bits
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmlt samples
 *              number of guard bits to remove from output
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       clips output to [-2^30, 2^30 - 1], guaranteeing at least 1 guard bit
 *              see notes on PostMultiply(), above
 **************************************************************************************/
static void PostMultiplyRescale(int tabidx, int *fft1, int es)
{
	int i, nmlt, ar1, ai1, ar2, ai2, skipFactor, z, f0;
	int t, cs2, sin2;
	int *fft2;
	const int *csptr;

	nmlt = nmltTab[tabidx];		
	csptr = cos1sin1tab;
	skipFactor = postSkip[tabidx];
	fft2 = fft1 + nmlt - 1;

	/* load coeffs for first pass
	 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
	 */
	cs2 = *csptr++;
	sin2 = *csptr;
	csptr += skipFactor;

	for (i = nmlt >> 2; i != 0; i--) {
		ar1 = *(fft1 + 0);
		ai1 = *(fft1 + 1);
		ai2 = *(fft2 + 0);

		/* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5*sin */
		MULSHIFT32hx(sin2, ar1 + ai1,t);
		z = t - MULSHIFT32(cs2, ai1);	
		CLIP_2N_SHIFT(z, es);	 
		*fft2-- = z;
		cs2 -= 2*sin2;
		MULSHIFT32hx(cs2, ar1, f0);
		z = t + f0;	
		CLIP_2N_SHIFT(z, es);	 
		*fft1++ = z;

		cs2 = *csptr++;
		sin2 = *csptr;
		csptr += skipFactor;

		ar2 = *fft2;
		ai2 = -ai2;
		MULSHIFT32hx(sin2, ar2 + ai2,t);
		z = t - MULSHIFT32(cs2, ai2);	
		CLIP_2N_SHIFT(z, es);	 
		*fft2-- = z;
		cs2 -= 2*sin2;
		MULSHIFT32hx(cs2, ar2, f0);
		z = t + f0;	
		CLIP_2N_SHIFT(z, es);	 
		*fft1++ = z;
		cs2 += 2*sin2;

	}

	/* see comments in PostMultiply() for notes on scaling */
	return;	
}

示例#2

0

显示文件

文件： mlt.c 项目： jprothwell/sc-fix

/**************************************************************************************
 * Function:    PostMultiply
 *
 * Description: post-twiddle stage of MDCT
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmlt samples
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out - gains 1 int bit
 *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 *              should asm code (compiler not doing free pointer updates, etc.)
 **************************************************************************************/
static void PostMultiply(int tabidx, int *fft1)
{
	int i, nmlt, ar1, ai1, ar2, ai2, skipFactor, f0;
	int t, cms2, cps2, sin2;
	int *fft2;
	const int *csptr;

	nmlt = nmltTab[tabidx];		
	csptr = cos1sin1tab;
	skipFactor = postSkip[tabidx];
	fft2 = fft1 + nmlt - 1;

	/* load coeffs for first pass
	 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
	 */
	cps2 = *csptr++;
	sin2 = *csptr;
	csptr += skipFactor;
	cms2 = cps2 - 2*sin2;

	for (i = nmlt >> 2; i != 0; i--) {
		ar1 = *(fft1 + 0);
		ai1 = *(fft1 + 1);
		ar2 = *(fft2 - 1);
		ai2 = *(fft2 + 0);

		/* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5*sin */
		MULSHIFT32hx(sin2, ar1 + ai1,t);
		*fft2-- = t - MULSHIFT32(cps2, ai1);
		MULSHIFT32hx(cms2, ar1, f0);
		*fft1++ = t + f0;

		cps2 = *csptr++;
		sin2 = *csptr;
		csptr += skipFactor;

		ai2 = -ai2;
		MULSHIFT32hx(sin2, ar2 + ai2,t);
		*fft2-- = t - MULSHIFT32(cps2, ai2);
		cms2 = cps2 - 2*sin2;
		MULSHIFT32hx(cms2, ar2, f0);
		*fft1++ = t + f0;

	}

	/* Note on scaling... 
	 * assumes 1 guard bit in, gains 2 int bits
	 * max gain of abs(cos) + abs(sin) = sqrt(2) = 1.414, so current format 
	 *   guarantees 1 guard bit in output
	 */
	return;	
}

示例#3

0

显示文件

文件： stproc.c 项目： 003900107/realboard-lpc4088

/**************************************************************************************
 * Function:    IntensityProcMPEG2
 *
 * Description: intensity stereo processing for MPEG2
 *
 * Inputs:      vector x with dequantized samples from left and right channels
 *              number of non-zero samples in left channel
 *              valid FrameHeader struct
 *              two each of ScaleFactorInfoSub, CriticalBandInfo structs (both channels)
 *              ScaleFactorJS struct with joint stereo info from UnpackSFMPEG2()
 *              flags indicating midSide on/off, mixedBlock on/off
 *              guard bit mask (left and right channels)
 *
 * Outputs:     updated sample vector x
 *              updated guard bit mask
 *
 * Return:      none
 *
 * Notes:       assume at least 1 GB in input
 *
 * TODO:        combine MPEG1/2 into one function (maybe)
 *              make sure all the mixed-block and IIP logic is right
 *                probably redo IIP logic to be simpler
 **************************************************************************************/
void IntensityProcMPEG2(int x[MAX_NCHAN][MAX_NSAMP], int nSamps, FrameHeader *fh, ScaleFactorInfoSub *sfis, 
						CriticalBandInfo *cbi, ScaleFactorJS *sfjs, int midSideFlag, int mixFlag, int mOut[2])
{
	int i, j, k, n, r, cb, w;
	int fl, fr, mOutL, mOutR, xl, xr;
	int sampsLeft;
	int isf, sfIdx, tmp, il[23];
	int *isfTab;
	int cbStartL, cbStartS, cbEndL, cbEndS;
	
	isfTab = (int *)ISFMpeg2[sfjs->intensityScale][midSideFlag];
	mOutL = mOutR = 0;

	/* fill buffer with illegal intensity positions (depending on slen) */
	for (k = r = 0; r < 4; r++) {
		tmp = (1 << sfjs->slen[r]) - 1;
		for (j = 0; j < sfjs->nr[r]; j++, k++) 
			il[k] = tmp;
	}

	if (cbi[1].cbType == 0) {
		/* long blocks */
		il[21] = il[22] = 1;
		cbStartL = cbi[1].cbEndL + 1;	/* start at end of right */
		cbEndL =   cbi[0].cbEndL + 1;	/* process to end of left */
		i = fh->sfBand->l[cbStartL];
		sampsLeft = nSamps - i;

		for(cb = cbStartL; cb < cbEndL; cb++) {
			sfIdx = sfis->l[cb];
			if (sfIdx == il[cb]) {
				fl = ISFIIP[midSideFlag][0];
				fr = ISFIIP[midSideFlag][1];
			} else {
				isf = (sfis->l[cb] + 1) >> 1;
				fl = isfTab[(sfIdx & 0x01 ? isf : 0)];
				fr = isfTab[(sfIdx & 0x01 ? 0 : isf)];
			}
			n = MIN(fh->sfBand->l[cb + 1] - fh->sfBand->l[cb], sampsLeft);

			for(j = 0; j < n; j++, i++) {
				xr = MULSHIFT32(fr, x[0][i]) << 2;	x[1][i] = xr;	mOutR |= FASTABS(xr);
				xl = MULSHIFT32(fl, x[0][i]) << 2;	x[0][i] = xl;	mOutL |= FASTABS(xl);
			}

			/* early exit once we've used all the non-zero samples */
			sampsLeft -= n;
			if (sampsLeft == 0)		
				break;
		}
	} else {

示例#4

0

显示文件

文件： mlt.c 项目： jprothwell/sc-fix

/**************************************************************************************
 * Function:    PreMultiply
 *
 * Description: pre-twiddle stage of MDCT
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmlt samples
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out - loses (1+tabidx) int bits
 *              normalization by 2/sqrt(N) is rolled into tables here
 *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 *              should asm code (compiler not doing free pointer updates, etc.)
 **************************************************************************************/
static void PreMultiply(int tabidx, int *zbuf1)
{
	int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
	int t, cms2, cps2a, sin2a, cps2b, sin2b;
	int *zbuf2;
	const int *csptr;

	nmlt = nmltTab[tabidx];		
	zbuf2 = zbuf1 + nmlt - 1;
	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];

	/* whole thing should fit in registers - verify that compiler does this */
	for (i = nmlt >> 2; i != 0; i--) {
		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
		cps2a = *csptr++;	
		sin2a = *csptr++;
		cps2b = *csptr++;	
		sin2b = *csptr++;

		ar1 = *(zbuf1 + 0);
		ai2 = *(zbuf1 + 1);
		ai1 = *(zbuf2 + 0);
		ar2 = *(zbuf2 - 1);

		/* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
		MULSHIFT32hx(sin2a, ar1 + ai1,t);
		z2 = MULSHIFT32(cps2a, ai1) - t;
		cms2 = cps2a - 2*sin2a;
		z1 = MULSHIFT32(cms2, ar1) + t;
		*zbuf1++ = z1;
		*zbuf1++ = z2;

		MULSHIFT32hx(sin2b, ar2 + ai2,t);
		z2 = MULSHIFT32(cps2b, ai2) - t;
		cms2 = cps2b - 2*sin2b;
		z1 = MULSHIFT32(cms2, ar2) + t;
		*zbuf2-- = z2;
		*zbuf2-- = z1;

	}

	/* Note on scaling... 
	 * assumes 1 guard bit in, gains (1 + tabidx) fraction bits 
	 *   i.e. gain 1, 2, or 3 fraction bits, for nSamps = 256, 512, 1024
	 *   (left-shifting, since table scaled by 2 / sqrt(nSamps))
	 * this offsets the fact that each extra pass in FFT gains one more int bit
	 */
	return;		
}

示例#5

0

显示文件

文件： mlt.c 项目： jprothwell/sc-fix

/**************************************************************************************
 * Function:    PreMultiplyRescale
 *
 * Description: pre-twiddle stage of MDCT, with rescaling for extra guard bits
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmlt samples
 *              number of guard bits to add to input before processing
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       see notes on PreMultiply(), above
 **************************************************************************************/
static void PreMultiplyRescale(int tabidx, int *zbuf1, int es)
{
	int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
	int t, cms2, cps2a, sin2a, cps2b, sin2b;
	int *zbuf2;
	const int *csptr;

	nmlt = nmltTab[tabidx];		
	zbuf2 = zbuf1 + nmlt - 1;
	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];

	/* whole thing should fit in registers - verify that compiler does this */
	for (i = nmlt >> 2; i != 0; i--) {
		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
		cps2a = *csptr++;	
		sin2a = *csptr++;
		cps2b = *csptr++;	
		sin2b = *csptr++;

		ar1 = *(zbuf1 + 0) >> es;
		ai1 = *(zbuf2 + 0) >> es;
		ai2 = *(zbuf1 + 1) >> es;

		/* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
		MULSHIFT32hx(sin2a, ar1 + ai1,t);
		z2 = MULSHIFT32(cps2a, ai1) - t;
		cms2 = cps2a - 2*sin2a;
		z1 = MULSHIFT32(cms2, ar1) + t;
		*zbuf1++ = z1;
		*zbuf1++ = z2;

		ar2 = *(zbuf2 - 1) >> es;	/* do here to free up register used for es */

		MULSHIFT32hx(sin2b, ar2 + ai2,t);
		z2 = MULSHIFT32(cps2b, ai2) - t;
		cms2 = cps2b - 2*sin2b;
		z1 = MULSHIFT32(cms2, ar2) + t;
		*zbuf2-- = z2;
		*zbuf2-- = z1;

	}
	
	/* see comments in PreMultiply() for notes on scaling */
	return;
}

示例#6

0

显示文件

文件： ra_couple.c 项目： mdrjr/c2_aml_libs

/**************************************************************************************
 * Function:    JointDecodeMLT
 *
 * Description: decode the jointly-coded MLT
 *
 * Inputs:      pointer to initialized Gecko2Info struct
 *              mltleft[0, ... , cplStart-1]  has non-coupled coefficients for left
 *              mltrght[0, ... , cplStart-1]  has non-coupled coefficients for right
 *              mltleft[cplStart, ... , cRegions] has coupled coefficients
 *
 * Outputs:     mltleft[0, ... , nRegions-1]  has reconstructed coefficients for left
 *              mltrght[0, ... , nRegions-1]  has reconstructed coefficients for right
 *
 * Return:      none
 **************************************************************************************/
void JointDecodeMLT(Gecko2Info *gi, int *mltleft, int *mltrght)
{
	int scaleleft, scalerght;
	int i, r, q;
	int cplquant, cploffset;
	int *cplindex = gi->db.cplindex;

	cplquant = (1 << gi->cplQbits) - 1;	/* quant levels */
	cploffset = cplScaleOffset[gi->cplQbits];

	/* reconstruct the stereo channels */
	for (r = gi->cplStart; r < gi->nRegions; r++) {

		/*
		 * dequantize the expanded coupling ratio
		 * expand = (q - (cplquant>>1)) * (2.0f/cplquant);
		 *
		 * square-law compression
		 * ratio = sqrt(fabs(expand));
		 * if (expand < 0.0f) ratio = -ratio;
		 *
		 * reconstruct the scaling factors
		 * scaleleft = sqrt(0.5f - 0.5f * ratio);
		 * scalerght = sqrt(0.5f + 0.5f * ratio);
		 */
		q = cplindex[cplband[r]];
		scaleleft = cplScale[cploffset + q];
		scalerght = cplScale[cploffset + cplquant - 1 - q];

		/* drop extra sign bit (max gain = 0.998) */
		for (i = 0; i < NBINS; i++) {
			mltrght[NBINS*r + i] = MULSHIFT32(scalerght, mltleft[NBINS*r + i]) << 1;
			mltleft[NBINS*r + i] = MULSHIFT32(scaleleft, mltleft[NBINS*r + i]) << 1;
		}
	}

	/* set non-coded regions to zero */
	for (i = gi->nRegions * NBINS; i < gi->nSamples; i++) {
		mltleft[i] = 0;
		mltrght[i] = 0;
	}

	return;
}

示例#7

0

显示文件

文件： dct4.c 项目： carlocaione/buildroot-ved-2014-05-27

/**************************************************************************************
 * Function:    PostMultiplyRescale
 *
 * Description: post-twiddle stage of DCT4, with rescaling for extra guard bits
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmdct samples
 *              number of guard bits to remove from output
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       clips output to [-2^30, 2^30 - 1], guaranteeing at least 1 guard bit
 *              see notes on PostMultiply(), above
 **************************************************************************************/
static void PostMultiplyRescale(int tabidx, int *fft1, int es)
{
	int i, nmdct, ar1, ai1, ar2, ai2, skipFactor, z;
	int t, cs2, sin2;
	int *fft2;
	const int *csptr;

	nmdct = nmdctTab[tabidx];		
	csptr = cos1sin1tab;
	skipFactor = postSkip[tabidx];
	fft2 = fft1 + nmdct - 1;

	/* load coeffs for first pass
	 * cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin)
	 */
	cs2 = *csptr++;
	sin2 = *csptr;
	csptr += skipFactor;

	for (i = nmdct >> 2; i != 0; i--) {
		ar1 = *(fft1 + 0);
		ai1 = *(fft1 + 1);
		ai2 = *(fft2 + 0);

		t = MULSHIFT32(sin2, ar1 + ai1);
		z = t - MULSHIFT32(cs2, ai1);	
		CLIP_2N_SHIFT(z, es);	 
		*fft2-- = z;
		cs2 -= 2*sin2;
		z = t + MULSHIFT32(cs2, ar1);	
		CLIP_2N_SHIFT(z, es);	 
		*fft1++ = z;

		cs2 = *csptr++;
		sin2 = *csptr;
		csptr += skipFactor;

		ar2 = *fft2;
		ai2 = -ai2;
		t = MULSHIFT32(sin2, ar2 + ai2);
		z = t - MULSHIFT32(cs2, ai2);	
		CLIP_2N_SHIFT(z, es);	 
		*fft2-- = z;
		cs2 -= 2*sin2;
		z = t + MULSHIFT32(cs2, ar2);	
		CLIP_2N_SHIFT(z, es);	 
		*fft1++ = z;
		cs2 += 2*sin2;
	}
}

示例#8

0

显示文件

文件： dct4.c 项目： carlocaione/buildroot-ved-2014-05-27

/**************************************************************************************
 * Function:    PostMultiply
 *
 * Description: post-twiddle stage of DCT4
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmdct samples
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out - gains 2 int bits
 *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 **************************************************************************************/
static void PostMultiply(int tabidx, int *fft1)
{
	int i, nmdct, ar1, ai1, ar2, ai2, skipFactor;
	int t, cms2, cps2, sin2;
	int *fft2;
	const int *csptr;

	nmdct = nmdctTab[tabidx];		
	csptr = cos1sin1tab;
	skipFactor = postSkip[tabidx];
	fft2 = fft1 + nmdct - 1;

	/* load coeffs for first pass
	 * cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin)
	 */
	cps2 = *csptr++;
	sin2 = *csptr;
	csptr += skipFactor;
	cms2 = cps2 - 2*sin2;

	for (i = nmdct >> 2; i != 0; i--) {
		ar1 = *(fft1 + 0);
		ai1 = *(fft1 + 1);
		ar2 = *(fft2 - 1);
		ai2 = *(fft2 + 0);

		/* gain 2 ints bit from MULSHIFT32 by Q30
		 * max per-sample gain = MAX(sin(angle)+cos(angle)) = 1.414
		 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
		 *   extra sign bits, and eat one in adding
		 */
		t = MULSHIFT32(sin2, ar1 + ai1);
		*fft2-- = t - MULSHIFT32(cps2, ai1);	/* sin*ar1 - cos*ai1 */
		*fft1++ = t + MULSHIFT32(cms2, ar1);	/* cos*ar1 + sin*ai1 */
		cps2 = *csptr++;
		sin2 = *csptr;
		csptr += skipFactor;

		ai2 = -ai2;
		t = MULSHIFT32(sin2, ar2 + ai2);
		*fft2-- = t - MULSHIFT32(cps2, ai2);	/* sin*ar1 - cos*ai1 */
		cms2 = cps2 - 2*sin2;
		*fft1++ = t + MULSHIFT32(cms2, ar2);	/* cos*ar1 + sin*ai1 */
	}
}

示例#9

0

显示文件

文件： dct4.c 项目： carlocaione/buildroot-ved-2014-05-27

/**************************************************************************************
 * Function:    PreMultiply
 *
 * Description: pre-twiddle stage of DCT4
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmdct samples
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out, gains 5 (short) or 8 (long) frac bits
 *              i.e. gains 2-7= -5 int bits (short) or 2-10 = -8 int bits (long)
 *              normalization by -1/N is rolled into tables here (see trigtabs.c)
 *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 **************************************************************************************/
static void PreMultiply(int tabidx, int *zbuf1)
{
	int i, nmdct, ar1, ai1, ar2, ai2, z1, z2;
	int t, cms2, cps2a, sin2a, cps2b, sin2b;
	int *zbuf2;
	const int *csptr;

	nmdct = nmdctTab[tabidx];		
	zbuf2 = zbuf1 + nmdct - 1;
	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];

	/* whole thing should fit in registers - verify that compiler does this */
	for (i = nmdct >> 2; i != 0; i--) {
		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
		cps2a = *csptr++;
		sin2a = *csptr++;
		cps2b = *csptr++;
		sin2b = *csptr++;

		ar1 = *(zbuf1 + 0);
		ai2 = *(zbuf1 + 1);
		ai1 = *(zbuf2 + 0);
		ar2 = *(zbuf2 - 1);

		/* gain 2 ints bit from MULSHIFT32 by Q30, but drop 7 or 10 int bits from table scaling of 1/M
		 * max per-sample gain (ignoring implicit scaling) = MAX(sin(angle)+cos(angle)) = 1.414
		 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
		 *   extra sign bits, and eat one in adding
		 */
		t  = MULSHIFT32(sin2a, ar1 + ai1);
		z2 = MULSHIFT32(cps2a, ai1) - t;
		cms2 = cps2a - 2*sin2a;
		z1 = MULSHIFT32(cms2, ar1) + t;
		*zbuf1++ = z1;	/* cos*ar1 + sin*ai1 */
		*zbuf1++ = z2;	/* cos*ai1 - sin*ar1 */

		t  = MULSHIFT32(sin2b, ar2 + ai2);
		z2 = MULSHIFT32(cps2b, ai2) - t;
		cms2 = cps2b - 2*sin2b;
		z1 = MULSHIFT32(cms2, ar2) + t;
		*zbuf2-- = z2;	/* cos*ai2 - sin*ar2 */
		*zbuf2-- = z1;	/* cos*ar2 + sin*ai2 */
	}
}

示例#10

0

显示文件

文件： dct4.c 项目： carlocaione/buildroot-ved-2014-05-27

/**************************************************************************************
 * Function:    PreMultiplyRescale
 *
 * Description: pre-twiddle stage of DCT4, with rescaling for extra guard bits
 *
 * Inputs:      table index (for transform size)
 *              buffer of nmdct samples
 *              number of guard bits to add to input before processing
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       see notes on PreMultiply(), above
 **************************************************************************************/
static void PreMultiplyRescale(int tabidx, int *zbuf1, int es)
{
	int i, nmdct, ar1, ai1, ar2, ai2, z1, z2;
	int t, cms2, cps2a, sin2a, cps2b, sin2b;
	int *zbuf2;
	const int *csptr;

	nmdct = nmdctTab[tabidx];		
	zbuf2 = zbuf1 + nmdct - 1;
	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];

	/* whole thing should fit in registers - verify that compiler does this */
	for (i = nmdct >> 2; i != 0; i--) {
		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
		cps2a = *csptr++;	
		sin2a = *csptr++;
		cps2b = *csptr++;	
		sin2b = *csptr++;

		ar1 = *(zbuf1 + 0) >> es;
		ai1 = *(zbuf2 + 0) >> es;
		ai2 = *(zbuf1 + 1) >> es;

		t  = MULSHIFT32(sin2a, ar1 + ai1);
		z2 = MULSHIFT32(cps2a, ai1) - t;
		cms2 = cps2a - 2*sin2a;
		z1 = MULSHIFT32(cms2, ar1) + t;
		*zbuf1++ = z1;
		*zbuf1++ = z2;

		ar2 = *(zbuf2 - 1) >> es;	/* do here to free up register used for es */

		t  = MULSHIFT32(sin2b, ar2 + ai2);
		z2 = MULSHIFT32(cps2b, ai2) - t;
		cms2 = cps2b - 2*sin2b;
		z1 = MULSHIFT32(cms2, ar2) + t;
		*zbuf2-- = z2;
		*zbuf2-- = z1;

	}
}

示例#11

0

显示文件

文件： sbrqmf.c 项目： MattLeMay/Bluetune

/**************************************************************************************
 * Function:    PostMultiply64
 *
 * Description: post-twiddle stage of 64-point type-IV DCT
 *
 * Inputs:      buffer of 64 samples
 *              number of output samples to calculate
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out, gains 2 int bits
 *              gbOut = gbIn + 1
 *              output is limited to sqrt(2)/2 plus GB in full GB
 *              nSampsOut is rounded up to next multiple of 4, since we calculate
 *                4 samples per loop
 **************************************************************************************/
static void PostMultiply64(int *fft1, int nSampsOut)
{
	int i, ar1, ai1, ar2, ai2;
	int t, cms2, cps2, sin2;
	int *fft2;
	const int *csptr;

	csptr = cos1sin1tab64;
	fft2 = fft1 + 64 - 1;

	/* load coeffs for first pass
	 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
	 */
	cps2 = *csptr++;
	sin2 = *csptr++;
	cms2 = cps2 - 2*sin2;

	for (i = (nSampsOut + 3) >> 2; i != 0; i--) {
		ar1 = *(fft1 + 0);
		ai1 = *(fft1 + 1);
		ar2 = *(fft2 - 1);
		ai2 = *(fft2 + 0);

		/* gain 2 int bits (multiplying by Q30), max gain = sqrt(2) */
		t = MULSHIFT32(sin2, ar1 + ai1);
		*fft2-- = t - MULSHIFT32(cps2, ai1);
		*fft1++ = t + MULSHIFT32(cms2, ar1);

		cps2 = *csptr++;
		sin2 = *csptr++;

		ai2 = -ai2;
		t = MULSHIFT32(sin2, ar2 + ai2);
		*fft2-- = t - MULSHIFT32(cps2, ai2);
		cms2 = cps2 - 2*sin2;
		*fft1++ = t + MULSHIFT32(cms2, ar2);
	}
}

示例#12

0

显示文件

文件： stproc.c 项目： 003900107/realboard-lpc4088

/**************************************************************************************
 * Function:    IntensityProcMPEG1
 *
 * Description: intensity stereo processing for MPEG1
 *
 * Inputs:      vector x with dequantized samples from left and right channels
 *              number of non-zero samples in left channel
 *              valid FrameHeader struct
 *              two each of ScaleFactorInfoSub, CriticalBandInfo structs (both channels)
 *              flags indicating midSide on/off, mixedBlock on/off
 *              guard bit mask (left and right channels)
 *
 * Outputs:     updated sample vector x
 *              updated guard bit mask
 *
 * Return:      none
 *
 * Notes:       assume at least 1 GB in input
 *
 * TODO:        combine MPEG1/2 into one function (maybe)
 *              make sure all the mixed-block and IIP logic is right
 **************************************************************************************/
void IntensityProcMPEG1(int x[MAX_NCHAN][MAX_NSAMP], int nSamps, FrameHeader *fh, ScaleFactorInfoSub *sfis, 
						CriticalBandInfo *cbi, int midSideFlag, int mixFlag, int mOut[2])
{
	int i=0, j=0, n=0, cb=0, w=0;
	int sampsLeft, isf, mOutL, mOutR, xl, xr;
	int fl, fr, fls[3], frs[3];
	int cbStartL=0, cbStartS=0, cbEndL=0, cbEndS=0;
	int *isfTab;
	
	/* NOTE - this works fine for mixed blocks, as long as the switch point starts in the
	 *  short block section (i.e. on or after sample 36 = sfBand->l[8] = 3*sfBand->s[3]
	 * is this a safe assumption?
	 * TODO - intensity + mixed not quite right (diff = 11 on he_mode)
	 *  figure out correct implementation (spec ambiguous about when to do short block reorder)
	 */
	if (cbi[1].cbType == 0) {
		/* long block */
		cbStartL = cbi[1].cbEndL + 1;
		cbEndL =   cbi[0].cbEndL + 1;
		cbStartS = cbEndS = 0;
		i = fh->sfBand->l[cbStartL];
	} else if (cbi[1].cbType == 1 || cbi[1].cbType == 2) {
		/* short or mixed block */
		cbStartS = cbi[1].cbEndSMax + 1;
		cbEndS =   cbi[0].cbEndSMax + 1;
		cbStartL = cbEndL = 0;
		i = 3 * fh->sfBand->s[cbStartS];
	}

	sampsLeft = nSamps - i;		/* process to length of left */
	isfTab = (int *)ISFMpeg1[midSideFlag];
	mOutL = mOutR = 0;

	/* long blocks */
	for (cb = cbStartL; cb < cbEndL && sampsLeft > 0; cb++) {
		isf = sfis->l[cb];
		if (isf == 7) {
			fl = ISFIIP[midSideFlag][0];
			fr = ISFIIP[midSideFlag][1];
		} else {
			fl = isfTab[isf];	
			fr = isfTab[6] - isfTab[isf];
		}

		n = fh->sfBand->l[cb + 1] - fh->sfBand->l[cb];
		for (j = 0; j < n && sampsLeft > 0; j++, i++) {
			xr = MULSHIFT32(fr, x[0][i]) << 2;	x[1][i] = xr; mOutR |= FASTABS(xr);
			xl = MULSHIFT32(fl, x[0][i]) << 2;	x[0][i] = xl; mOutL |= FASTABS(xl);
			sampsLeft--;
		}
	}

	/* short blocks */
	for (cb = cbStartS; cb < cbEndS && sampsLeft >= 3; cb++) {
		for (w = 0; w < 3; w++) {
			isf = sfis->s[cb][w];
			if (isf == 7) {
				fls[w] = ISFIIP[midSideFlag][0];
				frs[w] = ISFIIP[midSideFlag][1];
			} else {
				fls[w] = isfTab[isf];
				frs[w] = isfTab[6] - isfTab[isf];
			}
		}

		n = fh->sfBand->s[cb + 1] - fh->sfBand->s[cb];
		for (j = 0; j < n && sampsLeft >= 3; j++, i+=3) {
			xr = MULSHIFT32(frs[0], x[0][i+0]) << 2;	x[1][i+0] = xr;	mOutR |= FASTABS(xr);
			xl = MULSHIFT32(fls[0], x[0][i+0]) << 2;	x[0][i+0] = xl;	mOutL |= FASTABS(xl);
			xr = MULSHIFT32(frs[1], x[0][i+1]) << 2;	x[1][i+1] = xr;	mOutR |= FASTABS(xr);
			xl = MULSHIFT32(fls[1], x[0][i+1]) << 2;	x[0][i+1] = xl;	mOutL |= FASTABS(xl);
			xr = MULSHIFT32(frs[2], x[0][i+2]) << 2;	x[1][i+2] = xr;	mOutR |= FASTABS(xr);
			xl = MULSHIFT32(fls[2], x[0][i+2]) << 2;	x[0][i+2] = xl;	mOutL |= FASTABS(xl);
			sampsLeft -= 3;
		}
	}
	mOut[0] = mOutL;
	mOut[1] = mOutR;
	
	return;
}

示例#13

0

显示文件

文件： mpi_aenc_lsrc.c 项目： MindShow/hisi-driverlibs

static HI_S32 LinearSRC_16bitProcessFrame(SRC_Linear* pInst,
                                          HI_S16 *    pInPcmBuf,
                                          HI_S16 *    pOutpcmBuf,
                                          HI_S32      InSamps)
{
    HI_S32 insamps, outsamps;
    HI_S32 sum, out = 0, in = 0, ch;
    HI_S16 *pcmbuf  = (HI_S16*)pInPcmBuf;
    HI_S32 Channels = pInst->Channels;

    insamps  = InSamps;
    outsamps = 0;

    if (pInst->DisContinuity && (insamps > 0))
    {
        if (pInst->AdjustInRate == pInst->OutRate) /* fade out */
        {
            for (ch = 0; ch < Channels; ch++)
            {
                pOutpcmBuf[Channels * out + ch] = (pInst->PreSample[ch]);
            }

            outsamps++;
            out++;
            pInst->DisContinuity = 0;
        }
        else  /* fade in */
        {
            for (ch = 0; ch < Channels; ch++)
            {
                (pInst->PreSample[ch]) = (HI_S32)(pcmbuf[Channels * in + ch]);
            }

            in++;
            insamps--;
            pInst->DisContinuity = 0;
        }
    }

    if (pInst->AdjustInRate == pInst->OutRate)
    {
        while (insamps > 0)
        {
            for (ch = 0; ch < Channels; ch++)
            {
                pOutpcmBuf[Channels * out + ch] = pcmbuf[Channels * in + ch];
            }

            outsamps++;
            out++;
            in++;
            insamps--;
        }
    }
    else
    {
        if (insamps > 0)
        {
            while (pInst->Remainder < pInst->OutRate)
            {
                for (ch = 0; ch < Channels; ch++)
                {
#ifdef LINEARSRC_FIXPOINT
                    sum  = MULSHIFT32(pcmbuf[Channels * (in + 0) + ch], pInst->IvtSFFactor * pInst->Remainder);
                    sum += MULSHIFT32(pInst->PreSample[ch], pInst->IvtSFFactor * (pInst->OutRate - pInst->Remainder));
					pOutpcmBuf[Channels * out + ch] = CLIPTOSHORT(sum << 2);/* IvtSFFactor is 2.30 format */
#else
                    pOutpcmBuf[Channels * out + ch] = ((pcmbuf[Channels * in
                                                               + ch] * pInst->Remainder + pInst->PreSample[ch]
                                                        * (pInst->OutRate - pInst->Remainder)) / pInst->OutRate);
#endif
                }

                outsamps++;
                out++;
                pInst->Remainder += pInst->AdjustInRate;
            }

            pInst->Remainder -= pInst->OutRate;
            insamps--;
        }

        while (insamps > 0)
        {
            while (pInst->Remainder < pInst->OutRate)
            {
                for (ch = 0; ch < Channels; ch++)
                {
#ifdef LINEARSRC_FIXPOINT
                    sum  = MULSHIFT32(pcmbuf[Channels * (in + 1) + ch], pInst->IvtSFFactor * pInst->Remainder);
                    sum += MULSHIFT32(pcmbuf[Channels
                                             * (in + 0) + ch], pInst->IvtSFFactor * (pInst->OutRate - pInst->Remainder));
					pOutpcmBuf[Channels * out + ch] = CLIPTOSHORT(sum << 2);/* IvtSFFactor is 2.30 format */

#else
                    pOutpcmBuf[Channels * out
                               + ch] = ((pcmbuf[Channels
                                                * (in + 1) + ch] * pInst->Remainder + pcmbuf[Channels * in + ch]
                                         * (pInst->OutRate - pInst->Remainder)) / pInst->OutRate);
#endif
                }

                outsamps++;
                out++;
                pInst->Remainder += pInst->AdjustInRate;
            }

            pInst->Remainder -= pInst->OutRate;
            in++;
            insamps--;
        }

        for (ch = 0; ch < Channels; ch++)
        {
            (pInst->PreSample[ch]) = (HI_S32)(pcmbuf[Channels * in + ch]);
        }
    }
    return outsamps;
}

示例#14

0

显示文件

文件： mpi_aenc_lsrc.c 项目： MindShow/hisi-driverlibs

static HI_S32 LinearSRC_32bitProcessFrame(SRC_Linear* pInst,
                                          HI_S32 *    pInPcmBuf,
                                          HI_S32 *    pOutpcmBuf,
                                          HI_S32      InSamps)
{
    HI_S32 insamps, outsamps;
    HI_S32 sum, out = 0, in = 0, ch;
    HI_S32 *pcmbuf  = (HI_S32*)pInPcmBuf;
    HI_S32 Channels = pInst->Channels;

    insamps  = InSamps;
    outsamps = 0;

    if (pInst->DisContinuity && (insamps > 0))
    {
        if (pInst->AdjustInRate == pInst->OutRate) /* fade out */
        {
            for (ch = 0; ch < Channels; ch++)
            {
                pOutpcmBuf[Channels * out + ch] = (pInst->PreSample[ch]);
            }

            outsamps++;
            out++;
            pInst->DisContinuity = 0;
        }
        else  /* fade in */
        {
            for (ch = 0; ch < Channels; ch++)
            {
                (pInst->PreSample[ch]) = (HI_S32)(pcmbuf[Channels * in + ch]);
            }

            in++;
            insamps--;
            pInst->DisContinuity = 0;
        }
    }

    if (pInst->AdjustInRate == pInst->OutRate)
    {
        while (insamps > 0)
        {
            for (ch = 0; ch < Channels; ch++)
            {
                pOutpcmBuf[Channels * out + ch] = pcmbuf[Channels * in + ch];
            }

            outsamps++;
            out++;
            in++;
            insamps--;
        }
    }
    else
    {
        if (insamps > 0)
        {
            while (pInst->Remainder < pInst->OutRate)
            {
                for (ch = 0; ch < Channels; ch++)
                {
#ifdef LINEARSRC_FIXPOINT
                    sum  = MULSHIFT32(pcmbuf[Channels * (in + 0) + ch], pInst->IvtSFFactor * pInst->Remainder);
                    sum += MULSHIFT32(pInst->PreSample[ch], pInst->IvtSFFactor * (pInst->OutRate - pInst->Remainder));
					sum = CLIPTOSHORT(sum >>14);/* IvtSFFactor is 2.30 format */
					pOutpcmBuf[Channels * out + ch] =sum << 16;
#else
                    HI_S64 s64sample0, s64sample1;
                    s64sample0 = (HI_S64)(pcmbuf[Channels * in + ch]);
                    s64sample1 = (HI_S64)(pInst->PreSample[ch]);
                    pOutpcmBuf[Channels * out
                               + ch] = (HI_S32)((s64sample0 * pInst->Remainder + s64sample1
                                                 * (pInst->OutRate - pInst->Remainder)) / pInst->OutRate);
#endif
                }

                outsamps++;
                out++;
                pInst->Remainder += pInst->AdjustInRate;
            }

            pInst->Remainder -= pInst->OutRate;
            insamps--;
        }

        while (insamps > 0)
        {
            while (pInst->Remainder < pInst->OutRate)
            {
                for (ch = 0; ch < Channels; ch++)
                {
#ifdef LINEARSRC_FIXPOINT
/* TODO: use high precison MUL32_30 to replace MULSHIFT32(x,y)<<2 */
                    sum  = MULSHIFT32(pcmbuf[Channels * (in + 1) + ch], pInst->IvtSFFactor * pInst->Remainder);
                    sum += MULSHIFT32(pcmbuf[Channels
                                             * (in + 0) + ch], pInst->IvtSFFactor * (pInst->OutRate - pInst->Remainder));
					sum = CLIPTOSHORT(sum >>14);/* IvtSFFactor is 2.30 format */
					pOutpcmBuf[Channels * out + ch] =sum << 16;

#else
                    HI_S64 s64sample0, s64sample1;
                    s64sample0 = (HI_S64)(pcmbuf[Channels * (in + 1) + ch]);
                    s64sample1 = (HI_S64)(pcmbuf[Channels * (in + 0) + ch]);
                    pOutpcmBuf[Channels * out
                               + ch] = (HI_S32)((s64sample0 * pInst->Remainder + s64sample1
                                                 * (pInst->OutRate - pInst->Remainder)) / pInst->OutRate);
#endif
                }

                outsamps++;
                out++;
                pInst->Remainder += pInst->AdjustInRate;
            }

            pInst->Remainder -= pInst->OutRate;
            in++;
            insamps--;
        }

        for (ch = 0; ch < Channels; ch++)
        {
            (pInst->PreSample[ch]) = (HI_S32)(pcmbuf[Channels * in + ch]);
        }
    }

    return outsamps;
}