uint32_t quant_h263_inter(int16_t * coeff, int16_t * data, const uint32_t quant, uint16_t * mpeg_quant_matrices, const int16_t mult) { //const uint16_t mult = (uint16_t)multipliers[quant]; const uint16_t quant_m_2 = quant << 1; const uint16_t quant_d_2 = quant >> 1; uint32_t sum = 0; uint32_t i; _nassert((int)(coeff)%8 == 0); _nassert((int)(data)%8 == 0); for (i = 0; i < 64; i++) { int16_t abs_acLevel = abs(data[i])-quant_d_2; abs_acLevel = (abs_acLevel)< quant_m_2 ? 0 : (int16_t)((abs_acLevel * mult) >> SCALEBITS); sum += abs_acLevel; coeff[i] = data[i]>=0 ? abs_acLevel : -abs_acLevel; } return(sum); }
float DSPF_sp_dotprod(const float * x, const float * y, const int nx) { int i; float sum = 0; _nassert(nx > 0); _nassert(nx % 8 == 0); _nassert((int)x % 8 == 0); _nassert((int)y % 8 == 0); for(i = 0; i < nx; i++) sum += x[i]*y[i]; return (sum); }
void twiddles(Param size, Param n, int* ix, Cplx16* in, Cplx16* out){ #if VERBOSE printf("Execute twiddles size %d n %d *ix %d\n", size, n, *ix); #endif int r = (*ix)*n; typedef unsigned long complex_16; typedef unsigned long long complex_16_2; typedef unsigned long long complex_32; typedef int32x4_t complex_32_2; complex_16_2 const* restrict pl_in = (complex_16_2*) in; complex_16_2 const* restrict pl_w = (complex_16_2*) (twi64k+r*size); complex_16_2 * restrict pl_out = (complex_16_2*) out; /* Specify aligned input/output/twi for optimizations */ _nassert((int) pl_in % 8 == 0); // input is 64-bit aligned _nassert((int) pl_out % 8 == 0); // output is 64-bit aligned _nassert((int) pl_w % 8 == 0); // twiddles is 64-bit aligned #pragma MUST_ITERATE(8,,8) for(int i=0; i<n*size/2; i++) *(pl_out++) = _dcmpyr1(*(pl_in++), *(pl_w++)); // int c; // int r = (*ix)*n; // // for(int i=0; i<n; i++){ // for(c=0; c<size; c++){ // short real0 = (((long)(in[c].real))*twi64k[r*c].real)>>16; // short real1 = (((long)(in[c].imag))*twi64k[r*c].imag)>>16; // short imag0 = (((long)(in[c].imag))*twi64k[r*c].real)>>16; // short imag1 = (((long)(in[c].real))*twi64k[r*c].imag)>>16; // out[c].real = real0 - real1; // out[c].imag = imag0 + imag1; // } // in += size; // out += size; // r++; // } }
/* quantize intra-block */ uint32_t quant_h263_intra(int16_t * coeff, int16_t * data, const uint32_t quant, const uint32_t scaler_lum, const uint32_t scaler_chr, const int16_t mult ) { int i; _nassert((int)(coeff)%8 == 0); _nassert((int)(data)%8 == 0); // coeff[0] = DIV_DIV(data[0], (int32_t) dcscalar); for (i = 0; i < 64*6; i++) { coeff[i] = (int16_t)((data[i] * mult) >> SCALEBITS)+((data[i]>=0) ? 0 : 1); } return(0); }
short DSP_maxval ( const short *x, /* x[nx] = input vector */ int nx /* nx = number of elements */ ) { int i; const long long *xll; double x0123, x4567; int max01, max23, max45, max67; /* Set all 8 intermediate max values to most negative */ /* Each 32bit var contains two shorts */ max01 = 0x80008000; max23 = 0x80008000; max45 = 0x80008000; max67 = 0x80008000; /* Convert the short pointer to a 64bit long long pointer */ xll = (const long long *)x; /* In each loop iteration we will load 8 short values from the array. */ /* On the C64x+ we can do 4 max2 operations in one cycle. This will */ /* give us 8 results, that we keep seperated. Outside the loop we'll */ /* find the max out of these 8 intermediate values. */ _nassert((int)(xll) % 8 == 0); #pragma MUST_ITERATE(1,,1); for (i = 0; i < nx; i += 8) { x0123 = _amemd8((void *)xll++); /* Use LDDW to load 4 shorts */ x4567 = _amemd8((void *)xll++); /* Use LDDW to load 4 shorts */ max01 = _max2(max01, _lo(x0123)); max23 = _max2(max23, _hi(x0123)); max45 = _max2(max45, _lo(x4567)); max67 = _max2(max67, _hi(x4567)); } max01 = _max2(max01, max23); /* Calculate 2 maximums of max01 and max23 */ max45 = _max2(max45, max67); /* Calculate 2 maximums of max45 and max67 */ max01 = _max2(max01, max45); /* Get the 2 max values of the remaining 4 */ max45 = _rotl(max01, 16); /* Swap lower and higher 16 bit */ /* Find the final maximum value (will be in higher and lower part) */ max01 = _max2(max01, max45); /* max01 is a 32-bit value with the result in the upper and lower 16 bit */ /* Use an AND operation to only return the lower 16 bit to the caller. */ return (max01 & 0xFFFF); }
uint32_t dequant_h263_inter(int16_t * data, int16_t * coeff, const uint32_t quant, uint16_t * mpeg_quant_matrices) { const uint16_t quant_m_2 = quant << 1; const uint16_t quant_add = (quant & 1 ? quant : quant - 1); int i; _nassert((int)(coeff)%8 == 0); _nassert((int)(data)%8 == 0); for (i = 0; i < 64; i++) { int32_t acLevel = coeff[i]; acLevel = (acLevel == 0) ? 0 : (acLevel<0)? (acLevel * quant_m_2 - quant_add) : (acLevel * quant_m_2 + quant_add); data[i] = CLIP(acLevel,-2048, 2047); } return(0); }