uint32_t dequant_mpeg_intra_mxu(int16_t * data, // const int16_t * coeff, const uint32_t quant, const uint32_t dcscalar, const uint16_t * mpeg_quant_matrices) { const uint16_t *intra_matrix = mpeg_quant_matrices; int32_t i = 0; /* deal with data[0] then save to xr6 */ S32I2M(xr3,-2048); S32I2M(xr4,2047); S32I2M(xr5,quant); S32MUL(xr0,xr6,(int32_t)data[0],dcscalar); S32LUI(xr9,1,0); D16MUL_WW(xr0,xr6,xr9,xr6); S32MIN(xr6,xr6,xr4); S32MAX(xr6,xr6,xr3); data-=2; intra_matrix-=2; for (i = 0; i < 32; i++) { S32LDI(xr1,data,4); S32LDI(xr2,intra_matrix,4); D16MUL_LW(xr13,xr9,xr1,xr14); // resave values of data[i] and data[i+1] D16CPS(xr1,xr1,xr1); /* abs(level) *( intra_matrix[i]*quant) >> 3 */ D16MUL_LW(xr7,xr5,xr2,xr8); S32SFL(xr15,xr7,xr8,xr2,3); D16MUL_WW(xr7,xr1,xr2,xr8); D32SLR(xr7,xr7,xr8,xr8,3); /* -2048 < data[i+1] < 2047 */ S32CPS(xr7,xr7,xr13); S32MAX(xr10,xr7,xr3); S32MIN(xr10,xr10,xr4); /* -2048 < data[i] < 2047 */ S32CPS(xr8,xr8,xr14); S32MAX(xr11,xr8,xr3); S32MIN(xr11,xr11,xr4); S32SFL(xr0,xr10,xr11,xr12,3); S32STD(xr12,data,0); } S16STD(xr6,data,-62*2,0);//xr6 to data[0] return(0); }
void imdct_half_fix_c(MDCTContext_fix *s, FFTSample_fix *output, const FFTSample_fix *input) { //PMON_ON(qmf); int k, n8, n4, n2, n, j,j1; const FFTSample_fix *in1, *in2; const unsigned short *revtab = s->fft.revtab; const FFTSample_fix *tcos = s->tcos; const FFTSample_fix *tsin = s->tsin; FFTComplex_fix *z = (FFTComplex_fix *)output; n = 1 << s->nbits;//64 n2 = n >> 1;//32 n4 = n >> 2;//16 n8 = n >> 3;//8 /* pre rotation */ in1 = input; //head in2 = input + n2 - 1;//tail for(k = 0; k < n8; k++) { #if 0 j=revtab[k]; FFT_CMUL_fix(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; #else FFTSample_fix _are,_bre,_aim,_bim,are,aim; _are = *in2; _bre = tcos[k]; _aim = *in1; _bim = tsin[k]; j=revtab[k]; n=n4-k-1; j1=revtab[n]; S32MUL(xr1,xr2, _are, _bre); S32MUL(xr3, xr4, _are, _bim); in2--; are = *in2; S32MUL(xr7,xr8, are, _bre); S32MUL(xr9, xr10, are, _bim); S32MSUB(xr1, xr2, _aim, _bim); S32MADD(xr3, xr4, _aim, _bre); ; in1++; aim = *in1; D32SLL(xr5,xr1,xr3,xr6,1); S32MSUB(xr7, xr8, aim, _bim); S32MADD(xr9, xr10, aim, _bre); z[j].re=S32M2I(xr5); D32SLL(xr11,xr7,xr9,xr12,1); z[j].im=S32M2I(xr6); in1++; in2--; z[j1].re=S32M2I(xr11); z[j1].im=S32M2I(xr12); #endif } s->fft.fft_calc(&s->fft, z); /* post rotation + reordering */ /* XXX: optimize */ for(k = 0; k < n8; k++) { FFTSample_fix r0, i0, r1, i1; FFT_CMUL_fix(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); FFT_CMUL_fix(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); z[n8-k-1].re = r0; z[n8-k-1].im = i0; z[n8+k ].re = r1; z[n8+k ].im = i1; } //PMON_OFF(qmf); }
void fft_calc_fix_inverse(FFTContext_fix *s, FFTComplex_fix *z) { int ln = s->nbits; int j, np, np2; int nblocks, nloops; register FFTComplex_fix *p, *q; FFTComplex_fix *exptab = s->exptab; int l; FFTSample_fix tmp_re, tmp_im; np = 1 << ln; /* function is :butterfly all 4 step ,N=16 */ /* pass 0 */ #if 0 p=&z[0]; j=(np >> 1); do { /* X(k) = G(k)+H(k)*W (= e j*0) */ FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); p+=2; } while (--j); #endif /* pass 1 */ p=&z[0]; j=np >> 2; do { #if 1 S32LDD(xr1,p,0); S32LDD(xr2,p,4); S32LDD(xr3,p,8); S32LDD(xr4,p,12); S32LDD(xr5,p,16); S32LDD(xr6,p,20); S32LDD(xr7,p,24); S32LDD(xr8,p,28); D32ADD_AS(xr1,xr1,xr3,xr3); D32ADD_AS(xr2,xr2,xr4,xr4); D32ADD_AS(xr5,xr5,xr7,xr7); D32ADD_AS(xr6,xr6,xr8,xr8); D32ADD_AS(xr1,xr1,xr5,xr5); D32ADD_AS(xr2,xr2,xr6,xr6); D32ADD_SA(xr3,xr3,xr8,xr9); D32ADD_AS(xr4,xr4,xr7,xr8); S32STD(xr1,p,0); S32STD(xr2,p,4); S32STD(xr3,p,8); S32STD(xr4,p,12); S32STD(xr5,p,16); S32STD(xr6,p,20); S32STD(xr9,p,24); S32STD(xr8,p,28); #else FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); FFT_BF_fix(p[2].re, p[2].im, p[3].re, p[3].im, p[2].re, p[2].im, p[3].re, p[3].im); FFT_BF_fix(p[0].re, p[0].im, p[2].re, p[2].im, p[0].re, p[0].im, p[2].re, p[2].im); FFT_BF_fix(p[1].re, p[1].im, p[3].re, p[3].im, p[1].re, p[1].im, -p[3].im, p[3].re); #endif p+=4; } while (--j); /* pass 2 .. ln-1 */ nblocks = np >> 3; nloops = 1 << 2; np2 = np >> 1; do { p = z; q = z + nloops; for (j = 0; j < nblocks; ++j) { #if 1 S32LDD(xr1,p,0); S32LDD(xr2,p,4); S32LDD(xr3,q,0); S32LDD(xr4,q,4); D32ADD_AS(xr1,xr1,xr3,xr3); D32ADD_AS(xr2,xr2,xr4,xr4); S32STD(xr1,p,0); S32STD(xr2,p,4); S32STD(xr3,q,0); S32STD(xr4,q,4); #else FFT_BF_fix(p->re, p->im, q->re, q->im, p->re, p->im, q->re, q->im); #endif p++; q++; for(l = nblocks; l < np2; l += nblocks) { /* FFT_CMUL_fix( ) fuction is : (-j 2*PI/N *km) H(i) * E */ #if 1 FFTSample_fix _are = exptab[l].re; FFTSample_fix _bre = q->re; FFTSample_fix _aim = exptab[l].im; FFTSample_fix _bim = q->im; S32MUL(xr1, xr2, _are, _bre); S32MUL(xr5, xr6, _are, _bim); S32LDD(xr7,p,0); S32MSUB(xr1, xr2, _aim, _bim); S32MADD(xr5, xr6, _aim, _bre); S32LDD(xr8,p,4); D32SLL(xr1, xr1, xr5, xr5, 1); D32ADD_AS(xr7,xr7,xr1,xr1); D32ADD_AS(xr8,xr8,xr5,xr5); S32STD(xr7,p,0); S32STD(xr8,p,4); S32STD(xr1,q,0); S32STD(xr5,q,4); #else FFT_CMUL_fix(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); FFT_BF_fix(p->re, p->im, q->re, q->im, p->re, p->im, tmp_re, tmp_im); #endif p++; q++; } p += nloops; q += nloops; } nblocks = nblocks >> 1; nloops = nloops << 1; } while (nblocks); }
uint32_t dequant_h263_intra_mxu(int16_t * data, uint8_t yuv_len, const uint32_t quant, const uint32_t dcscalar, const uint16_t * mpeg_quant_matrices) { uint32_t i = 0; S32LUI(xr9,1,0); S32I2M(xr1,quant); D32SLL(xr5,xr1,xr0,xr0,1);// quant_m_2 /* quant_add */ S32AND(xr15,xr1,xr9); S32MOVN(xr2,xr15,xr1); D32ADD_SS(xr1,xr1,xr9,xr3); S32MOVZ(xr2,xr15,xr1); S32I2M(xr3,-2048); S32I2M(xr4,2047); /* part1 */ //S32MUL(xr4,xr6,*data,dcscalar); S32MUL(xr0,xr6,(int32_t)data[0],dcscalar); D16MUL_WW(xr0,xr6,xr9,xr6); S32MIN(xr6,xr6,xr4); S32MAX(xr6,xr6,xr3); /* part2 */ yuv_len = ((yuv_len&~1)+3)>>1; data-=2; for (i = 0; i < yuv_len; i++) { S32LDI(xr1,data,4); D16MUL_LW(xr13,xr9,xr1,xr14);// resave sign of data[i] and data[i+1] D16CPS(xr1,xr1,xr1); /* quant_m_2 * acLevel + quant_add */ D16MUL_LW(xr7,xr5,xr1,xr8); D32ADD_AA(xr7,xr7,xr2,xr0); D32ADD_AA(xr8,xr8,xr2,xr0); #if 0 /* -2048 < data[i+1] <2047 */ S32CPS(xr7,xr7,xr13); S32MAX(xr10,xr7,xr3); S32MIN(xr10,xr10,xr4); S32MOVZ(xr10,xr13,xr13); /* -2048 < data[i] <2047 */ S32CPS(xr8,xr8,xr14); S32MAX(xr11,xr8,xr3); S32MIN(xr11,xr11,xr4); S32MOVZ(xr11,xr14,xr14); #else /* -2048 < data[i+1] <2047 */ S32AND(xr7,xr7,xr4); S32CPS(xr10,xr7,xr13); S32MOVZ(xr10,xr13,xr13); /* -2048 < data[i] <2047 */ S32AND(xr8,xr8,xr4); S32CPS(xr11,xr8,xr14); S32MOVZ(xr11,xr14,xr14); #endif S32SFL(xr0,xr10,xr11,xr12,3); S32STD(xr12,data,0); } S16STD(xr6,data-(yuv_len*2-2),0,0);// data[0] return(0); }
/* * NAME: layer->II() * DESCRIPTION: decode a single Layer II frame */ int mad_layer_II(struct mad_stream *stream, struct mad_frame *frame) { struct mad_header *header = &frame->header; struct mad_bitptr start; unsigned int index, sblimit, nbal, nch, bound, gr, ch, s, sb; unsigned char const *offsets; unsigned char allocation[2][32], scfsi[2][32], scalefactor[2][32][3]; mad_fixed_t samples[3]; nch = MAD_NCHANNELS(header); if (header->flags & MAD_FLAG_LSF_EXT) index = 4; else if (header->flags & MAD_FLAG_FREEFORMAT) goto freeformat; else { unsigned long bitrate_per_channel; bitrate_per_channel = header->bitrate; if (nch == 2) { bitrate_per_channel /= 2; # if defined(OPT_STRICT) /* * ISO/IEC 11172-3 allows only single channel mode for 32, 48, 56, and * 80 kbps bitrates in Layer II, but some encoders ignore this * restriction. We enforce it if OPT_STRICT is defined. */ if (bitrate_per_channel <= 28000 || bitrate_per_channel == 40000) { stream->error = MAD_ERROR_BADMODE; return -1; } # endif } else { /* nch == 1 */ if (bitrate_per_channel > 192000 && bitrate_per_channel != 320000) { /* * ISO/IEC 11172-3 does not allow single channel mode for 224, 256, * 320, or 384 kbps bitrates in Layer II. */ stream->error = MAD_ERROR_BADMODE; return -1; } } if (bitrate_per_channel <= 48000) index = (header->samplerate == 32000) ? 3 : 2; else if (bitrate_per_channel <= 80000) index = 0; else { freeformat: index = (header->samplerate == 48000) ? 0 : 1; } } sblimit = sbquant_table[index].sblimit; offsets = sbquant_table[index].offsets; bound = 32; if (header->mode == MAD_MODE_JOINT_STEREO) { header->flags |= MAD_FLAG_I_STEREO; bound = 4 + header->mode_extension * 4; } if (bound > sblimit) bound = sblimit; start = stream->ptr; /* decode bit allocations */ for (sb = 0; sb < bound; ++sb) { nbal = bitalloc_table[offsets[sb]].nbal; for (ch = 0; ch < nch; ++ch) allocation[ch][sb] = mad_bit_read(&stream->ptr, nbal); } for (sb = bound; sb < sblimit; ++sb) { nbal = bitalloc_table[offsets[sb]].nbal; allocation[0][sb] = allocation[1][sb] = mad_bit_read(&stream->ptr, nbal); } /* decode scalefactor selection info */ for (sb = 0; sb < sblimit; ++sb) { for (ch = 0; ch < nch; ++ch) { if (allocation[ch][sb]) scfsi[ch][sb] = mad_bit_read(&stream->ptr, 2); } } /* check CRC word */ if (header->flags & MAD_FLAG_PROTECTION) { header->crc_check = mad_bit_crc(start, mad_bit_length(&start, &stream->ptr), header->crc_check); if (header->crc_check != header->crc_target && !(frame->options & MAD_OPTION_IGNORECRC)) { stream->error = MAD_ERROR_BADCRC; return -1; } } /* decode scalefactors */ for (sb = 0; sb < sblimit; ++sb) { for (ch = 0; ch < nch; ++ch) { if (allocation[ch][sb]) { scalefactor[ch][sb][0] = mad_bit_read(&stream->ptr, 6); switch (scfsi[ch][sb]) { case 2: scalefactor[ch][sb][2] = scalefactor[ch][sb][1] = scalefactor[ch][sb][0]; break; case 0: scalefactor[ch][sb][1] = mad_bit_read(&stream->ptr, 6); /* fall through */ case 1: case 3: scalefactor[ch][sb][2] = mad_bit_read(&stream->ptr, 6); } if (scfsi[ch][sb] & 1) scalefactor[ch][sb][1] = scalefactor[ch][sb][scfsi[ch][sb] - 1]; # if defined(OPT_STRICT) /* * Scalefactor index 63 does not appear in Table B.1 of * ISO/IEC 11172-3. Nonetheless, other implementations accept it, * so we only reject it if OPT_STRICT is defined. */ if (scalefactor[ch][sb][0] == 63 || scalefactor[ch][sb][1] == 63 || scalefactor[ch][sb][2] == 63) { stream->error = MAD_ERROR_BADSCALEFACTOR; return -1; } # endif } } } /* decode samples */ for (gr = 0; gr < 12; ++gr) { for (sb = 0; sb < bound; ++sb) { for (ch = 0; ch < nch; ++ch) { if ((index = allocation[ch][sb])) { #ifdef JZ4750_OPT mad_fixed_t sf_val; mad_fixed_t *sb_ptr; sb_ptr = &(frame->sbsample[ch][3*gr-1][sb]); sf_val = sf_table[scalefactor[ch][sb][gr/4]]; index = offset_table[bitalloc_table[offsets[sb]].offset][index - 1]; II_samples(&stream->ptr, &qc_table[index], samples); S32MUL(xr1,xr2, samples[0], sf_val); S32MUL(xr3,xr4, samples[1], sf_val); S32MUL(xr5,xr6, samples[2], sf_val); S32EXTR(xr1,xr2,(32 - MAD_F_SCALEBITS), 31); S32EXTR(xr3,xr4,(32 - MAD_F_SCALEBITS), 31); S32EXTR(xr5,xr6,(32 - MAD_F_SCALEBITS), 31); D32SLL(xr1,xr1,xr3,xr3,1); D32SLL(xr5,xr5,xr0,xr0,1); S32SDIV(xr1, sb_ptr, 32, 2); S32SDIV(xr3, sb_ptr, 32, 2); S32SDIV(xr5, sb_ptr, 32, 2); #else index = offset_table[bitalloc_table[offsets[sb]].offset][index - 1]; II_samples(&stream->ptr, &qc_table[index], samples); for (s = 0; s < 3; ++s) { frame->sbsample[ch][3 * gr + s][sb] = mad_f_mul(samples[s], sf_table[scalefactor[ch][sb][gr / 4]]); } #endif } else { for (s = 0; s < 3; ++s) frame->sbsample[ch][3 * gr + s][sb] = 0; } } } for (sb = bound; sb < sblimit; ++sb) { if ((index = allocation[0][sb])) { index = offset_table[bitalloc_table[offsets[sb]].offset][index - 1]; II_samples(&stream->ptr, &qc_table[index], samples); for (ch = 0; ch < nch; ++ch) { #ifdef JZ4750_OPT mad_fixed_t sf_val; mad_fixed_t *sb_ptr; sb_ptr = &(frame->sbsample[ch][3*gr-1][sb]); sf_val = sf_table[scalefactor[ch][sb][gr/4]]; S32MUL(xr1,xr2, samples[0], sf_val); S32MUL(xr3,xr4, samples[1], sf_val); S32MUL(xr5,xr6, samples[2], sf_val); S32EXTR(xr1,xr2,(32 - MAD_F_SCALEBITS), 31); S32EXTR(xr3,xr4,(32 - MAD_F_SCALEBITS), 31); S32EXTR(xr5,xr6,(32 - MAD_F_SCALEBITS), 31); D32SLL(xr1,xr1,xr3,xr3,1); D32SLL(xr5,xr5,xr0,xr0,1); S32SDIV(xr1, sb_ptr, 32, 2); S32SDIV(xr3, sb_ptr, 32, 2); S32SDIV(xr5, sb_ptr, 32, 2); #else for (s = 0; s < 3; ++s) { frame->sbsample[ch][3 * gr + s][sb] = mad_f_mul(samples[s], sf_table[scalefactor[ch][sb][gr / 4]]); } #endif } } else { for (ch = 0; ch < nch; ++ch) { for (s = 0; s < 3; ++s) frame->sbsample[ch][3 * gr + s][sb] = 0; } } } for (ch = 0; ch < nch; ++ch) { for (s = 0; s < 3; ++s) { for (sb = sblimit; sb < 32; ++sb) frame->sbsample[ch][3 * gr + s][sb] = 0; } } } return 0; }