static void cook_imlt(COOKContext *q, float* inbuffer, float* outbuffer, float* mlt_tmp){ int i; /* prerotation */ for(i=0 ; i<q->mlt_size ; i+=2){ outbuffer[i] = (q->mlt_presin[i/2] * inbuffer[q->mlt_size-1-i]) + (q->mlt_precos[i/2] * inbuffer[i]); outbuffer[i+1] = (q->mlt_precos[i/2] * inbuffer[q->mlt_size-1-i]) - (q->mlt_presin[i/2] * inbuffer[i]); } /* FFT */ ff_fft_permute(&q->fft_ctx, (FFTComplex *) outbuffer); ff_fft_calc (&q->fft_ctx, (FFTComplex *) outbuffer); /* postrotation */ for(i=0 ; i<q->mlt_size ; i+=2){ mlt_tmp[i] = (q->mlt_postcos[(q->mlt_size-1-i)/2] * outbuffer[i+1]) + (q->mlt_postcos[i/2] * outbuffer[i]); mlt_tmp[q->mlt_size-1-i] = (q->mlt_postcos[(q->mlt_size-1-i)/2] * outbuffer[i]) - (q->mlt_postcos[i/2] * outbuffer[i+1]); } /* window and reorder */ for(i=0 ; i<q->mlt_size/2 ; i++){ outbuffer[i] = mlt_tmp[q->mlt_size/2-1-i] * q->mlt_window[i]; outbuffer[q->mlt_size-1-i]= mlt_tmp[q->mlt_size/2-1-i] * q->mlt_window[q->mlt_size-1-i]; outbuffer[q->mlt_size+i]= mlt_tmp[q->mlt_size/2+i] * q->mlt_window[q->mlt_size-1-i]; outbuffer[2*q->mlt_size-1-i]= -(mlt_tmp[q->mlt_size/2+i] * q->mlt_window[i]); } }
static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp) { int k, n4, n2, n, j; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; FFTComplex *z = (FFTComplex *)tmp; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; /* pre rotation */ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { j=revtab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } ff_fft_calc(&s->fft, z); /* post rotation + reordering */ /* XXX: optimize */ for(k = 0; k < n4; k++) { CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); } }
static void imc_imdct256(IMCContext *q) { int i; float re, im; /* prerotation */ for(i=0; i < COEFFS/2; i++) { q->samples[i].re = -(q->pre_coef1[i] * q->CWdecoded[COEFFS-1-i*2]) - (q->pre_coef2[i] * q->CWdecoded[i*2]); q->samples[i].im = (q->pre_coef2[i] * q->CWdecoded[COEFFS-1-i*2]) - (q->pre_coef1[i] * q->CWdecoded[i*2]); } /* FFT */ ff_fft_permute(&q->fft, q->samples); ff_fft_calc (&q->fft, q->samples); /* postrotation, window and reorder */ for(i = 0; i < COEFFS/2; i++) { re = (q->samples[i].re * q->post_cos[i]) + (-q->samples[i].im * q->post_sin[i]); im = (-q->samples[i].im * q->post_cos[i]) - (q->samples[i].re * q->post_sin[i]); q->out_samples[i*2] = (q->mdct_sine_window[COEFFS-1-i*2] * q->last_fft_im[i]) + (q->mdct_sine_window[i*2] * re); q->out_samples[COEFFS-1-i*2] = (q->mdct_sine_window[i*2] * q->last_fft_im[i]) - (q->mdct_sine_window[COEFFS-1-i*2] * re); q->last_fft_im[i] = im; } }
/** * Compute inverse MDCT of size N = 2^nbits * @param output N samples * @param input N/2 samples * @param tmp N/2 samples */ void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input, FFTSample *tmp) { int k, n8, n4, n2, n, j; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; FFTComplex *z = (FFTComplex *)tmp; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; /* pre rotation */ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { j=revtab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } ff_fft_calc(&s->fft, z); /* post rotation + reordering */ /* XXX: optimize */ for(k = 0; k < n4; k++) { CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); } for(k = 0; k < n8; k++) { output[2*k] = -z[n8 + k].im; output[n2-1-2*k] = z[n8 + k].im; output[2*k+1] = z[n8-1-k].re; output[n2-1-2*k-1] = -z[n8-1-k].re; output[n2 + 2*k]=-z[k+n8].re; output[n-1- 2*k]=-z[k+n8].re; output[n2 + 2*k+1]=z[n8-k-1].im; output[n-2 - 2 * k] = z[n8-k-1].im; } }
/** * Compute MDCT of size N = 2^nbits * @param input N samples * @param out N/2 samples */ void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) { int i, j, n, n8, n4, n2, n3; FFTSample re, im; const uint16_t *revtab = s->revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; FFTComplex *x = (FFTComplex *)out; n = 1 << s->mdct_bits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; n3 = 3 * n4; /* pre rotation */ for(i=0;i<n8;i++) { re = -input[2*i+3*n4] - input[n3-1-2*i]; im = -input[n4+2*i] + input[n4-1-2*i]; j = revtab[i]; CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); re = input[2*i] - input[n2-1-2*i]; im = -(input[n2+2*i] + input[n-1-2*i]); j = revtab[n8 + i]; CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); } ff_fft_calc(s, x); /* post rotation */ for(i=0;i<n8;i++) { FFTSample r0, i0, r1, i1; CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]); CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]); x[n8-i-1].re = r0; x[n8-i-1].im = i0; x[n8+i ].re = r1; x[n8+i ].im = i1; } }
/** * Compute MDCT of size N = 2^nbits * @param input N samples * @param out N/2 samples * @param tmp temporary storage of N/2 samples */ void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input, FFTSample *tmp) { int i, j, n, n8, n4, n2, n3; FFTSample re, im, re1, im1; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; FFTComplex *x = (FFTComplex *)tmp; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; n3 = 3 * n4; /* pre rotation */ for(i=0;i<n8;i++) { re = -input[2*i+3*n4] - input[n3-1-2*i]; im = -input[n4+2*i] + input[n4-1-2*i]; j = revtab[i]; CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); re = input[2*i] - input[n2-1-2*i]; im = -(input[n2+2*i] + input[n-1-2*i]); j = revtab[n8 + i]; CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); } ff_fft_calc(&s->fft, x); /* post rotation */ for(i=0;i<n4;i++) { re = x[i].re; im = x[i].im; CMUL(re1, im1, re, im, -tsin[i], -tcos[i]); out[2*i] = im1; out[n2-1-2*i] = re1; } }
static void ff_dct_calc_c(DCTContext *s, FFTSample *data) { int n = 1<<s->nbits; int i; #define ROTATE(i,n) (-M_PI*((n)-0.5f)*(i)/(n)) if (s->inverse) { for(i=0; i < n; i++) { s->data[i].re = (float) (2 * data[i] * cos(ROTATE(i,n))); s->data[i].im = (float) (2 * data[i] * sin(ROTATE(i,n))); } s->data[n].re = 0; s->data[n].im = 0; for(i=0; i<n-1; i++) { s->data[n+i+1].re = (float) (-2 * data[n - (i+1)] * cos(ROTATE(n+i+1,n))); s->data[n+i+1].im = (float) (-2 * data[n - (i+1)] * sin(ROTATE(n+i+1,n))); } }else{ for(i=0; i < n; i++) { s->data[i].re = data[n - (i+1)]; s->data[i].im = 0; s->data[n+i].re = data[i]; s->data[n+i].im = 0; } } ff_fft_permute(&s->fft, s->data); ff_fft_calc(&s->fft, s->data); if (s->inverse) { for(i=0; i < n; i++) data[i] = s->data[n-(i+1)].re / (2 * n); }else { for(i=0; i < n; i++) data[i] = (float) (s->data[i].re / (2 * cos(ROTATE(i,n)))); } #undef ROTATE }
/** * Compute the middle half of the inverse MDCT of size N = 2^nbits, * thus excluding the parts that can be derived by symmetry * @param output N/2 samples * @param input N/2 samples */ void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input) { int k, n8, n4, n2, n, j; const uint16_t *revtab = s->fft.revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; const FFTSample *in1, *in2; FFTComplex *z = (FFTComplex *)output; n = 1 << s->nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; /* pre rotation */ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { j=revtab[k]; CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; } ff_fft_calc(&s->fft, z); /* post rotation + reordering */ output += n4; for(k = 0; k < n8; k++) { FFTSample r0, i0, r1, i1; CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); z[n8-k-1].re = r0; z[n8-k-1].im = i0; z[n8+k ].re = r1; z[n8+k ].im = i1; } }
int main(int argc, char **argv) { FFTComplex *tab, *tab1, *tab_ref; FFTSample *tab2; int it, i, c; int do_speed = 0; int do_mdct = 0; int do_inverse = 0; FFTContext s1, *s = &s1; MDCTContext m1, *m = &m1; int fft_nbits, fft_size; fft_nbits = 9; for(;;) { c = getopt(argc, argv, "hsimn:"); if (c == -1) break; switch(c) { case 'h': help(); break; case 's': do_speed = 1; break; case 'i': do_inverse = 1; break; case 'm': do_mdct = 1; break; case 'n': fft_nbits = atoi(optarg); break; } } fft_size = 1 << fft_nbits; tab = av_malloc(fft_size * sizeof(FFTComplex)); tab1 = av_malloc(fft_size * sizeof(FFTComplex)); tab_ref = av_malloc(fft_size * sizeof(FFTComplex)); tab2 = av_malloc(fft_size * sizeof(FFTSample)); if (do_mdct) { if (do_inverse) av_log(NULL, AV_LOG_INFO,"IMDCT"); else av_log(NULL, AV_LOG_INFO,"MDCT"); ff_mdct_init(m, fft_nbits, do_inverse); } else { if (do_inverse) av_log(NULL, AV_LOG_INFO,"IFFT"); else av_log(NULL, AV_LOG_INFO,"FFT"); ff_fft_init(s, fft_nbits, do_inverse); fft_ref_init(fft_nbits, do_inverse); } av_log(NULL, AV_LOG_INFO," %d test\n", fft_size); /* generate random data */ for(i=0;i<fft_size;i++) { tab1[i].re = frandom(); tab1[i].im = frandom(); } /* checking result */ av_log(NULL, AV_LOG_INFO,"Checking...\n"); if (do_mdct) { if (do_inverse) { imdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); ff_imdct_calc(m, tab2, (float *)tab1); check_diff((float *)tab_ref, tab2, fft_size); } else { mdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); ff_mdct_calc(m, tab2, (float *)tab1); check_diff((float *)tab_ref, tab2, fft_size / 2); } } else { memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); ff_fft_permute(s, tab); ff_fft_calc(s, tab); fft_ref(tab_ref, tab1, fft_nbits); check_diff((float *)tab_ref, (float *)tab, fft_size * 2); } /* do a speed test */ if (do_speed) { int64_t time_start, duration; int nb_its; av_log(NULL, AV_LOG_INFO,"Speed test...\n"); /* we measure during about 1 seconds */ nb_its = 1; for(;;) { time_start = gettime(); for(it=0;it<nb_its;it++) { if (do_mdct) { if (do_inverse) { ff_imdct_calc(m, (float *)tab, (float *)tab1); } else { ff_mdct_calc(m, (float *)tab, (float *)tab1); } } else { memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); ff_fft_calc(s, tab); } } duration = gettime() - time_start; if (duration >= 1000000) break; nb_its *= 2; } av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n", (double)duration / nb_its, (double)duration / 1000000.0, nb_its); } if (do_mdct) { ff_mdct_end(m); } else { ff_fft_end(s); } return 0; }
int main(int argc, char **argv) { FFTComplex *tab, *tab1, *tab_ref; FFTSample *tab2; int it, i, c; int do_speed = 0; int err = 1; enum tf_transform transform = TRANSFORM_FFT; int do_inverse = 0; FFTContext s1, *s = &s1; FFTContext m1, *m = &m1; RDFTContext r1, *r = &r1; DCTContext d1, *d = &d1; int fft_nbits, fft_size, fft_size_2; double scale = 1.0; AVLFG prng; av_lfg_init(&prng, 1); fft_nbits = 9; for(;;) { c = getopt(argc, argv, "hsimrdn:f:"); if (c == -1) break; switch(c) { case 'h': help(); break; case 's': do_speed = 1; break; case 'i': do_inverse = 1; break; case 'm': transform = TRANSFORM_MDCT; break; case 'r': transform = TRANSFORM_RDFT; break; case 'd': transform = TRANSFORM_DCT; break; case 'n': fft_nbits = atoi(optarg); break; case 'f': scale = atof(optarg); break; } } fft_size = 1 << fft_nbits; fft_size_2 = fft_size >> 1; tab = av_malloc(fft_size * sizeof(FFTComplex)); tab1 = av_malloc(fft_size * sizeof(FFTComplex)); tab_ref = av_malloc(fft_size * sizeof(FFTComplex)); tab2 = av_malloc(fft_size * sizeof(FFTSample)); switch (transform) { case TRANSFORM_MDCT: av_log(NULL, AV_LOG_INFO,"Scale factor is set to %f\n", scale); if (do_inverse) av_log(NULL, AV_LOG_INFO,"IMDCT"); else av_log(NULL, AV_LOG_INFO,"MDCT"); ff_mdct_init(m, fft_nbits, do_inverse, scale); break; case TRANSFORM_FFT: if (do_inverse) av_log(NULL, AV_LOG_INFO,"IFFT"); else av_log(NULL, AV_LOG_INFO,"FFT"); ff_fft_init(s, fft_nbits, do_inverse); fft_ref_init(fft_nbits, do_inverse); break; case TRANSFORM_RDFT: if (do_inverse) av_log(NULL, AV_LOG_INFO,"IDFT_C2R"); else av_log(NULL, AV_LOG_INFO,"DFT_R2C"); ff_rdft_init(r, fft_nbits, do_inverse ? IDFT_C2R : DFT_R2C); fft_ref_init(fft_nbits, do_inverse); break; case TRANSFORM_DCT: if (do_inverse) av_log(NULL, AV_LOG_INFO,"DCT_III"); else av_log(NULL, AV_LOG_INFO,"DCT_II"); ff_dct_init(d, fft_nbits, do_inverse ? DCT_III : DCT_II); break; } av_log(NULL, AV_LOG_INFO," %d test\n", fft_size); /* generate random data */ for (i = 0; i < fft_size; i++) { tab1[i].re = frandom(&prng); tab1[i].im = frandom(&prng); } /* checking result */ av_log(NULL, AV_LOG_INFO,"Checking...\n"); switch (transform) { case TRANSFORM_MDCT: if (do_inverse) { imdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); ff_imdct_calc(m, tab2, (float *)tab1); err = check_diff((float *)tab_ref, tab2, fft_size, scale); } else { mdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); ff_mdct_calc(m, tab2, (float *)tab1); err = check_diff((float *)tab_ref, tab2, fft_size / 2, scale); } break; case TRANSFORM_FFT: memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); ff_fft_permute(s, tab); ff_fft_calc(s, tab); fft_ref(tab_ref, tab1, fft_nbits); err = check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 1.0); break; case TRANSFORM_RDFT: if (do_inverse) { tab1[ 0].im = 0; tab1[fft_size_2].im = 0; for (i = 1; i < fft_size_2; i++) { tab1[fft_size_2+i].re = tab1[fft_size_2-i].re; tab1[fft_size_2+i].im = -tab1[fft_size_2-i].im; } memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); tab2[1] = tab1[fft_size_2].re; ff_rdft_calc(r, tab2); fft_ref(tab_ref, tab1, fft_nbits); for (i = 0; i < fft_size; i++) { tab[i].re = tab2[i]; tab[i].im = 0; } err = check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 0.5); } else { for (i = 0; i < fft_size; i++) { tab2[i] = tab1[i].re; tab1[i].im = 0; } ff_rdft_calc(r, tab2); fft_ref(tab_ref, tab1, fft_nbits); tab_ref[0].im = tab_ref[fft_size_2].re; err = check_diff((float *)tab_ref, (float *)tab2, fft_size, 1.0); } break; case TRANSFORM_DCT: memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); ff_dct_calc(d, tab); if (do_inverse) { idct_ref(tab_ref, tab1, fft_nbits); } else { dct_ref(tab_ref, tab1, fft_nbits); } err = check_diff((float *)tab_ref, (float *)tab, fft_size, 1.0); break; } /* do a speed test */ if (do_speed) { int64_t time_start, duration; int nb_its; av_log(NULL, AV_LOG_INFO,"Speed test...\n"); /* we measure during about 1 seconds */ nb_its = 1; for(;;) { time_start = gettime(); for (it = 0; it < nb_its; it++) { switch (transform) { case TRANSFORM_MDCT: if (do_inverse) { ff_imdct_calc(m, (float *)tab, (float *)tab1); } else { ff_mdct_calc(m, (float *)tab, (float *)tab1); } break; case TRANSFORM_FFT: memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); ff_fft_calc(s, tab); break; case TRANSFORM_RDFT: memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); ff_rdft_calc(r, tab2); break; case TRANSFORM_DCT: memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); ff_dct_calc(d, tab2); break; } } duration = gettime() - time_start; if (duration >= 1000000) break; nb_its *= 2; } av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n", (double)duration / nb_its, (double)duration / 1000000.0, nb_its); } switch (transform) { case TRANSFORM_MDCT: ff_mdct_end(m); break; case TRANSFORM_FFT: ff_fft_end(s); break; case TRANSFORM_RDFT: ff_rdft_end(r); break; case TRANSFORM_DCT: ff_dct_end(d); break; } av_free(tab); av_free(tab1); av_free(tab2); av_free(tab_ref); av_free(exptab); return err; }