static void filter(priv_t * p) { int i, num_in = max(0, fifo_occupancy(&p->input_fifo)); filter_t const * f = p->filter_ptr; int const overlap = f->num_taps - 1; double * output; while (num_in >= f->dft_length) { double const * input = fifo_read_ptr(&p->input_fifo); fifo_read(&p->input_fifo, f->dft_length - overlap, NULL); num_in -= f->dft_length - overlap; output = fifo_reserve(&p->output_fifo, f->dft_length); fifo_trim_by(&p->output_fifo, overlap); memcpy(output, input, f->dft_length * sizeof(*output)); lsx_rdft(f->dft_length, 1, output, lsx_fft_br, lsx_fft_sc); output[0] *= f->coefs[0]; output[1] *= f->coefs[1]; for (i = 2; i < f->dft_length; i += 2) { double tmp = output[i]; output[i ] = f->coefs[i ] * tmp - f->coefs[i+1] * output[i+1]; output[i+1] = f->coefs[i+1] * tmp + f->coefs[i ] * output[i+1]; } lsx_rdft(f->dft_length, -1, output, lsx_fft_br, lsx_fft_sc); } }
static void double_sample(stage_t * p, fifo_t * output_fifo) { sox_sample_t * output; int i, j, num_in = max(0, fifo_occupancy(&p->fifo)); rate_shared_t const * s = p->shared; dft_filter_t const * f = &s->half_band[p->which]; int const overlap = f->num_taps - 1; #ifdef SSE_ const float * const coeff = f->coefs; sox_sample_t tmp; __m128 coef, outp, sign; sign = SIGN; #endif while (p->rem + p->tuple * num_in >= f->dft_length) { div_t divd = div(f->dft_length - overlap - p->rem + p->tuple - 1, p->tuple); sox_sample_t const * input = fifo_read_ptr(&p->fifo); fifo_read(&p->fifo, divd.quot, NULL); num_in -= divd.quot; output = fifo_reserve_aligned(output_fifo, f->dft_length); fifo_trim_by(output_fifo, overlap); memset(output, 0, f->dft_length * sizeof(*output)); for (j = 0, i = p->rem; i < f->dft_length; ++j, i += p->tuple) output[i] = input[j]; p->rem = p->tuple - 1 - divd.rem; ff_rdft_x(f->dft_length, 1, output, f->tmp_buf); #ifdef SSE_ output[0] *= coeff[0]; output[1] *= coeff[1]; tmp = output[2]; output[2] = coeff[2] * tmp - coeff[3] * output[3]; output[3] = coeff[3] * tmp + coeff[2] * output[3]; for (i = 4; i < f->dft_length; i += 4) { outp = _mm_load_ps(output+i); coef = _mm_load_ps(coeff+i); _mm_store_ps(output+i, ZMUL2(outp, coef, sign)); } #else output[0] *= f->coefs[0]; output[1] *= f->coefs[1]; for (i = 2; i < f->dft_length; i += 2) { sox_sample_t tmp = output[i]; output[i ] = f->coefs[i ] * tmp - f->coefs[i+1] * output[i+1]; output[i+1] = f->coefs[i+1] * tmp + f->coefs[i ] * output[i+1]; } #endif ff_rdft_x(f->dft_length, -1, output, f->tmp_buf); } }