static inline void calc_output_quad (SINC_FILTER *filter, increment_t increment, increment_t start_filter_index, double scale, float * output) { double fraction, left [4], right [4], icoeff ; increment_t filter_index, max_filter_index ; int data_index, coeff_count, indx ; /* Convert input parameters into fixed point. */ max_filter_index = int_to_fp (filter->coeff_half_len) ; /* First apply the left half of the filter. */ filter_index = start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current - filter->channels * coeff_count ; left [0] = left [1] = left [2] = left [3] = 0.0 ; do { fraction = fp_to_double (filter_index) ; indx = fp_to_int (filter_index) ; icoeff = filter->coeffs [indx] + fraction * (filter->coeffs [indx + 1] - filter->coeffs [indx]) ; left [0] += icoeff * filter->buffer [data_index] ; left [1] += icoeff * filter->buffer [data_index + 1] ; left [2] += icoeff * filter->buffer [data_index + 2] ; left [3] += icoeff * filter->buffer [data_index + 3] ; filter_index -= increment ; data_index = data_index + 4 ; } while (filter_index >= MAKE_INCREMENT_T (0)) ; /* Now apply the right half of the filter. */ filter_index = increment - start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current + filter->channels * (1 + coeff_count) ; right [0] = right [1] = right [2] = right [3] = 0.0 ; do { fraction = fp_to_double (filter_index) ; indx = fp_to_int (filter_index) ; icoeff = filter->coeffs [indx] + fraction * (filter->coeffs [indx + 1] - filter->coeffs [indx]) ; right [0] += icoeff * filter->buffer [data_index] ; right [1] += icoeff * filter->buffer [data_index + 1] ; right [2] += icoeff * filter->buffer [data_index + 2] ; right [3] += icoeff * filter->buffer [data_index + 3] ; filter_index -= increment ; data_index = data_index - 4 ; } while (filter_index > MAKE_INCREMENT_T (0)) ; output [0] = scale * (left [0] + right [0]) ; output [1] = scale * (left [1] + right [1]) ; output [2] = scale * (left [2] + right [2]) ; output [3] = scale * (left [3] + right [3]) ; } /* calc_output_quad */
static inline double calc_output_single (SINC_FILTER *filter, increment_t increment, increment_t start_filter_index) { double fraction, left, right, icoeff ; increment_t filter_index, max_filter_index ; int data_index, coeff_count, indx ; /* Convert input parameters into fixed point. */ max_filter_index = int_to_fp (filter->coeff_half_len) ; /* First apply the left half of the filter. */ filter_index = start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current - coeff_count ; left = 0.0 ; do { fraction = fp_to_double (filter_index) ; indx = fp_to_int (filter_index) ; icoeff = filter->coeffs [indx] + fraction * (filter->coeffs [indx + 1] - filter->coeffs [indx]) ; left += icoeff * filter->buffer [data_index] ; filter_index -= increment ; data_index = data_index + 1 ; } while (filter_index >= MAKE_INCREMENT_T (0)) ; /* Now apply the right half of the filter. */ filter_index = increment - start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current + 1 + coeff_count ; right = 0.0 ; do { fraction = fp_to_double (filter_index) ; indx = fp_to_int (filter_index) ; icoeff = filter->coeffs [indx] + fraction * (filter->coeffs [indx + 1] - filter->coeffs [indx]) ; right += icoeff * filter->buffer [data_index] ; filter_index -= increment ; data_index = data_index - 1 ; } while (filter_index > MAKE_INCREMENT_T (0)) ; return (left + right) ; } /* calc_output_single */
static inline void calc_output_multi (SINC_FILTER *filter, increment_t increment, increment_t start_filter_index, int channels, double scale, float * output) { double fraction, icoeff ; /* The following line is 1999 ISO Standard C. If your compiler complains, get a better compiler. */ double *left, *right ; increment_t filter_index, max_filter_index ; int data_index, coeff_count, indx, ch ; left = filter->left_calc ; right = filter->right_calc ; /* Convert input parameters into fixed point. */ max_filter_index = int_to_fp (filter->coeff_half_len) ; /* First apply the left half of the filter. */ filter_index = start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current - channels * coeff_count ; memset (left, 0, sizeof (left [0]) * channels) ; do { fraction = fp_to_double (filter_index) ; indx = fp_to_int (filter_index) ; icoeff = filter->coeffs [indx] + fraction * (filter->coeffs [indx + 1] - filter->coeffs [indx]) ; /* ** Duff's Device. ** See : http://en.wikipedia.org/wiki/Duff's_device */ ch = channels ; do { switch (ch % 8) { default : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 7 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 6 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 5 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 4 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 3 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 2 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; case 1 : ch -- ; left [ch] += icoeff * filter->buffer [data_index + ch] ; } ; } while (ch > 0) ; filter_index -= increment ; data_index = data_index + channels ; } while (filter_index >= MAKE_INCREMENT_T (0)) ; /* Now apply the right half of the filter. */ filter_index = increment - start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current + channels * (1 + coeff_count) ; memset (right, 0, sizeof (right [0]) * channels) ; do { fraction = fp_to_double (filter_index) ; indx = fp_to_int (filter_index) ; icoeff = filter->coeffs [indx] + fraction * (filter->coeffs [indx + 1] - filter->coeffs [indx]) ; ch = channels ; do { switch (ch % 8) { default : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 7 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 6 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 5 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 4 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 3 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 2 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; case 1 : ch -- ; right [ch] += icoeff * filter->buffer [data_index + ch] ; } ; } while (ch > 0) ; filter_index -= increment ; data_index = data_index - channels ; } while (filter_index > MAKE_INCREMENT_T (0)) ; ch = channels ; do { switch (ch % 8) { default : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 7 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 6 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 5 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 4 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 3 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 2 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; case 1 : ch -- ; output [ch] = scale * (left [ch] + right [ch]) ; } ; } while (ch > 0) ; return ; } /* calc_output_multi */
static inline double calc_output_single (SINC_FILTER *filter, const increment_t increment, const increment_t start_filter_index) { #ifdef RESAMPLER_SSE_OPT __m128i increment4; __m128 left128,right128; float left,right; #else double left,right; #endif const coeff_t * const __restrict coeffs = filter->coeffs; const float * const __restrict buffer = filter->buffer; increment_t filter_index, max_filter_index ; int data_index, coeff_count; /* Convert input parameters into fixed point. */ max_filter_index = int_to_fp (filter->coeff_half_len) ; /* First apply the left half of the filter. */ filter_index = start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current - coeff_count ; #ifdef RESAMPLER_SSE_OPT increment4 = _mm_set_epi32(increment * 3, increment * 2, increment, 0); left128 = _mm_setzero_ps(); while(filter_index >= increment * 3) { #ifdef USE_WINDOWS_CODE __m128i indx = _mm_sub_epi32(_mm_set1_epi32(filter_index), increment4); __m128i fractioni = _mm_and_si128(indx,_mm_set1_epi32(((((increment_t)1) << SHIFT_BITS) - 1))); #else Windows__m128i indx; indx.m128i = _mm_sub_epi32(_mm_set1_epi32(filter_index), increment4); __m128i fractioni = _mm_and_si128(indx.m128i,_mm_set1_epi32(((((increment_t)1) << SHIFT_BITS) - 1))); #endif __m128 icoeff0, icoeff2; // warning that these are uninitialized is okay and its intended, as both high and low 64bit-parts are set below __m128 icoeff,icoeffp1,icoeffd,fraction; #ifdef _DEBUG icoeff0 = icoeff2 = _mm_setzero_ps(); #endif #ifdef USE_WINDOWS_CODE indx = _mm_srai_epi32(indx, SHIFT_BITS); #else indx.m128i = _mm_srai_epi32(indx.m128i, SHIFT_BITS); #endif icoeff0 = _mm_loadh_pi(_mm_loadl_pi(icoeff0, (__m64*)(coeffs + indx.m128i_i32[0])), (__m64*)(coeffs + indx.m128i_i32[1])); icoeff2 = _mm_loadh_pi(_mm_loadl_pi(icoeff2, (__m64*)(coeffs + indx.m128i_i32[2])), (__m64*)(coeffs + indx.m128i_i32[3])); icoeff = _mm_shuffle_ps(icoeff0, icoeff2, _MM_SHUFFLE(2, 0, 2, 0)); icoeffp1 = _mm_shuffle_ps(icoeff0, icoeff2, _MM_SHUFFLE(3, 1, 3, 1)); icoeffd = _mm_sub_ps(icoeffp1, icoeff); fraction = _mm_mul_ps(_mm_cvtepi32_ps(fractioni), _mm_set1_ps((float)INV_FP_ONE)); icoeff = _mm_add_ps(icoeff,_mm_mul_ps(icoeffd, fraction)); left128 = _mm_add_ps(left128,_mm_mul_ps(icoeff, _mm_loadu_ps(buffer + data_index))); data_index += 4; filter_index -= increment * 4; } #endif left = 0.; while (filter_index >= MAKE_INCREMENT_T(0)) { coeff_t fraction = fp_to_float(filter_index); int indx = fp_to_int(filter_index); coeff_t icoeff = coeffs[indx] + fraction * (coeffs[indx + 1] - coeffs[indx]); left += icoeff * buffer[data_index]; filter_index -= increment; data_index++; } /* Now apply the right half of the filter. */ filter_index = increment - start_filter_index ; coeff_count = (max_filter_index - filter_index) / increment ; filter_index = filter_index + coeff_count * increment ; data_index = filter->b_current + 1 + coeff_count ; #ifdef RESAMPLER_SSE_OPT right128 = _mm_setzero_ps(); while (filter_index > increment * 3) { #ifdef USE_WINDOWS_CODE __m128i indx = _mm_sub_epi32(_mm_set1_epi32(filter_index), increment4); __m128i fractioni = _mm_and_si128(indx, _mm_set1_epi32(((((increment_t)1) << SHIFT_BITS) - 1))); #else Windows__m128i indx; indx.m128i = _mm_sub_epi32(_mm_set1_epi32(filter_index), increment4); __m128i fractioni = _mm_and_si128(indx.m128i, _mm_set1_epi32(((((increment_t)1) << SHIFT_BITS) - 1))); #endif __m128 icoeff0, icoeff2; // warning that these are uninitialized is okay and its intended, as both high and low 64bit-parts are set below __m128 icoeff,icoeffp1,icoeffd,fraction,data; #ifdef _DEBUG icoeff0 = icoeff2 = _mm_setzero_ps(); #endif #ifdef USE_WINDOWS_CODE indx = _mm_srai_epi32(indx, SHIFT_BITS); #else indx.m128i = _mm_srai_epi32(indx.m128i, SHIFT_BITS); #endif icoeff0 = _mm_loadh_pi(_mm_loadl_pi(icoeff0, (__m64*)(coeffs + indx.m128i_i32[0])), (__m64*)(coeffs + indx.m128i_i32[1])); icoeff2 = _mm_loadh_pi(_mm_loadl_pi(icoeff2, (__m64*)(coeffs + indx.m128i_i32[2])), (__m64*)(coeffs + indx.m128i_i32[3])); icoeff = _mm_shuffle_ps(icoeff0, icoeff2, _MM_SHUFFLE(2, 0, 2, 0)); icoeffp1 = _mm_shuffle_ps(icoeff0, icoeff2, _MM_SHUFFLE(3, 1, 3, 1)); icoeffd = _mm_sub_ps(icoeffp1, icoeff); fraction = _mm_mul_ps(_mm_cvtepi32_ps(fractioni), _mm_set1_ps((float)INV_FP_ONE)); icoeff = _mm_add_ps(icoeff, _mm_mul_ps(icoeffd, fraction)); data = _mm_loadu_ps(buffer + (data_index - 3)); right128 = _mm_add_ps(right128,_mm_mul_ps(icoeff, _mm_shuffle_ps(data,data,_MM_SHUFFLE(0,1,2,3)))); data_index -= 4; filter_index -= increment * 4; } #endif right = 0.; while (filter_index > MAKE_INCREMENT_T(0)) { coeff_t fraction = fp_to_float(filter_index); int indx = fp_to_int(filter_index); coeff_t icoeff = coeffs[indx] + fraction * (coeffs[indx + 1] - coeffs[indx]); right += icoeff * buffer[data_index]; filter_index -= increment; data_index--; } return ( #ifdef RESAMPLER_SSE_OPT _mm_cvtss_f32(horizontal_add(left128)) + _mm_cvtss_f32(horizontal_add(right128)) + #endif left + right) ; } /* calc_output_single */