__SIMDd _SIMD_min_pd(__SIMDd a, __SIMDd b) { #ifdef USE_SSE return _mm_min_pd(a,b); #elif defined USE_AVX return _mm256_min_pd(a,b); #elif defined USE_IBM return vec_min(a,b); #endif }
/*! * \brief Compute the minimum between each pair element of the given vectors */ ETL_STATIC_INLINE(avx_simd_double) min(avx_simd_double lhs, avx_simd_double rhs) { return _mm256_min_pd(lhs.value, rhs.value); }
// Process audio effects for 8 channels simultaneously: void processEffects(const vec8_i32 &inpSamples, vec8_i32 &outSamples, const long n) { // Extract int samples and convert to doubles: const vec4_d64 ds0 = _mm256_div_pd( _mm256_cvtepi32_pd(_mm256_extractf128_si256(inpSamples, 0)), _mm256_set1_pd((double)INT_MAX) ); const vec4_d64 ds1 = _mm256_div_pd( _mm256_cvtepi32_pd(_mm256_extractf128_si256(inpSamples, 1)), _mm256_set1_pd((double)INT_MAX) ); // Monitor input levels: fx.fi_monitor.levels[n + 0] = scalar_to_dBFS(ds0); fx.fi_monitor.levels[n + 1] = scalar_to_dBFS(ds1); vec4_d64 s0, s1; // f0_gain: { s0 = _mm256_mul_pd(ds0, fx.f0_gain.calc.gain[n + 0]); s1 = _mm256_mul_pd(ds1, fx.f0_gain.calc.gain[n + 1]); } // Monitor levels: fx.f0_output.levels[n + 0] = scalar_to_dBFS(s0); fx.f0_output.levels[n + 1] = scalar_to_dBFS(s1); // f1_compressor: { const vec4_dBFS l0 = scalar_to_dBFS_offs(s0); const vec4_dBFS l1 = scalar_to_dBFS_offs(s1); // over = s - thresh vec4_dB over0 = _mm256_sub_pd(l0, fx.f1_compressor.input.threshold[n + 0]); vec4_dB over1 = _mm256_sub_pd(l1, fx.f1_compressor.input.threshold[n + 1]); // over = if over < 0.0 then 0.0 else over; over0 = mm256_if_then_else(_mm256_cmp_pd(over0, _mm256_set1_pd(0.0), _CMP_LT_OQ), _mm256_set1_pd(0.0), over0); over1 = mm256_if_then_else(_mm256_cmp_pd(over1, _mm256_set1_pd(0.0), _CMP_LT_OQ), _mm256_set1_pd(0.0), over1); // over += DC_OFFSET over0 = _mm256_add_pd(over0, DC_OFFSET); over1 = _mm256_add_pd(over1, DC_OFFSET); // env = over + coef * ( env - over ) const vec4_dB attack_env0 = _mm256_add_pd(over0, _mm256_mul_pd(fx.f1_compressor.calc.attack_coef[n + 0], _mm256_sub_pd(fx.f1_compressor.state.env[n + 0], over0))); const vec4_dB attack_env1 = _mm256_add_pd(over1, _mm256_mul_pd(fx.f1_compressor.calc.attack_coef[n + 1], _mm256_sub_pd(fx.f1_compressor.state.env[n + 1], over1))); const vec4_dB release_env0 = _mm256_add_pd(over0, _mm256_mul_pd(fx.f1_compressor.calc.release_coef[n + 0], _mm256_sub_pd(fx.f1_compressor.state.env[n + 0], over0))); const vec4_dB release_env1 = _mm256_add_pd(over1, _mm256_mul_pd(fx.f1_compressor.calc.release_coef[n + 1], _mm256_sub_pd(fx.f1_compressor.state.env[n + 1], over1))); // env = if over > env then attack_env else release_env fx.f1_compressor.state.env[n + 0] = mm256_if_then_else(_mm256_cmp_pd(over0, fx.f1_compressor.state.env[n + 0], _CMP_GT_OQ), attack_env0, release_env0); fx.f1_compressor.state.env[n + 1] = mm256_if_then_else(_mm256_cmp_pd(over1, fx.f1_compressor.state.env[n + 1], _CMP_GT_OQ), attack_env1, release_env1); // over = env - DC_OFFSET over0 = _mm256_sub_pd(fx.f1_compressor.state.env[n + 0], DC_OFFSET); over1 = _mm256_sub_pd(fx.f1_compressor.state.env[n + 1], DC_OFFSET); // grdB = ( over * ( ratio - 1.0 ) ) vec4_dB gr0dB = _mm256_mul_pd(over0, fx.f1_compressor.calc.ratio_min_1[n + 0]); vec4_dB gr1dB = _mm256_mul_pd(over1, fx.f1_compressor.calc.ratio_min_1[n + 1]); // gr = dB_to_scalar(grdB) fx.f1_compressor.monitor.gain_reduction[n + 0] = dB_to_scalar(gr0dB); fx.f1_compressor.monitor.gain_reduction[n + 1] = dB_to_scalar(gr1dB); // Apply gain reduction to inputs: s0 = _mm256_mul_pd(s0, fx.f1_compressor.monitor.gain_reduction[n + 0]); s1 = _mm256_mul_pd(s1, fx.f1_compressor.monitor.gain_reduction[n + 1]); // Apply make-up gain: s0 = _mm256_mul_pd(s0, fx.f1_compressor.calc.gain[n + 0]); s1 = _mm256_mul_pd(s1, fx.f1_compressor.calc.gain[n + 1]); } // Monitor output levels: fx.fo_monitor.levels[n + 0] = scalar_to_dBFS(s0); fx.fo_monitor.levels[n + 1] = scalar_to_dBFS(s1); // TODO(jsd): Better limiter implementation! // Limit final samples: s0 = _mm256_max_pd(_mm256_min_pd(s0, _mm256_set1_pd((double)1.0)), _mm256_set1_pd((double)-1.0)); s1 = _mm256_max_pd(_mm256_min_pd(s1, _mm256_set1_pd((double)1.0)), _mm256_set1_pd((double)-1.0)); // Convert doubles back to 32-bit ints: s0 = _mm256_mul_pd(s0, _mm256_set1_pd((double)INT_MAX)); s1 = _mm256_mul_pd(s1, _mm256_set1_pd((double)INT_MAX)); const vec8_i32 os = _mm256_setr_m128i(_mm256_cvtpd_epi32(s0), _mm256_cvtpd_epi32(s1)); // Write outputs: _mm256_stream_si256(&outSamples, os); }
inline vector4d min(const vector4d& lhs, const vector4d& rhs) { return _mm256_min_pd(lhs, rhs); }
inline F64vec4 min(const F64vec4 &l, const F64vec4 &r) { return _mm256_min_pd(l, r); }
BI_FORCE_INLINE inline avx_double min(const avx_double x, const avx_double y) { avx_double res; res.packed = _mm256_min_pd(x.packed, y.packed); return res; }