void BiquadBase::setCoefficients (double a0, double a1, double a2, double b0, double b1, double b2) { assert (!Dsp::is_nan (a0) && !Dsp::is_nan (a1) && !Dsp::is_nan (a2) && !Dsp::is_nan (b0) && !Dsp::is_nan (b1) && !Dsp::is_nan (b2)); m_a0 = a0; m_a1 = a1/a0; m_a2 = a2/a0; m_b0 = b0/a0; m_b1 = b1/a0; m_b2 = b2/a0; #ifdef __SSE__ m_va0 = _mm_set1_ps(m_a0); m_vb0 = _mm_set1_ps(m_b0); m_vab12 = _mm_set_ps(m_b2, m_b1, m_a2, m_a1); #elif defined(__ARM_NEON__) m_va0 = vdup_n_f32(m_a0); m_vb0 = vdup_n_f32(m_b0); vsetq_lane_f32(m_a1, m_vab12, 0); vsetq_lane_f32(m_b1, m_vab12, 1); vsetq_lane_f32(m_a2, m_vab12, 2); vsetq_lane_f32(m_b2, m_vab12, 3); #endif }
static inline void neon_make_rgb(float32x4_t macropixel, float32x4_t *rgba0p, float32x4_t *rgba1p) { const float32x4_t u_coeff = {0.0, -0.34455, 1.7790, 0.0 }; const float32x4_t v_coeff = {1.4075, -0.7169, 0.0, 0.0 }; float32x4_t y0_vec, y1_vec, u_vec, v_vec, uv_vec; float32x2_t y0_u, y1_v; const float32_t alpha = 255.0; /* macropixel is [Y0, U, Y1, V]. */ /* since vdupq_lane_f32 will only take two element vectors we */ /* need to pick macropixel apart to build vectors of the components. */ /* so make y0_u be the first half of macropixel [Y0, U] and */ /* y1_v be the second half [Y1, V]. */ y0_u = vget_low_f32(macropixel); y1_v = vget_high_f32(macropixel); /* now copy Y0 to all elements of y0_vec, then overwrite element 3 */ /* with alpha. */ y0_vec = vdupq_lane_f32(y0_u, 0); y0_vec = vsetq_lane_f32(alpha, y0_vec, 3); /* make u_vec be [U, U, U, U]. we'll do that using */ /* vdupq_lane_f32 and selecting U (element 1) from y0_u */ u_vec = vdupq_lane_f32(y0_u, 1); /* now copy Y1 to all elements of y1_vec, then overwrite element 3 */ /* with alpha. */ y1_vec = vdupq_lane_f32(y1_v, 0); y1_vec = vsetq_lane_f32(alpha, y1_vec, 3); /* make v_vec be [V, V, V, V]. we'll do that using */ /* vdupq_lane_f32 and selecting V (element 1) from y1_v */ v_vec = vdupq_lane_f32(y1_v, 1); /* now multiply u_vec * u_coeff and v_vec by v_coeff. */ u_vec = vmulq_f32(u_vec, u_coeff); v_vec = vmulq_f32(v_vec, v_coeff); /* add u_vec and v_vec to form uv_vec. use that to build */ /* rgba0 and rgba1 by adding y0_vec, y1_vec*/ uv_vec = vaddq_f32(u_vec, v_vec); *rgba0p = vaddq_f32(y0_vec, uv_vec); *rgba1p = vaddq_f32(y1_vec, uv_vec); }
float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { // CHECK-LABEL: test_vsetq_lane_f32: // CHECK-NEXT: ins.s v1[3], v0[0] // CHECK-NEXT: mov.16b v0, v1 // CHECK-NEXT: ret return vsetq_lane_f32(a, b, 3); }
float32x4_t test_vsetq_lane_f32(float32_t v1, float32x4_t v2) { // CHECK: test_vsetq_lane_f32 return vsetq_lane_f32(v1, v2, 1); // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] }
void AudioBlockPanStereoToStereo_NEON( const float aInputL[WEBAUDIO_BLOCK_SIZE], const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL[WEBAUDIO_BLOCK_SIZE], float aGainR[WEBAUDIO_BLOCK_SIZE], const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE], float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]) { ASSERT_ALIGNED(aInputL); ASSERT_ALIGNED(aInputR); ASSERT_ALIGNED(aGainL); ASSERT_ALIGNED(aGainR); ASSERT_ALIGNED(aIsOnTheLeft); ASSERT_ALIGNED(aOutputL); ASSERT_ALIGNED(aOutputR); float32x4_t vinL0, vinL1; float32x4_t vinR0, vinR1; float32x4_t voutL0, voutL1; float32x4_t voutR0, voutR1; float32x4_t vscaleL0, vscaleL1; float32x4_t vscaleR0, vscaleR1; float32x4_t onleft0, onleft1, notonleft0, notonleft1; float32x4_t zero = vmovq_n_f32(0); uint8x8_t isOnTheLeft; // Although MSVC throws uninitialized value warning for voutL0 and voutL1, // since we fill all lanes by vsetq_lane_f32, we can ignore it. But to avoid // compiler warning, set zero. voutL0 = zero; voutL1 = zero; for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) { vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i + 4)); vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i + 4)); vscaleL0 = vld1q_f32(ADDRESS_OF(aGainL, i)); vscaleL1 = vld1q_f32(ADDRESS_OF(aGainL, i + 4)); vscaleR0 = vld1q_f32(ADDRESS_OF(aGainR, i)); vscaleR1 = vld1q_f32(ADDRESS_OF(aGainR, i + 4)); // Load output with boolean "on the left" values. This assumes that // bools are stored as a single byte. isOnTheLeft = vld1_u8((uint8_t*)&aIsOnTheLeft[i]); voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 0), voutL0, 0); voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 1), voutL0, 1); voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 2), voutL0, 2); voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 3), voutL0, 3); voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 4), voutL1, 0); voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 5), voutL1, 1); voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 6), voutL1, 2); voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 7), voutL1, 3); // Convert the boolean values into masks by setting all bits to 1 // if true. voutL0 = (float32x4_t)vcgtq_f32(voutL0, zero); voutL1 = (float32x4_t)vcgtq_f32(voutL1, zero); // The right output masks are the same as the left masks voutR0 = voutL0; voutR1 = voutL1; // Calculate left channel assuming isOnTheLeft onleft0 = vmlaq_f32(vinL0, vinR0, vscaleL0); onleft1 = vmlaq_f32(vinL1, vinR1, vscaleL0); // Calculate left channel assuming not isOnTheLeft notonleft0 = vmulq_f32(vinL0, vscaleL0); notonleft1 = vmulq_f32(vinL1, vscaleL1); // Write results using previously stored masks voutL0 = vbslq_f32((uint32x4_t)voutL0, onleft0, notonleft0); voutL1 = vbslq_f32((uint32x4_t)voutL1, onleft1, notonleft1); // Calculate right channel assuming isOnTheLeft onleft0 = vmulq_f32(vinR0, vscaleR0); onleft1 = vmulq_f32(vinR1, vscaleR1); // Calculate right channel assuming not isOnTheLeft notonleft0 = vmlaq_f32(vinR0, vinL0, vscaleR0); notonleft1 = vmlaq_f32(vinR1, vinL1, vscaleR1); // Write results using previously stored masks voutR0 = vbslq_f32((uint32x4_t)voutR0, onleft0, notonleft0); voutR1 = vbslq_f32((uint32x4_t)voutR1, onleft1, notonleft1); vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); vst1q_f32(ADDRESS_OF(aOutputL, i + 4), voutL1); vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); vst1q_f32(ADDRESS_OF(aOutputR, i + 4), voutR1); } }
// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 // CHECK: ret <4 x float> [[VSET_LANE]] float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { return vsetq_lane_f32(a, b, 3); }