buffer_c16_t TranslateByFSOver4AndDecimateBy2CIC3::execute(const buffer_c8_t& src, const buffer_c16_t& dst) { /* Translates incoming complex<int8_t> samples by -fs/4, * decimates by two using a non-recursive third-order CIC filter. */ /* Derivation of algorithm: * Original CIC filter (decimating by two): * D_I0 = i3 * 1 + i2 * 3 + i1 * 3 + i0 * 1 * D_Q0 = q3 * 1 + q2 * 3 + q1 * 3 + q0 * 1 * * D_I1 = i5 * 1 + i4 * 3 + i3 * 3 + i2 * 1 * D_Q1 = q5 * 1 + q4 * 3 + q3 * 3 + q2 * 1 * * Translate -fs/4, phased 180 degrees, accomplished by complex multiplication * of complex length-4 sequence: * * Substitute: * i0 = -i0, q0 = -q0 * i1 = -q1, q1 = i1 * i2 = i2, q2 = q2 * i3 = q3, q3 = -i3 * i4 = -i4, q4 = -q4 * i5 = -q5, q5 = i5 * * Resulting taps (with decimation by 2, four samples in, two samples out): * D_I0 = q3 * 1 + i2 * 3 + -q1 * 3 + -i0 * 1 * D_Q0 = -i3 * 1 + q2 * 3 + i1 * 3 + -q0 * 1 * * D_I1 = -q5 * 1 + -i4 * 3 + q3 * 3 + i2 * 1 * D_Q1 = i5 * 1 + -q4 * 3 + -i3 * 3 + q2 * 1 */ // 6 cycles per complex input sample, not including loop overhead. uint32_t q1_i0 = _q1_i0; uint32_t q0_i1 = _q0_i1; /* 3:1 Scaled by 32 to normalize output to +/-32768-ish. */ constexpr uint32_t scale_factor = 32; const uint32_t k_3_1 = 0x00030001 * scale_factor; uint32_t* src_p = reinterpret_cast<uint32_t*>(&src.p[0]); uint32_t* const src_end = reinterpret_cast<uint32_t*>(&src.p[src.count]); uint32_t* dst_p = reinterpret_cast<uint32_t*>(&dst.p[0]); while(src_p < src_end) { const uint32_t q3_i3_q2_i2 = *(src_p++); // 3 const uint32_t q5_i5_q4_i4 = *(src_p++); const uint32_t i2_i3 = __SXTB16(q3_i3_q2_i2, 16); // 1: (q3_i3_q2_i2 ror 16)[23:16]:(q3_i3_q2_i2 ror 16)[7:0] const uint32_t q3_q2 = __SXTB16(q3_i3_q2_i2, 8); // 1: (q3_i3_q2_i2 ror 8)[23:16]:(q3_i3_q2_i2 ror 8)[7:0] const uint32_t i2_q3 = __PKHTB(i2_i3, q3_q2, 16); // 1: Rn[31:16]:(Rm>>16)[15:0] const uint32_t i3_q2 = __PKHBT(q3_q2, i2_i3, 16); // 1:(Rm<<16)[31:16]:Rn[15:0] // D_I0 = 3 * (i2 - q1) + (q3 - i0) const uint32_t i2_m_q1_q3_m_i0 = __QSUB16(i2_q3, q1_i0); // 1: Rn[31:16]-Rm[31:16]:Rn[15:0]-Rm[15:0] const uint32_t d_i0 = __SMUAD(k_3_1, i2_m_q1_q3_m_i0); // 1: Rm[15:0]*Rs[15:0]+Rm[31:16]*Rs[31:16] // D_Q0 = 3 * (q2 + i1) - (i3 + q0) const uint32_t i3_p_q0_q2_p_i1 = __QADD16(i3_q2, q0_i1); // 1: Rn[31:16]+Rm[31:16]:Rn[15:0]+Rm[15:0] const uint32_t d_q0 = __SMUSDX(i3_p_q0_q2_p_i1, k_3_1); // 1: Rm[15:0]*Rs[31:16]–Rm[31:16]*RsX[15:0] const uint32_t d_q0_i0 = __PKHBT(d_i0, d_q0, 16); // 1: (Rm<<16)[31:16]:Rn[15:0] const uint32_t i5_i4 = __SXTB16(q5_i5_q4_i4, 0); // 1: (q5_i5_q4_i4 ror 0)[23:16]:(q5_i5_q4_i4 ror 0)[7:0] const uint32_t q4_q5 = __SXTB16(q5_i5_q4_i4, 24); // 1: (q5_i5_q4_i4 ror 24)[23:16]:(q5_i5_q4_i4 ror 24)[7:0] const uint32_t q4_i5 = __PKHTB(q4_q5, i5_i4, 16); // 1: Rn[31:16]:(Rm>>16)[15:0] const uint32_t q5_i4 = __PKHBT(i5_i4, q4_q5, 16); // 1: (Rm<<16)[31:16]:Rn[15:0] // D_I1 = (i2 - q5) + 3 * (q3 - i4) const uint32_t i2_m_q5_q3_m_i4 = __QSUB16(i2_q3, q5_i4); // 1: Rn[31:16]-Rm[31:16]:Rn[15:0]-Rm[15:0] const uint32_t d_i1 = __SMUADX(i2_m_q5_q3_m_i4, k_3_1); // 1: Rm[15:0]*Rs[31:16]+Rm[31:16]*Rs[15:0] // D_Q1 = (i5 + q2) - 3 * (q4 + i3) const uint32_t q4_p_i3_i5_p_q2 = __QADD16(q4_i5, i3_q2); // 1: Rn[31:16]+Rm[31:16]:Rn[15:0]+Rm[15:0] const uint32_t d_q1 = __SMUSD(k_3_1, q4_p_i3_i5_p_q2); // 1: Rm[15:0]*Rs[15:0]–Rm[31:16]*Rs[31:16] const uint32_t d_q1_i1 = __PKHBT(d_i1, d_q1, 16); // 1: (Rm<<16)[31:16]:Rn[15:0] *(dst_p++) = d_q0_i0; // 3 *(dst_p++) = d_q1_i1; q1_i0 = q5_i4; q0_i1 = q4_i5; } _q1_i0 = q1_i0; _q0_i1 = q0_i1; return { dst.p, src.count / 2, src.sampling_rate / 2 }; }
/** \brief Test case: TC_CoreSimd_ParMul16 \details - Check Parallel 16-bit multiplication: __SMLAD __SMLADX __SMLALD __SMLALDX __SMLSD __SMLSDX __SMLSLD __SMLSLDX __SMUAD __SMUADX __SMUSD __SMUSDX */ void TC_CoreSimd_ParMul16 (void) { #if ((defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) ) volatile int32_t op1_s32, op2_s32, op3_s32; volatile int32_t res_s32; volatile int64_t op1_s64; volatile int64_t res_s64; /* --- __SMLAD Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op3_s32 = 0x20000000; res_s32 = __SMLAD(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x20000017); /* --- __SMLADX Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op3_s32 = 0x00000800; res_s32 = __SMLADX(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x00000816); /* --- __SMLALD Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLALD(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000017LL); /* --- __SMLALDX Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLALDX(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000016LL); /* --- __SMLSD Test ---------------------------------------------- */ op1_s32 = 0x00030006; op2_s32 = 0x00050004; op3_s32 = 0x00000800; res_s32 = __SMLSD(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x00000809); /* --- __SMLSDX Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op3_s32 = 0x00000800; res_s32 = __SMLSDX(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x000007FE); /* --- __SMLSLD Test ---------------------------------------------- */ op1_s32 = 0x00030006; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLSLD(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000009LL); /* --- __SMLSLDX Test ---------------------------------------------- */ op1_s32 = 0x00030006; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLSLDX(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000012LL); /* --- __SMUAD Test ---------------------------------------------- */ op1_s32 = 0x00030001; op2_s32 = 0x00040002; res_s32 = __SMUAD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == 0x0000000E); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUAD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF2); /* --- __SMUADX Test ---------------------------------------------- */ op1_s32 = 0x00030001; op2_s32 = 0x00040002; res_s32 = __SMUADX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == 0x0000000A); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUADX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF6); /* --- __SMUSD Test ---------------------------------------------- */ op1_s32 = (int32_t)0x00030001; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUSD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF6); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUSD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == 0x0000000A); /* --- __SMUSDX Test ---------------------------------------------- */ op1_s32 = 0x00030001; op2_s32 = 0x00040002; res_s32 = __SMUSDX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFFE); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUSDX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0x00000002); #endif }