buffer_c16_t FIRAndDecimateComplex::execute( const buffer_c16_t& src, const buffer_c16_t& dst ) { /* int16_t input (sample count "n" must be multiple of decimation_factor) * -> int16_t output, decimated by decimation_factor. * taps are normalized to 1 << 16 == 1.0. */ const auto output_sampling_rate = src.sampling_rate / decimation_factor_; const size_t output_samples = src.count / decimation_factor_; sample_t* dst_p = dst.p; const buffer_c16_t result { dst.p, output_samples, output_sampling_rate }; const sample_t* src_p = src.p; size_t outer_count = output_samples; while(outer_count > 0) { /* Put new samples into delay buffer */ auto z_new_p = &samples_[taps_count_ - decimation_factor_]; for(size_t i=0; i<decimation_factor_; i++) { *__SIMD32(z_new_p)++ = *__SIMD32(src_p)++; } size_t loop_count = taps_count_ / 8; auto t_p = &taps_reversed_[0]; auto z_p = &samples_[0]; int64_t t_real = 0; int64_t t_imag = 0; while(loop_count > 0) { const auto tap0 = *__SIMD32(t_p)++; const auto sample0 = *__SIMD32(z_p)++; const auto tap1 = *__SIMD32(t_p)++; const auto sample1 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample0, tap0, t_real); t_imag = __SMLALDX(sample0, tap0, t_imag); t_real = __SMLSLD(sample1, tap1, t_real); t_imag = __SMLALDX(sample1, tap1, t_imag); const auto tap2 = *__SIMD32(t_p)++; const auto sample2 = *__SIMD32(z_p)++; const auto tap3 = *__SIMD32(t_p)++; const auto sample3 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample2, tap2, t_real); t_imag = __SMLALDX(sample2, tap2, t_imag); t_real = __SMLSLD(sample3, tap3, t_real); t_imag = __SMLALDX(sample3, tap3, t_imag); const auto tap4 = *__SIMD32(t_p)++; const auto sample4 = *__SIMD32(z_p)++; const auto tap5 = *__SIMD32(t_p)++; const auto sample5 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample4, tap4, t_real); t_imag = __SMLALDX(sample4, tap4, t_imag); t_real = __SMLSLD(sample5, tap5, t_real); t_imag = __SMLALDX(sample5, tap5, t_imag); const auto tap6 = *__SIMD32(t_p)++; const auto sample6 = *__SIMD32(z_p)++; const auto tap7 = *__SIMD32(t_p)++; const auto sample7 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample6, tap6, t_real); t_imag = __SMLALDX(sample6, tap6, t_imag); t_real = __SMLSLD(sample7, tap7, t_real); t_imag = __SMLALDX(sample7, tap7, t_imag); loop_count--; } /* TODO: Re-evaluate whether saturation is performed, normalization, * all that jazz. */ const int32_t r = t_real >> 16; const int32_t i = t_imag >> 16; const int32_t r_sat = __SSAT(r, 16); const int32_t i_sat = __SSAT(i, 16); *__SIMD32(dst_p)++ = __PKHBT( r_sat, i_sat, 16 ); /* Shift sample buffer left/down by decimation factor. */ const size_t unroll_factor = 4; size_t shift_count = (taps_count_ - decimation_factor_) / unroll_factor; sample_t* t = &samples_[0]; const sample_t* s = &samples_[decimation_factor_]; while(shift_count > 0) { *__SIMD32(t)++ = *__SIMD32(s)++; *__SIMD32(t)++ = *__SIMD32(s)++; *__SIMD32(t)++ = *__SIMD32(s)++; *__SIMD32(t)++ = *__SIMD32(s)++; shift_count--; } shift_count = (taps_count_ - decimation_factor_) % unroll_factor; while(shift_count > 0) { *(t++) = *(s++); shift_count--; } outer_count--; } return result; }
/** \brief Test case: TC_CoreSimd_ParMul16 \details - Check Parallel 16-bit multiplication: __SMLAD __SMLADX __SMLALD __SMLALDX __SMLSD __SMLSDX __SMLSLD __SMLSLDX __SMUAD __SMUADX __SMUSD __SMUSDX */ void TC_CoreSimd_ParMul16 (void) { #if ((defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) ) volatile int32_t op1_s32, op2_s32, op3_s32; volatile int32_t res_s32; volatile int64_t op1_s64; volatile int64_t res_s64; /* --- __SMLAD Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op3_s32 = 0x20000000; res_s32 = __SMLAD(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x20000017); /* --- __SMLADX Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op3_s32 = 0x00000800; res_s32 = __SMLADX(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x00000816); /* --- __SMLALD Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLALD(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000017LL); /* --- __SMLALDX Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLALDX(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000016LL); /* --- __SMLSD Test ---------------------------------------------- */ op1_s32 = 0x00030006; op2_s32 = 0x00050004; op3_s32 = 0x00000800; res_s32 = __SMLSD(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x00000809); /* --- __SMLSDX Test ---------------------------------------------- */ op1_s32 = 0x00030002; op2_s32 = 0x00050004; op3_s32 = 0x00000800; res_s32 = __SMLSDX(op1_s32, op2_s32, op3_s32); ASSERT_TRUE(res_s32 == 0x000007FE); /* --- __SMLSLD Test ---------------------------------------------- */ op1_s32 = 0x00030006; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLSLD(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000009LL); /* --- __SMLSLDX Test ---------------------------------------------- */ op1_s32 = 0x00030006; op2_s32 = 0x00050004; op1_s64 = 0x00000000200000000LL; res_s64 = __SMLSLDX(op1_s32, op2_s32, op1_s64); ASSERT_TRUE(res_s64 == 0x0000000200000012LL); /* --- __SMUAD Test ---------------------------------------------- */ op1_s32 = 0x00030001; op2_s32 = 0x00040002; res_s32 = __SMUAD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == 0x0000000E); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUAD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF2); /* --- __SMUADX Test ---------------------------------------------- */ op1_s32 = 0x00030001; op2_s32 = 0x00040002; res_s32 = __SMUADX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == 0x0000000A); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUADX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF6); /* --- __SMUSD Test ---------------------------------------------- */ op1_s32 = (int32_t)0x00030001; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUSD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF6); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUSD(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == 0x0000000A); /* --- __SMUSDX Test ---------------------------------------------- */ op1_s32 = 0x00030001; op2_s32 = 0x00040002; res_s32 = __SMUSDX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFFE); op1_s32 = (int32_t)0xFFFDFFFF; op2_s32 = (int32_t)0x00040002; res_s32 = __SMUSDX(op1_s32,op2_s32); ASSERT_TRUE(res_s32 == (int32_t)0x00000002); #endif }