buffer_c16_t FIRAndDecimateComplex::execute(
	const buffer_c16_t& src,
	const buffer_c16_t& dst
) {
	/* int16_t input (sample count "n" must be multiple of decimation_factor)
	 * -> int16_t output, decimated by decimation_factor.
	 * taps are normalized to 1 << 16 == 1.0.
	 */
	const auto output_sampling_rate = src.sampling_rate / decimation_factor_;
	const size_t output_samples = src.count / decimation_factor_;
	
	sample_t* dst_p = dst.p;
	const buffer_c16_t result { dst.p, output_samples, output_sampling_rate };

	const sample_t* src_p = src.p;
	size_t outer_count = output_samples;
	while(outer_count > 0) {
		/* Put new samples into delay buffer */
		auto z_new_p = &samples_[taps_count_ - decimation_factor_];
		for(size_t i=0; i<decimation_factor_; i++) {
			*__SIMD32(z_new_p)++ = *__SIMD32(src_p)++;
		}

		size_t loop_count = taps_count_ / 8;
		auto t_p = &taps_reversed_[0];
		auto z_p = &samples_[0];

		int64_t t_real = 0;
		int64_t t_imag = 0;

		while(loop_count > 0) {
			const auto tap0 = *__SIMD32(t_p)++;
			const auto sample0 = *__SIMD32(z_p)++;
			const auto tap1 = *__SIMD32(t_p)++;
			const auto sample1 = *__SIMD32(z_p)++;
			t_real = __SMLSLD(sample0, tap0, t_real);
			t_imag = __SMLALDX(sample0, tap0, t_imag);
			t_real = __SMLSLD(sample1, tap1, t_real);
			t_imag = __SMLALDX(sample1, tap1, t_imag);

			const auto tap2 = *__SIMD32(t_p)++;
			const auto sample2 = *__SIMD32(z_p)++;
			const auto tap3 = *__SIMD32(t_p)++;
			const auto sample3 = *__SIMD32(z_p)++;
			t_real = __SMLSLD(sample2, tap2, t_real);
			t_imag = __SMLALDX(sample2, tap2, t_imag);
			t_real = __SMLSLD(sample3, tap3, t_real);
			t_imag = __SMLALDX(sample3, tap3, t_imag);

			const auto tap4 = *__SIMD32(t_p)++;
			const auto sample4 = *__SIMD32(z_p)++;
			const auto tap5 = *__SIMD32(t_p)++;
			const auto sample5 = *__SIMD32(z_p)++;
			t_real = __SMLSLD(sample4, tap4, t_real);
			t_imag = __SMLALDX(sample4, tap4, t_imag);
			t_real = __SMLSLD(sample5, tap5, t_real);
			t_imag = __SMLALDX(sample5, tap5, t_imag);

			const auto tap6 = *__SIMD32(t_p)++;
			const auto sample6 = *__SIMD32(z_p)++;
			const auto tap7 = *__SIMD32(t_p)++;
			const auto sample7 = *__SIMD32(z_p)++;
			t_real = __SMLSLD(sample6, tap6, t_real);
			t_imag = __SMLALDX(sample6, tap6, t_imag);
			t_real = __SMLSLD(sample7, tap7, t_real);
			t_imag = __SMLALDX(sample7, tap7, t_imag);

			loop_count--;
		}

		/* TODO: Re-evaluate whether saturation is performed, normalization,
		 * all that jazz.
		 */
		const int32_t r = t_real >> 16;
		const int32_t i = t_imag >> 16;
		const int32_t r_sat = __SSAT(r, 16);
		const int32_t i_sat = __SSAT(i, 16);
		*__SIMD32(dst_p)++ = __PKHBT(
			r_sat,
			i_sat,
			16
		);

		/* Shift sample buffer left/down by decimation factor. */
		const size_t unroll_factor = 4;
		size_t shift_count = (taps_count_ - decimation_factor_) / unroll_factor;

		sample_t* t = &samples_[0];
		const sample_t* s = &samples_[decimation_factor_];
		
		while(shift_count > 0) {
			*__SIMD32(t)++ = *__SIMD32(s)++;
			*__SIMD32(t)++ = *__SIMD32(s)++;
			*__SIMD32(t)++ = *__SIMD32(s)++;
			*__SIMD32(t)++ = *__SIMD32(s)++;
			shift_count--;
		}

		shift_count = (taps_count_ - decimation_factor_) % unroll_factor;
		while(shift_count > 0) {
			*(t++) = *(s++);
			shift_count--;
		}

		outer_count--;
	}

	return result;
}
Beispiel #2
0
/**
\brief Test case: TC_CoreSimd_ParMul16
\details
- Check Parallel 16-bit multiplication:
  __SMLAD
  __SMLADX
  __SMLALD
  __SMLALDX
  __SMLSD
  __SMLSDX
  __SMLSLD
  __SMLSLDX
  __SMUAD
  __SMUADX
  __SMUSD
  __SMUSDX
*/
void TC_CoreSimd_ParMul16 (void) {
#if ((defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__  == 1)) || \
     (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))     )
  volatile int32_t op1_s32, op2_s32, op3_s32;
  volatile int32_t res_s32;

  volatile int64_t op1_s64;
  volatile int64_t res_s64;

  /* --- __SMLAD Test ---------------------------------------------- */
  op1_s32 = 0x00030002;
  op2_s32 = 0x00050004;
  op3_s32 = 0x20000000;
  res_s32 = __SMLAD(op1_s32, op2_s32, op3_s32);
  ASSERT_TRUE(res_s32 == 0x20000017);

  /* --- __SMLADX Test ---------------------------------------------- */
  op1_s32 = 0x00030002;
  op2_s32 = 0x00050004;
  op3_s32 = 0x00000800;
  res_s32 = __SMLADX(op1_s32, op2_s32, op3_s32);
  ASSERT_TRUE(res_s32 == 0x00000816);

  /* --- __SMLALD Test ---------------------------------------------- */
  op1_s32 = 0x00030002;
  op2_s32 = 0x00050004;
  op1_s64 = 0x00000000200000000LL;
  res_s64 = __SMLALD(op1_s32, op2_s32, op1_s64);
  ASSERT_TRUE(res_s64 == 0x0000000200000017LL);

  /* --- __SMLALDX Test ---------------------------------------------- */
  op1_s32 = 0x00030002;
  op2_s32 = 0x00050004;
  op1_s64 = 0x00000000200000000LL;
  res_s64 = __SMLALDX(op1_s32, op2_s32, op1_s64);
  ASSERT_TRUE(res_s64 == 0x0000000200000016LL);

  /* --- __SMLSD Test ---------------------------------------------- */
  op1_s32 = 0x00030006;
  op2_s32 = 0x00050004;
  op3_s32 = 0x00000800;
  res_s32 = __SMLSD(op1_s32, op2_s32, op3_s32);
  ASSERT_TRUE(res_s32 == 0x00000809);

  /* --- __SMLSDX Test ---------------------------------------------- */
  op1_s32 = 0x00030002;
  op2_s32 = 0x00050004;
  op3_s32 = 0x00000800;
  res_s32 = __SMLSDX(op1_s32, op2_s32, op3_s32);
  ASSERT_TRUE(res_s32 == 0x000007FE);

  /* --- __SMLSLD Test ---------------------------------------------- */
  op1_s32 = 0x00030006;
  op2_s32 = 0x00050004;
  op1_s64 = 0x00000000200000000LL;
  res_s64 = __SMLSLD(op1_s32, op2_s32, op1_s64);
  ASSERT_TRUE(res_s64 == 0x0000000200000009LL);

  /* --- __SMLSLDX Test ---------------------------------------------- */
  op1_s32 = 0x00030006;
  op2_s32 = 0x00050004;
  op1_s64 = 0x00000000200000000LL;
  res_s64 = __SMLSLDX(op1_s32, op2_s32, op1_s64);
  ASSERT_TRUE(res_s64 == 0x0000000200000012LL);

  /* --- __SMUAD Test ---------------------------------------------- */
  op1_s32 = 0x00030001;
  op2_s32 = 0x00040002;
  res_s32 = __SMUAD(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == 0x0000000E);

  op1_s32 = (int32_t)0xFFFDFFFF;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SMUAD(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF2);

  /* --- __SMUADX Test ---------------------------------------------- */
  op1_s32 = 0x00030001;
  op2_s32 = 0x00040002;
  res_s32 = __SMUADX(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == 0x0000000A);

  op1_s32 = (int32_t)0xFFFDFFFF;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SMUADX(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF6);

  /* --- __SMUSD Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x00030001;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SMUSD(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFF6);

  op1_s32 = (int32_t)0xFFFDFFFF;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SMUSD(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == 0x0000000A);

  /* --- __SMUSDX Test ---------------------------------------------- */
  op1_s32 = 0x00030001;
  op2_s32 = 0x00040002;
  res_s32 = __SMUSDX(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xFFFFFFFE);

  op1_s32 = (int32_t)0xFFFDFFFF;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SMUSDX(op1_s32,op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x00000002);
#endif
}