void fft_2(Param n, Param p, Param N2, Param N1, char* ix, CplxSp* i0, CplxSp* i1, CplxSp* o0, CplxSp* o1){ #if VERBOSE printf("Execute fft_2\n"); #endif const int id = (*ix)*n; const int m = (id % (N2*(1<<(p)))); const unsigned short incr = N1 / (1 << (p+1)); CplxSp const* restrict pd_in0 = i0; CplxSp const* restrict pd_in1 = i1; CplxSp const* restrict pd_twi = twi64k; CplxSp * restrict pd_out0 = o0; CplxSp * restrict pd_out1 = o1; #pragma MUST_ITERATE(8, ,8) for(int k=0; k<n; k++){ unsigned short r = (m+k)*incr; __float2_t v_in0 = _amem8_f2_const(&pd_in0[k]); __float2_t v_in1 = _amem8_f2_const(&pd_in1[k]); /* Get the corresponding twiddle factor */ __float2_t v_twi = _amem8_f2_const(&pd_twi[r]); /* Execute: * out0 = in0 + in1*twi * out1 = in0 - in1*twi * with out0, out1, in0, in1 and twi complex floating numbers * [even]: real part and [odd]: imag part */ __float2_t v_in1Twi = _complex_mpysp(v_in1, v_twi); _amem8_f2(&pd_out0[k]) = _daddsp(v_in0, v_in1Twi); _amem8_f2(&pd_out1[k]) = _dsubsp(v_in0, v_in1Twi); } }
void intrinsicC_filters(float *inputComplex, int N_E, float *output ) { int filter_size, i, j; __float2_t x[MAX_FILTER_SIZE]; __float2_t y,z; __float2_t *p_in; __float2_t sum; float total_sum; /* * First filter, coefficients are (1/2 -1/2) (-1/2 1/2) */ p_in = (__float2_t *) inputComplex; filter_size = 2; x[0] = _ftof2(0.5, -0.5); x[1] = _ftof2(-0.5, 0.5); //#pragma MUST_ITERATE (128) for (i=0; i < N_E; i = i + 2*filter_size) { sum =_ftof2(0.0, 0.0); for (j=0; j < filter_size; j++) { y = _mem8_f2(p_in++); z = complexMultiply(y,x[j]); sum = _daddsp(sum, z); } } total_sum = _hif2(sum) * _hif2(sum) + _lof2(sum) * _lof2(sum); *output++ = total_sum; /* * End of filter 1 */ /* * Second filter, coefficients are (1/4 -1/4) (1/2 -1/2) (-1/4 1/4) (-1/2 1/2) */ p_in = (__float2_t *)inputComplex; filter_size = 4; x[0] = _ftof2(0.25, -0.25); x[1] = _ftof2(0.5, -0.5); x[2] = _ftof2(-0.25, 0.25); x[3] = _ftof2(-0.5, 0.5); //#pragma MUST_ITERATE (64) for (i=0; i < N_E; i = i + 2*filter_size) { sum =_ftof2(0.0, 0.0); for (j=0; j < filter_size; j++) { y = _mem8_f2(p_in++); z = complexMultiply(y,x[j]); sum = _daddsp(sum, z); } } total_sum = _hif2(sum) * _hif2(sum) + _lof2(sum) * _lof2(sum); *output++ = total_sum; /* * End of filter 2 */ /* * Third filter, coefficients are (1/4 -1/4) (1/2 -1/2) (-1/4 1/4) (-1/2 1/2) */ p_in = (__float2_t *) inputComplex; filter_size = 8; x[0] = _ftof2(0.5, -0.5); x[1] = _ftof2(1.0, -1.0); x[2] = _ftof2(0.5, -0.5); x[3] = _ftof2(0.0, 0.0); x[4] = _ftof2(-0.5, 0.5); x[5] = _ftof2(-1.0, 1.0); x[6] = _ftof2(-0.5, 0.5); x[7] = _ftof2(0.0, 0.0); //#pragma MUST_ITERATE (32) for (i=0; i < N_E; i = i + 2*filter_size) { sum =_ftof2(0.0, 0.0); for (j=0; j < filter_size; j++) { y = _mem8_f2(p_in++); z = complexMultiply(y,x[j]); sum = _daddsp(sum, z); } } total_sum = _hif2(sum) * _hif2(sum) + _lof2(sum) * _lof2(sum); *output++ = total_sum; p_in = (__float2_t *) inputComplex ; filter_size = 16 ; x[0] = _ftof2(0.25, -0.25) ; x[1] = _ftof2(0.5, -0.5) ; x[2] = _ftof2(0.75, -0.75) ; x[3] = _ftof2(1.0, -1.0) ; x[4] = _ftof2(0.75, -0.75) ; x[5] = _ftof2(0.5, -0.5) ; x[6] = _ftof2(0.25, -0.25) ; x[7] = _ftof2(0.0, 0.0) ; x[8] = _ftof2(-0.25, 0.25) ; x[9] = _ftof2(-0.5, 0.5) ; x[10] = _ftof2(-0.75, 0.75) ; x[11] = _ftof2(-1.0, 1.0) ; x[12] = _ftof2(-0.75, 0.75) ; x[13] = _ftof2(-0.5, 0.5) ; x[14] = _ftof2(-0.25, 0.25) ; x[15] = _ftof2(0.0, 0.0) ; //#pragma MUST_ITERATE (16) for (i=0; i < N_E; i = i + 2*filter_size) { sum =_ftof2(0.0, 0.0) ; for (j=0; j < filter_size; j++) { y = _mem8_f2(p_in++); z = complexMultiply(y,x[j]); sum = _daddsp(sum, z); } } total_sum = _hif2(sum) * _hif2(sum) + _lof2(sum) * _lof2(sum); *output++ = total_sum; /* * End of filter 3 */ }