Esempio n. 1
0
void ne10_fft_c2c_1d_int16_neon (ne10_fft_cpx_int16_t *fout,
                                 ne10_fft_cpx_int16_t *fin,
                                 ne10_fft_cpx_int16_t *twiddles,
                                 ne10_int32_t *factors,
                                 ne10_int32_t nfft,
                                 ne10_int32_t inverse_fft)
{
    if (fin == fout)
    {
        /* NOTE: for an in-place FFT algorithm. It just performs an out-of-place FFT into a temp buffer */
        ne10_fft_cpx_int16_t * tmpbuf_ = (ne10_fft_cpx_int16_t*) NE10_MALLOC (sizeof (ne10_fft_cpx_int16_t) * nfft);

        // copy the data from input to output and bit reversal
        ne10_data_bitreversal_int16 (tmpbuf_, fin, 1, &factors[2]);

        if (inverse_fft)
            ne10_mixed_radix_butterfly_inverse_int16_neon (tmpbuf_, factors, twiddles);
        else
            ne10_mixed_radix_butterfly_int16_neon (tmpbuf_, factors, twiddles);

        memcpy (fout, tmpbuf_, sizeof (ne10_fft_cpx_int16_t) *nfft);
        NE10_FREE (tmpbuf_);
    }
    else
    {
        // copy the data from input to output and bit reversal
        ne10_data_bitreversal_int16 (fout, fin, 1, &factors[2]);

        if (inverse_fft)
            ne10_mixed_radix_butterfly_inverse_int16_neon (fout, factors, twiddles);
        else
            ne10_mixed_radix_butterfly_int16_neon (fout, factors, twiddles);
    }
}
Esempio n. 2
0
 * @return none.
 * The function implements a mixed radix-2/4 FFT (real to complex). The length of 2^N(N is 2, 3, 4, 5, 6 ....etc) is supported.
 * Otherwise, we alloc a temp buffer(the size is same as input buffer) for storing intermedia.
 * For the usage of this function, please check test/test_suite_fft_int16.c
 */
void ne10_fft_r2c_1d_int16_scaled_neon (ne10_fft_cpx_int16_t *fout,
                                 ne10_int16_t *fin,
                                 ne10_fft_cpx_int16_t *twiddles,
                                 ne10_fft_cpx_int16_t *super_twiddles,
                                 ne10_int32_t *factors,
                                 ne10_int32_t nfft)
{
    ne10_int32_t ncfft = nfft >> 1;

    /* malloc a temp buffer for cfft */
    ne10_fft_cpx_int16_t * tmpbuf_ = (ne10_fft_cpx_int16_t*) NE10_MALLOC (sizeof (ne10_fft_cpx_int16_t) * ncfft);

    // copy the data from input to output and bit reversal
    ne10_data_bitreversal_int16 (tmpbuf_, (ne10_fft_cpx_int16_t*) fin, 1, &factors[2]);
    ne10_mixed_radix_butterfly_int16_neon (tmpbuf_, factors, twiddles);

    ne10_fft_split_r2c_1d_int16 (fout, tmpbuf_, super_twiddles, ncfft);

    NE10_FREE (tmpbuf_);
}

/**
 * @brief Mixed radix-2/4 IFFT (complex to real) of int16 data.
 * @param[out]  *fout            point to the output buffer
 * @param[in]   *fin             point to the input buffer
 * @param[in]   *twiddles        point to the twiddle buffer
Esempio n. 3
0
int main(int argc, char **argv)
{
	int input_fd;
	int output_fd;
	int fft_dev_fd;
	int result;
	IN_SAMPLE_TYPE *in_buf;
	OUT_SAMPLE_TYPE *out_buf;
	
	initialize_everything(argc, argv);
	
	/* allocate storage for input, output and config buffers */
	in_buf = (IN_SAMPLE_TYPE*) NE10_MALLOC (FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(in_buf == NULL)
		error(1, errno, "in_buf allocation");
		
	out_buf = (OUT_SAMPLE_TYPE*) NE10_MALLOC (FFT_POINTS * sizeof(OUT_SAMPLE_TYPE));
	if(out_buf == NULL)
		error(1, errno, "out_buf allocation");
		
	/* open the input and output files and fft dev */
	input_fd = open(g_input_filename, O_RDONLY);
	if(input_fd < 0)
		error(1, errno, "opening input file '%s'", g_input_filename);
	
	output_fd = open(g_output_filename, O_WRONLY | O_CREAT);
	if(output_fd < 0)
		error(1, errno, "opening output file '%s'", g_output_filename);

	fft_dev_fd = open("/dev/fft", O_RDWR);
	if(fft_dev_fd < 0)
		error(1, errno, "opening fft_dev_fd");

	/* capture the start value of the GT */
	g_start_time = get_gt_value();
	
	/* read the input data */
	result = read(input_fd, in_buf, FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "read input file");
	if(result != (FFT_POINTS * sizeof(IN_SAMPLE_TYPE)))
		error(1, 0, "input data size, expected %d but got %d", FFT_POINTS * sizeof(IN_SAMPLE_TYPE), result);
	
	/* perform FFT with FPGA hardware, 16 bit input 24/32 bit output */
	result = write(fft_dev_fd, in_buf, FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "write to fft_dev_fd");
	if (result != (int)(FFT_POINTS * sizeof(IN_SAMPLE_TYPE)))
		error(1, 0, "fft_dev_fd input data size, expected %d but got %d", FFT_POINTS * sizeof(IN_SAMPLE_TYPE), result);
	
	result = read(fft_dev_fd, out_buf, FFT_POINTS * sizeof(OUT_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "read from fft_dev_fd");
	if (result != (int)(FFT_POINTS * sizeof(OUT_SAMPLE_TYPE)))
		error(1, 0, "fft_dev_fd output data size, expected %d but got %d", FFT_POINTS * sizeof(OUT_SAMPLE_TYPE), result);
	
	/* write the output data */
	result = write(output_fd, out_buf, FFT_POINTS * sizeof(OUT_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "write output file");
	if(result != (FFT_POINTS * sizeof(OUT_SAMPLE_TYPE)))
		error(1, 0, "output data size, expected %d but got %d", FFT_POINTS * sizeof(OUT_SAMPLE_TYPE), result);

	/* capture the end value of the GT */
	g_end_time = get_gt_value();

	/* close the input and output files and fft dev */
	close(fft_dev_fd);
	close(output_fd);
	close(input_fd);

	/* free storage for input, output and config buffers */
	NE10_FREE (out_buf);
	NE10_FREE (in_buf);
	
	print_results();
	release_everything();
	return 0;
}
Esempio n. 4
0
/**
 * @brief User-callable function to allocate all necessary storage space for the fft.
 * @param[in]   nfft             length of FFT
 * @return      st               point to the FFT config memory. This memory is allocated with malloc.
 * The function allocate all necessary storage space for the fft. It also factors out the length of FFT and generates the twiddle coeff.
 */
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon (ne10_int32_t nfft)
{
    // For input shorter than 16, fall back to c version.
    // We would not get much improvement from NEON for these cases.
    if (nfft < 16)
    {
        return ne10_fft_alloc_c2c_int32_c (nfft);
    }

    ne10_fft_cfg_int32_t st = NULL;
    ne10_uint32_t memneeded = sizeof (ne10_fft_state_int32_t)
                              + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors*/
                              + sizeof (ne10_fft_cpx_int32_t) * nfft        /* twiddle*/
                              + sizeof (ne10_fft_cpx_int32_t) * nfft        /* buffer*/
                              + NE10_FFT_BYTE_ALIGNMENT;     /* 64-bit alignment*/

    st = (ne10_fft_cfg_int32_t) NE10_MALLOC (memneeded);

    // Bad allocation.
    if (st == NULL)
    {
        return st;
    }

    uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_int32_t);
    NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
    st->factors = (ne10_int32_t*) address;
    st->twiddles = (ne10_fft_cpx_int32_t*) (st->factors + (NE10_MAXFACTORS * 2));
    st->buffer = st->twiddles + nfft;

    // st->last_twiddles is default NULL.
    // Calling fft_c or fft_neon is decided by this pointers.
    st->last_twiddles = NULL;

    st->nfft = nfft;
    if (nfft % NE10_FFT_PARA_LEVEL == 0)
    {
        // Size of FFT satisfies requirement of NEON optimization.
        st->nfft /= NE10_FFT_PARA_LEVEL;
        st->last_twiddles = st->twiddles + nfft / NE10_FFT_PARA_LEVEL;
    }

    ne10_int32_t result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_DEFAULT);

    // Can not factor.
    if (result == NE10_ERR)
    {
        NE10_FREE (st);
        return st;
    }

    // Check if radix-8 can be enabled
    ne10_int32_t stage_count    = st->factors[0];
    ne10_int32_t algorithm_flag = st->factors[2 * (stage_count + 1)];

    // Enable radix-8.
    if (algorithm_flag == NE10_FFT_ALG_ANY)
    {
        result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_EIGHT);
        if (result == NE10_ERR)
        {
            NE10_FREE (st);
            return st;
        }
        ne10_fft_generate_twiddles_int32 (st->twiddles, st->factors, st->nfft);
    }
    else
    {
        st->last_twiddles = NULL;
        st->nfft = nfft;
        result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_DEFAULT);
        ne10_fft_generate_twiddles_int32 (st->twiddles, st->factors, st->nfft);
        return st;
    }

    // Generate super twiddles for the last stage.
    if (nfft % NE10_FFT_PARA_LEVEL == 0)
    {
        // Size of FFT satisfies requirement of NEON optimization.
        ne10_fft_generate_twiddles_line_int32 (st->last_twiddles,
                st->nfft,
                1,
                NE10_FFT_PARA_LEVEL,
                nfft);
    }
    return st;
}
int main(int argc, char **argv)
{
	int input_fd;
	int output_fd;
	int result;
	IN_SAMPLE_TYPE *in_buf;
	OUT_SAMPLE_TYPE *out_buf;
	CFG_TYPE cfg;
	int i;
	
	initialize_everything(argc, argv);
	
	/* allocate storage for input, output and config buffers */
	in_buf = (IN_SAMPLE_TYPE*) NE10_MALLOC (FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(in_buf == NULL)
		error(1, errno, "in_buf allocation");
		
	out_buf = (OUT_SAMPLE_TYPE*) NE10_MALLOC (FFT_POINTS * sizeof(OUT_SAMPLE_TYPE));
	if(out_buf == NULL)
		error(1, errno, "out_buf allocation");
		
	cfg = CFG_ALLOC_FUNC(FFT_CALC_POINTS);
	if(cfg == NULL)
		error(1, errno, "cfg allocation");

	/* open the input and output files */
	input_fd = open(g_input_filename, O_RDONLY);
	if(input_fd < 0)
		error(1, errno, "opening input file '%s'", g_input_filename);
	
	output_fd = open(g_output_filename, O_WRONLY | O_CREAT);
	if(output_fd < 0)
		error(1, errno, "opening output file '%s'", g_output_filename);

	/* capture the start value of the GT */
	g_start_time = get_gt_value();
	
	/* read the input data */
	result = read(input_fd, in_buf, FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "read input file");
	if(result != (FFT_POINTS * sizeof(IN_SAMPLE_TYPE)))
		error(1, 0, "input data size, expected %d but got %d", FFT_POINTS * sizeof(IN_SAMPLE_TYPE), result);

	/* compute FFT */
	for (i = 0; i < FFT_CALC_ROUNDS ; i++) {
		FFT_FUNC(out_buf + (i * FFT_CALC_POINTS), in_buf + (i * FFT_CALC_POINTS), cfg, 0, 1);
	}
	
	/* write the output data */
	result = write(output_fd, out_buf, FFT_POINTS * sizeof(OUT_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "write output file");
	if(result != (FFT_POINTS * sizeof(OUT_SAMPLE_TYPE)))
		error(1, 0, "output data size, expected %d but got %d", FFT_POINTS * sizeof(OUT_SAMPLE_TYPE), result);

	/* capture the end value of the GT */
	g_end_time = get_gt_value();

	/* close the input and output files */
	close(output_fd);
	close(input_fd);

	/* free storage for input, output and config buffers */
	NE10_FREE (cfg);
	NE10_FREE (out_buf);
	NE10_FREE (in_buf);
	
	print_results();
	release_everything();
	return 0;
}
int main(int argc, char **argv)
{
	int input_fd;
	int output_fd;
	int raw256stream_dev_fd;
	int result;
	IN_SAMPLE_TYPE *in_buf;
	int j;
	
	initialize_everything(argc, argv);
	
	/* allocate storage for input */
	in_buf = (IN_SAMPLE_TYPE*) NE10_MALLOC (FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(in_buf == NULL)
		error(1, errno, "in_buf allocation");
		
	/* open the input and output files and raw256stream device */
	input_fd = open(g_input_filename, O_RDONLY);
	if(input_fd < 0)
		error(1, errno, "opening input file '%s'", g_input_filename);
	
	output_fd = open(g_output_filename, O_WRONLY | O_CREAT);
	if(output_fd < 0)
		error(1, errno, "opening output file '%s'", g_output_filename);

	raw256stream_dev_fd = open("/dev/raw256stream", O_RDWR);
	if(raw256stream_dev_fd < 0)
		error(1, errno, "opening raw256stream_dev_fd");

	/* read the input data */
	result = read(input_fd, in_buf, FFT_CALC_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "read input file");
	if(result != (FFT_CALC_POINTS * sizeof(IN_SAMPLE_TYPE)))
		error(1, 0, "input data size, expected %d but got %d", FFT_CALC_POINTS * sizeof(IN_SAMPLE_TYPE), result);
	
	/* write the waveform buffer */
	result = write(raw256stream_dev_fd, in_buf, FFT_CALC_POINTS * sizeof(IN_SAMPLE_TYPE));
	if(result < 0)
		error(1, errno, "write waveform buffer");
	if(result != (FFT_CALC_POINTS * sizeof(IN_SAMPLE_TYPE)))
		error(1, 0, "output data size, expected %d but got %d", FFT_CALC_POINTS * sizeof(IN_SAMPLE_TYPE), result);
	
	/* capture the start value of the GT */
	g_start_time = get_gt_value();
	
	for(j = 0 ; j < FFT_IN_ROUNDS ; j++) {
		/* read the input stream */
		result = read(raw256stream_dev_fd, in_buf, FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
		if(result < 0)
			error(1, errno, "read input stream");
		if(result != (FFT_POINTS * sizeof(IN_SAMPLE_TYPE)))
			error(1, 0, "input data size, expected %d but got %d", FFT_POINTS * sizeof(IN_SAMPLE_TYPE), result);

		/* write the output data */
		result = write(output_fd, in_buf, FFT_POINTS * sizeof(IN_SAMPLE_TYPE));
		if(result < 0)
			error(1, errno, "write output file");
		if(result != (FFT_POINTS * sizeof(IN_SAMPLE_TYPE)))
			error(1, 0, "output data size, expected %d but got %d", FFT_POINTS * sizeof(IN_SAMPLE_TYPE), result);
	}
	
	/* capture the end value of the GT */
	g_end_time = get_gt_value();

	/* close the input and output files and stream device */
	close(raw256stream_dev_fd);
	close(output_fd);
	close(input_fd);
	
	/* free storage for input, output and config buffers */
	NE10_FREE (in_buf);
	
	print_results();
	release_everything();
	return 0;
}
static inline void ne10_radix_generic_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
        const ne10_fft_cpx_float32_t *Fin,
        const ne10_fft_cpx_float32_t *twiddles,
        const ne10_int32_t radix,
        const ne10_int32_t in_step,
        const ne10_int32_t out_step,
        const ne10_int32_t is_inverse,
        const ne10_int32_t is_scaled)
{
    ne10_int32_t q, q1;
    ne10_int32_t f_count = in_step;

    ne10_fft_cpx_float32_t tmp;
    ne10_fft_cpx_float32_t *scratch;
    scratch = (ne10_fft_cpx_float32_t *) NE10_MALLOC (radix *
            sizeof (ne10_fft_cpx_float32_t));

    for (; f_count > 0; f_count--)
    {
        // load
        for (q1 = 0; q1 < radix; q1++)
        {
            scratch[q1] = Fin[in_step * q1];
            if (is_inverse)
            {
                scratch[q1].i = -scratch[q1].i;
#ifdef NE10_DSP_CFFT_SCALING
                if (is_scaled)
                {
                    const ne10_float32_t one_by_nfft = 1.0 / (radix * in_step);
                    scratch[q1].r *= one_by_nfft;
                    scratch[q1].i *= one_by_nfft;
                }
#endif
            }
        } // q1

        // compute Fout[q1 * out_step] from definition
        for (q1 = 0; q1 < radix; q1++)
        {
            ne10_int32_t twidx = 0;
            Fout[q1 * out_step] = scratch[0];
            for (q = 1; q < radix; q++)
            {
                twidx += 1 * q1;
                if (twidx >= radix)
                {
                    twidx -= radix;
                }
                NE10_CPX_MUL_F32 (tmp, scratch[q], twiddles[twidx]);
                NE10_CPX_ADDTO (Fout[q1 * out_step], tmp);
            } // q
            if (is_inverse)
            {
                Fout[q1 * out_step].i = -Fout[q1 * out_step].i;
            }
        } // q1

        Fout += radix;
        Fin++;
    }

    NE10_FREE (scratch);
}