void TimeOneNE10RFFT(int count, int fft_log_size, float signal_value, int signal_type) { OMX_F32* x; /* Source */ OMX_FC32* y; /* Transform */ OMX_F32* z; /* Inverse transform */ OMX_F32* temp; OMX_F32* y_true; /* True FFT */ struct AlignedPtr* x_aligned; struct AlignedPtr* y_aligned; struct AlignedPtr* z_aligned; int n; ne10_result_t status; ne10_fft_r2c_cfg_float32_t fft_fwd_spec; int fft_size; struct timeval start_time; struct timeval end_time; double elapsed_time; struct SnrResult snr_forward; struct SnrResult snr_inverse; fft_size = 1 << fft_log_size; x_aligned = AllocAlignedPointer(32, sizeof(*x) * 4 * fft_size); /* The transformed value is in CCS format and is has fft_size + 2 values */ y_aligned = AllocAlignedPointer(32, sizeof(*y) * (4 * fft_size + 2)); z_aligned = AllocAlignedPointer(32, sizeof(*z) * 4 * fft_size); x = x_aligned->aligned_pointer_; y = y_aligned->aligned_pointer_; z = z_aligned->aligned_pointer_; y_true = (OMX_F32*) malloc(sizeof(*y_true) * (fft_size + 2)); GenerateRealFloatSignal(x, (struct ComplexFloat*) y_true, fft_size, signal_type, signal_value); fft_fwd_spec = ne10_fft_alloc_r2c_float32(fft_size); if (!fft_fwd_spec) { fprintf(stderr, "NE10 RFFT: Cannot initialize FFT structure for order %d\n", fft_log_size); return; } if (do_forward_test) { GetUserTime(&start_time); for (n = 0; n < count; ++n) { ne10_fft_r2c_1d_float32_neon((ne10_fft_cpx_float32_t *) y, x, fft_fwd_spec); } GetUserTime(&end_time); elapsed_time = TimeDifference(&start_time, &end_time); CompareComplexFloat(&snr_forward, (OMX_FC32*) y, (OMX_FC32*) y_true, fft_size / 2 + 1); PrintResult("Forward NE10 RFFT", fft_log_size, elapsed_time, count, snr_forward.complex_snr_); if (verbose >= 255) { printf("FFT Actual:\n"); DumpArrayComplexFloat("y", fft_size / 2 + 1, y); printf("FFT Expected:\n"); DumpArrayComplexFloat("true", fft_size / 2 + 1, (OMX_FC32*) y_true); } } if (do_inverse_test) { // Ne10 FFTs destroy the input. GetUserTime(&start_time); for (n = 0; n < count; ++n) { //memcpy(y, y_true, (fft_size >> 1) * sizeof(*y)); // The inverse appears not to be working. ne10_fft_c2r_1d_float32_neon(z, (ne10_fft_cpx_float32_t *) y_true, fft_fwd_spec); } GetUserTime(&end_time); elapsed_time = TimeDifference(&start_time, &end_time); CompareFloat(&snr_inverse, (OMX_F32*) z, (OMX_F32*) x, fft_size); PrintResult("Inverse NE10 RFFT", fft_log_size, elapsed_time, count, snr_inverse.complex_snr_); if (verbose >= 255) { printf("IFFT Actual:\n"); DumpArrayFloat("z", fft_size, z); printf("IFFT Expected:\n"); DumpArrayFloat("x", fft_size, x); } } ne10_fft_destroy_r2c_float32(fft_fwd_spec); FreeAlignedPointer(x_aligned); FreeAlignedPointer(y_aligned); FreeAlignedPointer(z_aligned); }
void TimeOneFloatRFFT(int count, int fft_log_size, float signal_value, int signal_type) { OMX_F32* x; /* Source */ OMX_F32* y; /* Transform */ OMX_F32* z; /* Inverse transform */ OMX_F32* y_true; /* True FFT */ struct AlignedPtr* x_aligned; struct AlignedPtr* y_aligned; struct AlignedPtr* z_aligned; struct AlignedPtr* y_true_aligned; OMX_INT n, fft_spec_buffer_size; OMXResult status; OMXFFTSpec_R_F32 * fft_fwd_spec = NULL; OMXFFTSpec_R_F32 * fft_inv_spec = NULL; int fft_size; struct timeval start_time; struct timeval end_time; double elapsed_time; struct SnrResult snr_forward; struct SnrResult snr_inverse; fft_size = 1 << fft_log_size; x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); /* The transformed value is in CCS format and is has fft_size + 2 values */ y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); y_true_aligned = AllocAlignedPointer(32, sizeof(*z) * (fft_size + 2)); x = x_aligned->aligned_pointer_; y = y_aligned->aligned_pointer_; z = z_aligned->aligned_pointer_; y_true = y_true_aligned->aligned_pointer_; GenerateRealFloatSignal(x, (OMX_FC32*) y_true, fft_size, signal_type, signal_value); status = omxSP_FFTGetBufSize_R_F32(fft_log_size, &fft_spec_buffer_size); fft_fwd_spec = (OMXFFTSpec_R_F32*) malloc(fft_spec_buffer_size); fft_inv_spec = (OMXFFTSpec_R_F32*) malloc(fft_spec_buffer_size); status = omxSP_FFTInit_R_F32(fft_fwd_spec, fft_log_size); status = omxSP_FFTInit_R_F32(fft_inv_spec, fft_log_size); if (do_forward_test) { GetUserTime(&start_time); for (n = 0; n < count; ++n) { FORWARD_FLOAT_RFFT(x, y, fft_fwd_spec); } GetUserTime(&end_time); elapsed_time = TimeDifference(&start_time, &end_time); CompareComplexFloat(&snr_forward, (OMX_FC32*) y, (OMX_FC32*) y_true, fft_size / 2 + 1); PrintResult("Forward Float RFFT", fft_log_size, elapsed_time, count, snr_forward.complex_snr_); } if (do_inverse_test) { GetUserTime(&start_time); for (n = 0; n < count; ++n) { INVERSE_FLOAT_RFFT(y_true, z, fft_inv_spec); } GetUserTime(&end_time); elapsed_time = TimeDifference(&start_time, &end_time); CompareFloat(&snr_inverse, (OMX_F32*) z, (OMX_F32*) x, fft_size); PrintResult("Inverse Float RFFT", fft_log_size, elapsed_time, count, snr_inverse.complex_snr_); } FreeAlignedPointer(x_aligned); FreeAlignedPointer(y_aligned); FreeAlignedPointer(z_aligned); free(fft_fwd_spec); free(fft_inv_spec); }
/* Argument s16s32: * S32: Calculate RFFT16 with 32 bit complex FFT; * otherwise: Calculate RFFT16 with 16 bit complex FFT. */ void TimeOneRFFT16(int count, int fft_log_size, float signal_value, int signal_type, s16_s32 s16s32) { OMX_S16* x; OMX_S32* y; OMX_S16* z; OMX_S32* y_true; OMX_F32* xr; OMX_F32* yrTrue; struct AlignedPtr* x_aligned; struct AlignedPtr* y_aligned; struct AlignedPtr* z_aligned; struct AlignedPtr* y_trueAligned; struct AlignedPtr* xr_aligned; struct AlignedPtr* yr_true_aligned; OMX_S16* temp16; OMX_S32* temp32; OMX_INT n, fft_spec_buffer_size; OMXResult status; OMXFFTSpec_R_S16 * fft_fwd_spec = NULL; OMXFFTSpec_R_S16 * fft_inv_spec = NULL; int fft_size; struct timeval start_time; struct timeval end_time; double elapsed_time; int scaleFactor; fft_size = 1 << fft_log_size; scaleFactor = fft_log_size; x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); y_trueAligned = AllocAlignedPointer(32, sizeof(*y_true) * (fft_size + 2)); xr_aligned = AllocAlignedPointer(32, sizeof(*xr) * fft_size); yr_true_aligned = AllocAlignedPointer(32, sizeof(*yrTrue) * (fft_size + 2)); x = x_aligned->aligned_pointer_; y = y_aligned->aligned_pointer_; z = z_aligned->aligned_pointer_; y_true = y_trueAligned->aligned_pointer_; xr = xr_aligned->aligned_pointer_; yrTrue = yr_true_aligned->aligned_pointer_; temp16 = (OMX_S16*) malloc(sizeof(*temp16) * fft_size); temp32 = (OMX_S32*) malloc(sizeof(*temp32) * fft_size); GenerateRFFT16Signal(x, (OMX_SC32*) y_true, fft_size, signal_type, signal_value); /* * Generate a real version so we can measure scaling costs */ GenerateRealFloatSignal(xr, (OMX_FC32*) yrTrue, fft_size, signal_type, signal_value); if(s16s32 == S32) { status = omxSP_FFTGetBufSize_R_S16S32(fft_log_size, &fft_spec_buffer_size); fft_fwd_spec = malloc(fft_spec_buffer_size); fft_inv_spec = malloc(fft_spec_buffer_size); status = omxSP_FFTInit_R_S16S32(fft_fwd_spec, fft_log_size); status = omxSP_FFTInit_R_S16S32(fft_inv_spec, fft_log_size); } else { status = omxSP_FFTGetBufSize_R_S16(fft_log_size, &fft_spec_buffer_size); fft_fwd_spec = malloc(fft_spec_buffer_size); fft_inv_spec = malloc(fft_spec_buffer_size); status = omxSP_FFTInit_R_S16(fft_fwd_spec, fft_log_size); status = omxSP_FFTInit_R_S16(fft_inv_spec, fft_log_size); } if (do_forward_test) { if (include_conversion) { int k; float factor = -1; GetUserTime(&start_time); for (k = 0; k < count; ++k) { /* * Spend some time computing the max of the signal, and then scaling it. */ for (n = 0; n < fft_size; ++n) { if (abs(xr[n]) > factor) { factor = abs(xr[n]); } } factor = 32767 / factor; for (n = 0; n < fft_size; ++n) { temp16[n] = factor * xr[n]; } if(s16s32 == S32) { status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, (OMXFFTSpec_R_S16S32*)fft_fwd_spec, (OMX_INT) scaleFactor); } else { status = omxSP_FFTFwd_RToCCS_S16_Sfs(x, (OMX_S16*)y, (OMXFFTSpec_R_S16*)fft_fwd_spec, (OMX_INT) scaleFactor); } /* * Now spend some time converting the fixed-point FFT back to float. */ factor = 1 / factor; for (n = 0; n < fft_size + 2; ++n) { xr[n] = y[n] * factor; } } GetUserTime(&end_time); } else { float factor = -1; GetUserTime(&start_time); for (n = 0; n < count; ++n) { if(s16s32 == S32) { status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, (OMXFFTSpec_R_S16S32*)fft_fwd_spec, (OMX_INT) scaleFactor); } else { status = omxSP_FFTFwd_RToCCS_S16_Sfs(x, (OMX_S16*)y, (OMXFFTSpec_R_S16*)fft_fwd_spec, (OMX_INT) scaleFactor); } } GetUserTime(&end_time); } elapsed_time = TimeDifference(&start_time, &end_time); if(s16s32 == S32) { PrintResultNoSNR("Forward RFFT16 (with S32)", fft_log_size, elapsed_time, count); } else { PrintResultNoSNR("Forward RFFT16 (with S16)", fft_log_size, elapsed_time, count); } } if (do_inverse_test) { if (include_conversion) { int k; float factor = -1; GetUserTime(&start_time); for (k = 0; k < count; ++k) { /* * Spend some time scaling the FFT signal to fixed point. */ for (n = 0; n < fft_size; ++n) { if (abs(yrTrue[n]) > factor) { factor = abs(yrTrue[n]); } } for (n = 0; n < fft_size; ++n) { temp32[n] = factor * yrTrue[n]; } if(s16s32 == S32) { status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z, (OMXFFTSpec_R_S16S32*)fft_inv_spec, 0); } else { status = omxSP_FFTInv_CCSToR_S16_Sfs((OMX_S16*)y, z, (OMXFFTSpec_R_S16*)fft_inv_spec, 0); } /* * Spend some time converting the result back to float */ factor = 1 / factor; for (n = 0; n < fft_size; ++n) { xr[n] = factor * z[n]; } } GetUserTime(&end_time); } else { GetUserTime(&start_time); for (n = 0; n < count; ++n) { if(s16s32 == S32) { status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z, (OMXFFTSpec_R_S16S32*)fft_inv_spec, 0); } else { status = omxSP_FFTInv_CCSToR_S16_Sfs((OMX_S16*)y, z, (OMXFFTSpec_R_S16*)fft_inv_spec, 0); } } GetUserTime(&end_time); } elapsed_time = TimeDifference(&start_time, &end_time); if(s16s32 == S32) { PrintResultNoSNR("Inverse RFFT16 (with S32)", fft_log_size, elapsed_time, count); } else { PrintResultNoSNR("Inverse RFFT16 (with S16)", fft_log_size, elapsed_time, count); } } FreeAlignedPointer(x_aligned); FreeAlignedPointer(y_aligned); FreeAlignedPointer(z_aligned); FreeAlignedPointer(y_trueAligned); FreeAlignedPointer(xr_aligned); FreeAlignedPointer(yr_true_aligned); free(fft_fwd_spec); free(fft_inv_spec); }
void TimeOnePfRFFT(int count, int fft_log_size, float signal_value, int signal_type) { struct AlignedPtr* x_aligned; struct AlignedPtr* y_aligned; struct AlignedPtr* z_aligned; struct AlignedPtr* y_tmp_aligned; float* x; struct ComplexFloat* y; OMX_F32* z; float* y_true; float* y_tmp; int n; int fft_size; struct timeval start_time; struct timeval end_time; double elapsed_time; PFFFT_Setup *s; struct SnrResult snr_forward; struct SnrResult snr_inverse; fft_size = 1 << fft_log_size; x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); y_tmp_aligned = AllocAlignedPointer(32, sizeof(*y_tmp) * (fft_size + 2)); y_true = (float*) malloc(sizeof(*y_true) * 2 * fft_size); x = x_aligned->aligned_pointer_; y = y_aligned->aligned_pointer_; z = z_aligned->aligned_pointer_; y_tmp = y_tmp_aligned->aligned_pointer_; s = pffft_new_setup(fft_size, PFFFT_REAL); if (!s) { fprintf(stderr, "TimeOnePfRFFT: Could not initialize structure for order %d\n", fft_log_size); } GenerateRealFloatSignal(x, (struct ComplexFloat*) y_true, fft_size, signal_type, signal_value); if (do_forward_test) { GetUserTime(&start_time); for (n = 0; n < count; ++n) { pffft_transform_ordered(s, (float*)x, (float*)y, NULL, PFFFT_FORWARD); } GetUserTime(&end_time); elapsed_time = TimeDifference(&start_time, &end_time); /* * Arrange the output of the FFT to match the expected output. */ y[fft_size / 2].Re = y[0].Im; y[fft_size / 2].Im = 0; y[0].Im = 0; CompareComplexFloat(&snr_forward, (OMX_FC32*) y, (OMX_FC32*) y_true, fft_size / 2 + 1); PrintResult("Forward PFFFT RFFT", fft_log_size, elapsed_time, count, snr_forward.complex_snr_); if (verbose >= 255) { printf("FFT Actual:\n"); DumpArrayComplexFloat("y", fft_size / 2 + 1, (OMX_FC32*) y); printf("FFT Expected:\n"); DumpArrayComplexFloat("true", fft_size / 2 + 1, (OMX_FC32*) y_true); } } if (do_inverse_test) { float scale = 1.0 / fft_size; /* Copy y_true to true, but arrange the values according to what rdft wants. */ memcpy(y_tmp, y_true, sizeof(y_tmp[0]) * fft_size); y_tmp[1] = y_true[fft_size / 2]; GetUserTime(&start_time); for (n = 0; n < count; ++n) { int m; pffft_transform_ordered(s, (float*)y_tmp, (float*)z, NULL, PFFFT_BACKWARD); /* * Need to include cost of scaling the inverse */ ScaleVector(z, fft_size, fft_size); } GetUserTime(&end_time); elapsed_time = TimeDifference(&start_time, &end_time); CompareFloat(&snr_inverse, (OMX_F32*) z, (OMX_F32*) x, fft_size); PrintResult("Inverse PFFFT RFFT", fft_log_size, elapsed_time, count, snr_inverse.complex_snr_); if (verbose >= 255) { printf("IFFT Actual:\n"); DumpArrayFloat("z", fft_size, z); printf("IFFT Expected:\n"); DumpArrayFloat("x", fft_size, x); } } FreeAlignedPointer(x_aligned); FreeAlignedPointer(y_aligned); FreeAlignedPointer(z_aligned); FreeAlignedPointer(y_tmp_aligned); pffft_destroy_setup(s); free(y_true); }