OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs(const OMX_F32 *pSrc, OMX_F32 *pDst, const OMXFFTSpec_R_F32 *pFFTSpec) { // Input must be 32 byte aligned if (!pSrc || !pDst || (OMX_INT)pSrc & 31 || (OMX_INT)pDst & 31) return OMX_Sts_BadArgErr; OMX_INT n; OMX_INT n_by_2; OMX_INT n_by_4; const OMX_F32 *twiddle; OMX_F32 *buf; const X86FFTSpec_R_FC32 *pFFTStruct = (const X86FFTSpec_R_FC32*) pFFTSpec; n = pFFTStruct->N; // This is to handle the case of order == 1. if (n == 2) { pDst[0] = (pSrc[0] + pSrc[1]); pDst[1] = 0.0f; pDst[2] = (pSrc[0] - pSrc[1]); pDst[3] = 0.0f; return OMX_Sts_NoErr; } n_by_2 = n >> 1; n_by_4 = n >> 2; buf = pFFTStruct->pBuf1; twiddle = pFFTStruct->pTwiddle; if(n_by_2 >= 16) { buf = x86SP_F32_radix4_kernel_OutOfPlace_sse( pSrc, pFFTStruct->pBuf2, buf, twiddle, n_by_2, 1); } else { buf = x86SP_F32_radix2_kernel_OutOfPlace( pSrc, pFFTStruct->pBuf2, buf, twiddle, n_by_2, 1); } if(n >= 8) RevbinPermuteFwdSse(buf, pDst, twiddle, n); else RevbinPermuteFwd(buf, pDst, twiddle, n); return OMX_Sts_NoErr; }
OMXResult omxSP_FFTInv_CCSToR_F32_Sfs(const OMX_F32 *pSrc, OMX_F32 *pDst, const OMXFFTSpec_R_F32 *pFFTSpec) { OMX_INT n; OMX_INT n_by_2; OMX_INT n_by_4; OMX_INT i; const OMX_F32 *twiddle; OMX_F32 *buf; OMX_F32 *in = (OMX_F32*) pSrc; const X86FFTSpec_R_FC32 *pFFTStruct = (const X86FFTSpec_R_FC32*) pFFTSpec; // Input must be 32 byte aligned if (!pSrc || !pDst || (const uintptr_t)pSrc & 31 || (uintptr_t)pDst & 31) return OMX_Sts_BadArgErr; n = pFFTStruct->N; // This is to handle the case of order == 1. if (n == 2) { pDst[0] = (pSrc[0] + pSrc[2]) / 2; pDst[1] = (pSrc[0] - pSrc[2]) / 2; return OMX_Sts_NoErr; } n_by_2 = n >> 1; n_by_4 = n >> 2; buf = pFFTStruct->pBuf1; twiddle = pFFTStruct->pTwiddle; if (n < 8) RevbinPermuteInv(in, buf, twiddle, n); else RevbinPermuteInvSse(in, buf, twiddle, n); if (n_by_2 < 16) { buf = x86SP_F32_radix2_kernel_OutOfPlace( buf, pFFTStruct->pBuf2, buf, twiddle, n_by_2, 0); } else { buf = x86SP_F32_radix4_kernel_OutOfPlace_sse( buf, pFFTStruct->pBuf2, buf, twiddle, n_by_2, 0); } // Scale the result by 1/n. // It contains a scaling factor of 1/2 in // RevbinPermuteInv/RevbinPermuteInvSse. OMX_F32 factor = 1.0f / n; if (n < 8) { for (i = 0; i < n_by_2; i++) { pDst[i << 1] = buf[i] * factor; pDst[(i << 1) + 1] = buf[i + n_by_2] * factor; } } else { OMX_F32 *base; OMX_F32 *dst; VC temp0; VC temp1; __m128 mFactor = _mm_load1_ps(&factor); // Two things are done in this loop: // 1 Get the result scaled; 2 Change the format from split to interleaved. for (i = 0; i < n_by_2; i += 4) { base = buf + i; dst = pDst + (i << 1); VC_LOAD_SPLIT(&temp0, base, n_by_2); VC_MUL_F(&temp1, &temp0, mFactor); VC_STORE_INTERLEAVE(dst, &temp1); } } return OMX_Sts_NoErr; }