/* ** FAST(b,n) ** This routine replaces the real float vector b ** of length n with its finite discrete fourier transform. ** DC term is returned in b[0]; ** n/2th harmonic real part in b[1]. ** jth harmonic is returned as complex number stored as ** b[2*j] + i b[2*j + 1] ** (i.e., remaining coefficients are as a DPCOMPLEX vector). ** */ int FAST(real *b, int n) { real fn; int i, in, nn, n2pow, n4pow, nthpo; n2pow = fastlog2(n); if(n2pow <= 0) return 0; nthpo = n; fn = nthpo; n4pow = n2pow / 2; /* radix 2 iteration required; do it now */ if(n2pow % 2) { nn = 2; in = n / nn; FR2TR(in, b, b + in); } else nn = 1; /* perform radix 4 iterations */ for(i = 1; i <= n4pow; i++) { nn *= 4; in = n / nn; FR4TR(in, nn, b, b + in, b + 2 * in, b + 3 * in); } /* perform inplace reordering */ FORD1(n2pow, b); FORD2(n2pow, b); /* take conjugates */ for(i = 3; i < n; i += 2) b[i] = -b[i]; return 1; }
// Compute Entropy of distribution (assuming \sum_i dist[i] = 1). float ComputeEntropy(const std::vector<float> &dist) { float entropy = 0.0; for (auto iter = dist.begin(); iter != dist.end(); ++iter) { if (*iter) { entropy -= (*iter) * fastlog2(*iter); } } if (entropy < 1e-5) { entropy = 0.0; } return entropy; }
/*! \brief Initializes slab subsystem (it is called automatically). */ void __attribute__ ((constructor)) slab_init() { long slab_size = sysconf(_SC_PAGESIZE); if (slab_size < 0) { slab_size = SLAB_MINSIZE; } // Fetch page size SLAB_SZ = (size_t)slab_size; unsigned slab_logsz = fastlog2(SLAB_SZ); // Compute slab page mask SLAB_MASK = 0; for (unsigned i = 0; i < slab_logsz; ++i) { SLAB_MASK |= 1 << i; } SLAB_MASK = ~SLAB_MASK; // Initialize depot slab_depot_init(); }
void Bilinear::finalize() { // Calculate longest sides of the patch longest_u = std::max((verts[0][0] - verts[0][1]).length(), (verts[0][2] - verts[0][3]).length()); longest_v = std::max((verts[0][0] - verts[0][3]).length(), (verts[0][1] - verts[0][2]).length()); log_widest = fastlog2(std::max(longest_u, longest_v)); // Calculate bounds bbox.resize(verts.size()); for (size_t time = 0; time < verts.size(); time++) { bbox[time].min.x = verts[time][0].x; bbox[time].max.x = verts[time][0].x; bbox[time].min.y = verts[time][0].y; bbox[time].max.y = verts[time][0].y; bbox[time].min.z = verts[time][0].z; bbox[time].max.z = verts[time][0].z; for (int i = 1; i < 4; i++) { bbox[time].min.x = verts[time][i].x < bbox[time].min.x ? verts[time][i].x : bbox[time].min.x; bbox[time].max.x = verts[time][i].x > bbox[time].max.x ? verts[time][i].x : bbox[time].max.x; bbox[time].min.y = verts[time][i].y < bbox[time].min.y ? verts[time][i].y : bbox[time].min.y; bbox[time].max.y = verts[time][i].y > bbox[time].max.y ? verts[time][i].y : bbox[time].max.y; bbox[time].min.z = verts[time][i].z < bbox[time].min.z ? verts[time][i].z : bbox[time].min.z; bbox[time].max.z = verts[time][i].z > bbox[time].max.z ? verts[time][i].z : bbox[time].max.z; } // Extend bounds for displacements for (int i = 1; i < 4; i++) { bbox[time].min.x -= Config::displace_distance; bbox[time].max.x += Config::displace_distance; bbox[time].min.y -= Config::displace_distance; bbox[time].max.y += Config::displace_distance; bbox[time].min.z -= Config::displace_distance; bbox[time].max.z += Config::displace_distance; } } }
OMXResult omxSP_FFTFwd_CToC_FC32_Sfs(const OMX_FC32* pSrc, OMX_FC32* pDst, const OMXFFTSpec_C_FC32* pFFTSpec) { ARMsFFTSpec_FC32* spec = (ARMsFFTSpec_FC32*)pFFTSpec; int order; long subFFTSize; long subFFTNum; OMX_FC32* pTwiddle; OMX_FC32* pOut; /* * Check args are not NULL and the source and destination pointers * are properly aligned. */ if (!validateParametersFC32(pSrc, pDst, spec)) return OMX_Sts_BadArgErr; order = fastlog2(spec->N); subFFTSize = 1; subFFTNum = spec->N; pTwiddle = spec->pTwiddle; pOut = spec->pBuf; if (order > 3) { OMX_FC32* argDst; /* * Set up argDst and pOut appropriately so that pOut = pDst for * the very last FFT stage. */ if ((order & 2) == 0) { argDst = pOut; pOut = pDst; } else { argDst = pDst; } /* * Odd order uses a radix 8 first stage; even order, a radix 4 * first stage. */ if (order & 1) { armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace( pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); } else { armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace( pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); } /* * Now use radix 4 stages to finish rest of the FFT */ if (subFFTNum >= 4) { while (subFFTNum > 4) { OMX_FC32* tmp; armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace( argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); /* * Swap argDst and pOut */ tmp = pOut; pOut = argDst; argDst = tmp; } armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace( argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); } } else if (order == 3) { armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace( pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace( pDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace( pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize); } else if (order == 2) { armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace( pSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace( pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize); } else { /* Order = 1 */ armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace( pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize); } return OMX_Sts_NoErr; }
/** * Function: omxSP_FFTInv_CCSToR_F32_Sfs * * Description: * These functions compute the inverse FFT for a conjugate-symmetric input * sequence. Transform length is determined by the specification structure, * which must be initialized prior to calling the FFT function using * <FFTInit_R_F32>. For a transform of length M, the input sequence is * represented using a packed CCS vector of length M+2, and is organized * as follows: * * Index: 0 1 2 3 4 5 . . . M-2 M-1 M M+1 * Comp: R[0] 0 R[1] I[1] R[2] I[2] . . . R[M/2-1] I[M/2-1] R[M/2] 0 * * where R[n] and I[n], respectively, denote the real and imaginary * components for FFT bin n. Bins are numbered from 0 to M/2, where M * is the FFT length. Bin index 0 corresponds to the DC component, * and bin index M/2 corresponds to the foldover frequency. * * Input Arguments: * pSrc - pointer to the complex-valued input sequence represented * using CCS format, of length (2^order) + 2; must be aligned on a * 32-byte boundary. * pFFTSpec - pointer to the preallocated and initialized * specification structure * * Output Arguments: * pDst - pointer to the real-valued output sequence, of length * 2^order ; must be aligned on a 32-byte boundary. * * Return Value: * * OMX_Sts_NoErr - no error * OMX_Sts_BadArgErr - bad arguments if one or more of the * following is true: * - pSrc, pDst, or pFFTSpec is NULL * - pSrc or pDst is not aligned on a 32-byte boundary * */ OMXResult omxSP_FFTInv_CCSToR_F32_Sfs( const OMX_F32* pSrc, OMX_F32* pDst, const OMXFFTSpec_R_F32* pFFTSpec) { ARMsFFTSpec_R_FC32* spec = (ARMsFFTSpec_R_FC32*)pFFTSpec; int order; long subFFTSize; long subFFTNum; OMX_FC32* pTwiddle; OMX_FC32* pOut; OMX_FC32* pComplexSrc; OMX_FC32* pComplexDst = (OMX_FC32*) pDst; /* * Check args are not NULL and the source and destination pointers * are properly aligned. */ if (!validateParametersF32(pSrc, pDst, spec)) return OMX_Sts_BadArgErr; /* * Preprocess the input before calling the complex inverse FFT. The * result is actually stored in the second half of the temp buffer * in pFFTSpec. */ if (spec->N > 1) armSP_FFTInv_CCSToR_F32_preTwiddleRadix2( pSrc, spec->pTwiddle, spec->pBuf, spec->N); /* * Do a complex inverse FFT of half size. */ order = fastlog2(spec->N) - 1; subFFTSize = 1; subFFTNum = spec->N >> 1; pTwiddle = spec->pTwiddle; /* * The pBuf is split in half. The first half is the temp buffer. The * second half holds the source data that was placed there by * armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe. */ pOut = (OMX_FC32*) spec->pBuf; pComplexSrc = pOut + (1 << order); if (order > 3) { OMX_FC32* argDst; /* * Set up argDst and pOut appropriately so that pOut = pDst for * the very last FFT stage. */ if ((order & 2) == 0) { argDst = pOut; pOut = pComplexDst; } else { argDst = pComplexDst; } /* * Odd order uses a radix 8 first stage; even order, a radix 4 * first stage. */ if (order & 1) { armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace( pComplexSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); } else { armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace( pComplexSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); } /* * Now use radix 4 stages to finish rest of the FFT */ if (subFFTNum >= 4) { while (subFFTNum > 4) { OMX_FC32* tmp; armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace( argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); /* * Swap argDst and pOut */ tmp = pOut; pOut = argDst; argDst = tmp; } armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace( argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); } } else if (order == 3) { armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( pComplexSrc, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace( pComplexDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace( pOut, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize); } else if (order == 2) { armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( pComplexSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace( pOut, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize); } else if (order == 1) { armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace( pComplexSrc, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize); } else { /* Order = 0 */ *pComplexDst = *pComplexSrc; } ScaleRFFTData(pDst, spec->N); return OMX_Sts_NoErr; }
static inline float fastlog (float x) { return 0.69314718f * fastlog2 (x); }
static inline float fastpow (float x, float p) { return fastpow2 (p * fastlog2 (x)); }
OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs(const OMX_F32* pSrc, OMX_F32* pDst, const OMXFFTSpec_R_F32* pFFTSpec) { ARMsFFTSpec_R_FC32* spec = (ARMsFFTSpec_R_FC32*)pFFTSpec; int order; long subFFTSize; long subFFTNum; OMX_FC32* pTwiddle; OMX_FC32* pOut; OMX_FC32* pComplexSrc = (OMX_FC32*) pSrc; OMX_FC32* pComplexDst = (OMX_FC32*) pDst; /* * Check args are not NULL and the source and destination pointers * are properly aligned. */ if (!validateParametersF32(pSrc, pDst, spec)) return OMX_Sts_BadArgErr; /* * Compute the RFFT using a complex FFT of one less order, so set * order to be the order of the complex FFT. */ order = fastlog2(spec->N) - 1; subFFTSize = 1; subFFTNum = spec->N >> 1; pTwiddle = spec->pTwiddle; pOut = (OMX_FC32*) spec->pBuf; if (order > 3) { OMX_FC32* argDst; OMX_FC32* pComplexDst = (OMX_FC32*) pDst; /* * Set up argDst and pOut appropriately so that pOut = pDst for * ComplexToRealFixup. */ if ((order & 2) != 0) { argDst = pOut; pOut = pComplexDst; } else { argDst = pComplexDst; } /* * Odd order uses a radix 8 first stage; even order, a radix 4 * first stage. */ if (order & 1) { armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace( pComplexSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); } else { armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace( pComplexSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize); } /* * Now use radix 4 stages to finish rest of the FFT */ if (subFFTNum >= 4) { while (subFFTNum > 4) { OMX_FC32* tmp; armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace( argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); /* * Swap argDst and pOut */ tmp = pOut; pOut = argDst; argDst = tmp; } armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace( argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); } } else if (order == 3) { armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace( pComplexSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace( pOut, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace( pComplexDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); } else if (order == 2) { armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace( pComplexSrc, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize); armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace( pComplexDst, pOut, pTwiddle, &subFFTNum, &subFFTSize); } else if (order == 1) { armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace( pComplexSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize); } else { /* Handle complex order 0 specially */ pOut->Re = pSrc[0]; pOut->Im = pSrc[1]; } /* * Complex FFT done. Fix up the complex result to give the correct * RFFT. */ ComplexToRealFixup(pOut, pDst, pTwiddle, spec->pBuf, spec->N); return OMX_Sts_NoErr; }
/* * FFT842 (Name kept from the original Fortran version) * This routine replaces the input DCOMPLEX vector by its * finite discrete complex fourier transform if in==FFT_FORWARD. * It replaces the input DCOMPLEX vector by its finite discrete * complex inverse fourier transform if in==FFT_INVERSE. * * The implementation is a radix-2 FFT, but with faster shortcuts for * radix-4 and radix-8. It performs as many radix-8 iterations as * possible, and then finishes with a radix-2 or -4 iteration if needed. */ void FFT842(int direction, int n, DCOMPLEX *b) /* direction: FFT_FORWARD or FFT_INVERSE * n: length of vector * *b: input vector */ { double fn, r, fi; int L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15; /* int j0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;*/ int j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14; int i, j, ij, ji, ij1, ji1; /* int nn, n2pow, n8pow, nthpo, ipass, nxtlt, length;*/ int n2pow, n8pow, nthpo, ipass, nxtlt, length; n2pow = fastlog2(n); nthpo = n; fn = 1.0 / (double)nthpo; /* Scaling factor for inverse transform */ if(direction==FFT_FORWARD) /* Conjugate the input */ for(i=0;i<n;i++) { b[i].im = -b[i].im; } if(direction==FFT_INVERSE) /* Scramble the inputs */ for(i=0,j=n/2;j<n;i++,j++) { r = b[j].re; fi = b[j].im; b[j].re = b[i].re; b[j].im = b[i].im; b[i].re = r; b[i].im = fi; } n8pow = n2pow/3; if(n8pow) { /* Radix 8 iterations */ for(ipass=1;ipass<=n8pow;ipass++) { nxtlt = 0x1 << (n2pow - 3*ipass); length = 8*nxtlt; R8TX(nxtlt, nthpo, length, b, b+nxtlt, b+2*nxtlt, b+3*nxtlt, b+4*nxtlt, b+5*nxtlt, b+6*nxtlt, b+7*nxtlt); } } if(n2pow%3 == 1) { /* A final radix 2 iteration is needed */ R2TX(nthpo, b, b+1); } if(n2pow%3 == 2) { /* A final radix 4 iteration is needed */ R4TX(nthpo, b, b+1, b+2, b+3); } for(j=1;j<=15;j++) { L[j] = 1; if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j); } L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7]; L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15]; ij = 1; for(j1=1;j1<=L1;j1++) for(j2=j1;j2<=L2;j2+=L1) for(j3=j2;j3<=L3;j3+=L2) for(j4=j3;j4<=L4;j4+=L3) for(j5=j4;j5<=L5;j5+=L4) for(j6=j5;j6<=L6;j6+=L5) for(j7=j6;j7<=L7;j7+=L6) for(j8=j7;j8<=L8;j8+=L7) for(j9=j8;j9<=L9;j9+=L8) for(j10=j9;j10<=L10;j10+=L9) for(j11=j10;j11<=L11;j11+=L10) for(j12=j11;j12<=L12;j12+=L11) for(j13=j12;j13<=L13;j13+=L12) for(j14=j13;j14<=L14;j14+=L13) for(ji=j14;ji<=L15;ji+=L14) { ij1 = ij-1; ji1 = ji-1; if(ij-ji<0) { r = b[ij1].re; b[ij1].re = b[ji1].re; b[ji1].re = r; fi = b[ij1].im; b[ij1].im = b[ji1].im; b[ji1].im = fi; } ij++; } if(direction==FFT_FORWARD) /* Take conjugates & unscramble outputs */ for(i=0,j=n/2; j<n; i++,j++) { r = b[j].re; fi = b[j].im; b[j].re = b[i].re; b[j].im = -b[i].im; b[i].re = r; b[i].im = -fi; } if(direction==FFT_INVERSE) /* Scale outputs */ for(i=0; i<nthpo; i++) { b[i].re *= fn; b[i].im *= fn; } }
/* int in; FORWARD or INVERSE int n; length of vector DPCOMPLEX *b; input vector */ void ifft842 (doubleComplex* b, int size , int in) { double fn; doubleComplex temp ; int L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15; int j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14; int i = 0, j, ij, ji, ij1, ji1; int n2pow, n8pow, nthpo, ipass, nxtlt, lengt; n2pow = fastlog2( size ); nthpo = size ; fn = nthpo; if(in==INVERSE) /*scramble inputs*/ for(i=0,j=size/2;j<size;i++,j++) { temp = DoubleComplex ( zreals ( b[j] ) , zimags( b[j] )); b[j] = DoubleComplex ( zreals ( b[i] ) , zimags( b[i] )); b[i] = DoubleComplex ( zreals ( temp ) , zimags( temp )); /* r = b[j].re; fi = b[j].im; b[j].re = b[i].re; b[j].im = b[i].im; b[i].re = r; b[i].im = fi; */ } n8pow = n2pow/3; if(n8pow) { /* radix 8 iterations */ for(ipass=1;ipass<=n8pow;ipass++) { nxtlt = 0x1 << (n2pow - 3*ipass); lengt = 8*nxtlt; ir8tx(nxtlt,nthpo,lengt, b,b+nxtlt,b+2*nxtlt, b+3*nxtlt,b+4*nxtlt,b+5*nxtlt, b+6*nxtlt,b+7*nxtlt); } } if(n2pow%3 == 1) { /* radix 2 iteration needed */ ir2tx(nthpo,b,b+1); } if(n2pow%3 == 2) { /* radix 4 iteration needed */ ir4tx(nthpo,b,b+1,b+2,b+3); } for(j=1;j<=15;j++) { L[j] = 1; if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j); } L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7]; L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15]; ij = 1; for(j1=1;j1<=L1;j1++) for(j2=j1;j2<=L2;j2+=L1) for(j3=j2;j3<=L3;j3+=L2) for(j4=j3;j4<=L4;j4+=L3) for(j5=j4;j5<=L5;j5+=L4) for(j6=j5;j6<=L6;j6+=L5) for(j7=j6;j7<=L7;j7+=L6) for(j8=j7;j8<=L8;j8+=L7) for(j9=j8;j9<=L9;j9+=L8) for(j10=j9;j10<=L10;j10+=L9) for(j11=j10;j11<=L11;j11+=L10) for(j12=j11;j12<=L12;j12+=L11) for(j13=j12;j13<=L13;j13+=L12) for(j14=j13;j14<=L14;j14+=L13) for(ji=j14;ji<=L15;ji+=L14) { ij1 = ij-1; ji1 = ji-1; if(ij-ji<0) { temp = b[ij1]; b[ij1] = b[ji1]; b[ji1] = temp; /* r = b[ij1].re; b[ij1].re = b[ji1].re; b[ji1].re = r; fi = b[ij1].im; b[ij1].im = b[ji1].im; b[ji1].im = fi; */ } ij++; } if(in==INVERSE) /* scale outputs */ { for(i=0;i<nthpo;i++) { b[i] = DoubleComplex ( zreals( b[i] )/fn , zimags(b[i])/fn); fn *= -1 ; } } }