void CFFTOSX::GetMagnitude(sample* pfSamples) throw (IException*) { // Only 32-bit sample is supported if (sizeof(sample) != 4) { throw IException::Create(IException::TypeCode, IException::ReasonGeneric, EXCEPTION_INFO, (tchar*)"Unknown or unsupported sample size"); } // Convert to required format ctoz((COMPLEX*)pfSamples, 2, &mA, 1, muiFFTSize / 2); // FFT fft_zrip(mFFT, &mA, 1, muiOrder, FFT_FORWARD); // Scale it float fScale = 1.0f / (2 * muiFFTSize); vsmul(mA.realp, 1, &fScale, mA.realp, 1, muiFFTSize / 2); vsmul(mA.imagp, 1, &fScale, mA.imagp, 1, muiFFTSize / 2); // Convert to magnitude tint32 iIndex; for (iIndex = 1; iIndex < muiFFTSize / 2; iIndex++) { pfSamples[iIndex] = (float)sqrt((mA.realp[iIndex] * mA.realp[iIndex]) + (mA.imagp[iIndex] * mA.imagp[iIndex])); } // Special cases pfSamples[0] = (float)sqrt(mA.realp[0] * mA.realp[0]); pfSamples[muiFFTSize / 2] = (float)sqrt(mA.realp[1] * mA.realp[1]); }
void UpdateCamera() { vsub(&cameraPtr->dir, &cameraPtr->target, &cameraPtr->orig); vnorm(&cameraPtr->dir); const Vec up = {0.f, 1.f, 0.f}; const float fov = (M_PI / 180.f) * 45.f; vxcross(&cameraPtr->x, &cameraPtr->dir, &up); vnorm(&cameraPtr->x); vsmul(&cameraPtr->x, width * fov / height, &cameraPtr->x); vxcross(&cameraPtr->y, &cameraPtr->x, &cameraPtr->dir); vnorm(&cameraPtr->y); vsmul(&cameraPtr->y, fov, &cameraPtr->y); }
void UpdateRenderingCPU(void) { double startTime = WallClockTime(); const float invWidth = 1.f / width; const float invHeight = 1.f / height; int x, y; for (y = 0; y < height; y++) { /* Loop over image rows */ for (x = 0; x < width; x++) { /* Loop cols */ const int i = (height - y - 1) * width + x; const int i2 = 2 * i; const float r1 = GetRandom(&seeds[i2], &seeds[i2 + 1]) - .5f; const float r2 = GetRandom(&seeds[i2], &seeds[i2 + 1]) - .5f; const float kcx = (x + r1) * invWidth - .5f; const float kcy = (y + r2) * invHeight - .5f; Vec rdir; vinit(rdir, camera.x.x * kcx + camera.y.x * kcy + camera.dir.x, camera.x.y * kcx + camera.y.y * kcy + camera.dir.y, camera.x.z * kcx + camera.y.z * kcy + camera.dir.z); Vec rorig; vsmul(rorig, 0.1f, rdir); vadd(rorig, rorig, camera.orig) vnorm(rdir); const Ray ray = {rorig, rdir}; Vec r; RadiancePathTracing(spheres, sphereCount, &ray, &seeds[i2], &seeds[i2 + 1], &r); if (currentSample == 0) colors[i] = r; else { const float k1 = currentSample; const float k2 = 1.f / (k1 + 1.f); colors[i].x = (colors[i].x * k1 + r.x) * k2; colors[i].y = (colors[i].y * k1 + r.y) * k2; colors[i].z = (colors[i].z * k1 + r.z) * k2; } pixels[y * width + x] = toInt(colors[i].x) | (toInt(colors[i].y) << 8) | (toInt(colors[i].z) << 16); } } const float elapsedTime = WallClockTime() - startTime; const float sampleSec = height * width / elapsedTime; sprintf(captionBuffer, "Rendering time %.3f sec (pass %d) Sample/sec %.1fK\n", elapsedTime, currentSample, sampleSec / 1000.f); currentSample++; }
void siglab_cbPhaseDeg(float *src, float *dst, long nfft) { // requires dst to be nel/2+1 element or more // src is in separated Re and Im arrays, A(Im) = A(Re) + NFFT/2 static float phsf = 45.0f/atan2f(1.0f,1.0f); siglab_cbPhase(src,dst,nfft); vsmul(dst,1,&phsf,dst,1,nfft/2); }
// ------------------------------------------------- // FFT post-processing // void siglab_sbDB(float *src, float *dst, long nfft) { // requires src to be power instead of amplitude // already processed for pwr from complex format. static float dbsf = 10.0f/logf(10.0f); long nfft2 = nfft/2; float *pdst = dst; for(long ix = nfft2; --ix >= 0;) *pdst++ = logf(*src++); vsmul(dst,1,&dbsf,dst,1,nfft2); }
void __TFWT2D_TransformerRep::init_kernel(float *pkrnl, long nkrows, long nkcols) { // should never happen... but disallow kernel sizes larger // than the expected image size. Even though we could theoretically // accommodate to the ceiling power of two in size that would destroy // the zero fill needed for user image padding... check_valid_transformer(); if(nkcols > m_nucols || nkrows > m_nurows) throw("TFWT2D_Transformer: kernel size too large."); VDSP_CRITICAL_SECTION { // prep output buffer for this run setup_split_complex_outbuf(); // just move the kernel into the input buffer without regard // for its phase center. We will correct afterward. implant_kernel(pkrnl, nkcols, nkrows); // perform the forward FFT of the kernel fwd_fft(); // Correct the phase center. We only nead real-valued amplitudes. // Prep for use as an element-by-element real-real vector multipy // this uses a bit more memory but it is faster in the face of the // 2-D packed output format of the real-to-complex FFT // first get back to packed format from this wierd output format provided ztoc(&m_outdata, 1, m_pkrnlmask, 2, m_tsize2); // then, for each row and every complex column except the first... for(long iy = m_nirows; --iy >= 0; ) { long off = iy * m_nicols2; for(long ix = m_nicols2; --ix > 0; ) { long k = off + ix; double re = (double)m_pkrnlmask[k].real; double im = (double)m_pkrnlmask[k].imag; float v = (float)hypot(re,im); (*m_poutbuf)[k].real = v; (*m_poutbuf)[k].imag = v; } } // Then in the first complex column, for all rows except the first two... float *pdst = *(float**)m_poutbuf; float *psrc = (float*)m_pkrnlmask; for(long iy = 2; iy < m_nirows; iy += 2) { long kre = iy*m_nicols; long kim = kre + m_nicols; double re = (double)psrc[kre]; double im = (double)psrc[kim]; float v = (float)hypot(re,im); pdst[kre] = v; pdst[kim] = v; ++kre; ++kim; re = psrc[kre]; im = psrc[kim]; v = (float)hypot(re,im); pdst[kre] = v; pdst[kim] = v; } // finally, copy over the remaining untouched 4 cells for(long iy = 2; --iy >= 0;) { long off = iy * m_nicols; for(long ix = 2; --ix >= 0;) { long k = ix + off; pdst[k] = psrc[k]; } } // now refold the kernel so that we can do straight multiplies in the filter DSPSplitComplex kdata = {(float*)m_pkrnlmask, (float*)m_pkrnlmask + m_tsize2}; ctoz(*m_poutbuf, 2, &kdata, 1, m_tsize2); } END_VDSP_CRITICAL_SECTION; // apply descaling for round-trip amplification // = *2 for forward FFT on image // = *tsize for inverse FFT on image // = *2 for forward FFT on kernel long tsize = m_nicols * m_nirows; float sf = 0.25/tsize; vsmul((const float*)m_pkrnlmask, 1, &sf, (float*)m_pkrnlmask, 1, tsize); }
void siglab_sbMpy1(float kval, float *src, float *dst, long nel) { vsmul(src, 1, &kval, dst, 1, nel); }
static hist2_t *lj_domd(model_t *m, int ntp, double *beta, double *bp) { double emin, emax, vmin, vmax; int istep, itp, jtp, acc; double retot = DBL_MIN, reacc = 0.0; lj_t **lj, *ljtmp; hist2_t *hs; /* round emin and emax to multiples of m->de */ emin = (int) (m->nn * m->emin / m->de) * m->de; emax = (int) (m->nn * m->emax / m->de) * m->de; /* round vmin and vmax to multiples of m->dv */ vmin = (int) (m->nn * m->vmin / m->dv) * m->dv; vmax = (int) (m->nn * m->vmax / m->dv) * m->dv; hs = hist2_open(ntp, emin, emax, m->de, vmin, vmax, m->dv); /* randomize the initial state */ mtscramble( time(NULL) ); xnew(lj, ntp); for ( itp = 0; itp < ntp; itp++ ) { double rho = bp[itp] / beta[itp]; lj[itp] = lj_open(m->nn, rho, m->rcdef); } /* do simulations */ for ( istep = 1; istep <= m->nequil + m->nsteps; istep++ ) { for ( itp = 0; itp < ntp; itp++ ) { /* loop over replicas */ lj_vv(lj[itp], m->mddt); lj[itp]->ekin = lj_vrescale(lj[itp], 1/beta[itp], m->thdt); if ( istep % m->nstvmov == 0 ) { lj_langp0(lj[itp], m->pdt, 1/beta[itp], bp[itp]/beta[itp], 0); } } if ( m->re ) { /* replica exchange: randomly swap configurations */ double dbdE, r; itp = (int) (rand01() * ntp); jtp = (itp + 1 + (int) (rand01() * (ntp - 1))) % ntp; dbdE = (beta[itp] - beta[jtp]) * (lj[itp]->epot - lj[jtp]->epot) + (bp[itp] - bp[jtp])* (lj[itp]->vol - lj[jtp]->vol); acc = 0; if ( dbdE >= 0 ) { acc = 1; } else { r = rand01(); if ( r < exp(dbdE) ) { acc = 1; } } if ( acc ) { double scl = sqrt( beta[itp]/beta[jtp] ); int i; /* scale the velocities */ for ( i = 0; i < m->nn; i++ ) { vsmul(lj[itp]->v[i], scl); vsmul(lj[jtp]->v[i], 1/scl); } /* swap the models */ ljtmp = lj[itp], lj[itp] = lj[jtp], lj[jtp] = ljtmp; } if ( istep > m->nequil ) { reacc += acc; retot += 1.0; } } if ( istep <= m->nequil ) continue; for ( itp = 0; itp < ntp; itp++ ) { hist2_add1(hs, itp, lj[itp]->epot, lj[itp]->vol, 1.0, HIST2_VERBOSE); } } hist2_save(hs, m->fnhis2, HIST2_ADDAHALF | HIST2_NOZEROES | HIST2_VERBOSE); fprintf(stderr, "simulation ended, doing WHAM, reacc = %g%%\n", 100*reacc/retot); for ( itp = 0; itp < ntp; itp++ ) { lj_close( lj[itp] ); } return hs; }
static hist_t *lj_domd(model_t *m, const double *beta) { double emin, emax; int istep, iT; lj_t **lj, *ljtmp; hist_t *hs; /* round emin and emax to multiples of m->de */ emin = (int) (m->nn * m->emin / m->de) * m->de; emax = (int) (m->nn * m->emax / m->de) * m->de; hs = hist_open(m->nT, emin, emax, m->de); /* randomize the initial state */ mtscramble( time(NULL) ); xnew(lj, m->nT); for ( iT = 0; iT < m->nT; iT++ ) { lj[iT] = lj_open(m->nn, m->rho, m->rcdef); } /* do simulations */ for ( istep = 1; istep <= m->nequil + m->nsteps; istep++ ) { for ( iT = 0; iT < m->nT; iT++ ) { lj_vv(lj[iT], m->mddt); lj[iT]->ekin = lj_vrescale(lj[iT], 1/beta[iT], m->thdt); } if ( m->re ) { /* replica exchange: randomly swap configurations of * two neighboring temperatures */ int jT, acc; double dbdE, r; iT = (int) (rand01() * (m->nT - 1)); jT = iT + 1; dbdE = (beta[iT] - beta[jT]) * (lj[iT]->epot - lj[jT]->epot); acc = 0; if ( dbdE >= 0 ) { acc = 1; } else { r = rand01(); if ( r < exp(dbdE) ) { acc = 1; } } if ( acc ) { double scl = sqrt( beta[iT]/beta[jT] ); /* sqrt(Tj/Ti) */ int i; /* scale the velocities */ for ( i = 0; i < m->nn; i++ ) { vsmul(lj[iT]->v[i], scl); vsmul(lj[jT]->v[i], 1/scl); } /* swap the models */ ljtmp = lj[iT], lj[iT] = lj[jT], lj[jT] = ljtmp; } } if ( istep <= m->nequil ) continue; for ( iT = 0; iT < m->nT; iT++ ) { hist_add1(hs, iT, lj[iT]->epot, 1.0, HIST_VERBOSE); } } hist_save(hs, m->fnhis, HIST_ADDAHALF | HIST_NOZEROES | HIST_VERBOSE); fprintf(stderr, "simulation ended, doing WHAM\n"); for ( iT = 0; iT < m->nT; iT++ ) { lj_close( lj[iT] ); } return hs; }