int shrinkWrap ( float * const & rIntensity, const std::vector<unsigned> & rSize, unsigned rnCycles, float rTargetError, float rHioBeta, float rIntensityCutOffAutoCorel, float rIntensityCutOff, float rSigma0, float rSigmaChange, unsigned rnHioCycles ) { if ( rSize.size() != 2 ) return 1; const unsigned & Ny = rSize[1]; const unsigned & Nx = rSize[0]; /* Evaluate input parameters and fill with default values if necessary */ if ( rIntensity == NULL ) return 1; if ( rTargetError <= 0 ) rTargetError = 1e-5; if ( rnHioCycles == 0 ) rnHioCycles = 20; if ( rHioBeta <= 0 ) rHioBeta = 0.9; if ( rIntensityCutOffAutoCorel <= 0 ) rIntensityCutOffAutoCorel = 0.04; if ( rIntensityCutOff <= 0 ) rIntensityCutOff = 0.2; if ( rSigma0 <= 0 ) rSigma0 = 3.0; if ( rSigmaChange <= 0 ) rSigmaChange = 0.01; float sigma = rSigma0; /* calculate this (length of array) often needed value */ unsigned nElements = 1; for ( unsigned i = 0; i < rSize.size(); ++i ) { assert( rSize[i] > 0 ); nElements *= rSize[i]; } /* allocate needed memory so that HIO doesn't need to allocate and * deallocate on each call */ fftwf_complex * const curData = fftwf_alloc_complex( nElements ); fftwf_complex * const gPrevious = fftwf_alloc_complex( nElements ); auto const isMasked = new float[nElements]; /* create fft plans G' to g' and g to G */ auto toRealSpace = fftwf_plan_dft( rSize.size(), (int*) &rSize[0], curData, curData, FFTW_BACKWARD, FFTW_ESTIMATE ); auto toFreqSpace = fftwf_plan_dft( rSize.size(), (int*) &rSize[0], gPrevious, curData, FFTW_FORWARD, FFTW_ESTIMATE ); /* create first guess for mask from autocorrelation (fourier transform * of the intensity @see * https://en.wikipedia.org/wiki/Wiener%E2%80%93Khinchin_theorem */ #pragma omp parallel for for ( unsigned i = 0; i < nElements; ++i ) { curData[i][0] = rIntensity[i]; /* Re */ curData[i][1] = 0; } fftwf_execute( toRealSpace ); complexNormElementwise( isMasked, curData, nElements ); /* fftShift is not necessary, but I introduced this, because for the * example it shifted the result to a better looking position ... */ //fftShift( isMasked, Nx,Ny ); libs::gaussianBlur( isMasked, Nx, Ny, sigma ); #if DEBUG_SHRINKWRAPP_CPP == 1 std::ofstream file; std::string fname = std::string("shrinkWrap-init-mask-blurred"); file.open( ( fname + std::string(".dat") ).c_str() ); for ( unsigned ix = 0; ix < rSize[0]; ++ix ) { for ( unsigned iy = 0; iy < rSize[1]; ++iy ) file << std::setw(10) << isMasked[ iy*rSize[0] + ix ] << " "; file << "\n"; } file.close(); std::cout << "Written out " << fname << ".png\n"; #endif /* apply threshold to make binary mask */ { const auto absMax = vectorMax( isMasked, nElements ); const float threshold = rIntensityCutOffAutoCorel * absMax; #pragma omp parallel for for ( unsigned i = 0; i < nElements; ++i ) isMasked[i] = isMasked[i] < threshold ? 1 : 0; } #if DEBUG_SHRINKWRAPP_CPP == 1 fname = std::string("shrinkWrap-init-mask"); file.open( ( fname + std::string(".dat") ).c_str() ); for ( unsigned ix = 0; ix < rSize[0]; ++ix ) { for ( unsigned iy = 0; iy < rSize[1]; ++iy ) file << std::setw(10) << isMasked[ iy*rSize[0] + ix ] << " "; file << "\n"; } file.close(); std::cout << "Written out " << fname << ".png\n"; #endif /* copy original image into fftw_complex array and add random phase */ #pragma omp parallel for for ( unsigned i = 0; i < nElements; ++i ) { curData[i][0] = rIntensity[i]; /* Re */ curData[i][1] = 0; } /* in the first step the last value for g is to be approximated * by g'. The last value for g, called g_k is needed, because * g_{k+1} = g_k - hioBeta * g' ! This is inside the loop * because the fft is needed */ #pragma omp parallel for for ( unsigned i = 0; i < nElements; ++i ) { gPrevious[i][0] = curData[i][0]; gPrevious[i][1] = curData[i][1]; } /* repeatedly call HIO algorithm and change mask */ for ( unsigned iCycleShrinkWrap = 0; iCycleShrinkWrap < rnCycles; ++iCycleShrinkWrap ) { /************************** Update Mask ***************************/ std::cout << "Update Mask with sigma=" << sigma << "\n"; /* blur |g'| (normally g' should be real!, so |.| not necessary) */ complexNormElementwise( isMasked, curData, nElements ); libs::gaussianBlur( isMasked, Nx, Ny, sigma ); const auto absMax = vectorMax( isMasked, nElements ); /* apply threshold to make binary mask */ const float threshold = rIntensityCutOff * absMax; #pragma omp parallel for for ( unsigned i = 0; i < nElements; ++i ) isMasked[i] = isMasked[i] < threshold ? 1 : 0; /* update the blurring sigma */ sigma = fmax( 1.5, ( 1 - rSigmaChange ) * sigma ); for ( unsigned iHioCycle = 0; iHioCycle < rnHioCycles; ++iHioCycle ) { /* apply domain constraints to g' to get g */ #pragma omp parallel for for ( unsigned i = 0; i < nElements; ++i ) { if ( isMasked[i] == 1 or /* g' */ curData[i][0] < 0 ) { gPrevious[i][0] -= rHioBeta * curData[i][0]; gPrevious[i][1] -= rHioBeta * curData[i][1]; } else { gPrevious[i][0] = curData[i][0]; gPrevious[i][1] = curData[i][1]; } } /* Transform new guess g for f back into frequency space G' */ fftwf_execute( toFreqSpace ); /* Replace absolute of G' with measured absolute |F| */ applyComplexModulus( curData, curData, rIntensity, nElements ); fftwf_execute( toRealSpace ); } // HIO loop /* check if we are done */ const float currentError = imresh::libs::calculateHioError( curData /*g'*/, isMasked, nElements ); std::cout << "[Error " << currentError << "/" << rTargetError << "] " << "[Cycle " << iCycleShrinkWrap << "/" << rnCycles-1 << "]" << "\n"; if ( rTargetError > 0 && currentError < rTargetError ) break; if ( iCycleShrinkWrap >= rnCycles ) break; } // shrink wrap loop for ( unsigned i = 0; i < nElements; ++i ) rIntensity[i] = curData[i][0]; /* free buffers and plans */ fftwf_destroy_plan( toFreqSpace ); fftwf_destroy_plan( toRealSpace ); fftwf_free( curData ); fftwf_free( gPrevious); delete[] isMasked; return 0; }
int bi_entry(void * mdpv, int iproblemsize, double * dresults) { /* dstart, dend: the start and end time of the measurement */ /* dtime: the time for a single measurement in seconds */ double dstart = 0.0, dend = 0.0, dtime = 0.0, dinit = 0.0; /* flops stores the calculated FLOPS */ double flops = 0.0; /* ii is used for loop iterations */ myinttype ii, jj, imyproblemsize, numberOfRuns; /* cast void* pointer */ mydata_t* pmydata = (mydata_t*)mdpv; int invalid = 0; /* calculate real problemsize */ imyproblemsize = (int)pow(2, (log2(pmydata->min) + (myinttype)iproblemsize - 1)); /* store the value for the x axis in results[0] */ dresults[0] = (double)imyproblemsize; /*** in place run ***/ /* malloc */ pmydata->inout = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * imyproblemsize * imyproblemsize); /* create FFT plan */ pmydata->p = fftwf_plan_dft_2d(imyproblemsize, imyproblemsize, pmydata->inout, pmydata->inout, FFTW_FORWARD, FFTW_ESTIMATE); /* init stuff */ initData_ip(pmydata, imyproblemsize); numberOfRuns = 1; dstart = bi_gettime(); /* fft calculation */ fftwf_execute(pmydata->p); dend = bi_gettime(); /* calculate the used time*/ dtime = dend - dstart; dtime -= dTimerOverhead; /* loop calculation if accuracy is insufficient */ while (dtime < 100 * dTimerGranularity) { numberOfRuns = numberOfRuns * 2; dstart = bi_gettime(); for (jj = 0; jj < numberOfRuns; jj++) { /* fft calculation */ fftwf_execute(pmydata->p); } dend = bi_gettime(); dtime = dend - dstart; dtime -= dTimerOverhead; } /* check for overflows */ for (ii = 0; ii < imyproblemsize * imyproblemsize; ii++) { if (isnan(pmydata->inout[ii][0]) || isnan(pmydata->inout[ii][1])) invalid = 1; if (isinf(pmydata->inout[ii][0]) || isinf(pmydata->inout[ii][1])) invalid = 1; } /* if loop was necessary */ if (numberOfRuns > 1) dtime = dtime / numberOfRuns; /* calculate the used FLOPS */ flops = (double)(5.0 * imyproblemsize * imyproblemsize * (log2(1.0 * imyproblemsize * imyproblemsize)) / dtime); /* store the FLOPS in results[1] */ if (invalid == 1) dresults[1] = INVALID_MEASUREMENT; else dresults[1] = flops; fftwf_destroy_plan(pmydata->p); /* free data */ fftwf_free(pmydata->inout); /*** out of place run ***/ /* malloc */ pmydata->in = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * imyproblemsize * imyproblemsize); pmydata->out = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * imyproblemsize * imyproblemsize); /* create FFT plan */ pmydata->p = fftwf_plan_dft_2d(imyproblemsize, imyproblemsize, pmydata->in, pmydata->out, FFTW_FORWARD, FFTW_ESTIMATE); /* init stuff */ initData_oop(pmydata, imyproblemsize); numberOfRuns = 1; dstart = bi_gettime(); /* fft calculation */ fftwf_execute(pmydata->p); dend = bi_gettime(); /* calculate the used time*/ dtime = dend - dstart; dtime -= dTimerOverhead; /* loop calculation if accuracy is insufficient */ while (dtime < 100 * dTimerGranularity) { numberOfRuns = numberOfRuns * 2; dstart = bi_gettime(); for (ii = 0; ii < numberOfRuns; ii++) { /* fft calculation */ fftwf_execute(pmydata->p); } dend = bi_gettime(); /* calculate the used time*/ dtime = dend - dstart; dtime -= dTimerOverhead; } /* if loop was necessary */ if (numberOfRuns > 1) dtime = dtime / numberOfRuns; /* check for overflows */ for (ii = 0; ii < imyproblemsize * imyproblemsize; ii++) { if (isnan(pmydata->out[ii][0]) || isnan(pmydata->out[ii][1])) invalid = 1; if (isinf(pmydata->out[ii][0]) || isinf(pmydata->out[ii][1])) invalid = 1; } /* calculate the used FLOPS */ flops = (double)(5.0 * imyproblemsize * imyproblemsize * (log2(1.0 * imyproblemsize * imyproblemsize)) / dtime); /* store the FLOPS in results[2] */ if (invalid == 1) dresults[2] = INVALID_MEASUREMENT; else dresults[2] = flops; fftwf_destroy_plan(pmydata->p); /* free data */ fftwf_free(pmydata->in); fftwf_free(pmydata->out); return 0; }
void* conv_tilde_parallel_thread(void* arg) { int chan, n, i,H_ptr; t_sample *in, *ir, *out; /* The pointer to the object struct */ t_conv_tilde *x = (t_conv_tilde*) arg; const unsigned int offset = 2; /* Loop the latter half of the total channels. */ for ( chan = x->channels_halved; chan < x->channels; chan++ ) { /* Check whether the IR has changed for either channel */ if (x->ircount[chan] != x->ircount_prev[chan]) { post("conv~: IR%d changed", chan); /* Start filling the arrays from the beginning */ x->ir_end_ptr[chan] = 0; x->input_rw_ptr[chan] = 0; /* Truncate the IR if it is longer than STORAGELENGTH. */ if ( x->irlength[chan] > STORAGELENGTH ) x->irlength[chan] = STORAGELENGTH; /* Calculate the number of frames to store */ x->ir_frames[chan] = ceil((float)x->irlength[chan] / x->framesize); x->frames_stored[chan] = 0; x->ircount_prev[chan] = x->ircount[chan]; } /* ----- BYPASS ----------------------------------------------------- */ /* If no IR is loaded, bypass the convolution to save cpu */ if ( x->irlength[chan] <= 0 || x->bypass[chan] ) { in = (t_sample *)(x->x_myvec[offset + chan]); out = (t_sample *)(x->x_myvec[offset - chan + x->all_channels - 1]); memcpy( out, in, sizeof(t_sample) * x->framesize ); /* Skip the processing */ continue; } /* ------------------------------------------------------------------ */ /* Get the sample data from x->x_myvec this time. */ in = (t_sample *)(x->x_myvec[offset + chan]); ir = (t_sample *)(x->x_myvec[offset + chan + x->channels]); out = (t_sample *)(x->x_myvec[offset - chan + x->all_channels - 1]); /* This bit decides whether we append the frame to the storage or overwrite the oldest one */ if (x->frames_stored[chan] < x->ir_frames[chan]) { /* This means that there are still IR buffers coming. */ /* 1) Copy the input buffer to the REAL part of the input_complex array. */ #ifdef INPLACE /* Zero pad. */ memset(x->input_complex_2, 0, sizeof(fftwf_complex) * x->framesize); /* Copy samples. */ for (i = x->framesize - 1; i >= 0; i--) { x->input_complex_2[x->framesize + i][0] = in[i]; x->input_complex_2[x->framesize + i][1] = 0; } #else memcpy(x->input_to_fft_2 + x->framesize, in, sizeof(float) * x->framesize); #endif /* 2) Transform and store the input buffer. */ fftwf_execute(x->fftplan_in_2); memcpy(x->stored_input[chan] + x->ir_end_ptr[chan], x->input_complex_2, sizeof(fftwf_complex) * x->fft_size); /* 3) Do the same to the IR buffer. */ #ifdef INPLACE /* Zero pad. */ memset(x->ir_complex_2 + x->framesize, 0, sizeof(fftwf_complex) * x->framesize); /* Copy samples. */ for (i = x->framesize - 1; i >= 0; i--) { x->ir_complex_2[i][0] = ir[i]; x->ir_complex_2[i][1] = 0; } #else memcpy(x->ir_to_fft_2, ir, sizeof(float) * x->framesize); #endif fftwf_execute(x->fftplan_ir_2); memcpy(x->stored_ir[chan] + x->ir_end_ptr[chan], x->ir_complex_2, sizeof(fftwf_complex) * x->fft_size); /* 4) Increment storage pointers. */ x->ir_end_ptr[chan] += x->fft_size; x->frames_stored[chan]++; /* 5) Set the input read/write pointer forwards */ x->input_rw_ptr[chan] += x->fft_size; if (x->input_rw_ptr[chan] >= x->ir_end_ptr[chan]) x->input_rw_ptr[chan] -= x->ir_end_ptr[chan]; } else { /* IR is fully loaded. Overwrite stored audio frames (FIFO). */ /* 1) Set the input read/write pointer forwards. */ x->input_rw_ptr[chan] += x->fft_size; if (x->input_rw_ptr[chan] >= x->ir_end_ptr[chan]) x->input_rw_ptr[chan] -= x->ir_end_ptr[chan]; /* 2) Copy the input buffer to the fft_input_array. */ #ifdef INPLACE /* Zero pad. */ memset(x->input_complex_2, 0, sizeof(fftwf_complex) * x->framesize); /* Copy samples. */ for (i = x->framesize - 1; i >= 0; i--) { x->input_complex_2[x->framesize + i][0] = in[i]; x->input_complex_2[x->framesize + i][1] = 0; } #else memcpy(x->input_to_fft_2 + x->framesize, in, sizeof(float) * x->framesize); #endif /* 3) Transform and store the input buffer. */ fftwf_execute(x->fftplan_in_2); memcpy(x->stored_input[chan] + x->input_rw_ptr[chan], x->input_complex_2, sizeof(fftwf_complex) * x->fft_size); } /* - Convolve block by block -----------------------------------------*/ H_ptr = 0; /* If the process is here then there is bound to be at least one frame stored. The first frame in the sum overwrites the output array. The iteration goes down as ARM CPU compares against zero the fastest. */ for (n = x->fft_size - 1; n >= 0; n--) { /* Complex multiplication (frequency domain convolution): Re(Y) = Re(X)Re(H) - Im(X)Im(H) Im(Y) = Im(X)Re(H) + Re(X)Im(H) Nice 4 divisible iteration length for compiler SIMD optimization. Note that the first iteration has to overwrite the output array. | chan | sample | Re/Im | */ x->out_complex_2[n][0] = x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [0] - x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [1]; x->out_complex_2[n][1] = x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [0] + x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [1]; } /* Move the IR pointer forwards */ H_ptr += x->fft_size; /* Move the input read/write pointer backwards. */ x->input_rw_ptr[chan] -= x->fft_size; if (x->input_rw_ptr[chan] < 0 ) x->input_rw_ptr[chan] += x->ir_end_ptr[chan]; /* If many frames in storage, repeat and sum the results. */ for (i = x->frames_stored[chan] - 1; i > 0; i--) { for (n = x->fft_size - 1; n >= 0; n--) { x->out_complex_2[n][0] += x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [0] - x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [1]; x->out_complex_2[n][1] += x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [0] + x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [1]; } /* Move the IR pointer forwards */ H_ptr += x->fft_size; /* Move the input read/write pointer backwards. */ x->input_rw_ptr[chan] -= x->fft_size; if (x->input_rw_ptr[chan] < 0 ) x->input_rw_ptr[chan] += x->ir_end_ptr[chan]; } /* Insert the previous overlap save portion before overwriting the OS. */ memcpy(out, x->overlap_save[chan], sizeof(float) * x->framesize); /* Inverse fft. Result is stored in x->outTemp_2. */ fftwf_execute(x->fftplan_inverse_2); /* Store the overlap save portion. */ memcpy(x->overlap_save[chan], x->outTemp_2, sizeof(float) * x->framesize); /* Sum the output with the previous overlap and scale the amplitude. */ for (i = x->framesize - 1; i >= 0; i--) { out[i] += x->outTemp_2[i + x->framesize]; out[i] *= x->out_gain; } } /* Exit thread. */ pthread_exit(NULL); }
int Resampler::process(Buffer* const dataIn, Buffer* dataOut) { PDEBUG("Resampler::process(dataIn: %p, dataOut: %p)\n", dataIn, dataOut); dataOut->setLength(dataIn->getLength() * L / M); FFT_TYPE* in = reinterpret_cast<FFT_TYPE*>(dataIn->getData()); FFT_TYPE* out = reinterpret_cast<FFT_TYPE*>(dataOut->getData()); size_t sizeIn = dataIn->getLength() / sizeof(complexf); for (size_t i = 0, j = 0; i < sizeIn; i += myFftSizeIn / 2, j += myFftSizeOut / 2) { memcpy(myFftIn, myBufferIn, myFftSizeIn / 2 * sizeof(FFT_TYPE)); memcpy(myFftIn + (myFftSizeIn / 2), in + i, myFftSizeIn / 2 * sizeof(FFT_TYPE)); memcpy(myBufferIn, in + i, myFftSizeIn / 2 * sizeof(FFT_TYPE)); for (size_t k = 0; k < myFftSizeIn; ++k) { FFT_REAL(myFftIn[k]) *= myWindow[k]; FFT_IMAG(myFftIn[k]) *= myWindow[k]; } fftwf_execute(myFftPlan1); if (myFftSizeOut > myFftSizeIn) { memset(myBack, 0, myFftSizeOut * sizeof(FFT_TYPE)); memcpy(myBack, myFront, myFftSizeIn / 2 * sizeof(FFT_TYPE)); memcpy(&myBack[myFftSizeOut - (myFftSizeIn / 2)], &myFront[myFftSizeIn / 2], myFftSizeIn / 2 * sizeof(FFT_TYPE)); // Copy input Fs FFT_REAL(myBack[myFftSizeIn / 2]) = FFT_REAL(myFront[myFftSizeIn / 2]); FFT_IMAG(myBack[myFftSizeIn / 2]) = FFT_IMAG(myFront[myFftSizeIn / 2]); } else { memcpy(myBack, myFront, myFftSizeOut / 2 * sizeof(FFT_TYPE)); memcpy(&myBack[myFftSizeOut / 2], &myFront[myFftSizeIn - (myFftSizeOut / 2)], myFftSizeOut / 2 * sizeof(FFT_TYPE)); // Average output Fs from input FFT_REAL(myBack[myFftSizeOut / 2]) += FFT_REAL(myFront[myFftSizeOut / 2]); FFT_IMAG(myBack[myFftSizeOut / 2]) += FFT_IMAG(myFront[myFftSizeOut / 2]); FFT_REAL(myBack[myFftSizeOut / 2]) *= 0.5f; FFT_IMAG(myBack[myFftSizeOut / 2]) *= 0.5f; } for (size_t k = 0; k < myFftSizeOut; ++k) { FFT_REAL(myBack[k]) *= myFactor; FFT_IMAG(myBack[k]) *= myFactor; } fftwf_execute(myFftPlan2); for (size_t k = 0; k < myFftSizeOut / 2; ++k) { FFT_REAL(out[j + k]) = myBufferOut[k].real() + FFT_REAL(myFftOut[k]); FFT_IMAG(out[j + k]) = myBufferOut[k].imag() + FFT_IMAG(myFftOut[k]); } memcpy(myBufferOut, myFftOut + (myFftSizeOut / 2), (myFftSizeOut / 2) * sizeof(FFT_TYPE)); } return 1; }
int main (int argc, char* argv[]) { int n1, n2, n, nk, i, j, k, nlags; float *data, wt; sf_complex **fft, *dataf; sf_file inp, out; #ifdef SF_HAS_FFTW fftwf_plan cfg=NULL, icfg=NULL; #else kiss_fftr_cfg cfg=NULL, icfg=NULL; #endif sf_init(argc, argv); inp = sf_input("in"); out = sf_output("out"); if (SF_FLOAT != sf_gettype(inp)) sf_error("Need float input"); if (!sf_histint(inp,"n1",&n1)) sf_error("No n1= in input"); if (!sf_histint(inp,"n2",&n2)) sf_error("No n2= in input"); if (!sf_getint("nlags",&nlags)) nlags=100; /* number of lags */ sf_putint(out,"n1",nlags); sf_putint(out,"n3",n2); nk = kiss_fft_next_fast_size((n1+1)/2)+1; n = 2*(nk-1); wt = 1.0/n; data = sf_floatalloc(n); dataf = sf_complexalloc(nk); fft = sf_complexalloc2(nk,n2); #ifdef SF_HAS_FFTW cfg = fftwf_plan_dft_r2c_1d(n, data, (fftwf_complex *) dataf, FFTW_MEASURE); icfg = fftwf_plan_dft_c2r_1d(n, (fftwf_complex *) dataf, data, FFTW_MEASURE); if (NULL == cfg || NULL == icfg) sf_error("FFT allocation failure"); #else cfg = kiss_fftr_alloc(n,0,NULL,NULL); icfg = kiss_fftr_alloc(n,1,NULL,NULL); #endif for (i=0; i < n2; i++) { sf_floatread(data,n1,inp); for (k=n1; k < n; k++) { data[k] = 0.0f; } #ifdef SF_HAS_FFTW fftwf_execute(cfg); #else kiss_fftr (cfg,data,(kiss_fft_cpx *) dataf); #endif for (k=0; k < nk; k++) { fft[i][k] = dataf[k]; } } /************************************************* * * * cross-correlate every trace with every other * * * *************************************************/ for (i=0; i < n2; i++) { for (j=0; j < n2; j++) { for (k=0; k < nk; k++) { #ifdef SF_HAS_COMPLEX_H dataf[k] = fft[i][k] * conjf(fft[j][k]); #else dataf[k] = sf_cmul(fft[i][k],conjf(fft[j][k])); #endif } #ifdef SF_HAS_FFTW fftwf_execute(icfg); #else kiss_fftri(icfg,(kiss_fft_cpx *) dataf,data); #endif for (k=0; k < nlags; k++) { data[k] *= wt; } sf_floatwrite(data,nlags,out); } } exit(0); }
t_int *conv_tilde_perform(t_int *w) { /* The pointer to the object struct */ t_conv_tilde *x = (t_conv_tilde*) (w[1]); unsigned int H_ptr; int chan, i, n /*, vector_size */; /* t_sample is a PD datatype for sample data. It is basically just float. */ t_sample* __restrict in; t_sample* __restrict ir; t_sample* __restrict out; /* Audio starts from w[3] */ const unsigned int offset = 3; /* We do not really need vector size for anything if we keep the frame size constant. Changing framesize on the fly causes undefined behaviour as no good logic has been implemented for updating the storage ring-buffering in the event of a framesize change. */ /* vector_size = (int)(w[2]); if ( vector_size != DEFAULTFRAMESIZE ) post("conv~: Problem! Framesize %d != %d", vector_size, DEFAULTFRAMESIZE); */ /* - PROCESS THE SIGNAL ------------------------------------------------- */ #ifdef THREADS int rc = pthread_create(&x->childthread, NULL, conv_tilde_parallel_thread, (void*) x ); if ( rc != 0 ) post("conv~: Error in pthread_create(). Error code %d", rc); for ( chan = 0; chan < x->channels_halved; chan++ ) #else for ( chan = 0; chan < x->channels; chan++ ) #endif { /* Check whether the IR has changed for either channel */ if (x->ircount[chan] != x->ircount_prev[chan]) { post("conv~: IR%d changed", chan); /* Start filling the arrays from the beginning */ x->ir_end_ptr[chan] = 0; x->input_rw_ptr[chan] = 0; /* Truncate the IR if it is longer than STORAGELENGTH. */ if ( x->irlength[chan] > STORAGELENGTH ) x->irlength[chan] = STORAGELENGTH; /* Calculate the number of frames to store */ x->ir_frames[chan] = ceil((float)x->irlength[chan] / x->framesize); x->frames_stored[chan] = 0; x->ircount_prev[chan] = x->ircount[chan]; } /* ----- BYPASS ----------------------------------------------------- */ /* If no IR is loaded, bypass the convolution to save cpu */ if ( x->irlength[chan] <= 0 || x->bypass[chan] ) { in = (t_sample *)(w[offset + chan]); out = (t_sample *)(w[offset - chan + x->all_channels - 1]); memcpy( out, in, sizeof(t_sample) * x->framesize ); /* Skip the processing */ continue; } /* ------------------------------------------------------------------ */ /* Assign the signal arrays to temporary pointers for simplicity. Note the clockwise arrangement of channels in Pd (in in2 ir ir2 out2 out). */ in = (t_sample *)(w[offset + chan]); ir = (t_sample *)(w[offset + chan + x->channels]); out = (t_sample *)(w[offset - chan + x->all_channels - 1]); /* This bit decides whether we append the frame to the storage or overwrite the oldest one */ if (x->frames_stored[chan] < x->ir_frames[chan]) { /* This means that there are still IR buffers coming. */ /* 1) Copy the input buffer to the REAL part of the input_complex array. */ #ifdef INPLACE /* Zero pad. */ memset(x->input_complex, 0, sizeof(fftwf_complex) * x->framesize); /* Copy samples. */ for (i = x->framesize - 1; i >= 0; i--) { x->input_complex[x->framesize + i][0] = in[i]; x->input_complex[x->framesize + i][1] = 0; } #else memcpy(x->input_to_fft + x->framesize, in, sizeof(float) * x->framesize); #endif /* 2) Transform and store the input buffer. */ fftwf_execute(x->fftplan_in); memcpy(x->stored_input[chan] + x->ir_end_ptr[chan], x->input_complex, sizeof(fftwf_complex) * x->fft_size); /* 3) Do the same to the IR buffer. */ #ifdef INPLACE /* Zero pad. */ memset(x->ir_complex + x->framesize, 0, sizeof(fftwf_complex) * x->framesize); /* Copy samples. */ for (i = x->framesize - 1; i >= 0; i--) { x->ir_complex[i][0] = ir[i]; x->ir_complex[i][1] = 0; } #else memcpy(x->ir_to_fft, ir, sizeof(float) * x->framesize); #endif fftwf_execute(x->fftplan_ir); memcpy(x->stored_ir[chan] + x->ir_end_ptr[chan], x->ir_complex, sizeof(fftwf_complex) * x->fft_size); /* 4) Increment storage pointers. */ x->ir_end_ptr[chan] += x->fft_size; x->frames_stored[chan]++; /* 5) Set the input read/write pointer forwards */ x->input_rw_ptr[chan] += x->fft_size; if (x->input_rw_ptr[chan] >= x->ir_end_ptr[chan]) x->input_rw_ptr[chan] -= x->ir_end_ptr[chan]; } else { /* IR is fully loaded. Overwrite stored audio frames (FIFO). */ /* 1) Set the input read/write pointer forwards. */ x->input_rw_ptr[chan] += x->fft_size; if (x->input_rw_ptr[chan] >= x->ir_end_ptr[chan]) x->input_rw_ptr[chan] -= x->ir_end_ptr[chan]; /* 2) Copy the input buffer to the fft_input_array. */ #ifdef INPLACE /* Zero pad. */ memset(x->input_complex, 0, sizeof(fftwf_complex) * x->framesize); /* Copy samples. */ for (i = x->framesize - 1; i >= 0; i--) { x->input_complex[x->framesize + i][0] = in[i]; x->input_complex[x->framesize + i][1] = 0; } #else memcpy(x->input_to_fft + x->framesize, in, sizeof(float) * x->framesize); #endif /* 3) Transform and store the input buffer. */ fftwf_execute(x->fftplan_in); memcpy(x->stored_input[chan] + x->input_rw_ptr[chan], x->input_complex, sizeof(fftwf_complex) * x->fft_size); } /* - Convolve block by block -----------------------------------------*/ H_ptr = 0; /* If the process is here then there is bound to be at least one frame stored. The first frame in the sum overwrites the output array. The iteration goes down as ARM CPU compares against zero the fastest. */ for (n = x->fft_size - 1; n >= 0; n--) { /* Complex multiplication (frequency domain convolution): Re(Y) = Re(X)Re(H) - Im(X)Im(H) Im(Y) = Im(X)Re(H) + Re(X)Im(H) Nice 4 divisible iteration length for compiler SIMD optimization. Note that the first iteration has to overwrite the output array. | chan | sample | Re/Im | */ x->out_complex[n][0] = x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [0] - x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [1]; x->out_complex[n][1] = x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [0] + x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [1]; } /* Move the IR pointer forwards */ H_ptr += x->fft_size; /* Move the input read/write pointer backwards. */ x->input_rw_ptr[chan] -= x->fft_size; if (x->input_rw_ptr[chan] < 0 ) x->input_rw_ptr[chan] += x->ir_end_ptr[chan]; /* If many frames in storage, repeat and sum the results. */ for (i = x->frames_stored[chan] - 1; i > 0; i--) { for (n = x->fft_size - 1; n >= 0; n--) { x->out_complex[n][0] += x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [0] - x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [1]; x->out_complex[n][1] += x->stored_input [chan] [x->input_rw_ptr[chan] + n] [1] * x->stored_ir [chan] [H_ptr + n] [0] + x->stored_input [chan] [x->input_rw_ptr[chan] + n] [0] * x->stored_ir [chan] [H_ptr + n] [1]; } /* Move the IR pointer forwards */ H_ptr += x->fft_size; /* Move the input read/write pointer backwards. */ x->input_rw_ptr[chan] -= x->fft_size; if (x->input_rw_ptr[chan] < 0 ) x->input_rw_ptr[chan] += x->ir_end_ptr[chan]; } /* Insert the previous overlap save portion before overwriting the OS. */ memcpy(out, x->overlap_save[chan], sizeof(float) * x->framesize); /* Inverse fft. Result is stored in x->outTemp. */ fftwf_execute(x->fftplan_inverse); /* Store the overlap save portion. */ memcpy(x->overlap_save[chan], x->outTemp, sizeof(float) * x->framesize); /* Sum the output with the previous overlap and scale the amplitude. */ for (i = x->framesize - 1; i >= 0; i--) { out[i] += x->outTemp[i + x->framesize]; out[i] *= x->out_gain; } } /* - END ---------------------------------------------------------------- */ #ifdef THREADS rc = pthread_join(x->childthread, NULL); if ( rc != 0 ) post("conv~: pthread_join() error code %d", rc); #endif /* Return a pointer after the last output channel */ return (w + offset + x->all_channels); }
static void fft_stepforward( float **u0, float **u1, float *rwave, float *rwavem, fftwf_complex *cwave, fftwf_complex *cwavem, float **vp, float **vn, float **eta, float **vh, float **eps, float **lin_eta, float *kz, float *kx, fftwf_plan forward_plan, fftwf_plan inverse_plan, int nz, int nx, int nzpad, int nxpad, int nkz, int nkx, float wt, bool adj) { /* #pragma omp parallel for schedule(dynamic,1) */ for (int ix=0; ix<nxpad; ix++) { memset(&rwave[ix*nzpad], 0, sizeof(float)*nzpad); memset(&rwavem[ix*nzpad], 0, sizeof(float)*nzpad); memset(&cwave[ix*nkz], 0, sizeof(fftwf_complex)*nkz); memset(&cwavem[ix*nkz], 0, sizeof(fftwf_complex)*nkz); } if (adj) { /* adjoint modeling */ /* adj term 1 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] = 2.0f * u1[j][i] - u0[j][i]; rwave[jj] = u1[j][i] * vh[j][i]; } } /* --- 2D forward Fourier transform ---*/ fftwf_execute(forward_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { for (int ikz=0; ikz<nkz; ++ikz) { int idx = ikx * nkz + ikz; cwavem[idx] += cwave[idx] * kx[ikx]; //cwavem[idx] = cwave[idx] * (kx[ikx]+kz[ikz]); } } /* adj term 2 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; rwave[jj] = u1[j][i] * vp[j][i]; } } /* --- 2D forward Fourier transform ---*/ fftwf_execute(forward_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { for (int ikz=0; ikz<nkz; ++ikz) { int idx = ikx * nkz + ikz; cwavem[idx] += cwave[idx] * kz[ikz]; } } /* adj term 3 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; rwave[jj] = u1[j][i] * vn[j][i] * eta[j][i] * 2.f; } } /* --- 2D forward Fourier transform ---*/ fftwf_execute(forward_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { float inv_kx = 1. / kx[ikx]; for (int ikz=0; ikz<nkz; ++ikz) { float inv_kz = 1. / kz[ikz]; /* float ratio = kx_ * kz_ / (kx_ + kz_); */ float ratio = 0.f; if (isinf(inv_kx) || isinf(inv_kz)) ratio = 0.f; else ratio = 1./ (inv_kx + inv_kz); /* sf_warning("ratio = %f ", ratio); */ int idx = ikx * nkz + ikz; cwavem[idx] -= cwave[idx] * ratio; } } /* adj term 4 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; rwave[jj] = u1[j][i] * vn[j][i] * eps[j][i] * eta[j][i] * 8.f * Q1; } } /* --- 2D forward Fourier transform ---*/ fftwf_execute(forward_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { float inv_kx = 1. / kx[ikx]; for (int ikz=0; ikz<nkz; ++ikz) { float inv_kz = 1. / kz[ikz]; float ratio = 0.f; if (isinf(inv_kx) || isinf(inv_kz)) ratio = 0.f; else ratio = inv_kz / ((inv_kx + inv_kz)*(inv_kx + inv_kz)); int idx = ikx * nkz + ikz; cwavem[idx] += cwave[idx] * ratio; } } /* adj term 5 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; rwave[jj] = u1[j][i] * vn[j][i] * eta[j][i] * 32.f * Q1 * Q2 * lin_eta[j][i]; } } /* --- 2D forward Fourier transform ---*/ fftwf_execute(forward_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { float inv_kx = 1. / kx[ikx]; for (int ikz=0; ikz<nkz; ++ikz) { float inv_kz = 1. / kz[ikz]; float ratio = 0.f; if (isinf(inv_kx) || isinf(inv_kz)) ratio = 0.f; else ratio = 1./ ((inv_kx + inv_kz)*(inv_kx + inv_kz) * (kx[ikx]+kz[ikz])); int idx = ikx * nkz + ikz; cwavem[idx] -= cwave[idx] * ratio; } } fftwf_execute(inverse_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] -= wt * rwavem[jj]; } } } else { /* forward modeling */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] = 2.0f *u1[j][i] - u0[j][i]; rwave[jj] = u1[j][i]; } } /* --- 2D forward Fourier transform ---*/ fftwf_execute(forward_plan); /* term 1 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { for (int ikz=0; ikz<nkz; ++ikz) { int idx = ikx * nkz + ikz; cwavem[idx] = cwave[idx] * kx[ikx]; } } fftwf_execute(inverse_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] -= wt * rwavem[jj] * vh[j][i]; } } /* term 2 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { for (int ikz=0; ikz<nkz; ++ikz) { int idx = ikx * nkz + ikz; cwavem[idx] = cwave[idx] * kz[ikz]; } } fftwf_execute(inverse_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] -= wt * rwavem[jj] * vp[j][i]; } } /* term 3 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { float inv_kx = 1. / kx[ikx]; for (int ikz=0; ikz<nkz; ++ikz) { float inv_kz = 1. / kz[ikz]; float ratio = 0.f; if (isinf(inv_kx) || isinf(inv_kz)) ratio = 0.f; else ratio = 1./ (inv_kx + inv_kz); int idx = ikx * nkz + ikz; cwavem[idx] = cwave[idx] * ratio; } } fftwf_execute(inverse_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] += wt * rwavem[jj] * 2.f * vn[j][i] * eta[j][i]; } } /* term 4 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { float inv_kx = 1. / kx[ikx]; for (int ikz=0; ikz<nkz; ++ikz) { float inv_kz = 1. / kz[ikz]; float ratio = 0.f; if (isinf(inv_kx) || isinf(inv_kz)) ratio = 0.f; else ratio = inv_kz / ((inv_kx + inv_kz) * (inv_kx + inv_kz)); int idx = ikx * nkz + ikz; cwavem[idx] = cwave[idx] * ratio; } } fftwf_execute(inverse_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] -= wt * rwavem[jj] * 8.f * Q1 * vn[j][i] * eps[j][i] * eta[j][i]; } } /* term 5 */ #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int ikx=0; ikx<nkx; ++ikx) { float inv_kx = 1. / kx[ikx]; for (int ikz=0; ikz<nkz; ++ikz) { float inv_kz = 1. / kz[ikz]; float ratio = 0.f; if (isinf(inv_kx) || isinf(inv_kz)) ratio = 0.f; else ratio = 1./ ((inv_kx + inv_kz) * (inv_kx + inv_kz) * (kx[ikx]+kz[ikz])); int idx = ikx * nkz + ikz; cwavem[idx] = cwave[idx] * ratio; } } fftwf_execute(inverse_plan); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,1) #endif for (int j=0; j<nx; j++) { for (int i=0; i<nz; i++) { int jj = j*nzpad + i; u0[j][i] += wt * rwavem[jj] * 32.f * Q1 * Q2 * vn[j][i] * eta[j][i] * lin_eta[j][i]; } } } return; }
void imcfft3(float *out /* [n1*n2*n3] */, sf_complex *inp /* [nk*n2*n3] */) /*< 3-D inverse FFT >*/ { int i1, i2, i3, ith=0; /* FFT over first axis */ #ifdef _OPENMP #pragma omp parallel for private(i3,i2,ith) default(shared) #endif for (i3=0; i3 < local_n0; i3++) { #ifdef _OPENMP ith = omp_get_thread_num(); #endif for (i2=0; i2 < n2; i2++) { kiss_fft_stride(icfg1[ith],(kiss_fft_cpx *) inp+(i3*n2+i2)*nk,tmp+(i3*n2+i2)*nk,1); } } /* FFT over second axis */ #ifdef _OPENMP #pragma omp parallel for private(i3,i2,i1,ith) default(shared) #endif for (i3=0; i3 < local_n0; i3++) { #ifdef _OPENMP ith = omp_get_thread_num(); #endif for (i1=0; i1 < nk; i1++) { kiss_fft_stride(icfg2[ith],tmp+i3*n2*nk+i1,ctrace2[ith],nk); for (i2=0; i2 < n2; i2++) { tmp[(i3*n2+i2)*nk+i1]=ctrace2[ith][i2]; } } } /* parallel transpose from n1*n2 * n3 to n3 * n1*n2 */ fftwf_execute(cfg); /* FFT over third axis */ #ifdef _OPENMP #pragma omp parallel for private(i3,i1,ith) default(shared) #endif for (i1=0; i1 < local_n1; i1++) { #ifdef _OPENMP ith = omp_get_thread_num(); #endif kiss_fft_stride(icfg3[ith],tmp+i1*n3,ctrace3[ith],1); for (i3=0; i3<n3; i3++) { tmp[i1*n3+i3] = ctrace3[ith][i3]; } } fftwf_execute(icfg); /* FFT centering and normalization*/ #pragma omp parallel for private(i3,i2,i1) default(shared) for (i3=0; i3<local_n0; i3++) { for (i2=0; i2<n2; i2++) { for (i1=0; i1<n1; i1++) { out[(i3*n2+i2)*n1+i1] = (((((i3+local_0_start)%2==0)==(i2%2==0))==(i1%2==0))? wt:-wt)*crealf(tmp2[(i3*n2+i2)*n1+i1]); } } } }
void executeInverse() { fftwf_execute(inversePlan); }
int main(int argc, char **argv) { fftwf_plan ptmp; FILE *wisdom_file; char *wisdom_string; fftwf_complex *in, *out; double *inf,*outf; in = (fftwf_complex *)fftwf_malloc(65536*sizeof(fftwf_complex)); out = (fftwf_complex *)fftwf_malloc(65536*sizeof(fftwf_complex)); inf = (double *)in; outf = (double *)out; if ((wisdom_file = fopen("wisdom","w")) != NULL) { fprintf(stderr,"Wisdom file successfully opened\n"); fprintf(stderr,"Checking forward fft's up to 8192\n"); fprintf(stderr,"64 Forward\n"); ptmp = fftwf_plan_dft_1d(64,in,out,FFTW_FORWARD, FFTW_PATIENT); { int i; for(i=0;i<64;i++) { inf[2*i] = cos(i*M_PI/129.0); inf[2*i+1] = sin(i*M_PI/129.0); } fftwf_execute(ptmp); /*for(i=0;i<64;i++) { fprintf(stderr,"bin[%3d] = (%15.10f %15.10f)\n",i,outf[2*i],outf[2*i+1]); }*/ } fftwf_destroy_plan(ptmp); fprintf(stderr,"128 Forward\n"); ptmp = fftwf_plan_dft_1d(128,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"256 Forward\n"); ptmp = fftwf_plan_dft_1d(256,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"512 Forward\n"); ptmp = fftwf_plan_dft_1d(512,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"1024 Forward\n"); ptmp = fftwf_plan_dft_1d(1024,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"2048 Forward\n"); ptmp = fftwf_plan_dft_1d(2048,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"4096 Forward\n"); ptmp = fftwf_plan_dft_1d(4096,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"8192 Forward\n"); ptmp = fftwf_plan_dft_1d(8192,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); /* fprintf(stderr,"16384 Forward\n"); ptmp = fftwf_plan_dft_1d(16384,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"32768 Forward\n"); ptmp = fftwf_plan_dft_1d(32768,in,out,FFTW_FORWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); */ fprintf(stderr,"Checking inverse fft's up to 8192\n"); fprintf(stderr,"64 Backward\n"); ptmp = fftwf_plan_dft_1d(64,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"128 Backward\n"); ptmp = fftwf_plan_dft_1d(128,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"256 Backward\n"); ptmp = fftwf_plan_dft_1d(256,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"512 Backward\n"); ptmp = fftwf_plan_dft_1d(512,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"1024 Backward\n"); ptmp = fftwf_plan_dft_1d(1024,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"2048 Backward\n"); ptmp = fftwf_plan_dft_1d(2048,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"4096 Backward\n"); ptmp = fftwf_plan_dft_1d(4096,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"8192 Backward\n"); ptmp = fftwf_plan_dft_1d(8192,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); /* fprintf(stderr,"16384 Backward\n"); ptmp = fftwf_plan_dft_1d(16384,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); fprintf(stderr,"32768 Backward\n"); ptmp = fftwf_plan_dft_1d(32768,in,out,FFTW_BACKWARD, FFTW_PATIENT); fftwf_destroy_plan(ptmp); */ fftwf_free(in); fftwf_free(out); fprintf(stderr,"Finished computing, exporting wisdom\n"); fflush(stderr); wisdom_string = fftwf_export_wisdom_to_string(); fprintf(wisdom_file,"%s",wisdom_string); fclose(wisdom_file); fprintf(stderr,"%s\n",wisdom_string); fprintf(stderr,"Done!\n"); } else fprintf(stderr,"Could not create the wisdom file.\n"); //fprintf(stderr,"Press the Enter key to close."),gets(dummy); }
void executeForward() { fftwf_execute(forwardPlan); }
void mcfft3(sf_complex *inp /* [n1*n2*n3] */, sf_complex *out /* [nk*n2*n3] */) /*< 3-D FFT >*/ { int i1, i2, i3, ith=0; /* FFT centering */ #pragma omp parallel for private(i3,i2,i1) default(shared) for (i3=0; i3<local_n0; i3++) { for (i2=0; i2<n2; i2++) { for (i1=0; i1<n1; i1++) { #ifdef SF_HAS_COMPLEX_H cc[(i3*n2+i2)*n1+i1] = ((((i3+local_0_start)%2==0)==(i2%2==0))==(i1%2==0))? inp[(i3*n2+i2)*n1+i1]:-inp[(i3*n2+i2)*n1+i1]; #else cc[(i3*n2+i2)*n1+i1] = ((((i3+local_0_start)%2==0)==(i2%2==0))==(i1%2==0))? inp[(i3*n2+i2)*n1+i1]:sf_cneg(inp[(i3*n2+i2)*n1+i1]); #endif } } } /* FFT over first axis */ #ifdef _OPENMP #pragma omp parallel for private(i3,i2,ith) default(shared) #endif for (i3=0; i3 < local_n0; i3++) { #ifdef _OPENMP ith = omp_get_thread_num(); #endif for (i2=0; i2 < n2; i2++) { kiss_fft_stride(cfg1[ith],(kiss_fft_cpx *) cc+(i3*n2+i2)*nk,tmp+(i3*n2+i2)*nk,1); } } /* FFT over second axis */ #ifdef _OPENMP #pragma omp parallel for private(i3,i2,i1,ith) default(shared) #endif for (i3=0; i3 < local_n0; i3++) { #ifdef _OPENMP ith = omp_get_thread_num(); #endif for (i1=0; i1 < nk; i1++) { kiss_fft_stride(cfg2[ith],tmp+i3*n2*nk+i1,ctrace2[ith],nk); for (i2=0; i2 < n2; i2++) { tmp[(i3*n2+i2)*nk+i1]=ctrace2[ith][i2]; } } } /* parallel transpose from n1*n2 * n3 to n3 * n1*n2 */ fftwf_execute(cfg); /* FFT over third axis */ #ifdef _OPENMP #pragma omp parallel for private(i3,i1,ith) default(shared) #endif for (i1=0; i1 < local_n1; i1++) { #ifdef _OPENMP ith = omp_get_thread_num(); #endif kiss_fft_stride(cfg3[ith],tmp+i1*n3,ctrace3[ith],1); for (i3=0; i3<n3; i3++) { tmp[i1*n3+i3] = ctrace3[ith][i3]; } } fftwf_execute(icfg); #pragma omp parallel for private(i3,i2,i1) default(shared) for (i3=0; i3<local_n0; i3++) for (i2=0; i2<n2; i2++) for (i1=0; i1<nk; i1++) out[(i3*n2+i2)*n1+i1]=tmp2[(i3*n2+i2)*n1+i1]; }
void lib_object_update(obj_t *obj) { assert( obj != NULL ); clutter_circle_set_angle_stop( CLUTTER_CIRCLE(obj->circle_volume), (uint) (obj->audio->volume * 360.0f) ); clutter_circle_set_angle_stop( CLUTTER_CIRCLE(obj->circle_position), (uint) (obj->audio->position * 360.0f) ); if ( ! (obj->flags & OBJ_FL_HIDE_BARGRAPH) ) { /* bar graph */ if ( obj->audio->input != NULL && obj->audio->input->data != NULL ) { static float v, *tmp; static int n, i; n = obj->audio->input->size; /* allocate memory * reajust if we got big sound. */ if ( bargraph_datasize != n ) { tmp = realloc(bargraph_data, sizeof(float) * n); if ( tmp == NULL ) return; bargraph_data = tmp; bargraph_datasize = n; } /* execute fast fourier plan */ if ( obj->fftpl == NULL ) { obj->fftpl = fftwf_plan_r2r_1d(n, obj->audio->input->data, bargraph_data, FFTW_R2HC, FFTW_FORWARD | FFTW_PRESERVE_INPUT ); } fftwf_execute(obj->fftpl); /* and ajust bargraph ! */ for ( i = 0; i < n && i < BAR_COUNT; i++ ) { v = sqrtf(bargraph_data[i] * bargraph_data[i]) * 30; if ( v > 50 ) v = 50; clutter_circle_set_width(CLUTTER_CIRCLE(obj->bars[i]), v); } } } }
FiltOvSv newFiltOvSv (COMPLEX * coefs, int ncoef, int pbits) { int buflen, fftlen; FiltOvSv p; fftwf_plan pfwd, pinv; COMPLEX *zrvec, *zfvec, *zivec, *zovec; p = (FiltOvSv) safealloc (1, sizeof (filt_ov_sv), "new overlap/save filter"); buflen = nblock2 (ncoef - 1), fftlen = 2 * buflen; zrvec = newvec_COMPLEX (fftlen, "raw signal vec in newFiltOvSv"); zfvec = newvec_COMPLEX (fftlen, "filter z vec in newFiltOvSv"); zivec = newvec_COMPLEX (fftlen, "signal in z vec in newFiltOvSv"); zovec = newvec_COMPLEX (fftlen, "signal out z vec in newFiltOvSv"); /* prepare frequency response from filter coefs */ { int i; COMPLEX *zcvec; fftwf_plan ptmp; zcvec = newvec_COMPLEX (fftlen, "temp filter z vec in newFiltOvSv"); //ptmp = fftw_create_plan(fftlen, FFTW_FORWARD, pbits); ptmp = fftwf_plan_dft_1d (fftlen, (fftwf_complex *) zcvec, (fftwf_complex *) zfvec, FFTW_FORWARD, pbits); #ifdef LHS for (i = 0; i < ncoef; i++) zcvec[i] = coefs[i]; #else for (i = 0; i < ncoef; i++) zcvec[fftlen - ncoef + i] = coefs[i]; #endif //fftw_one(ptmp, (fftw_complex *) zcvec, (fftw_complex *) zfvec); fftwf_execute (ptmp); fftwf_destroy_plan (ptmp); delvec_COMPLEX (zcvec); } /* prepare transforms for signal */ //pfwd = fftw_create_plan(fftlen, FFTW_FORWARD, pbits); //pinv = fftw_create_plan(fftlen, FFTW_BACKWARD, pbits); pfwd = fftwf_plan_dft_1d (fftlen, (fftwf_complex *) zrvec, (fftwf_complex *) zivec, FFTW_FORWARD, pbits); pinv = fftwf_plan_dft_1d (fftlen, (fftwf_complex *) zivec, (fftwf_complex *) zovec, FFTW_BACKWARD, pbits); /* stuff values */ p->buflen = buflen; p->fftlen = fftlen; p->zfvec = zfvec; p->zivec = zivec; p->zovec = zovec; p->zrvec = zrvec; p->pfwd = pfwd; p->pinv = pinv; p->scale = 1.0f / (REAL) fftlen; return p; }
int main(int argc,char *argv[]) { //this is C, so I have to declare all sorts of variables in advance //update, I guess it's C++ now so this is a luxury int i; int j; eggname[0]='\0'; //handle the command line options int onindex; if(((onindex=handle_options(argc,argv))==-1)||(argc-onindex<0)) {print_usage(); return -1;}; // char *eggname=argv[onindex]; if(eggname[0]=='\0') { fprintf(stderr,"no input file given, use -i option\n"); return -1; } //open the egg /* struct egg current; mBreakEgg(eggname,¤t); mParseEggHeader(¤t); sampling_rate_mhz=current.data->sample_rate; */ // Monarch *egg=Monarch::OpenForReading(eggname); // Monarch *egg=Monarch::Open(std::string(eggname),ReadMode); const Monarch *egg=Monarch::OpenForReading(std::string(eggname)); egg->ReadHeader(); const MonarchHeader *eggheader=egg->GetHeader(); const MonarchRecord *event; sampling_rate_mhz=eggheader->GetAcquisitionRate(); // printf("record size: %d\n",eggheader->GetRecordSize()); //decide the optimal size for ffts and allocate memory if(eggheader->GetRecordSize()<(unsigned int)fft_size) { // if(current.data->record_size<fft_size) { fprintf(stderr,"fft size larger than record size. make fft size smaller. aborting"); return -1; } //nffts_per_event=current.data->record_size/fft_size; nffts_per_event=eggheader->GetRecordSize()/fft_size; fft_output_size=fft_size/2+1; //fft_input=fftwf_alloc_real(fft_size*nffts_per_event); fft_input=(float*)fftwf_malloc(sizeof(float)*nffts_per_event*fft_size); //fft_output=fftwf_alloc_complex(fft_size*nffts_per_event); fft_output=(fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*fft_output_size*nffts_per_event); output_powerspectrum=(double*)malloc(sizeof(double)*fft_output_size); for(i=0;i<fft_output_size;i++) output_powerspectrum[i]=0; //create the fft plan fft_plan=fftwf_plan_many_dft_r2c(1,&fft_size,nffts_per_event,fft_input,NULL,1,fft_size,fft_output,NULL,1,fft_output_size,FFTW_ESTIMATE); //perform ffts //int on_event=0; int nffts_so_far=0; //while((mHatchNextEvent(¤t)!=1)&&(on_event<=max_number_of_events)) { //while((event=egg->GetNextEvent())!=NULL&&(on_event<=max_number_of_events)) { while(egg->ReadRecord()) { if(on_channel==1) event=egg->GetRecordSeparateOne(); else event=egg->GetRecordSeparateTwo(); if(event==NULL) { fprintf(stderr,"ERROR: event was null. aborting\n"); return -1; } //convert data to floats //for(i=0;i<current.data->record_size;i++) for(i=0;i<eggheader->GetRecordSize();i++) //fft_input[i]=(float)(current.data->record[i])-128.0; fft_input[i]=(float)(event->fData[i])-128.0; //perform the ffts fftwf_execute(fft_plan); //pack in to power spectrum int on_pt=0; for(i=0;i<nffts_per_event;i++) for(j=0;j<fft_output_size;j++) { output_powerspectrum[j]+=fft_output[on_pt][0]*fft_output[on_pt][0]+fft_output[on_pt][1]*fft_output[on_pt][1]; on_pt++; } nffts_so_far+=nffts_per_event; } //normalize to power in milliwatts // each sample * 0.5 (volts fullscale)/255 (levels) /(sqrt(fftlength) // power *2 (positive and negative freqs) *1000 mW/W / naverages //1000 (milliwatts/watt) * 0.5 (volts fullscale)/256 (levels) / (sqrt(fft_length)*(number of averages) / 50 ohms double normalization=2.0*(1000.0*0.5*0.5/(256.0*256.0))*(1.0/(((double)fft_size*fft_size)*((double)nffts_so_far)))/50.0; for(i=0;i<fft_output_size;i++) output_powerspectrum[i]*=normalization; //print out result if(format=='j') { //ASCII output, JSON printf("{ \"sampling_rate\": %d , ",sampling_rate_mhz); printf("\"data\": ["); for(i=0;i<fft_output_size;i++) { if(i!=0) printf(","); printf("%g",output_powerspectrum[i]); } printf("] }"); } else if(format=='a') { for(i=0;i<fft_output_size;i++) printf("%g %g\n",sampling_rate_mhz*((double)i)/((double)(2*fft_output_size)),output_powerspectrum[i]); } else { //binary fwrite(output_powerspectrum,sizeof(double),fft_output_size,stdout); } //clean up egg->Close(); fftwf_destroy_plan(fft_plan); fftwf_free(fft_input); fftwf_free(fft_output); free(output_powerspectrum); //mCleanUp(¤t); return 0; }
void PsiDOf_l(fftwf_complex const *U ,fftwf_complex const *FQ,fftwf_complex *QU ,int nx, int nz, double dx,double dz,double m_ord,int l){ const double pi = 3.14159265358979323846; fftwf_complex *FU,*FR,*R; fftwf_plan plan_forward,plan_backward; //reinterpret_cast<fftwf_complex*>(U); //in= (fftwf_complex *) fftwf_malloc(nx*nz*sizeof(fftwf_complex)); FU=(fftwf_complex *) fftwf_malloc(nx*nz*sizeof(fftwf_complex)); FR= (fftwf_complex *) fftwf_malloc(nx*nz*sizeof(fftwf_complex)); R=(fftwf_complex *) fftwf_malloc(nx*nz*sizeof(fftwf_complex)); /* clock_t start,end; double elapsed; start=clock(); */ plan_forward = fftwf_plan_dft_2d(nx,nz,U,FU,FFTW_FORWARD,FFTW_ESTIMATE); plan_backward= fftwf_plan_dft_2d(nx,nz,FR,R,FFTW_BACKWARD,FFTW_ESTIMATE); /* end=clock(); elapsed = ((double)(end-start))/CLOCKS_PER_SEC; printf(" time taken to set up plans in PsiDO: %lf \n",elapsed); */ /* reinterpret_cast <fftwf_complex*>(U) may work here try it */ int m,n,p,r; fftwf_execute(plan_forward); double X = (nx-1)*dx; double Z= (nz-1)*dz; double dxi = 1.0/X; double deta =1.0/Z; double xi,eta,omega,theta,alpha; /* for(m=0;m<nx;m++) { for(n=0;n<nz;n++) { QU[m*nz+n] = 0; } }*/ //Initialize QU to zero memset(QU,0,nx*nz*sizeof(float complex)); if ( m_ord == 0) {//printf("entering m_ord =0 \n"); for(p = 1;p<= (nx-1)/2;p++) { xi = 2*pi*dxi*p; for(r = 1; r <= (nz-1)/2; r++) { eta = 2*pi*deta*r; //omega = pow((xi*xi+eta*eta),(0.5)); theta = atan(eta/xi); FR[p*nz+r] = (cos(l*theta)+I*sin(l*theta))*FU[p*nz+r]; FR[p*nz-r+nz] = (cos(l*theta)-I*sin(l*theta))*FU[p*nz-r+nz]; FR[(-p+nx)*nz+r]= (cos(l*(pi-theta))+I*sin(l*(pi-theta)))*FU[(-p+nx)*nz+r]; FR[(-p+nx)*nz-r+nz]= (cos(l*(pi+theta))+I*sin(l*(pi+theta)))*FU[(-p+nx)*nz-r+nz]; } } for (r = 1;r<=(nz-1)/2;r++) { // eta = 2*pi*deta*r; FR[r] =(cos(l*pi/2)+I*sin(l*pi/2))*FU[r]; FR[-r+nz] = (cos(l*pi/2)-I*sin(l*pi/2))*FU[-r+nz]; } for (p=1;p<=(nx-1)/2;p++) { //xi = 2*pi*dxi*p; FR[p*nz]= (1)*FU[p*nz]; FR[(-p+nx)*nz]=(cos(l*pi))*FU[(-p+nx)*nz]; } //printf("exiting m_ord = 0\n"); } else { //printf("entering m_ord !=0 \n"); for(p = 1;p<= (nx-1)/2;p++) { xi = 2*pi*dxi*p; for(r = 1; r <= (nz-1)/2; r++) { eta = 2*pi*deta*r; alpha = pow((xi*xi+eta*eta),(0.5*m_ord)); theta = atan(eta/xi); FR[p*nz+r] = alpha*(cos(l*theta)+I*sin(l*theta))*FU[p*nz+r]; FR[p*nz-r+nz] = alpha*(cos(l*theta)-I*sin(l*theta))*FU[p*nz-r+nz]; FR[(-p+nx)*nz+r]=alpha* (cos(l*(pi-theta))+I*sin(l*(pi-theta)))*FU[(-p+nx)*nz+r]; FR[(-p+nx)*nz-r+nz]= alpha*(cos(l*(pi+theta))+I*sin(l*(pi+theta)))*FU[(-p+nx)*nz-r+nz]; } } for (r = 1;r<=(nz-1)/2;r++) { eta = 2*pi*deta*r; FR[r] =pow(eta,(m_ord))*(cos(l*pi/2)+I*sin(l*pi/2))*FU[r]; FR[-r+nz] = pow(eta,(m_ord))*(cos(l*pi/2)-I*sin(l*pi/2))*FU[-r+nz]; } for (p=1;p<=(nx-1)/2;p++) { xi = 2*pi*dxi*p; FR[p*nz]= pow(xi,m_ord)*(1)*FU[p*nz]; FR[(-p+nx)*nz]=pow(xi,m_ord)*(cos(l*pi))*FU[(-p+nx)*nz]; } //printf("exiting m_ord !=0\n"); } if(m_ord ==0) FR[0]=FU[0]; else FR[0]=0; fftwf_execute(plan_backward); //printf("fft inv ok ..\n"); for (m=0;m<nx;m++) { for(n=0;n<nz;n++) { QU[m*nz+n]=QU[m*nz+n]+ FQ[(m*nz+n)]*R[m*nz+n]/(nx*nz); } } //printf("setting QU in psido ok ..\n"); //printf("freeing memory..."); //free memory fftwf_destroy_plan(plan_forward); fftwf_destroy_plan(plan_backward); fftwf_free(FU); fftwf_free(FR); fftwf_free(R); //printf("memory freed..."); }
//set program_options int UHD_SAFE_MAIN (int argc, char *argv[]) { uhd::set_thread_priority_safe(); std::string args; size_t total_samples,number_bins,num_acc_samps; double bw,rate, freq, gain; po::options_description desc("allowed options"); desc.add_options() ("args",po::value<std::string>(&args)->default_value(""),"multi uhd device address args") ("help","help message") ("nsamps",po::value<size_t> (&total_samples)->default_value(0),"Total number of samples to receive, zero for continous mode") ("rate", po::value<double>(&rate)->default_value(2e6), "rate of incoming samples") ("freq",po::value<double>(&freq)->default_value(400e6),"rf center frequency in Hz") ("gain",po::value<double>(&gain)->default_value(0),"gain for the RF chain") ("number_bins",po::value<size_t>(&number_bins)->default_value(1024),"number of FFT points") ("bw", po::value<double>(&bw), "daughterboard IF filter bandwidth in Hz") ; po::variables_map vm; po::store(po::parse_command_line(argc,argv,desc),vm); po::notify(vm); if (vm.count("help")) {//if std::cout<< boost::format("UHD RX timed Samples %s") % desc <<std::endl; return ~0; }//if //create usrp device std::cout<<std::endl; std::cout<<boost::format("setting RX Rate: %f Msps...") % args <<std::endl; uhd::usrp::multi_usrp::sptr usrp =uhd::usrp::multi_usrp::make(args); std::cout<<boost::format("Using Device: %s ") % usrp->get_pp_string()<<std::endl; //set bandwidth if (vm.count("bw")){ std::cout << boost::format("Setting RX Bandwidth: %f MHz...") % bw << std::endl; usrp->set_rx_bandwidth(bw); std::cout << boost::format("Actual RX Bandwidth: %f MHz...") % usrp->get_rx_bandwidth() << std::endl << std::endl; } //set the sample rate std::cout << boost::format("setting RX Rate: %f Msps...") % (rate/1e6) <<std::endl<<std::endl; usrp->set_rx_rate(rate); std::cout<<boost::format("actual RX rate: %f Msps...") % (usrp->get_rx_rate()/1e6) <<std::endl<<std::endl; //set the rx center frequency std::cout << boost::format("Setting RX Freq: %f Mhz...") % (freq/1e6) << std::endl; usrp->set_rx_freq(freq); std::cout << boost::format("Actual RX Freq: %f Mhz...") % (usrp->get_rx_freq()/1e6) << std::endl << std::endl; //create a receiver streamer uhd::stream_args_t stream_args("fc32"); uhd::rx_streamer::sptr rx_streamer =usrp-> get_rx_stream(stream_args); //rm// set up streaming ...0 means continues uhd::stream_cmd_t stream_cmd((total_samples==0)? uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS: uhd::stream_cmd_t::STREAM_MODE_NUM_SAMPS_AND_DONE); stream_cmd.num_samps =total_samples; stream_cmd.stream_now = true; stream_cmd.time_spec =uhd::time_spec_t(); usrp->issue_stream_cmd(stream_cmd); size_t num_rx_samps =0; //initialize number of received samples uhd::rx_metadata_t md; std::vector<std::complex<float> > buff(number_bins); std::vector<std::complex<float> > out_buff(number_bins); //initialize fft plan fftwf_complex *in = (fftwf_complex*)&buff.front(); //allocate array in fftwf_complex *out = (fftwf_complex*)&out_buff.front(); //allocate array out fftwf_plan f; f =fftwf_plan_dft_1d(number_bins,in, out, FFTW_FORWARD,FFTW_ESTIMATE); while(not stop_signal_called and (num_acc_samps < total_samples or total_samples == 0)) { size_t num_rx_samps = rx_streamer->recv( &buff.front(), buff.size(), md, 3.0); std::cout <<" current buffer size: "<< buff.size()<<std::endl<<std::endl; //handle the error codes switch(md.error_code){ case uhd::rx_metadata_t::ERROR_CODE_NONE: break; case uhd::rx_metadata_t::ERROR_CODE_TIMEOUT: if (num_acc_samps == 0) continue; std::cout << boost::format( "Got timeout before all samples received, possible packet loss, exiting loop..." ) << std::endl; goto done_loop; default: std::cout << boost::format( "Got error code 0x%x, exiting loop..." ) % md.error_code << std::endl; goto done_loop; } std::cout<<"performing fft to samples at frequency"<<usrp->get_rx_freq()<<std::endl; fftwf_execute(f); num_acc_samps = num_rx_samps +1; std::cout<<"number of accumulated samples"<<num_acc_samps<<std::endl<<std::endl; std::cout <<"nubmer of rx samples: "<<num_rx_samps <<std::endl<<std::endl; float energy = find_energy(out_buff); std::cout<<"the energy for incoming samples: " <<energy; // print_data(out_buff); } done_loop: fftwf_destroy_plan(f); std::cout<<std::endl<<"done"; return 0; }
// Compute the filter, similar to Octave's fir2(n, f, m, grid_n, ramp_n, window); // Window and result must be of size n + 1. // grid_n: length of ideal frequency response function // ramp_n: transition width for jumps in filter response // defaults to grid_n/20; a wider ramp gives wider transitions // but has better stopband characteristics. void generateFirFilter(unsigned n, double w, const double* window, double* result) { // make sure grid is big enough for the window // the grid must be at least (n+1)/2 // for all filters where the order is a power of two minus 1, grid_n = n+1; unsigned grid_n = nextPowerOf2(n + 1); unsigned ramp_n = 2; // grid_n / 20; // Apply ramps to discontinuities // this is a low pass filter // maybe we can omit the "w, 0" point? // I did observe a small difference double f[] = {0.0, w-ramp_n/grid_n/2.0, w, w+ramp_n/grid_n/2.0, 1.0}; double m[] = {1.0, 1.0, 0.0, 0.0, 0.0}; // grid is a 1-D array with grid_n+1 points. Values are 1 in filter passband, 0 otherwise double* grid = (double*) malloc((grid_n + 1) * sizeof(double)); // interpolate between grid points interpolate(f, m, 5 /* length of f and m arrays */ , grid_n+1, grid); // the grid we do an ifft on is: // grid appended with grid_n*2 zeros // appended with original grid values from indices grid_n..2, i.e., the values in reverse order // (note, arrays start at 1 in octave!) // the input for the ifft is of size 4*grid_n // input = [grid ; zeros(grid_n*2,1) ;grid(grid_n:-1:2)]; fftwf_complex* cinput = (fftwf_complex*) fftwf_malloc(grid_n*4*sizeof(fftwf_complex)); fftwf_complex* coutput = (fftwf_complex*) fftwf_malloc(grid_n*4*sizeof(fftwf_complex)); if(cinput == NULL || coutput == NULL) { fprintf(stderr, "Cannot allocate buffer to generate FIR filter. Exiting\n"); exit(-1); } // wipe imaginary part for(unsigned i=0; i<grid_n*4; i++) { cinput[i][1] = 0.0; } // copy first part of grid for(unsigned i=0; i<grid_n+1; i++) { cinput[i][0] = float(grid[i]); } // append zeros for(unsigned i=grid_n+1; i<=grid_n*3; i++) { cinput[i][0] = 0.0; } // now append the grid in reverse order for(unsigned i=grid_n-1, index=0; i >=1; i--, index++) { cinput[grid_n*3+1 + index][0] = float(grid[i]); } fftwf_plan plan = fftwf_plan_dft_1d(grid_n*4, cinput, coutput, FFTW_BACKWARD, FFTW_ESTIMATE); fftwf_execute(plan); unsigned index = 0; for(unsigned i=4*grid_n-n; i<4*grid_n; i+=2) { result[index] = coutput[i][0]; index++; } for(unsigned i=1; i<=n; i+=2) { result[index] = coutput[i][0]; index++; } fftwf_destroy_plan(plan); fftwf_free(cinput); fftwf_free(coutput); // multiply with window for(unsigned i=0; i<=n; i++) { result[i] *= window[i]; } // normalize double factor = result[n/2]; for(unsigned i=0; i<=n; i++) { result[i] /= factor; } free(grid); }
int main(int argc, char* argv[]) { /*define variables*/ int nx,nx1,nt; int n1,n2; float d1,o1,d2,o2; int padt,padx; int ntfft,*n,nw,nk; float **d,*wavelet,**shot,**ds,**vel,**vmig,**M,v_ave; float *kx,*omega,dkx,dw; sf_complex **m,**ms,**mr,*in2a,*in2b,*cs,*cr,*c,czero; sf_complex Ls; float fmin,fmax,f_low,f_high; int if_low,if_high; int ix,iw,ik; float dt,dx,ox,dz,zmax; fftwf_plan p2a,p2b; sf_file in,out,velfile,source_wavelet; int iz,nz; int ishot,max_num_shot,ig,ng,it,index; int iswavelet; /*define sf input output*/ sf_init (argc,argv); in = sf_input("in"); out = sf_output("out"); velfile = sf_input("velfile"); if (!sf_histint(in,"n1",&n1)) sf_error("No n1= in input"); if (!sf_histfloat(in,"d1",&d1)) sf_error("No d1= in input"); if (!sf_histfloat(in,"o1",&o1)) o1=0.; if (!sf_histint(in,"n2",&n2)) sf_error("No n2= in vel"); if (!sf_histfloat(in,"d2",&d2)) sf_error("No d2= in input"); if (!sf_histfloat(in,"o2",&o2)) o2=0.; dt = d1; dx = d2; ox = o2; nx1 = n2; nt = n1; if (!sf_histint(velfile,"n1",&nz)) sf_error("No n1= in vel"); if (!sf_histfloat(velfile,"d1",&dz)) sf_error("No n1= in vel"); if (!sf_histint(velfile,"n2",&n2)) sf_error("No n2= in vel"); if (!sf_getint("iswavelet",&iswavelet)) iswavelet = 0; source_wavelet=sf_input("source_wavelet"); max_num_shot=100; ng=700; nx=n2; padt = 2; padx = 2; ntfft = padt*nt; nw=ntfft/2+1; nk = padx*nx; dw = 2*PI/ntfft/dt; dkx = 2*PI/nk/dx; sf_putint(out,"n1",nz); sf_putint(out,"n2",nx); sf_putfloat(out,"d1",dz); sf_putstring(out,"label1","z"); sf_putstring(out,"unit1","m"); sf_putstring(out,"title","migrated"); if (!sf_getfloat("fmax",&fmax)) fmax = 0.5/d1; /* max frequency to process */ if (fmax > 0.5/d1) fmax = 0.5/d1; if (!sf_getfloat("fmin",&fmin)) fmin = 0.1; /* min frequency to process */ if (!sf_getfloat("Zmax",&zmax)) zmax = (nz-1)*dz; /* max Depth to migrate */ /*define axis variables*/ dkx=(float) 2*PI/nk/dx; dw=(float) 2*PI/ntfft/dt; /*allocate memory to dynamic arrays*/ d = sf_floatalloc2(nt,nx1); shot=sf_floatalloc2(nt,ng); ds=sf_floatalloc2(nt,nx); vel = sf_floatalloc2(nz,nx); wavelet=sf_floatalloc(nt); vmig = sf_floatalloc2(nz,nx); m = sf_complexalloc2(nw,nx); ms = sf_complexalloc2(nw,nx); mr = sf_complexalloc2(nw,nx); kx= sf_floatalloc (nk); omega= sf_floatalloc (nw); in2a = sf_complexalloc(nk); in2b = sf_complexalloc(nk); n = sf_intalloc(1); M= sf_floatalloc2(nz,nx); c = sf_complexalloc(nx); cs = sf_complexalloc(nx); cr = sf_complexalloc(nx); /*read input files*/ sf_floatread(d[0],nx1*nt,in); sf_floatread(vel[0],nx*nz,velfile); /* If there is no wavelet use delta as default If there is a wavelet use it*/ if (iswavelet==0) { for (it=0; it<nt; it++) wavelet[it] = 0.0; wavelet[0]=1; } if (iswavelet==1) sf_floatread(wavelet,nt,source_wavelet); /* This part is important: we need to define the horizontal wavenumber and frequency axes right.*/ dw = 2*PI/ntfft/dt; dkx = 2*PI/nk/dx; for (iw=0;iw<nw;iw++){ omega[iw] = dw*iw; } for (ik=0;ik<nk;ik++){ if (ik<nk/2) kx[ik] = dkx*ik; else kx[ik] = -(dkx*nk - dkx*ik); } /* Define minimum and maximum frequency index to process*/ f_low = fmin; /* min frequency to process */ f_high = fmax; /* max frequency to process */ if(f_low>0){ if_low = trunc(f_low*dt*ntfft); } else{ if_low = 0; } if(f_high*dt*ntfft+1<nw){ if_high = trunc(f_high*dt*ntfft)+1; } else{ if_high = nw; } __real__ czero = 0; __imag__ czero = 0; n[0] = nk; p2a = fftwf_plan_dft(1, n, (fftwf_complex*)in2a, (fftwf_complex*)in2a, FFTW_FORWARD, FFTW_ESTIMATE); p2b = fftwf_plan_dft(1, n, (fftwf_complex*)in2b, (fftwf_complex*)in2b, FFTW_BACKWARD, FFTW_ESTIMATE); fftwf_execute(p2a); /* FFT x to k */ fftwf_execute(p2b); /* FFT x to k */ /* Define initial migrated model and source field as zeros*/ for (iz=0; iz<nz; iz++) { for (ix=0; ix<nx; ix++) M[ix][iz] = 0.0; } for (it=0; it<nt; it++) { for (ix=0; ix<nx; ix++) ds[ix][it] = 0.0; } for (iz=0; iz<nz;iz++){ for (ix=0;ix<nx;ix++) vmig[ix][iz]=vel[ix][iz]; } /* loop over shots*/ for (ishot=0;ishot<max_num_shot;ishot++){ for (ig=0;ig<ng;ig++){ for (it=0; it<nt; it++) shot[ig][it]=d[ishot*ng+ig][it]; } for (it=0; it<nt; it++) { for (ix=0; ix<nx; ix++) ds[ix][it] = 0.0; } index=ishot*nx/max_num_shot; for (it=0; it<nt; it++) ds[index][it]=wavelet[it]; /* apply fourier transform in time direction t-x ---> w-x*/ my_forward_fft(ms,mr,shot,ds,nt,dt,nx,padt); for (iw=if_low;iw<if_high;iw++){ for (iz=0; iz<nz;iz++){ v_ave=vmig[0][iz]; my_v_ave (v_ave,vmig,iz,nx); /*Apply phase shift to source side*/ my_phase_shift(ms,czero,iw,iz,omega,kx,nk,nx,v_ave,in2a,in2b,p2a,p2b,dz,0); for (ix=0;ix<nx;ix++) { cs[ix]= in2b[ix]; } /*Apply phase shift to receiver side*/ my_phase_shift(mr,czero,iw,iz,omega,kx,nk,nx,v_ave,in2a,in2b,p2a,p2b,dz,1); for (ix=0;ix<nx;ix++) { cr[ix]= in2b[ix]; } /*Apply split step correction to source and receiver side wavefields*/ my_split_step_correction (ms,cs,vmig,v_ave,iz,dz,iw,dw,nx,0); my_split_step_correction (mr,cr,vmig,v_ave,iz,dz,iw,dw,nx,1); /* Apply cross corrolation as an imaging condition*/ for (ix=0;ix<nx;ix++){ __real__ Ls=crealf(ms[ix][iw]); __imag__ Ls=- cimagf(ms[ix][iw]); m[ix][iw]=mr[ix][iw]*Ls; } /* Update migrated model by stacking*/ for (ix=0;ix<nx;ix++) M[ix][iz]=M[ix][iz]+2*crealf(m[ix][iw]); } } fprintf(stderr,"\r progress = %6.2f%%",(float) 100*(ishot)/(max_num_shot)); } sf_floatwrite(M[0],nz*nx,out); fftwf_destroy_plan(p2a); fftwf_free(in2a); fftwf_destroy_plan(p2b); fftwf_free(in2b); exit (0); }