void TimeConvolutionAction::PerformFFTSincOperation(ArtifactSet &artifacts, Image2DPtr real, Image2DPtr imag) const { fftw_complex *fftIn = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * real->Width()), *fftOut = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * real->Width()); // FFTW plan routines are not thread safe, so lock. boost::mutex::scoped_lock lock(artifacts.IOMutex()); fftw_plan fftPlanForward = fftw_plan_dft_1d(real->Width(), fftIn, fftOut, FFTW_FORWARD, FFTW_MEASURE), fftPlanBackward = fftw_plan_dft_1d(real->Width(), fftIn, fftOut, FFTW_BACKWARD, FFTW_MEASURE); lock.unlock(); const size_t width = real->Width(); const BandInfo band = artifacts.MetaData()->Band(); for(unsigned y=0;y<real->Height();++y) { const numl_t sincScale = ActualSincScaleInSamples(artifacts, band.channels[y].frequencyHz); const numl_t limitFrequency = (numl_t) width / sincScale; if(y == real->Height()/2) { AOLogger::Debug << "Horizontal sinc scale: " << sincScale << " (filter scale: " << Angle::ToString(ActualSincScaleAsRaDecDist(artifacts, band.channels[y].frequencyHz)) << ")\n"; } if(sincScale > 1.0) { for(unsigned x=0;x<width;++x) { fftIn[x][0] = real->Value(x, y); fftIn[x][1] = imag->Value(x, y); } fftw_execute_dft(fftPlanForward, fftIn, fftOut); size_t filterIndexSize = (limitFrequency > 1.0) ? (size_t) ceil(limitFrequency/2.0) : 1; // Remove the high frequencies [filterIndexSize : n-filterIndexSize] for(size_t f=filterIndexSize;f<width - filterIndexSize;++f) { fftOut[f][0] = 0.0; fftOut[f][1] = 0.0; } fftw_execute_dft(fftPlanBackward, fftOut, fftIn); const double n = width; for(unsigned x=0;x<width;++x) { real->SetValue(x, y, fftIn[x][0] / n); imag->SetValue(x, y, fftIn[x][1] / n); } } } fftw_free(fftIn); fftw_free(fftOut); }
void gabor_extract(const struct GaborSetting *setting, const unsigned char *image_data, gabor_real *feat_vec) { /* extract dense gabor feature */ struct GaborBank *pBank = setting->bank; int filter_num = pBank->filter_num; int kernel_w = setting->kernel_w; int kernel_h = setting->kernel_h; int step_x = setting->step_x; int step_y = setting->step_y; fftw_complex *in_buffer = pBank->in_buffer, *dft_buffer = pBank->dft_buffer, *idft_buffer = pBank->idft_buffer; /* fill in image data */ int filter_len = pBank->filter_len; for (int i = 0; i < filter_len; ++i) { in_buffer[i].real = image_data[i]; in_buffer[i],imag = .0; } /* DFT on image data and store image's dft data in dft_buffer */ fftw_execute_dft(pBank->plan_foreward, in_buffer, dft_buffer); gabor_real *pFeat = feat_vec; for (int i = 0; i < filter_num; ++i) { /* multiply the dft vectors */ fftw_complex *pCurrKernel = pBank->bank_dfts[i]; for (int j = 0; j < filter_len; ++j) { fftw_complex tmp0 = dft_buffer[j], tmp1 = pCurrKernel[j]; dft_buffer[j].real = (tmp0.real * tmp1.real - tmp0.imag * tmp1.imag); dft_buffer[j].imag = (tmp0.real * tmp1.imag + tmp0.imag * tmp1.real); } /* inverse dft */ fftw_execute_dft(pBank->plan_backward, dft_buffer, idft_buffer); /* extract MAG features */ for (int _y = 0; _y < kenerl_h; _y += step_y) { for (int _x = 0; _x < kernel_w; _x += step_x) { fftw_complex tmp2 = idft_buffer[ _y * kenerl_w + _x ]; *pFeat++ = sqrt(tmp2.real * tmp2.real + tmp2.imag * tmp2.imag ); } } } return ; }
void fft_perform_forw(double *data) { int i; /* int m,n,o; */ /* ===== first direction ===== */ FFT_TRACE(fprintf(stderr,"%d: fft_perform_forw: dir 1:\n",this_node)); fftw_complex *c_data = (fftw_complex *) data; fftw_complex *c_data_buf = (fftw_complex *) fft.data_buf; /* communication to current dir row format (in is data) */ fft_forw_grid_comm(fft.plan[1], data, fft.data_buf); /* fprintf(stderr,"%d: start grid \n",this_node); i=0; for(m=0;m<8;m++) { for(n=0;n<8;n++) { for(o=0;o<8;o++) { fprintf(stderr,"%.3f ",fft.data_buf[i++]); } fprintf(stderr,"\n"); } fprintf(stderr,"\n"); } */ /* complexify the real data array (in is fft.data_buf) */ for(i=0;i<fft.plan[1].new_size;i++) { data[2*i] = fft.data_buf[i]; /* real value */ data[(2*i)+1] = 0; /* complex value */ } /* perform FFT (in/out is data)*/ fftw_execute_dft(fft.plan[1].our_fftw_plan,c_data,c_data); /* ===== second direction ===== */ FFT_TRACE(fprintf(stderr,"%d: fft_perform_forw: dir 2:\n",this_node)); /* communication to current dir row format (in is data) */ fft_forw_grid_comm(fft.plan[2], data, fft.data_buf); /* perform FFT (in/out is fft.data_buf)*/ fftw_execute_dft(fft.plan[2].our_fftw_plan,c_data_buf,c_data_buf); /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr,"%d: fft_perform_forw: dir 3:\n",this_node)); /* communication to current dir row format (in is fft.data_buf) */ fft_forw_grid_comm(fft.plan[3], fft.data_buf, data); /* perform FFT (in/out is data)*/ fftw_execute_dft(fft.plan[3].our_fftw_plan,c_data,c_data); //fft_print_global_fft_mesh(fft.plan[3],data,1,0); /* REMARK: Result has to be in data. */ }
// This is an inplace real 2 complex transform // Assumes wrin has the logical dimensions [NY/PROC, NX, NZ] of real positions // physical dimensions [NY/NPROC, NX, NZ+2]; void gfft_r2c_t(double *wrin) { int i; double complex *win = (double complex *) wrin; fft_timer = fft_timer - get_c_time(); //start transforming in 2D wrin fftw_execute_dft_r2c(r2c_2d, wrin, win); // The logical dimensions of win are [NY_COMPLEX/NPROC, NX_COMPLEX, NZ_COMPLEX] // transpose it transpose_complex_YX(win, win); // We now have an array with logical dimensions[NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX] #ifdef _OPENMP #pragma omp parallel for private(i) schedule(static) #endif for(i=0 ; i < NX_COMPLEX/NPROC ; i++) fftw_execute_dft(r2c_1d, &win[i*NY_COMPLEX*NZ_COMPLEX],&win[i*NY_COMPLEX*NZ_COMPLEX]); fft_timer = fft_timer + get_c_time(); // done... return; }
void purify_measurement_cftfwd(void *out, void *in, void **data){ int i, j, nx2, ny2; int st1, st2; double scale; purify_measurement_cparam *param; double *deconv; purify_sparsemat_row *mat; fftw_plan *plan; complex double *temp; complex double *xin; complex double *yout; complex double alpha; //Cast input pointers param = (purify_measurement_cparam*)data[0]; deconv = (double*)data[1]; mat = (purify_sparsemat_row*)data[2]; plan = (fftw_plan*)data[3]; temp = (complex double*)data[4]; xin = (complex double*)in; yout = (complex double*)out; nx2 = param->ofx*param->nx1; ny2 = param->ofy*param->ny1; alpha = 0.0 + 0.0*I; //Zero padding and decovoluntion. //Original image in the center. for (i=0; i < nx2*ny2; i++){ *(temp + i) = alpha; } //Scaling scale = 1/sqrt((double)(nx2*ny2)); int npadx = nx2 / 4; int npady = ny2 / 4; for (j=0; j < param->ny1; j++){ st1 = j * param->nx1; st2 = (j + npady) * nx2; for (i=0; i < param->nx1; i++){ // *(temp + st2 + i) = *(xin + st1 + i)**(deconv + st1 + i)*scale; *(temp + st2 + i + npadx) = *(xin + st1 + i) * scale; *(temp + st2 + i + npadx) *= *(deconv + st1 + i); } } purify_utils_fftshift_2d_c(temp, nx2, ny2); //FFT fftw_execute_dft(*plan, temp, temp); //Multiplication by the sparse matrix storing the interpolation kernel purify_sparsemat_fwd_complexr(yout, temp, mat); }
PetscErrorCode MatApply_USFFT_Private(Mat A, fftw_plan *plan, int direction, Vec x,Vec y) { #if 0 PetscErrorCode ierr; PetscScalar *r_array, *y_array; Mat_USFFT* = (Mat_USFFT*)(A->data); #endif PetscFunctionBegin; #if 0 /* resample x to usfft->resample */ ierr = MatResample_USFFT_Private(A, x);CHKERRQ(ierr); /* NB: for now we use outdim for both x and y; this will change once a full USFFT is implemented */ ierr = VecGetArray(usfft->resample,&r_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!*plan) { /* create a plan then execute it*/ if (usfft->dof == 1) { #if defined(PETSC_DEBUG_USFFT) ierr = PetscPrintf(PetscObjectComm((PetscObject)A), "direction = %d, usfft->ndim = %d\n", direction, usfft->ndim);CHKERRQ(ierr); for (int ii = 0; ii < usfft->ndim; ++ii) { ierr = PetscPrintf(PetscObjectComm((PetscObject)A), "usfft->outdim[%d] = %d\n", ii, usfft->outdim[ii]);CHKERRQ(ierr); } #endif switch (usfft->dim) { case 1: *plan = fftw_plan_dft_1d(usfft->outdim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag); break; case 2: *plan = fftw_plan_dft_2d(usfft->outdim[0],usfft->outdim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag); break; case 3: *plan = fftw_plan_dft_3d(usfft->outdim[0],usfft->outdim[1],usfft->outdim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag); break; default: *plan = fftw_plan_dft(usfft->ndim,usfft->outdim,(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag); break; } fftw_execute(*plan); } /* if (dof == 1) */ else { /* if (dof > 1) */ *plan = fftw_plan_many_dft(/*rank*/usfft->ndim, /*n*/usfft->outdim, /*howmany*/usfft->dof, (fftw_complex*)x_array, /*nembed*/usfft->outdim, /*stride*/usfft->dof, /*dist*/1, (fftw_complex*)y_array, /*nembed*/usfft->outdim, /*stride*/usfft->dof, /*dist*/1, /*sign*/direction, /*flags*/usfft->p_flag); fftw_execute(*plan); } /* if (dof > 1) */ } /* if (!*plan) */ else { /* if (*plan) */ /* use existing plan */ fftw_execute_dft(*plan,(fftw_complex*)x_array,(fftw_complex*)y_array); } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); #endif PetscFunctionReturn(0); } /* MatApply_USFFT_Private() */
/*! \memberof splitop run split-step algorithm \a times times */ void splitop_run(splitop_t * w, int times) { int bins = w->prefs->bins; fftw_complex * ehV = w->ehV; fftw_complex * eVn = w->eVn; fftw_complex * ehVn = w->ehVn; fftw_complex * eT = w->eT; fftw_complex * psi = w->psi; fftw_complex * psik = w->psik; // naiive algorithm, just for reference / in case I forget... /*fftw_complex * p, * as, * ae; for (int k = 0; k < times; k++) { for (int n = 0; n < bins; n++) { psi[n] *= ehV[n]; } fftw_execute_dft(w->fwd, psi, psik); for (int n = 0; n < bins; n++) { psik[n] *= eT[n]; } fftw_execute_dft(w->bwd, psik, psi); for (int n = 0; n < bins; n++) { psi[n] *= ehV[n] / bins; } }*/ if (times > 0) { // propagate with U_{V/2} cvect_mult_asign(bins, psi, ehV); // DFT to momentum space fftw_execute_dft(w->fwd, psi, psik); // propagate with U_T cvect_mult_asign(bins, psik, eT); for (int k = 0; k < times - 1; k++) { // DFT to position space fftw_execute_dft(w->bwd, psik, psi); // fftw won't normalize psi, so propagate with U_V/bins cvect_mult_asign(bins, psi, eVn); // DFT to momentum space fftw_execute_dft(w->fwd, psi, psik); // propagate with U_T cvect_mult_asign(bins, psik, eT); } // DFT to position space fftw_execute_dft(w->bwd, psik, psi); // fftw won't normalize psi, so propagate with U_{V/2}/bins cvect_mult_asign(bins, psi, ehVn); } }
void dfft_perform_back(double *data) { int i; fftw_complex *c_data = (fftw_complex *) data; fftw_complex *c_data_buf = (fftw_complex *) dfft.data_buf; /* ===== third direction ===== */ FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_back: dir 3:\n",this_node)); /* perform FFT (in is data) */ fftw_execute_dft(dfft.back[3].our_fftw_plan,c_data,c_data); /* communicate (in is data)*/ dfft_back_grid_comm(dfft.plan[3],dfft.back[3],data,dfft.data_buf); /* ===== second direction ===== */ FFT_TRACE(fprintf(stderr,"%d: dipolar fft_perform_back: dir 2:\n",this_node)); /* perform FFT (in is data_buf) */ fftw_execute_dft(dfft.back[2].our_fftw_plan,c_data_buf,c_data_buf); /* communicate (in is data_buf) */ dfft_back_grid_comm(dfft.plan[2],dfft.back[2],dfft.data_buf,data); /* ===== first direction ===== */ FFT_TRACE(fprintf(stderr,"%d: fft_perform_back: dir 1:\n",this_node)); /* perform FFT (in is data) */ fftw_execute_dft(dfft.back[1].our_fftw_plan,c_data,c_data); /* throw away the (hopefully) empty complex component (in is data)*/ for(i=0;i<dfft.plan[1].new_size;i++) { dfft.data_buf[i] = data[2*i]; /* real value */ //Vincent: if (data[2*i+1]>1e-5) { printf("dipoar fft - Complex value is not zero (i=%d,data=%g)!!!\n",i,data[2*i+1]); if (i>100) exit(-1); } } /* communicate (in is data_buf) */ dfft_back_grid_comm(dfft.plan[1],dfft.back[1],dfft.data_buf,data); /* REMARK: Result has to be in data. */ }
THREADABLE_FUNCTION_6ARG(fft4d, complex*,out, complex*,in, int*,ext_dirs, int,ncpp, double,sign, int,normalize) { GET_THREAD_ID(); //first of all put in to out if(out!=in) vector_copy(out,in); //list all dirs int dirs[NDIM],ndirs=0; for(int mu=0;mu<NDIM;mu++) if(ext_dirs[mu]) dirs[ndirs++]=mu; verbosity_lv2_master_printf("Going to FFT: %d dimensions in total\n",ndirs); if(ndirs) { //allocate buffer complex *buf=nissa_malloc("buf",max_locd_size*ncpp,complex); //allocate plans fftw_plan *plans=nissa_malloc("plans",ndirs,fftw_plan); if(IS_MASTER_THREAD) for(int idir=0;idir<ndirs;idir++) plans[idir]=fftw_plan_many_dft(1,glb_size+dirs[idir],ncpp,buf,NULL,ncpp,1,buf,NULL,ncpp,1,sign,FFTW_ESTIMATE); THREAD_BARRIER(); //transpose each dir in turn and take fft for(int idir=0;idir<ndirs;idir++) { int mu=dirs[idir]; verbosity_lv2_master_printf("FFT-ing dimension %d/%d=%d\n",idir+1,ndirs,mu); remap_lx_vector_to_locd(buf,out,ncpp*sizeof(complex),mu); //makes all the fourier transform NISSA_PARALLEL_LOOP(ioff,0,locd_perp_size_per_dir[mu]) fftw_execute_dft(plans[idir],buf+ioff*glb_size[mu]*ncpp,buf+ioff*glb_size[mu]*ncpp); THREAD_BARRIER(); remap_locd_vector_to_lx(out,buf,ncpp*sizeof(complex),mu); } //destroy plans if(IS_MASTER_THREAD) for(int idir=0;idir<ndirs;idir++) fftw_destroy_plan(plans[idir]); //put normaliisation if(normalize) { double norm=glb_size[dirs[0]]; for(int idir=1;idir<ndirs;idir++) norm*=glb_size[idir]; double_vector_prod_double((double*)out,(double*)out,1/norm,2*ncpp*loc_vol); } nissa_free(buf); nissa_free(plans); } }
/*! * Compute forward Fouier transform of complex signal. * * \param[out] out (complex double*) Forward Fourier transform of input signal. * \param[in] in (complex double*) Complex input signal. * \param[in] data * - data[0] (fftw_plan*): The real-to-complex FFTW plan to use when * computing the Fourier transform (passed as an input so that the * FFTW can be FFTW_MEASUREd beforehand). * * \authors <a href="http://www.jasonmcewen.org">Jason McEwen</a> */ void purify_measurement_fft_complex(void *out, void *in, void **data) { fftw_plan *plan; plan = (fftw_plan*)data[0]; fftw_execute_dft(*plan, (complex double*)in, (complex double*)out); }
void purify_measurement_cftadj(void *out, void *in, void **data){ int i, j, nx2, ny2; int st1, st2; double scale; purify_measurement_cparam *param; double *deconv; purify_sparsemat_row *mat; fftw_plan *plan; complex double *temp; complex double *yin; complex double *xout; //Cast input pointers param = (purify_measurement_cparam*)data[0]; deconv = (double*)data[1]; mat = (purify_sparsemat_row*)data[2]; plan = (fftw_plan*)data[3]; temp = (complex double*)data[4]; yin = (complex double*)in; xout = (complex double*)out; nx2 = param->ofx*param->nx1; ny2 = param->ofy*param->ny1; //Multiplication by the adjoint of the //sparse matrix storing the interpolation kernel purify_sparsemat_adj_complexr(temp, yin, mat); //Inverse FFT fftw_execute_dft(*plan, temp, temp); //Scaling scale = 1/sqrt((double)(nx2*ny2)); purify_utils_fftshift_2d_c(temp, nx2, ny2); //Cropping and decovoluntion. //Top left corner of the image corresponf to the original image. int npadx = nx2 / 4; int npady = ny2 / 4; for (j=0; j < param->ny1; j++){ st1 = j * param->nx1; st2 = (j + npady) * nx2; for (i=0; i < param->nx1; i++){ // *(xout + st1 + i) = *(temp + st2 + i)**(deconv + st1 + i)*scale; *(xout + st1 + i) = *(temp + st2 + i + npadx) * scale; *(xout + st1 + i) *= *(deconv + st1 + i); } } }
void ath_2d_fft(struct ath_2d_fft_plan *ath_plan, fftw_complex *data) { #ifdef FFT_BLOCK_DECOMP fft_2d(data, data, ath_plan->dir, ath_plan->plan); #else /* FFT_BLOCK_DECOMP */ /* Plan already includes forward/backward */ fftw_execute_dft(ath_plan->plan, data, data); #endif /* FFT_BLOCK_DECOMP */ return; }
void CFFT::decode(int16_t *buffer, int size, double *xval, double *yval) { if (update == true) return; // fill complex for (int i=0, j=0; i < size; i+=2,j++) { in1[j+ bufferBlock * channelSize][0] = buffer[i]*1.0/32768.0; // set values in double between -1.0 and 1.0; channel 1 in2[j+ bufferBlock * channelSize][0] = buffer[i+1]*1.0/32768.0; // set values in double between -1.0 and 1.0; channel 2 in1[j][1] = 0.0; in2[j][1] = 0.0; } // if we reach the FFT Size if (bufferBlock * channelSize + channelSize == fftsize) { for (int i=0; i< fftsize; i++) { in1[i][0] = window[i] * in1[i][0]; in2[i][0] = window[i] * in2[i][0]; } fftw_execute_dft(ch1,in1,out1); fftw_execute_dft(ch2,in2,out2); // fill back spectrum buffer for (int i=0; i < fftsize/2; i++) { yval[i] = sqrt(pow(out1[i][0],2) + pow(out1[i][1],2)); // Channel antenna 1 yval[i+fftsize/2] = sqrt(pow(out2[i][0],2) + pow(out2[i][1],2)); // Channel antenna 2 xval[i] = i; xval[i+fftsize/2] = i+fftsize/2; } // Reinit buffer block bufferBlock = -1; } bufferBlock++; }
void gfft_c2r_t(double complex *win) { int i; double *wrin = (double *) win; fft_timer = fft_timer - get_c_time(); // We now have an array with logical dimensions[NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX] #ifdef _OPENMP #pragma omp parallel for private(i) schedule(static) #endif for(i=0 ; i < NX_COMPLEX/NPROC ; i++) fftw_execute_dft(c2r_1d, &win[i*NY_COMPLEX*NZ_COMPLEX],&win[i*NY_COMPLEX*NZ_COMPLEX]); // The logical dimensions of win are [NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX] // transpose it transpose_complex_XY(win, win); // The final 2D transform fftw_execute_dft_c2r(c2r_2d, win, wrin); // and we're done ! fft_timer = fft_timer + get_c_time(); return; }
/* Complex to complex forward transform. */ static int cfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out ) { fftw_plan plan; double *buf, *q, *p; int x, y; IMAGE *cmplx = im_open_local( dummy, "fwfft1:1", "t" ); /* We have to have a separate buffer for the planner to work on. */ double *planner_scratch = IM_ARRAY( dummy, in->Xsize * in->Ysize * 2, double ); /* Make dp complex image. */ if( !cmplx || im_pincheck( in ) || im_outcheck( out ) ) return( -1 ); if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) { im_error( "im_fwfft", _( "one band uncoded only" ) ); return( -1 ); } if( im_clip2dcm( in, cmplx ) ) return( -1 ); /* Make the plan for the transform. */ if( !(plan = fftw_plan_dft_2d( in->Ysize, in->Xsize, (fftw_complex *) planner_scratch, (fftw_complex *) planner_scratch, FFTW_FORWARD, 0 )) ) { im_error( "im_fwfft", _( "unable to create transform plan" ) ); return( -1 ); } fftw_execute_dft( plan, (fftw_complex *) cmplx->data, (fftw_complex *) cmplx->data ); fftw_destroy_plan( plan ); /* WIO to out. */ if( im_cp_desc( out, in ) ) return( -1 ); out->Bbits = IM_BBITS_DPCOMPLEX; out->BandFmt = IM_BANDFMT_DPCOMPLEX; if( im_setupout( out ) ) return( -1 ); if( !(buf = (double *) IM_ARRAY( dummy, IM_IMAGE_SIZEOF_LINE( out ), PEL )) ) return( -1 ); /* Copy to out, normalise. */ for( p = (double *) cmplx->data, y = 0; y < out->Ysize; y++ ) { int size = out->Xsize * out->Ysize; q = buf; for( x = 0; x < out->Xsize; x++ ) { q[0] = p[0] / size; q[1] = p[1] / size; p += 2; q += 2; } if( im_writeline( y, out, (PEL *) buf ) ) return( -1 ); } return( 0 ); }
int job_core(int pm, // Hemisphere int mm, // Grid 'sky position' int nn, // Second grid 'sky position' Search_settings *sett, // Search settings Command_line_opts *opts, // Search options Search_range *s_range, // Range for searching FFTW_plans *plans, // Plans for fftw FFTW_arrays *fftw_arr, // Arrays for fftw Aux_arrays *aux, // Auxiliary arrays int *sgnlc, // Candidate trigger parameters FLOAT_TYPE *sgnlv, // Candidate array int *FNum) { // Candidate signal number int i, j, n; int smin = s_range->sst, smax = s_range->spndr[1]; double al1, al2, sinalt, cosalt, sindelt, cosdelt, sgnlt[NPAR], nSource[3], het0, sgnl0, ft; double _tmp1[sett->nifo][sett->N]; #undef NORMTOMAX #ifdef NORMTOMAX double blkavg, threshold = 6.; int imax, imax0, iblk, blkstart, ihi; int blksize = 1024; int nfft = sett->nmax - sett->nmin; static int *Fmax; if (!Fmax) Fmax = (int *) malloc(nfft*sizeof(int)); #endif struct timespec tstart, tend; double spindown_timer = 0; int spindown_counter = 0; //tstart = get_current_time(CLOCK_REALTIME); /* Matrix M(.,.) (defined on page 22 of PolGrawCWAllSkyReview1.pdf file) defines the transformation form integers (bin, ss, nn, mm) determining a grid point to linear coordinates omega, omegadot, alpha_1, alpha_2), where bin is the frequency bin number and alpha_1 and alpha_2 are defined on p. 22 of PolGrawCWAllSkyReview1.pdf file. [omega] [bin] [omegadot] = M(.,.) \times [ss] [alpha_1/omega] [nn] [alpha_2/omega] [mm] Array M[.] is related to matrix M(.,.) in the following way; [ M[0] M[4] M[8] M[12] ] M(.,.) = [ M[1] M[5] M[9] M[13] ] [ M[2] M[6] M[10] M[14] ] [ M[3] M[7] M[11] M[15] ] and M[1] = M[2] = M[3] = M[6] = M[7] = 0 */ // Grid positions al1 = nn*sett->M[10] + mm*sett->M[14]; al2 = nn*sett->M[11] + mm*sett->M[15]; // check if the search is in an appropriate region of the grid // if not, returns NULL if ((sqr(al1)+sqr(al2))/sqr(sett->oms) > 1.) return 0; int ss; double shft1, phase, cp, sp; complex double exph; // Change linear (grid) coordinates to real coordinates lin2ast(al1/sett->oms, al2/sett->oms, pm, sett->sepsm, sett->cepsm, &sinalt, &cosalt, &sindelt, &cosdelt); // calculate declination and right ascention // written in file as candidate signal sky positions sgnlt[2] = asin(sindelt); sgnlt[3] = fmod(atan2(sinalt, cosalt) + 2.*M_PI, 2.*M_PI); het0 = fmod(nn*sett->M[8] + mm*sett->M[12], sett->M[0]); // Nyquist frequency int nyqst = (sett->nfft)/2 + 1; // Loop for each detector /* Amplitude modulation functions aa and bb * for each detector (in signal sub-struct * of _detector, ifo[n].sig.aa, ifo[n].sig.bb) */ for(n=0; n<sett->nifo; ++n) { modvir(sinalt, cosalt, sindelt, cosdelt, sett->N, &ifo[n], aux); // Calculate detector positions with respect to baricenter nSource[0] = cosalt*cosdelt; nSource[1] = sinalt*cosdelt; nSource[2] = sindelt; shft1 = nSource[0]*ifo[n].sig.DetSSB[0] + nSource[1]*ifo[n].sig.DetSSB[1] + nSource[2]*ifo[n].sig.DetSSB[2]; #define CHUNK 4 #pragma omp parallel default(shared) private(phase,cp,sp,exph) { #pragma omp for schedule(static,CHUNK) for(i=0; i<sett->N; ++i) { ifo[n].sig.shft[i] = nSource[0]*ifo[n].sig.DetSSB[i*3] + nSource[1]*ifo[n].sig.DetSSB[i*3+1] + nSource[2]*ifo[n].sig.DetSSB[i*3+2]; ifo[n].sig.shftf[i] = ifo[n].sig.shft[i] - shft1; _tmp1[n][i] = aux->t2[i] + (double)(2*i)*ifo[n].sig.shft[i]; } #pragma omp for schedule(static,CHUNK) for(i=0; i<sett->N; ++i) { // Phase modulation phase = het0*i + sett->oms*ifo[n].sig.shft[i]; #ifdef NOSINCOS cp = cos(phase); sp = sin(phase); #else sincos(phase, &sp, &cp); #endif exph = cp - I*sp; // Matched filter ifo[n].sig.xDatma[i] = ifo[n].sig.xDat[i]*ifo[n].sig.aa[i]*exph; ifo[n].sig.xDatmb[i] = ifo[n].sig.xDat[i]*ifo[n].sig.bb[i]*exph; } /* Resampling using spline interpolation: * This will double the sampling rate */ #pragma omp for schedule(static,CHUNK) for(i=0; i < sett->N; ++i) { fftw_arr->xa[i] = ifo[n].sig.xDatma[i]; fftw_arr->xb[i] = ifo[n].sig.xDatmb[i]; } // Zero-padding (filling with 0s up to sett->nfft, // the nearest power of 2) #pragma omp for schedule(static,CHUNK) for (i=sett->N; i<sett->nfft; ++i) { fftw_arr->xa[i] = 0.; fftw_arr->xb[i] = 0.; } } //omp parallel fftw_execute_dft(plans->pl_int,fftw_arr->xa,fftw_arr->xa); //forward fft (len nfft) fftw_execute_dft(plans->pl_int,fftw_arr->xb,fftw_arr->xb); //forward fft (len nfft) // move frequencies from second half of spectrum; // and zero frequencies higher than nyquist // loop length: nfft - nyqst = nfft - nfft/2 - 1 = nfft/2 - 1 for(i=nyqst + sett->Ninterp - sett->nfft, j=nyqst; i<sett->Ninterp; ++i, ++j) { fftw_arr->xa[i] = fftw_arr->xa[j]; fftw_arr->xa[j] = 0.; } for(i=nyqst + sett->Ninterp - sett->nfft, j=nyqst; i<sett->Ninterp; ++i, ++j) { fftw_arr->xb[i] = fftw_arr->xb[j]; fftw_arr->xb[j] = 0.; } // Backward fft (len Ninterp = nfft*interpftpad) fftw_execute_dft(plans->pl_inv,fftw_arr->xa,fftw_arr->xa); fftw_execute_dft(plans->pl_inv,fftw_arr->xb,fftw_arr->xb); ft = (double)sett->interpftpad / sett->Ninterp; //scale FFT for (i=0; i < sett->Ninterp; ++i) { fftw_arr->xa[i] *= ft; fftw_arr->xb[i] *= ft; } // struct timeval tstart = get_current_time(), tend; // Spline interpolation to xDatma, xDatmb arrays splintpad(fftw_arr->xa, ifo[n].sig.shftf, sett->N, sett->interpftpad, ifo[n].sig.xDatma); splintpad(fftw_arr->xb, ifo[n].sig.shftf, sett->N, sett->interpftpad, ifo[n].sig.xDatmb); } // end of detector loop // square sums of modulation factors double aa = 0., bb = 0.; for(n=0; n<sett->nifo; ++n) { double aatemp = 0., bbtemp = 0.; for(i=0; i<sett->N; ++i) { aatemp += sqr(ifo[n].sig.aa[i]); bbtemp += sqr(ifo[n].sig.bb[i]); } for(i=0; i<sett->N; ++i) { ifo[n].sig.xDatma[i] /= ifo[n].sig.sig2; ifo[n].sig.xDatmb[i] /= ifo[n].sig.sig2; } aa += aatemp/ifo[n].sig.sig2; bb += bbtemp/ifo[n].sig.sig2; } #ifdef YEPPP #define VLEN 1024 int bnd = (sett->N/VLEN)*VLEN; #endif // Check if the signal is added to the data or the range file is given: // if not, proceed with the wide range of spindowns // if yes, use smin = s_range->sst, smax = s_range->spndr[1] if(!strcmp(opts->addsig, "") && !strcmp(opts->range, "")) { // Spindown range defined using Smin and Smax (settings.c) smin = trunc((sett->Smin - nn*sett->M[9] - mm*sett->M[13])/sett->M[5]); smax = trunc(-(nn*sett->M[9] + mm*sett->M[13] + sett->Smax)/sett->M[5]); } if(opts->s0_flag) smin = smax; // if spindown parameter is taken into account, smin != smax printf ("\n>>%d\t%d\t%d\t[%d..%d]\n", *FNum, mm, nn, smin, smax); static fftw_complex *fxa, *fxb; static double *F; #pragma omp threadprivate(fxa,fxb, F) #pragma omp threadprivate(F) //private loop counter: ss //private (declared inside): ii,Fc,het1,k,veto_status,a,v,_p,_c,_s,status //shared default: nn,mm,sett,_tmp1,ifo,het0,bnd,plans,opts,aa,bb, // fftw_arr (zostawiamy i robimy nowe), FNum (atomic!) //we use shared plans and fftw_execute with 'new-array' interface #pragma omp parallel default(shared) \ private(i, j, n, sgnl0, exph, phase, cp, sp, tstart, tend) \ firstprivate(sgnlt) \ reduction(+ : spindown_timer, spindown_counter) { #ifdef YEPPP Yep64f _p[VLEN], _s[VLEN], _c[VLEN]; enum YepStatus status; #endif #ifdef SLEEF double _p[VECTLENDP], _c[VECTLENDP]; vdouble2 v; vdouble a; #endif if (!fxa) fxa = (fftw_complex *)fftw_malloc(fftw_arr->arr_len*sizeof(fftw_complex)); if (!fxb) fxb = (fftw_complex *)fftw_malloc(fftw_arr->arr_len*sizeof(fftw_complex)); if (!F) F = (double *)calloc(2*sett->nfft, sizeof(double)); /* Spindown loop */ #pragma omp for schedule(static,4) for(ss=smin; ss<=smax; ++ss) { #if TIMERS>2 tstart = get_current_time(CLOCK_PROCESS_CPUTIME_ID); #endif // Spindown parameter sgnlt[1] = ss*sett->M[5] + nn*sett->M[9] + mm*sett->M[13]; int ii; double Fc, het1; #ifdef VERBOSE //print a 'dot' every new spindown printf ("."); fflush (stdout); #endif het1 = fmod(ss*sett->M[4], sett->M[0]); if(het1<0) het1 += sett->M[0]; sgnl0 = het0 + het1; // phase modulation before fft #if defined(SLEEF) // use simd sincos from the SLEEF library; // VECTLENDP is a simd vector length defined in the SLEEF library // and it depends on selected instruction set e.g. -DENABLE_AVX for(i=0; i<sett->N; i+=VECTLENDP) { for(j=0; j<VECTLENDP; j++) _p[j] = het1*(i+j) + sgnlt[1]*_tmp1[0][i+j]; a = vloadu(_p); v = xsincos(a); vstoreu(_p, v.x); // reuse _p for sin vstoreu(_c, v.y); for(j=0; j<VECTLENDP; ++j){ exph = _c[j] - I*_p[j]; fxa[i+j] = ifo[0].sig.xDatma[i+j]*exph; //ifo[0].sig.sig2; fxb[i+j] = ifo[0].sig.xDatmb[i+j]*exph; //ifo[0].sig.sig2; } } #elif defined(YEPPP) // use yeppp! library; // VLEN is length of vector to be processed // for caches L1/L2 64/256kb optimal value is ~2048 for (j=0; j<bnd; j+=VLEN) { //double *_tmp2 = &_tmp1[0][j]; for (i=0; i<VLEN; ++i) //_p[i] = het1*(i+j) + sgnlt[1]*_tmp2[i]; _p[i] = het1*(i+j) + sgnlt[1]*_tmp1[0][i+j]; status = yepMath_Sin_V64f_V64f(_p, _s, VLEN); assert(status == YepStatusOk); status = yepMath_Cos_V64f_V64f(_p, _c, VLEN); assert(status == YepStatusOk); for (i=0; i<VLEN; ++i) { // exph = _c[i] - I*_s[i]; fxa[i+j] = ifo[0].sig.xDatma[i+j]*_c[i]-I*ifo[0].sig.xDatma[i+j]*_s[i]; fxb[i+j] = ifo[0].sig.xDatmb[i+j]*_c[i]-I*ifo[0].sig.xDatmb[i+j]*_s[i]; } } // remaining part is shorter than VLEN - no need to vectorize for (i=0; i<sett->N-bnd; ++i){ j = bnd + i; _p[i] = het1*j + sgnlt[1]*_tmp1[0][j]; } status = yepMath_Sin_V64f_V64f(_p, _s, sett->N-bnd); assert(status == YepStatusOk); status = yepMath_Cos_V64f_V64f(_p, _c, sett->N-bnd); assert(status == YepStatusOk); for (i=0; i<sett->N-bnd; ++i) { j = bnd + i; //exph = _c[i] - I*_s[i]; //fxa[j] = ifo[0].sig.xDatma[j]*exph; //fxb[j] = ifo[0].sig.xDatmb[j]*exph; fxa[j] = ifo[0].sig.xDatma[j]*_c[i]-I*ifo[0].sig.xDatma[j]*_s[i]; fxb[j] = ifo[0].sig.xDatmb[j]*_c[i]-I*ifo[0].sig.xDatmb[j]*_s[i]; } #elif defined(GNUSINCOS) for(i=sett->N-1; i!=-1; --i) { phase = het1*i + sgnlt[1]*_tmp1[0][i]; sincos(phase, &sp, &cp); exph = cp - I*sp; fxa[i] = ifo[0].sig.xDatma[i]*exph; //ifo[0].sig.sig2; fxb[i] = ifo[0].sig.xDatmb[i]*exph; //ifo[0].sig.sig2; } #else for(i=sett->N-1; i!=-1; --i) { phase = het1*i + sgnlt[1]*_tmp1[0][i]; cp = cos(phase); sp = sin(phase); exph = cp - I*sp; fxa[i] = ifo[0].sig.xDatma[i]*exph; //ifo[0].sig.sig2; fxb[i] = ifo[0].sig.xDatmb[i]*exph; //ifo[0].sig.sig2; } #endif for(n=1; n<sett->nifo; ++n) { #if defined(SLEEF) // use simd sincos from the SLEEF library; // VECTLENDP is a simd vector length defined in the SLEEF library // and it depends on selected instruction set e.g. -DENABLE_AVX for (i=0; i<sett->N; i+=VECTLENDP) { for(j=0; j<VECTLENDP; j++) _p[j] = het1*(i+j) + sgnlt[1]*_tmp1[n][i+j]; a = vloadu(_p); v = xsincos(a); vstoreu(_p, v.x); // reuse _p for sin vstoreu(_c, v.y); for(j=0; j<VECTLENDP; ++j){ exph = _c[j] - I*_p[j]; fxa[i+j] = ifo[n].sig.xDatma[i+j]*exph; fxb[i+j] = ifo[n].sig.xDatmb[i+j]*exph; } } #elif defined(YEPPP) // use yeppp! library; // VLEN is length of vector to be processed // for caches L1/L2 64/256kb optimal value is ~2048 for (j=0; j<bnd; j+=VLEN) { //double *_tmp2 = &_tmp1[n][j]; for (i=0; i<VLEN; ++i) // _p[i] = het1*(i+j) + sgnlt[1]*_tmp2[i]; _p[i] = het1*(j+i) + sgnlt[1]*_tmp1[n][j+i]; status = yepMath_Sin_V64f_V64f(_p, _s, VLEN); assert(status == YepStatusOk); status = yepMath_Cos_V64f_V64f(_p, _c, VLEN); assert(status == YepStatusOk); for (i=0; i<VLEN; ++i) { //exph = _c[i] - I*_s[i]; //fxa[j+i] += ifo[n].sig.xDatma[j+i]*exph; //fxb[j+i] += ifo[n].sig.xDatmb[j+i]*exph; fxa[i+j] += ifo[n].sig.xDatma[i+j]*_c[i]-I*ifo[n].sig.xDatma[i+j]*_s[i]; fxb[i+j] += ifo[n].sig.xDatmb[i+j]*_c[i]-I*ifo[n].sig.xDatmb[i+j]*_s[i]; } } // remaining part is shorter than VLEN - no need to vectorize for (i=0; i<sett->N-bnd; ++i){ j = bnd + i; _p[i] = het1*j + sgnlt[1]*_tmp1[n][j]; } status = yepMath_Sin_V64f_V64f(_p, _s, sett->N-bnd); assert(status == YepStatusOk); status = yepMath_Cos_V64f_V64f(_p, _c, sett->N-bnd); assert(status == YepStatusOk); for (i=0; i<sett->N-bnd; ++i) { j = bnd + i; //exph = _c[i] - I*_s[i]; //fxa[j] += ifo[n].sig.xDatma[j]*exph; //fxb[j] += ifo[n].sig.xDatmb[j]*exph; fxa[j] += ifo[n].sig.xDatma[j]*_c[i]-I*ifo[n].sig.xDatma[j]*_s[i]; fxb[j] += ifo[n].sig.xDatmb[j]*_c[i]-I*ifo[n].sig.xDatmb[j]*_s[i]; } #elif defined(GNUSINCOS) for(i=sett->N-1; i!=-1; --i) { phase = het1*i + sgnlt[1]*_tmp1[n][i]; sincos(phase, &sp, &cp); exph = cp - I*sp; fxa[i] += ifo[n].sig.xDatma[i]*exph; fxb[i] += ifo[n].sig.xDatmb[i]*exph; } #else for(i=sett->N-1; i!=-1; --i) { phase = het1*i + sgnlt[1]*_tmp1[n][i]; cp = cos(phase); sp = sin(phase); exph = cp - I*sp; fxa[i] += ifo[n].sig.xDatma[i]*exph; fxb[i] += ifo[n].sig.xDatmb[i]*exph; } #endif } // Zero-padding for(i = sett->fftpad*sett->nfft-1; i != sett->N-1; --i) fxa[i] = fxb[i] = 0.; fftw_execute_dft(plans->plan, fxa, fxa); fftw_execute_dft(plans->plan, fxb, fxb); // Computing F-statistic for (i=sett->nmin; i<sett->nmax; i++) { F[i] = (sqr(creal(fxa[i])) + sqr(cimag(fxa[i])))/aa + (sqr(creal(fxb[i])) + sqr(cimag(fxb[i])))/bb; } // for (i=sett->nmin; i<sett->nmax; i++) // F[i] += (sqr(creal(fxb[i])) + sqr(cimag(fxb[i])))/bb; #pragma omp atomic (*FNum)++; #if 0 FILE *f1 = fopen("fraw-1.dat", "w"); for(i=sett->nmin; i<sett->nmax; i++) fprintf(f1, "%d %lf %lf\n", i, F[i], 2.*M_PI*i/((double) sett->fftpad*sett->nfft) + sgnl0); fclose(f1); #endif #ifndef NORMTOMAX //#define NAVFSTAT 4096 // Normalize F-statistics if(!(opts->white_flag)) // if the noise is not white noise FStat(F + sett->nmin, sett->nmax - sett->nmin, NAVFSTAT, 0); // f1 = fopen("fnorm-4096-1.dat", "w"); //for(i=sett->nmin; i<sett->nmax; i++) //fprintf(f1, "%d %lf %lf\n", i, F[i], 2.*M_PI*i/((double) sett->fftpad*sett->nfft) + sgnl0); //fclose(f1); // exit(EXIT_SUCCESS); for(i=sett->nmin; i<sett->nmax; i++) { if ((Fc = F[i]) > opts->trl) { // if F-stat exceeds trl (critical value) // Find local maximum for neighboring signals ii = i; while (++i < sett->nmax && F[i] > opts->trl) { if(F[i] >= Fc) { ii = i; Fc = F[i]; } // if F[i] } // while i // Candidate signal frequency sgnlt[0] = 2.*M_PI*ii/((FLOAT_TYPE)sett->fftpad*sett->nfft) + sgnl0; // Signal-to-noise ratio sgnlt[4] = sqrt(2.*(Fc-sett->nd)); // Checking if signal is within a known instrumental line int k, veto_status = 0; for(k=0; k<sett->numlines_band; k++) if(sgnlt[0]>=sett->lines[k][0] && sgnlt[0]<=sett->lines[k][1]) { veto_status=1; break; } int _sgnlc; if(!veto_status) { /* #pragma omp critical { (*sgnlc)++; // increase found number // Add new parameters to output array for (j=0; j<NPAR; ++j) // save new parameters sgnlv[NPAR*(*sgnlc-1)+j] = (FLOAT_TYPE)sgnlt[j]; } */ #pragma omp atomic capture { (*sgnlc)++; // increase found number _sgnlc = *sgnlc; } // Add new parameters to output array for (j=0; j<NPAR; ++j) // save new parameters sgnlv[NPAR*(_sgnlc-1)+j] = (FLOAT_TYPE)sgnlt[j]; #ifdef VERBOSE printf ("\nSignal %d: %d %d %d %d %d snr=%.2f\n", *sgnlc, pm, mm, nn, ss, ii, sgnlt[4]); #endif } } // if Fc > trl } // for i #else // new version imax = -1; // find local maxima first //printf("nmin=%d nmax=%d nfft=%d nblocks=%d\n", sett->nmin, sett->nmax, nfft, nfft/blksize); for(iblk=0; iblk < nfft/blksize; ++iblk) { blkavg = 0.; blkstart = sett->nmin + iblk*blksize; // block start index in F // in case the last block is shorter than blksize, include its elements in the previous block if(iblk==(nfft/blksize-1)) {blksize = sett->nmax - blkstart;} imax0 = imax+1; // index of first maximum in current block //printf("\niblk=%d blkstart=%d blksize=%d imax0=%d\n", iblk, blkstart, blksize, imax0); for(i=1; i <= blksize; ++i) { // include first element of the next block ii = blkstart + i; if(ii < sett->nmax) {ihi=ii+1;} else {ihi = sett->nmax; /*printf("ihi=%d ii=%d\n", ihi, ii);*/}; if(F[ii] > F[ii-1] && F[ii] > F[ihi]) { blkavg += F[ii]; Fmax[++imax] = ii; ++i; // next element can't be maximum - skip it } } // i // now imax points to the last element of Fmax // normalize in blocks blkavg /= (double)(imax - imax0 + 1); for(i=imax0; i <= imax; ++i) F[Fmax[i]] /= blkavg; } // iblk //f1 = fopen("fmax.dat", "w"); //for(i=1; i < imax; i++) //fprintf(f1, "%d %lf \n", Fmax[i], F[Fmax[i]]); //fclose(f1); //exit(EXIT_SUCCESS); // apply threshold limit for(i=0; i <= imax; ++i){ //if(F[Fmax[i]] > opts->trl) { if(F[Fmax[i]] > threshold) { sgnlt[0] = 2.*M_PI*i/((FLOAT_TYPE)sett->fftpad*sett->nfft) + sgnl0; // Signal-to-noise ratio sgnlt[4] = sqrt(2.*(F[Fmax[i]] - sett->nd)); // Checking if signal is within a known instrumental line int k, veto_status=0; for(k=0; k<sett->numlines_band; k++) if(sgnlt[0]>=sett->lines[k][0] && sgnlt[0]<=sett->lines[k][1]) { veto_status=1; break; } if(!veto_status) { (*sgnlc)++; // increase number of found candidates // Add new parameters to buffer array for (j=0; j<NPAR; ++j) sgnlv[NPAR*(*sgnlc-1)+j] = (FLOAT_TYPE)sgnlt[j]; #ifdef VERBOSE printf ("\nSignal %d: %d %d %d %d %d snr=%.2f\n", *sgnlc, pm, mm, nn, ss, Fmax[i], sgnlt[4]); #endif } } } // i #endif // old/new version #if TIMERS>2 tend = get_current_time(CLOCK_PROCESS_CPUTIME_ID); spindown_timer += get_time_difference(tstart, tend); spindown_counter++; #endif } // for ss } // omp parallel #ifndef VERBOSE printf("Number of signals found: %d\n", *sgnlc); #endif // tend = get_current_time(CLOCK_REALTIME); //time_elapsed = get_time_difference(tstart, tend); //printf("Parallel part: %e ( per thread %e ) s\n", time_elapsed, time_elapsed/omp_get_max_threads()); #if TIMERS>2 printf("\nTotal spindown loop time: %e s, mean spindown cpu-time: %e s (%d runs)\n", spindown_timer, spindown_timer/spindown_counter, spindown_counter); #endif return 0; } // jobcore
int main(void) { FILE *fpw; int i1,i2; int half_nx; double kx,delta_kx; double kx2; char name[50]; int INDEX; double A = 1.0; double alpha = 1.0; double delta_x = 1.0; double delta_t = 0.2; int n_x=128;//number of nodes in the system int T,T_write;//(T)number of time steps //T_write is after how many time steps we write profile in file //Taking the input parameters from a input file fpw=fopen("input.dat","r"); fscanf(fpw,"%d",&n_x); fscanf(fpw,"%le",&delta_x); fscanf(fpw,"%d",&T); fscanf(fpw,"%le",&delta_t); fscanf(fpw,"%d",&T_write); fclose(fpw); fftw_complex *comp,*g; fftw_plan planF,planFg,planB; comp = fftw_malloc(n_x* sizeof(fftw_complex)); g = fftw_malloc(n_x* sizeof(fftw_complex)); planF = fftw_plan_dft_1d(n_x,comp,comp,FFTW_FORWARD,FFTW_ESTIMATE); planFg = fftw_plan_dft_1d(n_x,g,g,FFTW_FORWARD,FFTW_ESTIMATE); planB = fftw_plan_dft_1d(n_x,comp,comp,FFTW_BACKWARD,FFTW_ESTIMATE); /**Creating Random Numbers uisng gsl library**/ const gsl_rng_type * T1; gsl_rng * r; gsl_rng_env_setup(); T1 = gsl_rng_default; r = gsl_rng_alloc(T1); //Making Initial Profile for(i1=0; i1 < n_x; ++i1) { double u = gsl_rng_uniform(r); __real__ comp[i1] = 0.5+ (0.5-u)*1e-4; __imag__ comp[i1] = 0.0; } half_nx = (int) n_x/2; delta_kx = (2.0*M_PI)/(n_x*delta_x); /** Opening a file to write a gnuplot script **/ FILE *gnu; gnu=fopen("plotAnimation.gp","w"); fprintf(gnu,"set yrange[0:1]\nset xrange[0:%d]\n",n_x); /** Printing the initial Profile **/ sprintf(name,"./output/c_%d.dat",0); fpw=fopen(name,"w"); for(i1=0; i1<n_x; ++i1) { fprintf(fpw,"%le\n",__real__ comp[i1]); } fclose(fpw); fprintf(gnu,"plot \"%s\" with lines\npause 0.5\n",name); /** Starting the time loop **/ for(INDEX=1; INDEX<=T; ++INDEX){ //initialize g for(i1=0; i1 < n_x; ++i1){ g[i1] = 2*A*comp[i1]*(1-comp[i1])*(1-2*comp[i1]); } /** Let us take comp to the Fourier space **/ fftw_execute_dft(planF,comp,comp); fftw_execute_dft(planFg,g,g); /** Evolve composition **/ for(i1=0; i1 < n_x; ++i1){ if(i1 < half_nx) kx = i1*delta_kx; else kx = (i1-n_x)*delta_kx; kx2 = kx*kx; comp[i1] = (comp[i1]-g[i1]*delta_t*alpha*kx2)/(1+2*kx2*kx2*delta_t); } /** Take composition back to real space **/ fftw_execute_dft(planB,comp,comp); for(i1=0; i1<n_x; ++i1){ comp[i1] = comp[i1]/(n_x); } /**Print after a few Time steps**/ if(INDEX%T_write==0) { sprintf(name,"./output/c_%d.dat",INDEX); fpw=fopen(name,"w"); for(i1=0; i1<n_x; ++i1) { fprintf(fpw,"%le\n",__real__ comp[i1]); __imag__ comp[i1] = 0.0; } fclose(fpw); fprintf(gnu,"plot \"%s\" with lines\npause 0.5\n",name); } } //Printing the final profile fprintf(gnu,"set term png\nset output \"finalProfile.png\"\nreplot\nset term x11"); /**Free the memory allocated dynamically**/ gsl_rng_free (r); fclose(gnu); fftw_free(comp); fftw_free(g); fftw_destroy_plan(planF); fftw_destroy_plan(planFg); fftw_destroy_plan(planB); }
int main(int argc, char *argv[]) { if (argc<2) { printf("No file names given.\n"); printf("Use:\n\t%s audio_file output_prefix\n\n", argv[0]); exit(1); } av_register_all(); audio_data_t snd_data; read_audio(argv[1], &snd_data); int per_frame = snd_data.sample_rate/30; int32_t maxVal = 0; switch (snd_data.sample_size) { case 1: maxVal = 1<<6; break; case 2: maxVal = 1<<12; break; default: maxVal = 1<<28; } const int N = 120; fftw_complex *fft_in __attribute__ ((aligned (16))); fftw_complex **fft_out __attribute__ ((aligned (16))); fftw_plan fft_plan __attribute__ ((aligned (16))); fft_in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); fft_out = (fftw_complex**) fftw_malloc(sizeof(fftw_complex*) * N); for (int i=0; i<N; ++i) { fft_out[i] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); } fft_plan = fftw_plan_dft_1d(N, fft_in, fft_out[0], FFTW_FORWARD, FFTW_ESTIMATE); size_t fnum, cur_out; for (size_t i=0; i<N; ++i) { fft_in[i] = 0.0; for (size_t j=0; j<N; ++j) { fft_out[i][j] = 0.0; } } show_audio_info(&snd_data); RiBegin(RI_NULL); size_t num_frames = (snd_data.num_samples-per_frame)/per_frame; for (size_t i = 0, cur_out = 0, fnum = 1; i<(snd_data.num_samples-per_frame); i+= per_frame, ++fnum) { size_t j_size = N/2; size_t stp = per_frame/(j_size); size_t ci = per_frame*15+i; for (size_t j=0; j< j_size;++j) { fft_in[j] = (double)get_sample(&snd_data, ci, 0)/(double)maxVal; ci += stp; } fftw_execute_dft(fft_plan, fft_in, fft_out[cur_out]); printf("Calling doFrame %lu of %lu\n", fnum, num_frames); doFrame(fnum, cur_out, N, fft_out, argv[2]); cur_out += 1; if (cur_out == N) { cur_out = 0; } } RiEnd(); for (size_t i=0; i<N; ++i) { fftw_free(fft_out[i]); } fftw_destroy_plan(fft_plan); fftw_free(fft_out); fftw_free(fft_in); fftw_cleanup(); free(snd_data.samples); return 0; }
void FFT_NUC_VECTOR::execute() { fftw_execute_dft(plan, in, out); }
int main(void) { FILE *fpw,*gnu; int i1; int half_nx; double kx, delta_kx, delta_ky; double kx2,k2, k4; char name[50]; int INDEX; double A = 1.0; double delta_x = 1.0; double delta_t = 0.2; double alpha=1,beta=1; int n_x; int T=1000; int T_write=20; n_x= 128; /**Input the parameters**/ fpw=fopen("input.dat","r"); fscanf(fpw,"%d",&n_x); fscanf(fpw,"%le",&delta_x); fscanf(fpw,"%d",&T); fscanf(fpw,"%le",&delta_t); fscanf(fpw,"%d",&T_write); fscanf(fpw,"%le",&A); fscanf(fpw,"%le",&alpha); fscanf(fpw,"%le",&beta); fclose(fpw); fftw_complex *comp,*g; fftw_plan planF,planFg, planB; comp = fftw_malloc(n_x* sizeof(fftw_complex)); g = fftw_malloc(n_x* sizeof(fftw_complex)); planF = fftw_plan_dft_1d(n_x,comp,comp,FFTW_FORWARD,FFTW_ESTIMATE); planFg = fftw_plan_dft_1d(n_x,g,g,FFTW_FORWARD,FFTW_ESTIMATE); planB = fftw_plan_dft_1d(n_x,comp,comp,FFTW_BACKWARD,FFTW_ESTIMATE); /**Initial Profile**/ for(i1=0; i1 < n_x; ++i1){ if(i1<(n_x/4) || i1>(3*n_x/4)) { __real__ comp[i1] = 0.1; } else { __real__ comp[i1] = 1; } __imag__ comp[i1] = 0.0; } half_nx = (int) n_x/2; delta_kx = (2.0*M_PI)/(n_x*delta_x); /**Make a gnu script**/ gnu=fopen("plotAnimation.gp","w"); fprintf(gnu,"set xrange[0:%d]",n_x); fprintf(gnu,"\nset yrange[-0.1:1.1]"); /**printing the initial profile**/ sprintf(name,"./output/c_%d.dat",0); fpw=fopen(name,"w"); for(i1=0; i1<n_x; ++i1){ fprintf(fpw,"%le\n",__real__ comp[i1]); __imag__ comp[i1]=0; } fclose(fpw); fprintf(gnu,"\nplot \"%s\" with lines",name); fprintf(gnu,"\npause 1"); /**Starting the time loop**/ for(INDEX=1; INDEX<=T; ++INDEX){ /** calculating g **/ for(i1=0; i1 < n_x; ++i1){ g[i1] = 2*A*(comp[i1])*(1-comp[i1])*(1-2*comp[i1]); } /** Let us take comp to the Fourier space **/ fftw_execute_dft(planF,comp,comp); fftw_execute_dft(planFg,g,g); /** Evolve composition **/ for(i1=0; i1 < n_x; ++i1){ if(i1 < half_nx) kx = i1*delta_kx; else kx = (i1-n_x)*delta_kx; kx2 = kx*kx; k2 = kx2; k4= k2*k2; comp[i1] = (comp[i1]-alpha*k2*delta_t*g[i1])/(1+2*beta*k4*delta_t); } /** Take composition back to real space **/ fftw_execute_dft(planB,comp,comp); for(i1=0; i1<n_x; ++i1){ comp[i1] = comp[i1]/(n_x); } /**Printing the Results**/ if(INDEX%T_write==0) { sprintf(name,"./output/c_%d.dat",INDEX); fpw=fopen(name,"w"); for(i1=0; i1<n_x; ++i1){ fprintf(fpw,"%le\n",__real__ comp[i1]); __imag__ comp[i1]=0; } fclose(fpw); fprintf(gnu,"\nplot \"%s\" with lines",name); fprintf(gnu,"\npause 0.4"); } } //Freeing the dynamically allocated memory fftw_free(comp); fftw_free(g); fftw_destroy_plan(planFg); fftw_destroy_plan(planF); fftw_destroy_plan(planB); fclose(gnu); }
//Perform an FFT on two arrays of data void FFTComplex::transform(int size, Complex *in, Complex *out) { if (plan == 0) throw FFTException("Can not perform transform on NULL plan."); fftw_execute_dft((fftw_plan_s *)plan, reinterpret_cast<fftw_complex *>(in), reinterpret_cast<fftw_complex *>(out)); }
/** * create gabor filter bank, and their DFT represent. */ void gabor_create(struct GaborSetting *setting) { struct GaborBan *pBank; fftw_complex *tmp; if (!setting) return ; setting->bank = (struct GaborBank*)malloc(sizeof(struct GaborBank)); pBank = setting->bank; pBank->filter_num = setting->scale_num * setting->orientation_num; pBank->filter_len = setting->kenerl_w * setting->kenerl_h; /* allocate for filters and plans */ int sz = 0; sz = sizeof(fftw_complex*) * pBank->filter_num; sz += sizeof(fftw_complex) * (pBank->filter_num + 3) * pBank->filter_len; pBank->pMem = fftw_malloc(sz); if (!pBank->pMem) return ; memset(pBank->pMem, 0, sz); pBank->bank_dfts = (fftw_complex**)pMem; tmp = (fftw_complex*)(pBank->bank_dfts + pBank->filter_num); for (int i = 0; i < pBank->filter_num; ++i) { *(pBank->bank_dfts+i) = tmp; tmp += pBank->filter_sz; } pBank->in_buffer = tmp; tmp += pBank->filter_sz; pBank->dft_buffer = tmp; tmp += pBank->filter_sz; pBank->idft_buffer = tmp; pBank->plan_foreward = fftw_create_dft_plan_2d(setting->kenerl_w, setting->kenerl_h, in_buffer, dft_buffer, FFTW_FOREWARD, FFTW_ESTIMATE); pBank->plan_backward = fftw_create_dft_plan_2d(setting->kenerl_w, setting->kenerl_h, dft_buffer, idft_buffer, FFTW_BACKWARD, FFTW_ESTIMATE); for (int i = 0; i < setting->scale_num; ++i) for (int j = 0; j < setting->orientation_num; ++j) { _gabor_mk_kernel(setting->orientations[i], setting->scales[j], pBank->in_buffer, setting->kernel_w, setting->kernel_h); /* get the dft of the kernel */ fftw_execute_dft(plan_foreward, pBank->in_buffer, pBank->bank_dfts[ i*setting->orientation_num + j ]); } return ; }
void Transformer_CPU::transform_forward_z(double *inout) { fftw_execute_dft(plan_z_forw, (fftw_complex*)inout, (fftw_complex*)inout); }
int main(void) { FILE *fpw,*gnu; int i1; char name[50]; int INDEX; int half_nx; double kx, delta_kx, delta_ky; double kx2,k2, k4; double A = 1.0; double delta_x = 1.0; double delta_t = 0.2; double alpha=1,beta=1; int n_x; int T=1000; int T_write=20; n_x= 128; int flag=0; /**Input the parameters**/ fpw=fopen("input.dat","r"); fscanf(fpw,"%d",&flag);//to check for the case fscanf(fpw,"%d",&n_x); fscanf(fpw,"%le",&delta_x); fscanf(fpw,"%d",&T); fscanf(fpw,"%d",&T_write); fscanf(fpw,"%le",&delta_t); fclose(fpw); //checking which case is used and assigning the value to A and beta if(flag==1) { A=1; beta=1; } else if(flag==2) { A=1; beta=4; } else if(flag==3) { A=4; beta=1; } else { printf("\nInvalid input\nExiting program......"); exit(0); } fftw_complex *comp,*compDel,*g; fftw_plan planF,planFg, planB; fftw_plan planF_Del,planB_Del; comp = fftw_malloc(n_x* sizeof(fftw_complex)); compDel = fftw_malloc(n_x* sizeof(fftw_complex)); g = fftw_malloc(n_x* sizeof(fftw_complex)); planF = fftw_plan_dft_1d(n_x,comp,comp,FFTW_FORWARD,FFTW_ESTIMATE); planF_Del = fftw_plan_dft_1d(n_x,compDel,compDel,FFTW_FORWARD,FFTW_ESTIMATE); planFg = fftw_plan_dft_1d(n_x,g,g,FFTW_FORWARD,FFTW_ESTIMATE); planB = fftw_plan_dft_1d(n_x,comp,comp,FFTW_BACKWARD,FFTW_ESTIMATE); planB_Del = fftw_plan_dft_1d(n_x,compDel,compDel,FFTW_BACKWARD,FFTW_ESTIMATE); /**Initial Profile**/ for(i1=0; i1 < n_x; ++i1){ if(i1<(n_x/4) || i1>(3*n_x/4)) { __real__ comp[i1] = 0.1; } else { __real__ comp[i1] = 1; } __imag__ comp[i1] = 0.0; } half_nx = (int) n_x/2; delta_kx = (2.0*M_PI)/(n_x*delta_x); /**Make a gnu script**/ gnu=fopen("plotAnimation.gp","w"); fprintf(gnu,"set xrange[0:%d]",n_x); fprintf(gnu,"\nset yrange[-0.1:1.1]"); /**printing the initial profile**/ sprintf(name,"./output/c_%d.dat",0); fpw=fopen(name,"w"); for(i1=0; i1<n_x; ++i1){ fprintf(fpw,"%le\n",__real__ comp[i1]); __imag__ comp[i1]=0; } fclose(fpw); fprintf(gnu,"\nplot \"%s\" with lines",name); fprintf(gnu,"\npause 1"); /**Starting the time loop**/ for(INDEX=1; INDEX<=T; ++INDEX){ /** calculating g **/ for(i1=0; i1 < n_x; ++i1){ g[i1] = 2*A*(comp[i1])*(1-comp[i1])*(1-2*comp[i1]); } /** Let us take comp to the Fourier space **/ fftw_execute_dft(planF,comp,comp); fftw_execute_dft(planFg,g,g); /** Evolve composition **/ for(i1=0; i1 < n_x; ++i1){ if(i1 < half_nx) kx = i1*delta_kx; else kx = (i1-n_x)*delta_kx; kx2 = kx*kx; k2 = kx2; k4= k2*k2; //main equation implementation take place here comp[i1] = (comp[i1]-alpha*k2*delta_t*g[i1])/(1+2*beta*k4*delta_t); } /** Take composition back to real space **/ fftw_execute_dft(planB,comp,comp); for(i1=0; i1<n_x; ++i1){ comp[i1] = comp[i1]/(n_x); } /**Printing the Results**/ if(INDEX%T_write==0) { sprintf(name,"./output/c_%d.dat",INDEX); fpw=fopen(name,"w"); for(i1=0; i1<n_x; ++i1){ fprintf(fpw,"%le\n",__real__ comp[i1]); __imag__ comp[i1]=0; } fclose(fpw); fprintf(gnu,"\nplot \"%s\" with lines",name); fprintf(gnu,"\npause 0.4"); } } sprintf(name,"output%d.dat",flag); fpw=fopen(name,"w"); /**calculating the interface width for the final profile**/ //first find the point where the composition value just increases 0.5 int pos=1; for(i1=0; i1<n_x/2; ++i1){ if(__real__ comp[i1]>=0.5){ pos=i1; break; } } //now find the slope by taking a point 10 points ahead of this and 10 points before this double m=(__real__ comp[pos]-__real__ comp[pos-1]); //the interface width will be 1/slope double interfaceWidth=1/m; fprintf(fpw,"Interface Width = %le\n",interfaceWidth); /**calculating the interfacial energy**/ //fisrt find dc/dx for(i1=0; i1 < n_x; ++i1){ compDel[i1] = comp[i1]; } /** Let us take comp to the Fourier space **/ fftw_execute_dft(planF_Del,compDel,compDel); /** Evolve composition **/ for(i1=0; i1 < n_x; ++i1){ if(i1 < half_nx) kx = i1*delta_kx; else kx = (i1-n_x)*delta_kx; kx2 = kx*kx; //main equation implementation take place here __real__ compDel[i1] = -kx * __imag__ compDel[i1]; __imag__ compDel[i1] = __real__ compDel[i1] * kx; } /** Take composition back to real space **/ fftw_execute_dft(planB_Del,compDel,compDel); for(i1=0; i1<n_x; ++i1){ compDel[i1] = compDel[i1]/(n_x); } double energy=0; for(i1=0; i1<n_x; ++i1){ energy+=(A*comp[i1]*comp[i1]*(1-comp[i1])*(1-comp[i1]) + beta*compDel[i1]*compDel[i1])*delta_x; } fprintf(fpw,"Interfacial Energy = %le\n",energy); fclose(fpw); /**Freeing the dynamically allocated memory**/ fftw_destroy_plan(planFg); fftw_free(comp); fftw_free(g); fftw_destroy_plan(planF); fftw_destroy_plan(planB); fclose(gnu); }
/*--------------------------------------------------------------------------*/ void _fftwE (fftw_plan p, fftw_complex *in, fftw_complex *out) /*execute*/ { fftw_execute_dft(p, in, out); return; }
/*! \memberof splitop draw some nice representation of the current state */ void splitop_draw(splitop_t * w, cairo_t * cr, cairo_rectangle_t rect, fftw_complex * psi) { cairo_save(cr); cairo_rectangle(cr, rect.x, rect.y, rect.width, rect.height); cairo_clip(cr); cairo_set_source_rgb(cr, 1, 1, 1); cairo_set_line_width(cr, 2); cairo_paint(cr); double y0 = rect.height/2; //cairo_set_line_width(cr, 10); cairo_set_source_rgb(cr, 0, 0, 0); cairo_move_to(cr, rect.x, rect.y + y0); cairo_line_to(cr, rect.x + rect.width, rect.y + y0); cairo_stroke(cr); int bins = w->prefs->bins; double xscale = rect.width/bins, yscale, max; double * V = w->prefs->potential->data; max = 0; for (int n = 0; n < bins; n++) { double a = fabs(V[n]); if (max < a) { max = a; } } yscale = y0/max; cairo_set_source_rgb(cr, 0, 0, 0); cairo_move_to(cr, rect.x, y0); for (int n = 0; n < bins; n++) { cairo_line_to(cr, rect.x + n * xscale, rect.y + y0 - V[n] * yscale); } cairo_stroke(cr); fftw_complex * apsi = w->apsi; max = 0; for (int n = 0; n < bins; n++) { double a = cabs(apsi[n]); if (max < a) { max = a; } } yscale = y0/(5*max/4); cairo_set_line_width(cr, 1); cairo_set_source_rgb(cr, 0, 0, 1); cairo_move_to(cr, rect.x, y0); for (int n = 0; n < bins; n++) { cairo_line_to(cr, rect.x + n * xscale, rect.y + y0 - cabs(apsi[n]) * yscale); } cairo_stroke(cr); cairo_set_line_width(cr, 2); cairo_set_source_rgb(cr, 1, 0, 0); cairo_move_to(cr, rect.x, y0); for (int n = 0; n < bins; n++) { cairo_line_to(cr, rect.x + n * xscale, rect.y + y0 - cabs(psi[n]) * yscale); } cairo_stroke(cr); fftw_complex * psik = fftw_alloc_complex(bins); assert(psik); fftw_execute_dft(w->fwd, psi, psik); max = 0; for (int n = 0; n < bins; n++) { double a = cabs(psik[n]); if (max < a) { max = a; } } yscale = y0/(5*max/4); cairo_set_line_width(cr, 1); cairo_set_source_rgb(cr, 1, .5, 0); /*cairo_move_to(cr, rect.x, y0); for (int n = 0; n < bins; n++) { int l = (n+bins/2)%w->bins; cairo_line_to(cr, rect.x + (n-bins/3)*4 * xscale, rect.y + 2*y0 - cabs(psik[l]) * yscale); }*/ cairo_move_to(cr, rect.x, y0); int dron = 1; double reg = 1.0 / bins; for (int k = bins/2; k < bins; k++) { double x = rect.x + rect.width/2 + (k-bins) * reg * rect.width; double y = rect.y + 2*y0 - cabs(psik[k]) * yscale; if (dron) { cairo_move_to(cr, x, y); dron = 0; } else { cairo_line_to(cr, x, y); } } for (int k = 0; k < bins/2; k++) { double x = rect.x + rect.width/2 + k * reg * rect.width; double y = rect.y + 2*y0 - cabs(psik[k]) * yscale; cairo_line_to(cr, x, y); } cairo_stroke(cr); fftw_free(psik); cairo_restore(cr); }
void Transformer_CPU::transform_inverse_y(double *inout) { fftw_execute_dft(plan_y_inv, (fftw_complex*)inout, (fftw_complex*)inout); }
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { int i,offset,num; double norm; FFT_DATA *data,*copy; /* pre-remap to prepare for 1st FFTs if needed copy = loc for remap result */ if (plan->pre_plan) { if (plan->pre_target == 0) copy = out; else copy = plan->copy; remap_3d((double *) in, (double *) copy, (double *) plan->scratch, plan->pre_plan); data = copy; } else data = in; /* 1d FFTs along fast axis */ if (flag == -1) fftw_execute_dft(plan->plan_fast_forward,data,data); else fftw_execute_dft(plan->plan_fast_backward,data,data); /* 1st mid-remap to prepare for 2nd FFTs copy = loc for remap result */ if (plan->mid1_target == 0) copy = out; else copy = plan->copy; remap_3d((double *) data, (double *) copy, (double *) plan->scratch, plan->mid1_plan); data = copy; /* 1d FFTs along mid axis */ if (flag == -1) fftw_execute_dft(plan->plan_mid_forward,data,data); else fftw_execute_dft(plan->plan_mid_backward,data,data); /* 2nd mid-remap to prepare for 3rd FFTs copy = loc for remap result */ if (plan->mid2_target == 0) copy = out; else copy = plan->copy; remap_3d((double *) data, (double *) copy, (double *) plan->scratch, plan->mid2_plan); data = copy; /* 1d FFTs along slow axis */ if (flag == -1) fftw_execute_dft(plan->plan_slow_forward,data,data); else fftw_execute_dft(plan->plan_slow_backward,data,data); /* post-remap to put data in output format if needed destination is always out */ if (plan->post_plan) remap_3d((double *) data, (double *) out, (double *) plan->scratch, plan->post_plan); /* scaling if required */ if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; for (i = 0; i < num; i++) { out[i][0] *= norm; out[i][1] *= norm; } } }
// discrete Fourier transform // fftw_execute_dft for comp* arrays (why or why can't FFTW use std::complex<double>?) inline void fftw_execute_dft(const fftw_plan p, comp* in, comp* out) { fftw_execute_dft(p, reinterpret_cast<fftw_complex*>(in), reinterpret_cast<fftw_complex*>(out)); }