void test_correctness(int n) { int howmany; fftw_plan validated_plan_forward, validated_plan_backward; WHEN_VERBOSE(1, my_printf("Testing correctness for n = %d...", n); my_fflush(stdout)); /* produce a good plan */ validated_plan_forward = fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag); validated_plan_backward = fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag); for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) test_in_place_both(n, howmany, howmany, validated_plan_forward, validated_plan_backward); fftw_destroy_plan(validated_plan_forward); fftw_destroy_plan(validated_plan_backward); if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak) fftw_check_memory_leaks(); WHEN_VERBOSE(1, my_printf("OK\n")); }
int gmx_fft_init_1d(gmx_fft_t * pfft, int nx, enum gmx_fft_flag flags) { int i,j; gmx_fft_t fft; int fftw_flags; /* FFTW2 is slow to measure, so we do not use it */ /* If you change this, add an #ifndef for GMX_DISABLE_FFTW_MEASURE around it! */ fftw_flags = FFTW_ESTIMATE; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } fft->single[0][0] = fftw_create_plan(nx,FFTW_BACKWARD,FFTW_OUT_OF_PLACE|fftw_flags); fft->single[0][1] = fftw_create_plan(nx,FFTW_FORWARD,FFTW_OUT_OF_PLACE|fftw_flags); fft->single[1][0] = fftw_create_plan(nx,FFTW_BACKWARD,FFTW_IN_PLACE|fftw_flags); fft->single[1][1] = fftw_create_plan(nx,FFTW_FORWARD,FFTW_IN_PLACE|fftw_flags); fft->multi[0][0] = NULL; fft->multi[0][1] = NULL; fft->multi[1][0] = NULL; fft->multi[1][1] = NULL; for(i=0;i<2;i++) { for(j=0;j<2;j++) { if(fft->single[i][j] == NULL) { gmx_fatal(FARGS,"Error initializing FFTW2 plan."); gmx_fft_destroy(fft); return -1; } } } /* No workspace needed for complex-to-complex FFTs */ fft->work = NULL; fft->ndim = 1; fft->nx = nx; *pfft = fft; return 0; }
void NormalLineArray::setup (LineFFTinfo &info, CProxy_NormalLineArray _xProxy, CProxy_NormalLineArray _yProxy, CProxy_NormalLineArray _zProxy) { xProxy = _xProxy; yProxy = _yProxy; zProxy = _zProxy; PencilArrayInfo *pencilinfo = new PencilArrayInfo(); pencilinfo->info = info; pencilinfo->count = 0; infoVec.insert(infoVec.size(), pencilinfo); line = NULL; fwdplan = fftw_create_plan(info.sizeX, FFTW_FORWARD, FFTW_IN_PLACE); bwdplan = fftw_create_plan(info.sizeY, FFTW_BACKWARD, FFTW_IN_PLACE); id = -1; }
void F77_FUNC_(fftw_f77_create_plan,FFTW_F77_CREATE_PLAN) (fftw_plan *p, int *n, int *idir, int *flags) { fftw_direction dir = *idir < 0 ? FFTW_FORWARD : FFTW_BACKWARD; *p = fftw_create_plan(*n,dir,*flags); }
gint set_data_source(GtkWidget *widget, gpointer data) { if (GTK_TOGGLE_BUTTON(widget)->active) { /* its pressed */ draw_stop(); input_thread_stopper(data_handle); close_datasource(data_handle); data_source = (DataSource) GPOINTER_TO_INT(data); /* start even if none previously opened (in case previous sound source was bad) */ if ((data_handle=open_datasource(data_source)) >= 0) { #ifdef USING_FFTW2 plan = fftw_create_plan(nsamp, FFTW_FORWARD, FFTW_ESTIMATE); #elif USING_FFTW3 plan = fftw_plan_r2r_1d(nsamp, raw_fft_in, raw_fft_out, FFTW_R2HC, FFTW_ESTIMATE); #endif ring_rate_changed(); /* Fix all gui controls that depend on * ring_rate (adjustments and such */ input_thread_starter(data_handle); draw_start(); } } return TRUE; }
void polyphase_seg(float* data) { int i,n; static fftw_plan planfwd; float *p; if (!planfwd) { planfwd=fftw_create_plan(P_FFT_LEN, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM ); } for (i=0;i<P_FFT_LEN;i++) { f_data[2*i] = 0; f_data[2*i+1] = 0; for (n=0;n<N_WINDOWS;n++) { f_data[2*i] += data[2*i+2*n*P_FFT_LEN]*filter_r[P_FFT_LEN*n + i]; f_data[2*i+1] += data[2*i+2*n*P_FFT_LEN+1]*filter_i[P_FFT_LEN*n + i]; } } fftw_one(planfwd, (fftw_complex *)f_data, (fftw_complex *)NULL); /*for (i=0;i<P_FFT_LEN;i++) { fprintf( stderr, "%f %f\n", f_data[2*i], f_data[2*i+1]); }*/ //fprintf( stderr, "%f %f ", f_data[2*3], f_data[2*3+1] ); p = f_data; for (i=0; i<P_FFT_LEN; i++) { output_samples(p, i, obuf_pos); p += IFFT_LEN*2; } obuf_pos+=IFFT_LEN*2/CHAR_BIT; }
void dft(double *jr, double *ji, int n, int iflag) { fftw_plan plan; int i; double ninv; FFTW_COMPLEX *cbuf; static int wisdom_inited=0; char *ram_cache_wisdom; int plan_flags; if(!wisdom_inited) { wisdom_inited=1; wisdom_file=getenv("GRACE_FFTW_WISDOM_FILE"); ram_cache_wisdom=getenv("GRACE_FFTW_RAM_WISDOM"); if(ram_cache_wisdom) sscanf(ram_cache_wisdom, "%d", &using_wisdom); /* turn on wisdom if it is requested even without persistent storage */ if(wisdom_file && wisdom_file[0] ) { /* if a file was specified in GRACE_FFTW_WISDOM_FILE, try to read it */ FILE *wf; fftw_status fstat; wf=fopen(wisdom_file,"r"); if(wf) { fstat=fftw_import_wisdom_from_file(wf); fclose(wf); initial_wisdom=fftw_export_wisdom_to_string(); } else initial_wisdom=0; atexit(save_wisdom); using_wisdom=1; /* if a file is specified, always use wisdom */ } } plan_flags=using_wisdom? (FFTW_USE_WISDOM | FFTW_MEASURE) : FFTW_ESTIMATE; plan=fftw_create_plan(n, iflag?FFTW_BACKWARD:FFTW_FORWARD, plan_flags | FFTW_IN_PLACE); cbuf=xcalloc(n, sizeof(*cbuf)); if(!cbuf) return; for(i=0; i<n; i++) { cbuf[i].re=jr[i]; cbuf[i].im=ji[i]; } fftw(plan, 1, cbuf, 1, 1, 0, 1, 1); fftw_destroy_plan(plan); if(!iflag) { ninv=1.0/n; for(i=0; i<n; i++) { jr[i]=cbuf[i].re*ninv; ji[i]=cbuf[i].im*ninv; } } else { for(i=0; i<n; i++) { jr[i]=cbuf[i].re; ji[i]=cbuf[i].im; } } XCFREE(cbuf); }
int F77_FUNC_ (create_plan_1d, CREATE_PLAN_1D)(fftw_plan *p, int *n, int *idir) { fftw_direction dir = ( (*idir < 0) ? FFTW_FORWARD : FFTW_BACKWARD ); *p = fftw_create_plan(*n, dir, FFTW_ESTIMATE | FFTW_IN_PLACE); if( *p == NULL ) fprintf(stderr," *** CREATE_PLAN: warning empty plan ***\n"); /* printf(" pointer size = %d, value = %d\n", sizeof ( *p ), *p ); */ return 0; }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); if (paranoid && !(flags & FFTW_IN_PLACE)) { begin = fftw_get_time(); test_ergun(n, dir, plan); end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for validation: %f s\n", t)); } FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
void test_correctness(int n) { int istride, ostride, howmany; fftw_plan validated_plan_forward, validated_plan_backward; WHEN_VERBOSE(1, printf("Testing correctness for n = %d...", n); fflush(stdout)); /* produce a *good* plan (validated by Ergun's test procedure) */ validated_plan_forward = fftw_create_plan(n, FFTW_FORWARD, measure_flag | wisdom_flag); test_ergun(n, FFTW_FORWARD, validated_plan_forward); validated_plan_backward = fftw_create_plan(n, FFTW_BACKWARD, measure_flag | wisdom_flag); test_ergun(n, FFTW_BACKWARD, validated_plan_backward); for (istride = 1; istride <= MAX_STRIDE; ++istride) for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) test_out_of_place_both(n, istride, ostride, howmany, validated_plan_forward, validated_plan_backward); for (istride = 1; istride <= MAX_STRIDE; ++istride) for (howmany = 1; howmany <= MAX_HOWMANY; ++howmany) test_in_place_both(n, istride, howmany, validated_plan_forward, validated_plan_backward); fftw_destroy_plan(validated_plan_forward); fftw_destroy_plan(validated_plan_backward); if (!(wisdom_flag & FFTW_USE_WISDOM) && chk_mem_leak) fftw_check_memory_leaks(); WHEN_VERBOSE(1, printf("OK\n")); }
void reinit_extace(int new_nsamp) { /* Stop drawing the display */ draw_stop(); if(data_handle != -1) /* stop if previously opened */ { input_thread_stopper(data_handle); close_datasource(data_handle); } /* Free all buffers */ mem_dealloc(); scope_begin_l = 0; scope_begin_l = 0; /* auto shift lag slightly to maintain good sync * The idea is the shift the lag slighly so that the "on-time" data * is in the MIDDLE of the window function for better eye/ear matchup */ nsamp = new_nsamp; convolve_factor = floor(nsamp/width) < 3 ? floor(nsamp/width) : 3 ; if (convolve_factor == 0) convolve_factor = 1; recalc_markers = TRUE; recalc_scale = TRUE; mem_alloc(); setup_datawindow(NULL,(WindowFunction)window_func); ring_rate_changed(); ring_pos=0; /* only start if it has been stopped above */ if(data_handle != -1 && (data_handle=open_datasource(data_source)) >= 0) { #ifdef USING_FFTW2 fftw_destroy_plan(plan); plan = fftw_create_plan(nsamp, FFTW_FORWARD, FFTW_ESTIMATE); #elif USING_FFTW3 fftw_cleanup(); plan = fftw_plan_r2r_1d(nsamp, raw_fft_in, raw_fft_out,FFTW_R2HC, FFTW_ESTIMATE); #endif input_thread_starter(data_handle); ring_rate_changed(); /* Fix all gui controls that depend on * ring_rate (adjustments and such */ draw_start(); } }
void process_seg(float* data) { int i; float* p = data; static float dbuff[FFT_LEN*2]; static fftw_plan planfwd,planinverse; if (!planfwd) { planfwd=fftw_create_plan(FFT_LEN, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM ); planinverse=fftw_create_plan(IFFT_LEN, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM ); } fftw_one(planfwd, (fftw_complex *)data, (fftw_complex *)NULL); data[0]=0; data[1]=0; fftw(planinverse, NSTRIPS, (fftw_complex *)data, 1, IFFT_LEN, (fftw_complex *)NULL, 1, IFFT_LEN); for (i=0; i<NSTRIPS; i++) { output_samples(p, i, obuf_pos); p += IFFT_LEN*2; } obuf_pos+=IFFT_LEN*2/CHAR_BIT; }
/* * create an array of plans using the ordinary 1d fftw_create_plan, * which allocates its own array and creates plans optimized for * contiguous data. */ fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans, int rank, const int *n, fftw_direction dir, int flags) { if (rank <= 0) return 0; if (plans) { int i, j; int cur_flags; for (i = 0; i < rank; ++i) { if (i < rank - 1 || (flags & FFTW_IN_PLACE)) { /* * fft's except the last dimension are always in-place */ cur_flags = flags | FFTW_IN_PLACE; for (j = i - 1; j >= 0 && n[i] != n[j]; --j); } else { cur_flags = flags; /* * we must create a separate plan for the last * dimension */ j = -1; } if (j >= 0) { /* * If a plan already exists for this size * array, reuse it: */ plans[i] = plans[j]; } else { /* generate a new plan: */ plans[i] = fftw_create_plan(n[i], dir, cur_flags); if (!plans[i]) { destroy_plan_array(rank, plans); return 0; } } } } return plans; }
gint set_data_source() { draw_stop(); input_thread_stopper(data_handle); close_datasource(data_handle); if ((data_handle=open_datasource(data_source)) >= 0) { #ifdef USING_FFTW2 plan = fftw_create_plan(nsamp, FFTW_FORWARD, FFTW_ESTIMATE); #elif USING_FFTW3 plan = fftw_plan_r2r_1d(nsamp, raw_fft_in, raw_fft_out, FFTW_R2HC, FFTW_ESTIMATE); #endif ring_rate_changed(); /* Fix all gui controls that depend on * ring_rate (adjustments and such */ input_thread_starter(data_handle); draw_start(); } return TRUE; }
rfftwnd_mpi_plan rfftwnd_mpi_create_plan(MPI_Comm comm, int rank, const int *n, fftw_direction dir, int flags) { rfftwnd_mpi_plan p; if (rank < 2) return 0; p = (rfftwnd_mpi_plan) fftw_malloc(sizeof(rfftwnd_mpi_plan_data)); p->p_fft_x = 0; p->p_fft = 0; p->p_transpose = 0; p->p_transpose_inv = 0; p->work = 0; p->p_fft_x = fftw_create_plan(n[0], dir, flags | FFTW_IN_PLACE); p->p_fft = rfftwnd_create_plan(rank-1, n+1, dir, flags | FFTW_IN_PLACE); if (!p->p_fft) rfftwnd_mpi_destroy_plan(p); p->p_transpose = transpose_mpi_create_plan(n[0], p->p_fft->n[0], comm); if (!p->p_transpose) rfftwnd_mpi_destroy_plan(p); p->p_transpose_inv = transpose_mpi_create_plan(p->p_fft->n[0], n[0], comm); if (!p->p_transpose_inv) rfftwnd_mpi_destroy_plan(p); if (n[0] > p->p_fft->nwork) p->work = (fftw_complex *) fftw_malloc(n[0] * sizeof(fftw_complex)); return p; }
int fourier(double *jr, double *ji, int n, int iflag) { int i; int plan_flags; fftw_plan plan; FFTW_COMPLEX *cbuf; init_wisdom(); plan_flags = using_wisdom ? (FFTW_USE_WISDOM | FFTW_MEASURE):FFTW_ESTIMATE; plan_flags |= FFTW_IN_PLACE; plan = fftw_create_plan(n, iflag ? FFTW_BACKWARD:FFTW_FORWARD, plan_flags); cbuf = xcalloc(n, sizeof(FFTW_COMPLEX)); if (!cbuf) { return RETURN_FAILURE; } for (i = 0; i < n; i++) { cbuf[i].re = jr[i]; cbuf[i].im = ji[i]; } fftw_one(plan, cbuf, NULL); fftw_destroy_plan(plan); for (i = 0; i < n; i++) { jr[i] = cbuf[i].re; ji[i] = cbuf[i].im; } xfree(cbuf); return RETURN_SUCCESS; }
void zfft1(dcomplex *data, /* size n_in */ int n_in, dcomplex *dataout, /* output */ int isign0) { fftw_complex *in, *out; fftw_plan p; int i; double scale; int isign; if (isign0>0) { isign=-1; } else { isign=1; } #ifdef fftw2 in = (fftw_complex*)malloc(sizeof(fftw_complex)*n_in); out = (fftw_complex*)malloc(sizeof(fftw_complex)*n_in); #else in = fftw_malloc(sizeof(fftw_complex)*n_in); out = fftw_malloc(sizeof(fftw_complex)*n_in); #endif for (i=0;i<n_in;i++) { #ifdef fftw2 c_re(in[i]) = data[i].r; c_im(in[i]) = data[i].i; #else in[i][0]=data[i].r; in[i][1]=data[i].i; #endif } #ifdef fftw2 p = fftw_create_plan(n_in, isign, FFTW_ESTIMATE); fftw_one(p,in,out); #else p = fftw_plan_dft_1d(n_in,in,out,isign,FFTW_ESTIMATE); fftw_execute(p); #endif if (isign==-1) { scale = 1.0/( (double)n_in); for (i=0;i<n_in;i++) { #ifdef fftw2 dataout[i].r=c_re(out[i])*scale; dataout[i].i=c_im(out[i])*scale; #else dataout[i].r=out[i][0]*scale; dataout[i].i=out[i][1]*scale; #endif } } else { for (i=0;i<n_in;i++) { #ifdef fftw2 dataout[i].r=c_re(out[i]); dataout[i].i=c_im(out[i]); #else dataout[i].r=out[i][0]; dataout[i].i=out[i][1]; #endif } } fftw_destroy_plan(p); #ifdef fftw2 free(out); free(in); #else fftw_free(out); fftw_free(in); #endif }
int start_line_model_monitor(int len) { char buf[132 + 1]; float x; float y; int i; w = new Fl_Double_Window(850, 400, "Telephone line model monitor"); c_spec = new Fl_Group(0, 0, 380, 400); c_spec->box(FL_DOWN_BOX); c_spec->align(FL_ALIGN_TOP | FL_ALIGN_INSIDE); canvas_spec = new Ca_Canvas(60, 30, 300, 300, "Spectrum"); canvas_spec->box(FL_PLASTIC_DOWN_BOX); canvas_spec->color(7); canvas_spec->align(FL_ALIGN_TOP); canvas_spec->border(15); spec_freq = new Ca_X_Axis(65, 330, 290, 30, "Freq (Hz)"); spec_freq->align(FL_ALIGN_BOTTOM); spec_freq->minimum(0); spec_freq->maximum(4000); spec_freq->label_format("%g"); spec_freq->minor_grid_color(fl_gray_ramp(20)); spec_freq->major_grid_color(fl_gray_ramp(15)); spec_freq->label_grid_color(fl_gray_ramp(10)); spec_freq->grid_visible(CA_LABEL_GRID | CA_ALWAYS_VISIBLE); spec_freq->minor_grid_style(FL_DOT); spec_freq->major_step(5); spec_freq->label_step(1); spec_freq->axis_color(FL_BLACK); spec_freq->axis_align(CA_BOTTOM | CA_LINE); spec_amp = new Ca_Y_Axis(20, 35, 40, 290, "Amp (dBmO)"); spec_amp->align(FL_ALIGN_LEFT); spec_amp->minimum(-80.0); spec_amp->maximum(10.0); spec_amp->minor_grid_color(fl_gray_ramp(20)); spec_amp->major_grid_color(fl_gray_ramp(15)); spec_amp->label_grid_color(fl_gray_ramp(10)); //spec_amp->grid_visible(CA_MINOR_TICK | CA_MAJOR_TICK | CA_LABEL_GRID | CA_ALWAYS_VISIBLE); spec_amp->grid_visible(CA_LABEL_GRID | CA_ALWAYS_VISIBLE); spec_amp->minor_grid_style(FL_DOT); spec_amp->major_step(5); spec_amp->label_step(1); spec_amp->axis_color(FL_BLACK); spec_amp->current(); c_spec->end(); c_right = new Fl_Group(440, 0, 465, 405); c_can = new Fl_Group(380, 0, 415, 200); c_can->box(FL_DOWN_BOX); c_can->align(FL_ALIGN_TOP | FL_ALIGN_INSIDE); c_can->current(); canvas_can = new Ca_Canvas(460, 35, 300, 100, "??? coefficients"); canvas_can->box(FL_PLASTIC_DOWN_BOX); canvas_can->color(7); canvas_can->align(FL_ALIGN_TOP); Fl_Group::current()->resizable(canvas_can); canvas_can->border(15); can_x = new Ca_X_Axis(465, 135, 290, 30, "Tap"); can_x->align(FL_ALIGN_BOTTOM); can_x->minimum(0.0); can_x->maximum((float) len); can_x->label_format("%g"); can_x->minor_grid_color(fl_gray_ramp(20)); can_x->major_grid_color(fl_gray_ramp(15)); can_x->label_grid_color(fl_gray_ramp(10)); can_x->grid_visible(CA_LABEL_GRID | CA_ALWAYS_VISIBLE); can_x->minor_grid_style(FL_DOT); can_x->major_step(5); can_x->label_step(1); can_x->axis_align(CA_BOTTOM | CA_LINE); can_x->axis_color(FL_BLACK); can_x->current(); can_y = new Ca_Y_Axis(420, 40, 40, 90, "Amp"); can_y->align(FL_ALIGN_LEFT); can_y->minimum(-0.1); can_y->maximum(0.1); can_y->minor_grid_color(fl_gray_ramp(20)); can_y->major_grid_color(fl_gray_ramp(15)); can_y->label_grid_color(fl_gray_ramp(10)); can_y->grid_visible(CA_LABEL_GRID | CA_ALWAYS_VISIBLE); can_y->minor_grid_style(FL_DOT); can_y->major_step(5); can_y->label_step(1); can_y->axis_color(FL_BLACK); can_y->current(); c_can->end(); c_line_model = new Fl_Group(380, 200, 415, 200); c_line_model->box(FL_DOWN_BOX); c_line_model->align(FL_ALIGN_TOP | FL_ALIGN_INSIDE); c_line_model->current(); canvas_line_model = new Ca_Canvas(460, 235, 300, 100, "Line impulse response model"); canvas_line_model->box(FL_PLASTIC_DOWN_BOX); canvas_line_model->color(7); canvas_line_model->align(FL_ALIGN_TOP); Fl_Group::current()->resizable(canvas_line_model); canvas_line_model->border(15); line_model_x = new Ca_X_Axis(465, 335, 290, 30, "Tap"); line_model_x->align(FL_ALIGN_BOTTOM); line_model_x->minimum(0.0); line_model_x->maximum((float) len); line_model_x->label_format("%g"); line_model_x->minor_grid_color(fl_gray_ramp(20)); line_model_x->major_grid_color(fl_gray_ramp(15)); line_model_x->label_grid_color(fl_gray_ramp(10)); line_model_x->grid_visible(CA_LABEL_GRID | CA_ALWAYS_VISIBLE); line_model_x->minor_grid_style(FL_DOT); line_model_x->major_step(5); line_model_x->label_step(1); line_model_x->axis_align(CA_BOTTOM | CA_LINE); line_model_x->axis_color(FL_BLACK); line_model_x->current(); line_model_y = new Ca_Y_Axis(420, 240, 40, 90, "Amp"); line_model_y->align(FL_ALIGN_LEFT); line_model_y->minimum(-0.1); line_model_y->maximum(0.1); line_model_y->minor_grid_color(fl_gray_ramp(20)); line_model_y->major_grid_color(fl_gray_ramp(15)); line_model_y->label_grid_color(fl_gray_ramp(10)); line_model_y->grid_visible(CA_LABEL_GRID | CA_ALWAYS_VISIBLE); line_model_y->minor_grid_style(FL_DOT); line_model_y->major_step(5); line_model_y->label_step(1); line_model_y->axis_color(FL_BLACK); line_model_y->current(); c_line_model->end(); audio_meter = new Fl_Audio_Meter(810, 40, 10, 250, ""); audio_meter->box(FL_PLASTIC_UP_BOX); audio_meter->type(FL_VERT_AUDIO_METER); c_right->end(); Fl_Group::current()->resizable(c_right); w->end(); w->show(); #if defined(HAVE_FFTW3_H) p = fftw_plan_dft_1d(1024, in, out, FFTW_BACKWARD, FFTW_ESTIMATE); for (i = 0; i < 1024; i++) { in[i][0] = 0.0; in[i][1] = 0.0; } #else p = fftw_create_plan(1024, FFTW_BACKWARD, FFTW_ESTIMATE); for (i = 0; i < 1024; i++) { in[i].re = 0.0; in[i].im = 0.0; } #endif in_ptr = 0; Fl::check(); return 0; }
int main(int argc, char **argv) { int c, mu, status; int filename_set = 0; int mode = 0; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iiy, gid; int Thp1, nclass; int *oh_count=(int*)NULL, *oh_id=(int*)NULL, oh_nc; int *picount; double *conn = (double*)NULL; double *conn2 = (double*)NULL; double **oh_val=(double**)NULL; double q[4], qsqr; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; fftw_complex *corrt=NULL; fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL; fftw_plan plan_m; while ((c = getopt(argc, argv, "h?vf:m:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'm': mode = atoi(optarg); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); if(plan_m==NULL) { fprintf(stderr, "Error, could not create fftw plan\n"); return(1); } T = T_global; Thp1 = T/2 + 1; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(3); } /* conn2 = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn2==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(4); } pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex)); if( (pi00==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pi00\n"); exit(2); } pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex)); if( (pijj==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pijj\n"); exit(2); } */ corrt = fftw_malloc(T*sizeof(fftw_complex)); for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { // for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;} // for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;} /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "%s", filename_prefix); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } /* sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn2, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } */ retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime); /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; /* for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { for(x0=0; x0<T; x0++) { iix = g_ipt[0][x1][x2][x3]*T+x0; for(mu=1; mu<4; mu++) { ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME); pijj[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } ix = 2*g_ipt[x0][x1][x2][x3]; pi00[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } }}} */ for(x0=0; x0<T; x0++) { ix = g_ipt[x0][0][0][0]; corrt[x0].re = conn[_GWI(5,ix,VOLUME) ] + conn[_GWI(10,ix,VOLUME) ] + conn[_GWI(15,ix,VOLUME) ]; corrt[x0].im = conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1]; corrt[x0].re /= (double)T; corrt[x0].im /= (double)T; } /* fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */ fftw_one(plan_m, corrt, NULL); sprintf(filename, "rho.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing VKVK data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid); for(x0=1; x0<(T/2); x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, corrt[x0].re, corrt[T-x0].re, gid); } fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime); #ifdef _UNDEF free(conn); /* free(conn2); */ /******************************** * test: print correl to stdout * ********************************/ /* fprintf(stdout, "\n\n# ***************** pijj *****************\n"); for(ix=0; ix<LX*LY*LZ; ix++) { iix = ix*T; for(x0=0; x0<T; x0++) { fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pijj[iix+x0].re, pijj[iix+x0].im); } } fprintf(stdout, "\n\n# ***************** pi00 *****************\n"); for(ix=0; ix<LX*LY*LZ; ix++) { iix = ix*T; for(x0=0; x0<T; x0++) { fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pi00[iix+x0].re, pi00[iix+x0].im); } } */ /***************************************** * do the reverse Fourier transformation * *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; fftw(plan_m, LX*LY*LZ, pi00, 1, T, (fftw_complex*)NULL, 0, 0); fftw(plan_m, LX*LY*LZ, pijj, 1, T, (fftw_complex*)NULL, 0, 0); for(ix=0; ix<VOLUME; ix++) { pi00[ix].re /= (double)T; pi00[ix].im /= (double)T; pijj[ix].re /= 3.*(double)T; pijj[ix].im /= 3.*(double)T; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for Fourier transform %e seconds\n", retime-ratime); /***************************************** * write to file *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "pi00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing pi00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[0][x1][x2][x3]*T; /* fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */ for(x0=0; x0<T; x0++) { /* fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pi00[ix+x0].re, pi00[ix+x0].im); */ fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pi00[ix+x0].re, pi00[ix+x0].im); } }}} fclose(ofs); sprintf(filename, "pijj.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing pijj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[0][x1][x2][x3]*T; /* fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */ for(x0=0; x0<T; x0++) { /* fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pijj[ix+x0].re, pijj[ix+x0].im); */ fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pijj[ix+x0].re, pijj[ix+x0].im); } }}} fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to write correlator %e seconds\n", retime-ratime); /* if(mode==0) { ratime = (double)clock() / CLOCKS_PER_SEC; if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); sprintf(filename, "corr.00.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } for(ix=0; ix<VOLUME; ix++) picount[ix] = 0; for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) { ix = g_ipt[0][x1][x2][x3]; picount[ix] = 1; fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr); } }}} fclose(ofs); sprintf(filename, "corr_00.00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im); } } } fclose(ofs); sprintf(filename, "corr_jj.00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im); } } } fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime); free(picount); } else if(mode==1) { ratime = (double)clock() / CLOCKS_PER_SEC; if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); sprintf(filename, "corr.01.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); for(ix=0; ix<VOLUME; ix++) picount[ix] = 0; for(x1=0; x1<LX; x1++) { q[1] = 2. * M_PI * (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = 2. * M_PI * (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = 2. * M_PI * (double)x3 / (double)LZ; qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) { ix = g_ipt[0][x1][x2][x3]; picount[ix] = 1; fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr); } }}} fclose(ofs); sprintf(filename, "corr_00.01.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_01-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im); } } } fclose(ofs); sprintf(filename, "corr_jj.01.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im); } } } fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for writing: %e seconds\n", retime-ratime); free(picount); } else if(mode==2) { if(make_H3orbits(&oh_id, &oh_count, &oh_val, &oh_nc) != 0) return(123); ratime = (double)clock() / CLOCKS_PER_SEC; nclass = oh_nc / Thp1; if( (piavg = (fftw_complex*)malloc(oh_nc*sizeof(fftw_complex))) == (fftw_complex*)NULL) exit(110); if( (picount = (int*)malloc(oh_nc*sizeof(int))) == (int*)NULL) exit(110); for(ix=0; ix<oh_nc; ix++) { piavg[ix].re = 0.; piavg[ix].im = 0.; picount[ix] = 0; } for(ix=0; ix<LX*LY*LZ; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*T+x0; iiy = oh_id[ix]*Thp1+x0; piavg[iiy].re += pi00[iix].re; piavg[iiy].im += pi00[iix].im; if(x0>0 && x0<T/2) { iix = ix*T+(T-x0); piavg[iiy].re += pi00[iix].re; piavg[iiy].im += pi00[iix].im; } } picount[oh_id[ix]]++; } for(ix=0; ix<nclass; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*Thp1+x0; if(picount[ix]>0) { piavg[iix].re /= (double)picount[ix]; piavg[iix].im /= (double)picount[ix]; if(x0>0 && x0<T/2) { piavg[iix].re /= 2.; piavg[iix].im /= 2.; } } } } sprintf(filename, "corr02_00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr-00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<nclass; x1++) { if(oh_val[0][x1]>=g_qhatsqr_min-_Q2EPS && oh_val[0][x1]<=g_qhatsqr_max+_Q2EPS) { ix = x1*Thp1; for(x0=0; x0<Thp1; x0++) { fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, picount[x1]); } } } fclose(ofs); for(ix=0; ix<oh_nc; ix++) { piavg[ix].re = 0.; piavg[ix].im = 0.; picount[ix] = 0; } for(ix=0; ix<LX*LY*LZ; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*T+x0; iiy = oh_id[ix]*Thp1+x0; piavg[iiy].re += pijj[iix].re; piavg[iiy].im += pijj[iix].im; if(x0>0 && x0<T/2) { iix = ix*T+(T-x0); piavg[iiy].re += pijj[iix].re; piavg[iiy].im += pijj[iix].im; } } picount[oh_id[ix]]++; } for(ix=0; ix<nclass; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*Thp1+x0; if(picount[ix]>0) { piavg[iix].re /= (double)picount[ix]; piavg[iix].im /= (double)picount[ix]; if(x0>0 && x0<T/2) { piavg[iix].re /= 2.; piavg[iix].im /= 2.; } } }} sprintf(filename, "corr02_jj.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr-jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<nclass; x1++) { ix = x1*Thp1; for(x0=0; x0<Thp1; x0++) { fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, picount[x1]); } } fclose(ofs); sprintf(filename, "corr.02.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } for(ix=0; ix<VOLUME; ix++) fprintf(ofs, "%5d%25.16e%5d", ix, oh_val[0][ix], picount[ix]); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime); free(piavg); free(picount); } */ #endif } /*************************************** * free the allocated memory, finalize * ***************************************/ free(corrt); free_geometry(); /* free(pi00); free(pijj); */ fftw_destroy_plan(plan_m); return(0); }
int main(int argc, char *argv[]) { float *data1, *data2; fcomplex *ptr1, *ptr2; long n, npts, tmp = 0, ct, plimit, prn = 0; long i, isign = -1; double err = 0.0; #if defined USERAWFFTW FILE *wisdomfile; fftw_plan plan_forward, plan_inverse; static char wisdomfilenm[120]; #endif struct tms runtimes; double ttim, stim, utim, tott; if (argc <= 1 || argc > 4) { printf("\nUsage: testffts [sign (1/-1)] [print (0/1)] [frac err tol]\n\n"); exit(0); } else if (argc == 2) { isign = atoi(argv[1]); prn = 0; err = 0.02; } else if (argc == 3) { isign = atoi(argv[1]); prn = atoi(argv[2]); err = 0.02; } if (argc == 4) { isign = atoi(argv[1]); prn = atoi(argv[2]); err = atof(argv[3]); } /* import the wisdom for FFTW */ #if defined USERAWFFTW sprintf(wisdomfilenm, "%s/fftw_wisdom.txt", DATABASE); wisdomfile = fopen(wisdomfilenm, "r"); if (wisdomfile == NULL) { printf("Error opening '%s'. Run makewisdom again.\n", \ wisdomfilenm); printf("Exiting.\n"); exit(1); } if (FFTW_FAILURE == fftw_import_wisdom_from_file(wisdomfile)) { printf("Error importing FFTW wisdom.\n"); printf("Exiting.\n"); exit(1); } fclose(wisdomfile); #endif for (i = 0; i <= 8; i++) { /* npts = 1 << (i + 14); # of points in FFT */ /* npts = 1 << 16; # of points in FFT */ /* npts = 4096; # of points in FFT */ /* npts = 524288; # of points in FFT */ npts = 300000 * (i + 1); n = npts << 1; /* # of float vals */ data1 = gen_fvect(n); data2 = gen_fvect(n); ptr1 = (fcomplex *)data1; ptr2 = (fcomplex *)data2; /* make the data = {1,1,1,1,-1,-1,-1,-1} (all real) */ /* for (ct = 0; ct < npts/2; ct++) { tmp = 2 * ct; data1[tmp] = 1.0; data1[tmp + 1] = 0.0; data1[tmp + npts] = -1.0; data1[tmp + npts + 1] = 0.0; data2[tmp] = 1.0; data2[tmp + 1] = 0.0; data2[tmp + npts] = -1.0; data2[tmp + npts + 1] = 0.0; } */ /* make the data a sin wave of fourier freq 12.12345... */ /* for (ct = 0; ct < npts; ct++) { tmp = 2 * ct; data1[tmp] = sin(2.0*3.14159265358979*ct*12.12345/npts)+1.0; data2[tmp] = data1[tmp]; data1[tmp+1] = 0.0; data2[tmp+1] = data1[tmp+1]; } */ /* make the data a sin wave of fourier freq 12.12345... with noise */ for (ct = 0; ct < npts; ct++) { tmp = 2 * ct; data1[tmp] = 10.0 * sin(TWOPI * ct * 12.12345 / npts) + 100.0; data1[tmp] = gennor(data1[tmp], 10.0); data2[tmp] = data1[tmp]; data1[tmp + 1] = gennor(100.0, 10.0); data2[tmp + 1] = data1[tmp + 1]; } printf("\nCalculating...\n"); /* The challenger... */ tott = times(&runtimes) / (double) CLK_TCK; utim = runtimes.tms_utime / (double) CLK_TCK; stim = runtimes.tms_stime / (double) CLK_TCK; tablesixstepfft(ptr1, npts, isign); /* tablesixstepfft(plan1, plan2, ptr1, npts, isign); */ /* sixstepfft(ptr1, npts, isign); */ /* four1(ptr1 - 1, npts, isign); */ /* tablefft(ptr1, npts, isign); */ /* tablesplitfft(ptr1, npts, isign); */ /* realfft(ptr1, n, isign); */ /* fftw(plan, 1, in, 1, 0, out, 1, 0); */ tott = times(&runtimes) / (double) CLK_TCK - tott; printf("Timing summary (Ransom) npts = %ld:\n", npts); utim = runtimes.tms_utime / (double) CLK_TCK - utim; stim = runtimes.tms_stime / (double) CLK_TCK - stim; ttim = utim + stim; printf("CPU usage: %.3f sec total (%.3f sec user, %.3f sec system)\n", \ ttim, utim, stim); printf("Total time elapsed: %.3f sec.\n\n", tott); /* The "Standard" FFT... */ /* The following is for the fftw FFT */ /* Create new plans */ #if defined USERAWFFTW plan_forward = fftw_create_plan(npts, -1, FFTW_MEASURE | \ FFTW_USE_WISDOM | \ FFTW_IN_PLACE); plan_inverse = fftw_create_plan(npts, +1, FFTW_MEASURE | \ FFTW_USE_WISDOM | \ FFTW_IN_PLACE); #endif tott = times(&runtimes) / (double) CLK_TCK; utim = runtimes.tms_utime / (double) CLK_TCK; stim = runtimes.tms_stime / (double) CLK_TCK; /* four1(ptr2 - 1, npts, isign); */ /* tablefft(ptr2, npts, isign); */ /* tablesplitfft(ptr1, npts, isign); */ /* tablesixstepfft(ptr2, npts, isign); */ /* realft(ptr2 - 1, n, isign); */ fftwcall(ptr2, npts, -1); #if defined USERAWFFTW if (isign == -1) { fftw(plan_forward, 1, (FFTW_COMPLEX *) ptr2, 1, 1, NULL, 1, 1); } else { fftw(plan_inverse, 1, (FFTW_COMPLEX *) ptr2, 1, 1, NULL, 1, 1); } #endif tott = times(&runtimes) / (double) CLK_TCK - tott; printf("Timing summary (FFTW) npts = %ld:\n", npts); utim = runtimes.tms_utime / (double) CLK_TCK - utim; stim = runtimes.tms_stime / (double) CLK_TCK - stim; ttim = utim + stim; printf("CPU usage: %.3f sec total (%.3f sec user, %.3f sec system)\n", \ ttim, utim, stim); printf("Total time elapsed: %.3f sec.\n\n", tott); /* The following is for the fftw FFT */ #if defined USERAWFFTW fftw_destroy_plan(plan_forward); fftw_destroy_plan(plan_inverse); #endif /* Check if correct with fractional errors... */ for (ct = 0; ct < n; ct++) { if (data2[ct] != 0.0) { if (fabs((1.0 - (data1[ct] / data2[ct]))) > err) { if ((ct % 2) == 1) { printf("Values at freq %ld do not match to %4.2f%% fractional error:\n", (ct - 1) / 2, err * 100); printf(" rl1 = %f im1 = %f rl2 = %f im2 = %f\n", data1[ct - 1], data1[ct], data2[ct - 1], data2[ct]); } else { printf("Values at freq %ld do not match to %4.2f%% fractional error:\n", ct / 2, err * 100); printf(" rl1 = %f im1 = %f rl2 = %f im2 = %f\n", data1[ct], data1[ct + 1], data2[ct], data2[ct + 1]); } } } } if (npts >= 64) plimit = 64; else plimit = npts; /* Print the output... */ if (prn) { printf("\n #1: Challenger FFT... "); printf("#2: Standard...\n"); for (ct = 0; ct < plimit; ct++) { printf(" %3ld rl = %12.3f ", ct, data1[2 * ct]); printf("im = %12.3f rl = %12.3f im = %12.3f\n", \ data1[2 * ct + 1], data2[2 * ct], data2[2 * ct + 1]); } } free(data1); free(data2); } return 0; }
static void generate_proakis(void) { float f; float f1; float offset; float amp; float phase; float delay; float pw; int index; int i; int l; #if defined(HAVE_FFTW3_H) double in[FFT_SIZE][2]; double out[FFT_SIZE][2]; #else fftw_complex in[FFT_SIZE]; fftw_complex out[FFT_SIZE]; #endif fftw_plan p; #if defined(HAVE_FFTW3_H) p = fftw_plan_dft_1d(FFT_SIZE, in, out, FFTW_BACKWARD, FFTW_ESTIMATE); #else p = fftw_create_plan(FFT_SIZE, FFTW_BACKWARD, FFTW_ESTIMATE); #endif for (i = 0; i < FFT_SIZE; i++) { #if defined(HAVE_FFTW3_H) in[i][0] = in[i][1] = 0.0f; #else in[i].re = in[i].im = 0.0f; #endif } for (i = 1; i < FFT_SIZE/2; i++) { f = (float) i*SAMPLE_RATE/FFT_SIZE; f1 = f/200.0f; offset = f1 - floor(f1); index = (int) floor(f1); /* Linear interpolation */ amp = ((1.0f - offset)*proakis[index].amp + offset*proakis[index + 1].amp)/2.3f; delay = (1.0f - offset)*proakis[index].delay + offset*proakis[index + 1].delay; phase = 2.0f*M_PI*f*delay*0.001f; #if defined(HAVE_FFTW3_H) in[i][0] = amp*cosf(phase); in[i][1] = amp*sinf(phase); in[FFT_SIZE - i][0] = in[i][0]; in[FFT_SIZE - i][1] = -in[i][1]; #else in[i].re = amp*cosf(phase); in[i].im = amp*sinf(phase); in[FFT_SIZE - i].re = in[i].re; in[FFT_SIZE - i].im = -in[i].im; #endif } #if defined(HAVE_FFTW3_H) fftw_execute(p); #else fftw_one(p, in, out); #endif fprintf(outfile, "/* Medium range telephone line response\n"); fprintf(outfile, " (from p 537, Digital Communication, John G. Proakis */\n"); fprintf(outfile, "float proakis_line_model[] =\n"); fprintf(outfile, "{\n"); /* Normalise the filter's gain */ pw = 0.0f; l = FFT_SIZE - (LINE_FILTER_SIZE - 1)/2; for (i = 0; i < LINE_FILTER_SIZE; i++) { #if defined(HAVE_FFTW3_H) pw += out[l][0]*out[l][0]; #else pw += out[l].re*out[l].re; #endif if (++l == FFT_SIZE) l = 0; } pw = sqrt(pw); l = FFT_SIZE - (LINE_FILTER_SIZE - 1)/2; for (i = 0; i < LINE_FILTER_SIZE; i++) { #if defined(HAVE_FFTW3_H) impulse_responses[filter_sets][i] = out[l][0]/pw; #else impulse_responses[filter_sets][i] = out[l].re/pw; #endif fprintf(outfile, "%15.5f,\n", impulse_responses[filter_sets][i]); if (++l == FFT_SIZE) l = 0; } fprintf(outfile, "};\n\n"); filter_sets++; }
int main(int argc, char **argv) { int c, mu; int filename_set = 0; int l_LX_at, l_LXstart_at; int source_location, have_source_flag = 0; int x0, ix; int sx0, sx1, sx2, sx3; int check_WI=0; double *conn = (double*)NULL; double *conn2 = (double*)NULL; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; /************************** * variables for WI check */ int x1, x2, x3, nu; double wre, wim, q[4]; /**************************/ fftw_complex *in=(fftw_complex*)NULL, *out=(fftw_complex*)NULL; fftw_plan plan_m; while ((c = getopt(argc, argv, "wh?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'w': check_WI = 1; fprintf(stdout, "# [get_rho_corr] check WI in momentum space\n"); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); // set the default values set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# [get_rho_corr] reading input parameters from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(2 * 16 * VOLUME, sizeof(double)); if( (conn==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(3); } for(ix=0; ix<32*VOLUME; ix++) conn[ix] = 0.; conn2= (double*)calloc(2 * T, sizeof(double)); if( (conn2==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for corr.\n"); exit(2); } for(ix=0; ix<2*T; ix++) conn2[ix] = 0.; /***************************************** * prepare Fourier transformation arrays * *****************************************/ in = (fftw_complex*)malloc(T*sizeof(fftw_complex)); out = (fftw_complex*)malloc(T*sizeof(fftw_complex)); if( (in==(fftw_complex*)NULL) || (out==(fftw_complex*)NULL) ) exit(4); /******************************** * determine source coordinates * ********************************/ have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); if(have_source_flag==1) { fprintf(stdout, "local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } have_source_flag = 0; /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; // read_contraction(conn, (int*)NULL, filename_prefix, 16); read_lime_contraction(conn, filename_prefix, 16, 0); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time to read contractions %e seconds\n", retime-ratime); // TEST Ward Identity if(check_WI) { fprintf(stdout, "# [get_corr_v5] Ward identity\n"); sprintf(filename, "WI.%.4d", Nconf); ofs = fopen(filename, "w"); if(ofs == NULL) exit(32); for(x0=0; x0<T; x0++) { q[0] = 2. * sin(M_PI * (double)x0 / (double)T); for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)]; wim = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1]; fprintf(ofs, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} fclose(ofs); } /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; for(x0=0; x0<T; x0++) { for(mu=1; mu<4; mu++) { ix = get_indexf(x0,0,0,0,mu,mu); fprintf(stdout, "x0=%3d, mu=%3d\tix=%8d\n", x0, mu, ix); conn2[2*x0 ] += conn[ix ]; conn2[2*x0+1] += conn[ix+1]; } } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time to fill correlator %e seconds\n", retime-ratime); /******************************** * test: print correl to stdout * ********************************/ for(x0=0; x0<T; x0++) { fprintf(stdout, "%3d%25.16e%25.16e\n", x0, conn2[2*x0], conn[2*x0+1]); } /***************************************** * do the reverse Fourier transformation * *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; memcpy((void*)in, (void*)conn2, 2*T*sizeof(double)); fftw_one(plan_m, in, out); for(ix=0; ix<T; ix++) { conn2[2*ix ] = out[ix].re / (double)T; conn2[2*ix+1] = out[ix].im / (double)T; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time for Fourier transform %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "rho_corr.%.4d", Nconf); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "could not open file %s for writing\n", filename); exit(5); } //for(x0=0; x0<T; x0++) { // fprintf(ofs, "%3d%25.16e%25.16e\n", x0, conn2[2*x0], conn2[2*x0+1]); //} x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., Nconf); for(x0=1; x0<T/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], Nconf); } x0 = T/2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., Nconf); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time to write correlator %e seconds\n", retime-ratime); /*************************************** * free the allocated memory, finalize * ***************************************/ free_geometry(); fftw_free(in); fftw_free(out); free(conn); free(conn2); fftw_destroy_plan(plan_m); return(0); }
static fftw_rader_data *create_rader_aux(int p, int flags) { fftw_complex *omega, *work; int g, ginv, gpower; int i; FFTW_TRIG_REAL twoPiOverN; fftw_real scale = 1.0 / (p - 1); /* for convolution */ fftw_plan plan; fftw_rader_data *d; if (p < 2) fftw_die("non-prime order in Rader\n"); flags &= ~FFTW_IN_PLACE; d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data)); g = find_generator(p); ginv = power_mod(g, p - 2, p); omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); plan = fftw_create_plan(p - 1, FFTW_FORWARD, flags & ~FFTW_NO_VECTOR_RECURSE); work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p; gpower = 1; for (i = 0; i < p - 1; ++i) { c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower); c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN * gpower); gpower = MULMOD(gpower, ginv, p); } /* fft permuted roots of unity */ fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1, plan->recurse_kind); fftw_free(work); d->plan = plan; d->omega = omega; d->g = g; d->ginv = ginv; d->p = p; d->flags = flags; d->refcount = 1; d->next = NULL; d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc)); d->cdesc->name = NULL; d->cdesc->codelet = NULL; d->cdesc->size = p; d->cdesc->dir = FFTW_FORWARD; d->cdesc->type = FFTW_RADER; d->cdesc->signature = g; d->cdesc->ntwiddle = 0; d->cdesc->twiddle_order = NULL; return d; }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in1, *in2, *out2; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); if (!specific) plan = fftw_create_plan(n, dir, flags); else plan = fftw_create_plan_specific(n, dir, flags, in1, istride, (fftw_complex *) NULL, 0); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); } /* * fill in other positions of the array, to make sure that * fftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride + j]) = i * istride + j; c_im(in1[i * istride + j]) = i * istride - j; } CHECK(plan != NULL, "can't create plan"); WHEN_VERBOSE(2, fftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) fftw(plan, howmany, in1, istride, n * istride, (fftw_complex *) NULL, 0, 0); else fftw_one(plan, in1, NULL); fftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(c_re(in1[i * istride + j]) == i * istride + j && c_im(in1[i * istride + j]) == i * istride - j, "input has been overwritten"); for (i = 0; i < howmany; ++i) { fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); } CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size, nalloc; fftw_complex *in, *work; fftw_plan plan = 0; fftw_mpi_plan mpi_plan; double t, t0 = 0.0; if (specific || !(flags & FFTW_IN_PLACE)) return; if (io_okay && !only_parallel) plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); CHECK(mpi_plan, "failed to create plan!"); fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); if (io_okay && !only_parallel) nalloc = n; else nalloc = total_local_size; in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); if (io_okay) { WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan)); } if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, work, 1, 0), in, n * howmany_fields, t0); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t)); } fftw_free(in); fftw_free(work); fftw_mpi_destroy_plan(mpi_plan); WHEN_VERBOSE(1, my_printf("\n")); }
struct fft_plan_3d *fft_3d_create_plan( MPI_Comm comm, int nfast, int nmid, int nslow, int in_ilo, int in_ihi, int in_jlo, int in_jhi, int in_klo, int in_khi, int out_ilo, int out_ihi, int out_jlo, int out_jhi, int out_klo, int out_khi, int scaled, int permute, int *nbuf) { struct fft_plan_3d *plan; int me,nprocs; int i,num,flag,remapflag,fftflag; int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi; int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi; int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi; int out_size,first_size,second_size,third_size,copy_size,scratch_size; int np1,np2,ip1,ip2; int list[50]; // system specific variables #ifdef FFT_SCSL FFT_DATA dummy_d[5]; FFT_PREC dummy_p[5]; int isign,isys; FFT_PREC scalef; #endif #ifdef FFT_INTEL FFT_DATA dummy; #endif #ifdef FFT_T3E FFT_DATA dummy[5]; int isign,isys; double scalef; #endif // query MPI info MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); // compute division of procs in 2 dimensions not on-processor bifactor(nprocs,&np1,&np2); ip1 = me % np1; ip2 = me/np1; // allocate memory for plan data struct plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d)); if (plan == NULL) return NULL; // remap from initial distribution to layout needed for 1st set of 1d FFTs // not needed if all procs own entire fast axis initially // first indices = distribution after 1st set of FFTs if (in_ilo == 0 && in_ihi == nfast-1) flag = 0; else flag = 1; MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm); if (remapflag == 0) { first_ilo = in_ilo; first_ihi = in_ihi; first_jlo = in_jlo; first_jhi = in_jhi; first_klo = in_klo; first_khi = in_khi; plan->pre_plan = NULL; } else { first_ilo = 0; first_ihi = nfast - 1; first_jlo = ip1*nmid/np1; first_jhi = (ip1+1)*nmid/np1 - 1; first_klo = ip2*nslow/np2; first_khi = (ip2+1)*nslow/np2 - 1; plan->pre_plan = remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi, first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi,2,0,0,FFT_PRECISION); if (plan->pre_plan == NULL) return NULL; } // 1d FFTs along fast axis plan->length1 = nfast; plan->total1 = nfast * (first_jhi-first_jlo+1) * (first_khi-first_klo+1); // remap from 1st to 2nd FFT // choose which axis is split over np1 vs np2 to minimize communication // second indices = distribution after 2nd set of FFTs second_ilo = ip1*nfast/np1; second_ihi = (ip1+1)*nfast/np1 - 1; second_jlo = 0; second_jhi = nmid - 1; second_klo = ip2*nslow/np2; second_khi = (ip2+1)*nslow/np2 - 1; plan->mid1_plan = remap_3d_create_plan(comm, first_ilo,first_ihi,first_jlo,first_jhi, first_klo,first_khi, second_ilo,second_ihi,second_jlo,second_jhi, second_klo,second_khi,2,1,0,FFT_PRECISION); if (plan->mid1_plan == NULL) return NULL; // 1d FFTs along mid axis plan->length2 = nmid; plan->total2 = (second_ihi-second_ilo+1) * nmid * (second_khi-second_klo+1); // remap from 2nd to 3rd FFT // if final distribution is permute=2 with all procs owning entire slow axis // then this remapping goes directly to final distribution // third indices = distribution after 3rd set of FFTs if (permute == 2 && out_klo == 0 && out_khi == nslow-1) flag = 0; else flag = 1; MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm); if (remapflag == 0) { third_ilo = out_ilo; third_ihi = out_ihi; third_jlo = out_jlo; third_jhi = out_jhi; third_klo = out_klo; third_khi = out_khi; } else { third_ilo = ip1*nfast/np1; third_ihi = (ip1+1)*nfast/np1 - 1; third_jlo = ip2*nmid/np2; third_jhi = (ip2+1)*nmid/np2 - 1; third_klo = 0; third_khi = nslow - 1; } plan->mid2_plan = remap_3d_create_plan(comm, second_jlo,second_jhi,second_klo,second_khi, second_ilo,second_ihi, third_jlo,third_jhi,third_klo,third_khi, third_ilo,third_ihi,2,1,0,FFT_PRECISION); if (plan->mid2_plan == NULL) return NULL; // 1d FFTs along slow axis plan->length3 = nslow; plan->total3 = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * nslow; // remap from 3rd FFT to final distribution // not needed if permute = 2 and third indices = out indices on all procs if (permute == 2 && out_ilo == third_ilo && out_ihi == third_ihi && out_jlo == third_jlo && out_jhi == third_jhi && out_klo == third_klo && out_khi == third_khi) flag = 0; else flag = 1; MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm); if (remapflag == 0) plan->post_plan = NULL; else { plan->post_plan = remap_3d_create_plan(comm, third_klo,third_khi,third_ilo,third_ihi, third_jlo,third_jhi, out_klo,out_khi,out_ilo,out_ihi, out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION); if (plan->post_plan == NULL) return NULL; } // configure plan memory pointers and allocate work space // out_size = amount of memory given to FFT by user // first/second/third_size = amount of memory needed after pre,mid1,mid2 remaps // copy_size = amount needed internally for extra copy of data // scratch_size = amount needed internally for remap scratch space // for each remap: // out space used for result if big enough, else require copy buffer // accumulate largest required remap scratch space out_size = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); first_size = (first_ihi-first_ilo+1) * (first_jhi-first_jlo+1) * (first_khi-first_klo+1); second_size = (second_ihi-second_ilo+1) * (second_jhi-second_jlo+1) * (second_khi-second_klo+1); third_size = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * (third_khi-third_klo+1); copy_size = 0; scratch_size = 0; if (plan->pre_plan) { if (first_size <= out_size) plan->pre_target = 0; else { plan->pre_target = 1; copy_size = MAX(copy_size,first_size); } scratch_size = MAX(scratch_size,first_size); } if (plan->mid1_plan) { if (second_size <= out_size) plan->mid1_target = 0; else { plan->mid1_target = 1; copy_size = MAX(copy_size,second_size); } scratch_size = MAX(scratch_size,second_size); } if (plan->mid2_plan) { if (third_size <= out_size) plan->mid2_target = 0; else { plan->mid2_target = 1; copy_size = MAX(copy_size,third_size); } scratch_size = MAX(scratch_size,third_size); } if (plan->post_plan) scratch_size = MAX(scratch_size,out_size); *nbuf = copy_size + scratch_size; if (copy_size) { plan->copy = (FFT_DATA *) malloc(copy_size*sizeof(FFT_DATA)); if (plan->copy == NULL) return NULL; } else plan->copy = NULL; if (scratch_size) { plan->scratch = (FFT_DATA *) malloc(scratch_size*sizeof(FFT_DATA)); if (plan->scratch == NULL) return NULL; } else plan->scratch = NULL; // system specific pre-computation of 1d FFT coeffs // and scaling normalization #if defined(FFT_SGI) plan->coeff1 = (FFT_DATA *) malloc((nfast+15)*sizeof(FFT_DATA)); plan->coeff2 = (FFT_DATA *) malloc((nmid+15)*sizeof(FFT_DATA)); plan->coeff3 = (FFT_DATA *) malloc((nslow+15)*sizeof(FFT_DATA)); if (plan->coeff1 == NULL || plan->coeff2 == NULL || plan->coeff3 == NULL) return NULL; FFT_1D_INIT(nfast,plan->coeff1); FFT_1D_INIT(nmid,plan->coeff2); FFT_1D_INIT(nslow,plan->coeff3); if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #elif defined(FFT_SCSL) plan->coeff1 = (FFT_PREC *) malloc((2*nfast+30)*sizeof(FFT_PREC)); plan->coeff2 = (FFT_PREC *) malloc((2*nmid+30)*sizeof(FFT_PREC)); plan->coeff3 = (FFT_PREC *) malloc((2*nslow+30)*sizeof(FFT_PREC)); if (plan->coeff1 == NULL || plan->coeff2 == NULL || plan->coeff3 == NULL) return NULL; plan->work1 = (FFT_PREC *) malloc((2*nfast)*sizeof(FFT_PREC)); plan->work2 = (FFT_PREC *) malloc((2*nmid)*sizeof(FFT_PREC)); plan->work3 = (FFT_PREC *) malloc((2*nslow)*sizeof(FFT_PREC)); if (plan->work1 == NULL || plan->work2 == NULL || plan->work3 == NULL) return NULL; isign = 0; scalef = 1.0; isys = 0; FFT_1D_INIT(isign,nfast,scalef,dummy_d,dummy_d,plan->coeff1,dummy_p,&isys); FFT_1D_INIT(isign,nmid,scalef,dummy_d,dummy_d,plan->coeff2,dummy_p,&isys); FFT_1D_INIT(isign,nslow,scalef,dummy_d,dummy_d,plan->coeff3,dummy_p,&isys); if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #elif defined(FFT_ACML) plan->coeff1 = (FFT_DATA *) malloc((3*nfast+100)*sizeof(FFT_DATA)); plan->coeff2 = (FFT_DATA *) malloc((3*nmid+100)*sizeof(FFT_DATA)); plan->coeff3 = (FFT_DATA *) malloc((3*nslow+100)*sizeof(FFT_DATA)); if (plan->coeff1 == NULL || plan->coeff2 == NULL || plan->coeff3 == NULL) return NULL; int isign = 100; int isys = 1; int info = 0; FFT_DATA *dummy = NULL; FFT_1D(&isign,&isys,&nfast,dummy,plan->coeff1,&info); FFT_1D(&isign,&isys,&nmid,dummy,plan->coeff2,&info); FFT_1D(&isign,&isys,&nslow,dummy,plan->coeff3,&info); if (scaled == 0) { plan->scaled = 0; plan->norm = sqrt(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } else { plan->scaled = 1; plan->norm = sqrt(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #elif defined(FFT_INTEL) flag = 0; num = 0; factor(nfast,&num,list); for (i = 0; i < num; i++) if (list[i] != 2 && list[i] != 3 && list[i] != 5) flag = 1; num = 0; factor(nmid,&num,list); for (i = 0; i < num; i++) if (list[i] != 2 && list[i] != 3 && list[i] != 5) flag = 1; num = 0; factor(nslow,&num,list); for (i = 0; i < num; i++) if (list[i] != 2 && list[i] != 3 && list[i] != 5) flag = 1; MPI_Allreduce(&flag,&fftflag,1,MPI_INT,MPI_MAX,comm); if (fftflag) { if (me == 0) printf("ERROR: FFTs are not power of 2,3,5\n"); return NULL; } plan->coeff1 = (FFT_DATA *) malloc((3*nfast/2+1)*sizeof(FFT_DATA)); plan->coeff2 = (FFT_DATA *) malloc((3*nmid/2+1)*sizeof(FFT_DATA)); plan->coeff3 = (FFT_DATA *) malloc((3*nslow/2+1)*sizeof(FFT_DATA)); if (plan->coeff1 == NULL || plan->coeff2 == NULL || plan->coeff3 == NULL) return NULL; flag = 0; FFT_1D_INIT(&dummy,&nfast,&flag,plan->coeff1); FFT_1D_INIT(&dummy,&nmid,&flag,plan->coeff2); FFT_1D_INIT(&dummy,&nslow,&flag,plan->coeff3); if (scaled == 0) { plan->scaled = 1; plan->norm = nfast*nmid*nslow; plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } else plan->scaled = 0; #elif defined(FFT_MKL) DftiCreateDescriptor( &(plan->handle_fast), FFT_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total1/nfast); DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); DftiCommitDescriptor(plan->handle_fast); DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total2/nmid); DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); DftiCommitDescriptor(plan->handle_mid); DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total3/nslow); DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); DftiCommitDescriptor(plan->handle_slow); if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #elif defined(FFT_DEC) if (scaled == 0) { plan->scaled = 1; plan->norm = nfast*nmid*nslow; plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } else plan->scaled = 0; #elif defined(FFT_T3E) plan->coeff1 = (double *) malloc((12*nfast)*sizeof(double)); plan->coeff2 = (double *) malloc((12*nmid)*sizeof(double)); plan->coeff3 = (double *) malloc((12*nslow)*sizeof(double)); if (plan->coeff1 == NULL || plan->coeff2 == NULL || plan->coeff3 == NULL) return NULL; plan->work1 = (double *) malloc((8*nfast)*sizeof(double)); plan->work2 = (double *) malloc((8*nmid)*sizeof(double)); plan->work3 = (double *) malloc((8*nslow)*sizeof(double)); if (plan->work1 == NULL || plan->work2 == NULL || plan->work3 == NULL) return NULL; isign = 0; scalef = 1.0; isys = 0; FFT_1D_INIT(&isign,&nfast,&scalef,dummy,dummy,plan->coeff1,dummy,&isys); FFT_1D_INIT(&isign,&nmid,&scalef,dummy,dummy,plan->coeff2,dummy,&isys); FFT_1D_INIT(&isign,&nslow,&scalef,dummy,dummy,plan->coeff3,dummy,&isys); if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #elif defined(FFT_FFTW2) plan->plan_fast_forward = fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); plan->plan_fast_backward = fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); if (nmid == nfast) { plan->plan_mid_forward = plan->plan_fast_forward; plan->plan_mid_backward = plan->plan_fast_backward; } else { plan->plan_mid_forward = fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); plan->plan_mid_backward = fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); } if (nslow == nfast) { plan->plan_slow_forward = plan->plan_fast_forward; plan->plan_slow_backward = plan->plan_fast_backward; } else if (nslow == nmid) { plan->plan_slow_forward = plan->plan_mid_forward; plan->plan_slow_backward = plan->plan_mid_backward; } else { plan->plan_slow_forward = fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); plan->plan_slow_backward = fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); } if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #elif defined(FFT_FFTW3) plan->plan_fast_forward = FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1, NULL,&nfast,1,plan->length1, NULL,&nfast,1,plan->length1, FFTW_FORWARD,FFTW_ESTIMATE); plan->plan_fast_backward = FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1, NULL,&nfast,1,plan->length1, NULL,&nfast,1,plan->length1, FFTW_BACKWARD,FFTW_ESTIMATE); plan->plan_mid_forward = FFTW_API(plan_many_dft)(1, &nmid,plan->total2/plan->length2, NULL,&nmid,1,plan->length2, NULL,&nmid,1,plan->length2, FFTW_FORWARD,FFTW_ESTIMATE); plan->plan_mid_backward = FFTW_API(plan_many_dft)(1, &nmid,plan->total2/plan->length2, NULL,&nmid,1,plan->length2, NULL,&nmid,1,plan->length2, FFTW_BACKWARD,FFTW_ESTIMATE); plan->plan_slow_forward = FFTW_API(plan_many_dft)(1, &nslow,plan->total3/plan->length3, NULL,&nslow,1,plan->length3, NULL,&nslow,1,plan->length3, FFTW_FORWARD,FFTW_ESTIMATE); plan->plan_slow_backward = FFTW_API(plan_many_dft)(1, &nslow,plan->total3/plan->length3, NULL,&nslow,1,plan->length3, NULL,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #else plan->cfg_fast_forward = kiss_fft_alloc(nfast,0,NULL,NULL); plan->cfg_fast_backward = kiss_fft_alloc(nfast,1,NULL,NULL); if (nmid == nfast) { plan->cfg_mid_forward = plan->cfg_fast_forward; plan->cfg_mid_backward = plan->cfg_fast_backward; } else { plan->cfg_mid_forward = kiss_fft_alloc(nmid,0,NULL,NULL); plan->cfg_mid_backward = kiss_fft_alloc(nmid,1,NULL,NULL); } if (nslow == nfast) { plan->cfg_slow_forward = plan->cfg_fast_forward; plan->cfg_slow_backward = plan->cfg_fast_backward; } else if (nslow == nmid) { plan->cfg_slow_forward = plan->cfg_mid_forward; plan->cfg_slow_backward = plan->cfg_mid_backward; } else { plan->cfg_slow_forward = kiss_fft_alloc(nslow,0,NULL,NULL); plan->cfg_slow_backward = kiss_fft_alloc(nslow,1,NULL,NULL); } if (scaled == 0) plan->scaled = 0; else { plan->scaled = 1; plan->norm = 1.0/(nfast*nmid*nslow); plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1); } #endif return plan; }
void test_planner(int rank) { /* * create and destroy many plans, at random. Check the * garbage-collecting allocator of twiddle factors */ int i, dim; int r, s; fftw_plan p[PLANNER_TEST_SIZE]; fftwnd_plan pnd[PLANNER_TEST_SIZE]; int *narr, maxdim; chk_mem_leak = 0; verbose--; please_wait(); if (rank < 1) rank = 1; narr = (int *) fftw_malloc(rank * sizeof(int)); maxdim = (int) pow(8192.0, 1.0/rank); for (i = 0; i < PLANNER_TEST_SIZE; ++i) { p[i] = (fftw_plan) 0; pnd[i] = (fftwnd_plan) 0; } for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { r = rand(); if (r < 0) r = -r; r = r % PLANNER_TEST_SIZE; for (dim = 0; dim < rank; ++dim) { do { s = rand(); if (s < 0) s = -s; s = s % maxdim + 1; } while (s == 0); narr[dim] = s; } if (rank == 1) { if (p[r]) fftw_destroy_plan(p[r]); p[r] = fftw_create_plan(narr[0], random_dir(), measure_flag | wisdom_flag); if (paranoid && narr[0] < 200) test_correctness(narr[0]); } if (pnd[r]) fftwnd_destroy_plan(pnd[r]); pnd[r] = fftwnd_create_plan(rank, narr, random_dir(), measure_flag | wisdom_flag); if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { WHEN_VERBOSE(0, printf("test planner: so far so good\n")); WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n", i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); } } for (i = 0; i < PLANNER_TEST_SIZE; ++i) { if (p[i]) fftw_destroy_plan(p[i]); if (pnd[i]) fftwnd_destroy_plan(pnd[i]); } fftw_free(narr); verbose++; chk_mem_leak = 1; }
int main(int argc, char **argv) { int sample, samples, spectrum, spectra, bin, bins, dummy; float re, im, binPower; fftw_plan sigPlan; FILE *iFp, *oFp; if (getArgs(argc, argv)) exit (1); if (!(iFp = fopen(infile, "r"))) { cout << " opening input file" << endl; exit(2); } if (!(oFp = fopen(outfile, "w"))) { cout << " opening output file" << endl; exit(3); } samples = subbands * halfFrames * 512; bins = 2 * subbands * 512; spectra = samples / bins; sigPlan = fftw_create_plan(bins, FFTW_FORWARD, FFTW_ESTIMATE); double power[bins]; float_complex td[samples]; float_complex fd[samples]; // extract the samples from the channel file for (sample = 0; sample < samples; sample++) { fscanf(iFp, "%d (%f, %f)\n", &dummy, &re, &im); td[sample] = float_complex(re, im); } // now perform a full-width fft to create the signal bins fftw(sigPlan, spectra, (fftw_complex *) td, 1, bins, (fftw_complex *) fd, 1, bins); // rearrange the data so that DC is in the middle of the spectrum float_complex temp[bins/2]; for (spectrum = 0; spectrum < spectra; spectrum++) { memcpy(temp, &fd[spectrum*bins], sizeof(float_complex) * bins / 2); memcpy(&fd[spectrum*bins], &fd[spectrum*bins+bins/2], sizeof(float_complex) * bins / 2); memcpy(&fd[spectrum*bins+bins/2], &temp, sizeof(float_complex) * bins / 2); } for (bin = 0; bin < bins; bin++) power[bin] = 0; // now compute the total power in each frequency bin for (spectrum = 0; spectrum < spectra; spectrum++) { for (bin = 0; bin < bins; bin++) { binPower = norm(fd[spectrum*bins+bin]); #ifdef notdef fprintf(oFp, "%03d:%05d (%.3f, %.3f) (%.3f)\n", spectrum, bin, fd[spectrum*bins+bin].real(), fd[spectrum*bins+bin].imag(), binPower); #endif power[bin] += binPower; } } // print the powers in the bins for (bin = 0; bin < bins; bin++) fprintf(oFp, "%05d: %.3le\n", bin, power[bin]); fclose(iFp); fclose(oFp); }
int main(int argc, char **argv) { int c, mu, nu, status, gid; int filename_set = 0; int l_LX_at, l_LXstart_at; int source_location, have_source_flag = 0; int x0, x1, x2, x3, ix; int sx0, sx1, sx2, sx3; int tsize = 0; double *conn = NULL; double *conn2 = (double*)NULL; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; int ivec[4], idx[4], imu; double q[4], wre, wim; fftw_complex *inT=NULL, *outT=NULL, *inL=NULL, *outL=NULL; fftw_plan plan_m_T, plan_m_L; while ((c = getopt(argc, argv, "h?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } // set the default values set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# [get_corr_v2] reading input parameters from file %s\n", filename); read_input_parser(filename); // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { fprintf(stdout, "# [get_corr_v2] T=%d, LX=%d, LY=%d, LZ=%d\n", T_global, LX, LY, LZ); if(g_proc_id==0) fprintf(stderr, "[get_corr_v2] Error, T and L's must be set\n"); usage(); } // initialize MPI parameters mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m_T = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE); plan_m_L = fftw_create_plan(LX, FFTW_FORWARD, FFTW_MEASURE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "[get_corr_v2] Error from init_geometry\n"); EXIT(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32 * VOLUME, sizeof(double)); if( (conn==NULL) ) { fprintf(stderr, "[get_corr_v2] Error, could not allocate memory for contr. fields\n"); EXIT(2); } conn2= (double*)calloc(8 * T, sizeof(double)); if( (conn2==NULL) ) { fprintf(stderr, "[get_corr_v2] Error, could not allocate memory for corr.\n"); EXIT(3); } /***************************************** * prepare Fourier transformation arrays * *****************************************/ inT = (fftw_complex*)malloc(T * sizeof(fftw_complex)); inL = (fftw_complex*)malloc(LX * sizeof(fftw_complex)); outT = (fftw_complex*)malloc(T * sizeof(fftw_complex)); outL = (fftw_complex*)malloc(LX * sizeof(fftw_complex)); if( inT==NULL || inL==NULL || outT==NULL || outL==NULL ) { fprintf(stderr, "[get_corr_v2] Error, could not allocate fftw fields\n"); EXIT(4); } /******************************** * determine source coordinates * ********************************/ /* have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "# [get_corr_v2] process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); if(have_source_flag==1) { fprintf(stdout, "# [get_corr_v2] local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } have_source_flag = 0; */ for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { memset(conn, 0, 32*VOLUME*sizeof(double)); memset(conn2, 0, 8*T*sizeof(double)); /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "%s.%.4d", filename_prefix, gid); if(format==2 || format==3) { status = read_contraction(conn, NULL, filename, 16); } else if( format==0) { status = read_lime_contraction(conn, filename, 16, 0); } if(status != 0) { // fprintf(stderr, "[get_corr_v2] Error from read_contractions, status was %d\n", status); // EXIT(5); fprintf(stderr, "[get_corr_v2] Warning, could not read contractions for gid %d, status was %d\n", gid, status); continue; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time to read contractions %e seconds\n", retime-ratime); // TEST Pi_mm /* fprintf(stdout, "# [get_corr_v2] Pi_mm\n"); for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = conn[_GWI(5*nu,ix,VOLUME)]; wim = conn[_GWI(5*nu,ix,VOLUME)+1]; fprintf(stdout, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} */ // TEST Ward Identity /* fprintf(stdout, "# [get_corr_v2] Ward identity\n"); for(x0=0; x0<T; x0++) { q[0] = 2. * sin(M_PI * (double)x0 / (double)T); for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)]; wim = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1]; fprintf(stdout, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} */ /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; for(mu=0; mu<4; mu++) { ivec[0] = (0 + mu)%4; ivec[1] = (1 + mu)%4; ivec[2] = (2 + mu)%4; ivec[3] = (3 + mu)%4; idx[ivec[1]] = 0; idx[ivec[2]] = 0; idx[ivec[3]] = 0; tsize = (mu==0) ? T : LX; for(x0=0; x0<tsize; x0++) { idx[ivec[0]] = x0; for(nu=1; nu<4; nu++) { imu = (mu+nu) % 4; // ix = get_indexf(idx[0],idx[1],idx[2],idx[3],imu,imu); ix = _GWI(5*imu, g_ipt[idx[0]][idx[1]][idx[2]][idx[3]], VOLUME); // TEST //fprintf(stdout, "\tPi_%d_%d x0=%3d mu=%3d\tix=%8d\n", mu, mu, x0, imu, ix); conn2[2*(mu*T+x0) ] += conn[ix ]; conn2[2*(mu*T+x0)+1] += conn[ix+1]; } } } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time to fill correlator %e seconds\n", retime-ratime); // TEST /* fprintf(stdout, "# [get_corr_v2] correlators\n"); for(mu=0;mu<4;mu++) { for(x0=0; x0<T; x0++) { fprintf(stdout, "\t%3d%3d%25.16e%25.16e\n", mu, x0, conn2[2*(mu*T+x0)], conn2[2*(mu*T+x0)+1]); }} */ /***************************************** * reverse Fourier transformation *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; memcpy((void*)inT, (void*)conn2, 2*T*sizeof(double)); fftw_one(plan_m_T, inT, outT); for(ix=0; ix<T; ix++) { conn2[2*ix ] = outT[ix].re / (double)T; conn2[2*ix+1] = outT[ix].im / (double)T; } for(mu=1; mu<4; mu++) { memcpy((void*)inL, (void*)(conn2+2*mu*T), 2*LX*sizeof(double)); fftw_one(plan_m_L, inL, outL); for(ix=0; ix<LX; ix++) { conn2[2*(mu*T+ix) ] = outL[ix].re / (double)LX; conn2[2*(mu*T+ix)+1] = outL[ix].im / (double)LX; } } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time for Fourier transform %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "v0v0_corr.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "[get_corr_v2] Error, could not open file %s for writing\n", filename); EXIT(6); } x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid); for(x0=1; x0<T/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], gid); } x0 = T / 2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid); fclose(ofs); for(mu=1; mu<4; mu++) { sprintf(filename, "v%dv%d_corr.%.4d", mu, mu, gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "[get_corr_v2] Error, could not open file %s for writing\n", filename); EXIT(7); } x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], 0., gid); for(x0=1; x0<LX/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], conn2[2*(mu*T+ LX-x0)], gid); } x0 = LX / 2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], 0., gid); fclose(ofs); } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time to write correlator %e seconds\n", retime-ratime); } // of loop on gid /*************************************** * free the allocated memory, finalize * ***************************************/ free_geometry(); fftw_free(inT); fftw_free(outT); fftw_free(inL); fftw_free(outL); free(conn); free(conn2); fftw_destroy_plan(plan_m_T); fftw_destroy_plan(plan_m_L); fprintf(stdout, "# [get_corr_v2] %s# [get_corr_v2] end of run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "[get_corr_v2] %s[get_corr_v2] end of run\n", ctime(&g_the_time)); fflush(stderr); return(0); }
/* void fft_2k_test( fftw_complex *out ) { memset(fftw_in, 0, sizeof(fftw_complex)*M2KS); int m = (M2KS/2)+32;//1704; fftw_in[m].re = 0.7; fftw_one( m_fftw_2k_plan, fftw_in, out ); return; } */ void init_dvb_t_fft( void ) { // // Plans // #ifdef USE_AVFFT m_avfft_2k_context = av_fft_init (11, 1); m_avfft_4k_context = av_fft_init (12, 1); m_avfft_8k_context = av_fft_init (13, 1); m_avfft_16k_context = av_fft_init (14, 1); m_fft_in = (fft_complex*)av_malloc(sizeof(fft_complex)*M16KS); m_fft_out = (fft_complex*)av_malloc(sizeof(fft_complex)*M16KS); #else FILE *fp; if((fp=fopen(dvb_config_get_path("fftw_wisdom"),"r"))!=NULL) { fftw_import_wisdom_from_file(fp); m_fftw_2k_plan = fftw_create_plan(M2KS, FFTW_BACKWARD, FFTW_USE_WISDOM); m_fftw_4k_plan = fftw_create_plan(M4KS, FFTW_BACKWARD, FFTW_USE_WISDOM); m_fftw_8k_plan = fftw_create_plan(M8KS, FFTW_BACKWARD, FFTW_USE_WISDOM); m_fftw_16k_plan = fftw_create_plan(M16KS, FFTW_BACKWARD, FFTW_USE_WISDOM); fftw_import_wisdom_from_file(fp); } else { if((fp=fopen(dvb_config_get_path("fftw_wisdom"),"w"))!=NULL) { m_fftw_2k_plan = fftw_create_plan(M2KS, FFTW_BACKWARD, FFTW_MEASURE | FFTW_USE_WISDOM); m_fftw_4k_plan = fftw_create_plan(M4KS, FFTW_BACKWARD, FFTW_MEASURE | FFTW_USE_WISDOM); m_fftw_8k_plan = fftw_create_plan(M8KS, FFTW_BACKWARD, FFTW_MEASURE | FFTW_USE_WISDOM); m_fftw_16k_plan = fftw_create_plan(M16KS, FFTW_BACKWARD, FFTW_MEASURE | FFTW_USE_WISDOM); if(fp!=NULL) fftw_export_wisdom_to_file(fp); } } m_fft_in = (fft_complex*)fftw_malloc(sizeof(fft_complex)*M16KS); m_fft_out = (fft_complex*)fftw_malloc(sizeof(fft_complex)*M16KS); #endif if( m_format.tm == TM_2K) { m_N = M2KS; switch( m_format.chan ) { case CH_8M: case CH_7M: case CH_6M: m_IR = 1; break; case CH_4M: case CH_3M: case CH_2M: case CH_1M: m_IR = 2; break; case CH_500K: m_IR = 4; break; } } if( m_format.tm == TM_8K) { m_N = M8KS; switch( m_format.chan ) { case CH_8M: case CH_7M: case CH_6M: m_IR = 1; break; case CH_4M: case CH_3M: case CH_2M: case CH_1M: m_IR = 2; break; } } create_correction_table( m_N, m_IR ); }