void fftwnd(fftwnd_plan p, int howmany, fftw_complex *in, int istride, int idist, fftw_complex *out, int ostride, int odist) { fftw_complex *work; #ifdef FFTW_DEBUG if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE) && p->nwork && p->work) fftw_die("bug with FFTW_THREADSAFE flag\n"); #endif if (p->nwork && !p->work) work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex)); else work = p->work; switch (p->rank) { case 0: break; case 1: if (p->is_in_place) /* fft is in-place */ fftw(p->plans[0], howmany, in, istride, idist, work, 1, 0); else fftw(p->plans[0], howmany, in, istride, idist, out, ostride, odist); break; default: /* rank >= 2 */ { if (p->is_in_place) { out = in; ostride = istride; odist = idist; } if (howmany > 1 && odist < ostride) fftwnd_aux_howmany(p, 0, howmany, in, istride, idist, out, ostride, odist, work); else { int i; for (i = 0; i < howmany; ++i) fftwnd_aux(p, 0, in + i * idist, istride, out + i * odist, ostride, work); } } } if (p->nwork && !p->work) fftw_free(work); }
/* * guaranteed out-of-place transform. Does the necessary * copying if the plan is in-place. */ static void fftw_out_of_place(fftw_plan plan, int n, fftw_complex *in, fftw_complex *out) { if (plan->flags & FFTW_IN_PLACE) { array_copy(out, in, n); fftw(plan, 1, out, 1, n, (fftw_complex *)0, 1, n); } else { fftw(plan, 1, in, 1, n, out, 1, n); } }
int F77_FUNC_ (fft_z_stick_single, FFT_Z_STICK_SINGLE) (fftw_plan *p, FFTW_COMPLEX *a, int *ldz) { fftw(*p, 1,a, 1, 0, 0, 0, 0); return 0; }
/* * alternate version of fftwnd_aux -- this version pushes the howmany * loop down to the leaves of the computation, for greater locality in * cases where dist < stride */ void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim, int howmany, fftw_complex *in, int istride, int idist, fftw_complex *out, int ostride, int odist, fftw_complex *work) { int n_after = p->n_after[cur_dim], n = p->n[cur_dim]; int k; if (cur_dim == p->rank - 2) { /* just do the last dimension directly: */ if (p->is_in_place) for (k = 0; k < n; ++k) fftw(p->plans[p->rank - 1], howmany, in + k * n_after * istride, istride, idist, work, 1, 0); else for (k = 0; k < n; ++k) fftw(p->plans[p->rank - 1], howmany, in + k * n_after * istride, istride, idist, out + k * n_after * ostride, ostride, odist); } else { /* we have at least two dimensions to go */ int i; /* * process the subsequent dimensions recursively, in * hyperslabs, to get maximum locality: */ for (i = 0; i < n; ++i) fftwnd_aux_howmany(p, cur_dim + 1, howmany, in + i * n_after * istride, istride, idist, out + i * n_after * ostride, ostride, odist, work); } /* do the current dimension (in-place): */ if (p->nbuffers == 0) for (k = 0; k < n_after; ++k) fftw(p->plans[cur_dim], howmany, out + k * ostride, n_after * ostride, odist, work, 1, 0); else /* using contiguous copy buffers: */ for (k = 0; k < n_after; ++k) fftw_buffered(p->plans[cur_dim], howmany, out + k * ostride, n_after * ostride, odist, work, p->nbuffers, work + n); }
void dft(double *jr, double *ji, int n, int iflag) { fftw_plan plan; int i; double ninv; FFTW_COMPLEX *cbuf; static int wisdom_inited=0; char *ram_cache_wisdom; int plan_flags; if(!wisdom_inited) { wisdom_inited=1; wisdom_file=getenv("GRACE_FFTW_WISDOM_FILE"); ram_cache_wisdom=getenv("GRACE_FFTW_RAM_WISDOM"); if(ram_cache_wisdom) sscanf(ram_cache_wisdom, "%d", &using_wisdom); /* turn on wisdom if it is requested even without persistent storage */ if(wisdom_file && wisdom_file[0] ) { /* if a file was specified in GRACE_FFTW_WISDOM_FILE, try to read it */ FILE *wf; fftw_status fstat; wf=fopen(wisdom_file,"r"); if(wf) { fstat=fftw_import_wisdom_from_file(wf); fclose(wf); initial_wisdom=fftw_export_wisdom_to_string(); } else initial_wisdom=0; atexit(save_wisdom); using_wisdom=1; /* if a file is specified, always use wisdom */ } } plan_flags=using_wisdom? (FFTW_USE_WISDOM | FFTW_MEASURE) : FFTW_ESTIMATE; plan=fftw_create_plan(n, iflag?FFTW_BACKWARD:FFTW_FORWARD, plan_flags | FFTW_IN_PLACE); cbuf=xcalloc(n, sizeof(*cbuf)); if(!cbuf) return; for(i=0; i<n; i++) { cbuf[i].re=jr[i]; cbuf[i].im=ji[i]; } fftw(plan, 1, cbuf, 1, 1, 0, 1, 1); fftw_destroy_plan(plan); if(!iflag) { ninv=1.0/n; for(i=0; i<n; i++) { jr[i]=cbuf[i].re*ninv; ji[i]=cbuf[i].im*ninv; } } else { for(i=0; i<n; i++) { jr[i]=cbuf[i].re; ji[i]=cbuf[i].im; } } XCFREE(cbuf); }
int F77_FUNC_ (fft_z_stick, FFT_Z_STICK) (fftw_plan *p, FFTW_COMPLEX *zstick, int *ldz, int *nstick_l) { int howmany, idist; howmany = (*nstick_l) ; idist = (*ldz); fftw(*p, howmany, zstick, 1, idist, 0, 0, 0); return 0; }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); if (paranoid && !(flags & FFTW_IN_PLACE)) { begin = fftw_get_time(); test_ergun(n, dir, plan); end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for validation: %f s\n", t)); } FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
void rfftwnd_c2real_aux_howmany(fftwnd_plan p, int cur_dim, int howmany, fftw_complex *in, int istride, int idist, fftw_real *out, int ostride, int odist, fftw_complex *work) { int n_after = p->n_after[cur_dim], n = p->n[cur_dim]; int k; /* do the current dimension (in-place): */ for (k = 0; k < n_after; ++k) fftw(p->plans[cur_dim], howmany, in + k * istride, n_after * istride, idist, work, 1, 0); if (cur_dim == p->rank - 2) { /* just do the last dimension directly: */ if (p->is_in_place) for (k = 0; k < n; ++k) rfftw_c2real_overlap_aux(p->plans[p->rank - 1], howmany, in + (k * n_after * istride), istride, idist, out + (k * n_after * ostride) * 2, ostride, odist, (fftw_real *) work); else { int nlast = p->plans[p->rank - 1]->n; for (k = 0; k < n; ++k) rfftw_c2real_aux(p->plans[p->rank - 1], howmany, in + k * n_after * istride, istride, idist, out + k * nlast * ostride, ostride, odist, (fftw_real *) work); } } else { /* we have at least two dimensions to go */ int nr = p->plans[p->rank - 1]->n; int n_after_r = p->is_in_place ? n_after * 2 : nr * (n_after / (nr/2 + 1)); int i; /* * process the subsequent dimensions recursively, in hyperslabs, * to get maximum locality: */ for (i = 0; i < n; ++i) rfftwnd_c2real_aux_howmany(p, cur_dim + 1, howmany, in + i * n_after * istride, istride, idist, out + i * n_after_r * ostride, ostride, odist, work); } }
static void first_dim_aux(rfftwnd_mpi_plan p, int n_fields, fftw_real *local_data) { int local_ny = p->p_transpose->local_ny; int nx = p->p_fft_x->n; fftw_complex *work_1d = p->work ? p->work : p->p_fft->work; n_fields *= p->p_fft->n_after[0]; /* dimensions after y no longer need be considered separately from n_fields */ if (n_fields > 1) { fftw_plan p_fft_x = p->p_fft_x; int fft_iter; for (fft_iter = 0; fft_iter < local_ny; ++fft_iter) fftw(p_fft_x, n_fields, ((fftw_complex *) local_data) + (nx * n_fields) * fft_iter, n_fields, 1, work_1d, 1, 0); } else fftw(p->p_fft_x, local_ny, (fftw_complex *) local_data, 1, nx, work_1d, 1, 0); }
static void *fftw_howmany_thread(fftw_loop_data *ldata) { int min = ldata->min, max = ldata->max; fftw_howmany_data *d = (fftw_howmany_data*) ldata->data; fftw_plan p = d->p; int howmany = d->howmany; fftw_complex *io_data = d->io_data; int iostride = d->iostride, iodist = d->iodist, iodist0 = d->iodist0; fftw_complex *work = d->work + d->wdist * ldata->thread_num; for (; min < max; ++min) fftw(p, howmany, io_data + min*iodist0, iostride, iodist, work,1,0); return 0; }
/* * The timer keeps doubling the number of iterations * until the program runs for more than FFTW_TIME_MIN */ double fftw_measure_runtime(fftw_plan plan) { FFTW_COMPLEX *in, *out; fftw_time begin, end; double t; int i, iter; int n; n = plan->n; iter = 1; retry: in = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX)); out = (FFTW_COMPLEX *) fftw_malloc(n * sizeof(FFTW_COMPLEX)); begin = fftw_get_time(); for (i = 0; i < iter; ++i) { int j; /* generate random inputs */ for (j = 0; j < n; ++j) { c_re(in[j]) = 1.0; c_im(in[j]) = 32.432; } fftw(plan, 1, in, 1, 0, out, 1, 0); } end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end,begin)); fftw_free(in); fftw_free(out); if (t < FFTW_TIME_MIN) { iter *= 2; /* * See D. E. Knuth, Structured Programming with GOTO Statements, * Computing Surveys (6), December 1974, for a justification * of this `goto' in the `n + 1/2' loop. */ goto retry; } return t / (double)iter; }
int F77_FUNC_ (fft_x_stick_single, FFT_X_STICK_SINGLE) (fftw_plan *p, FFTW_COMPLEX *a, int *nx, int *ny, int *nz, int *ldx, int *ldy ) { int i, j, ind; int xstride, bigstride; int xhowmany, xidist; double * ptr; /* trasform along x and y */ bigstride = (*ldx) * (*ldy); xhowmany = (*ny); xstride = 1; xidist = (*ldx); fftw(*p,xhowmany,a,xstride,xidist,0,0,0); return 0; }
void rfftwnd_real2c_aux(fftwnd_plan p, int cur_dim, fftw_real *in, int istride, fftw_complex *out, int ostride, fftw_real *work) { int n_after = p->n_after[cur_dim], n = p->n[cur_dim]; if (cur_dim == p->rank - 2) { /* just do the last dimension directly: */ if (p->is_in_place) rfftw_real2c_aux(p->plans[p->rank - 1], n, in, istride, (n_after * istride) * 2, out, istride, n_after * istride, work); else rfftw_real2c_aux(p->plans[p->rank - 1], n, in, istride, p->plans[p->rank - 1]->n * istride, out, ostride, n_after * ostride, work); } else { /* we have at least two dimensions to go */ int nr = p->plans[p->rank - 1]->n; int n_after_r = p->is_in_place ? n_after * 2 : nr * (n_after / (nr/2 + 1)); int i; /* * process the subsequent dimensions recursively, in hyperslabs, * to get maximum locality: */ for (i = 0; i < n; ++i) rfftwnd_real2c_aux(p, cur_dim + 1, in + i * n_after_r * istride, istride, out + i * n_after * ostride, ostride, work); } /* do the current dimension (in-place): */ fftw(p->plans[cur_dim], n_after, out, n_after * ostride, ostride, (fftw_complex *) work, 1, 0); /* I hate this cast */ }
void process_seg(float* data) { int i; float* p = data; static float dbuff[FFT_LEN*2]; static fftw_plan planfwd,planinverse; if (!planfwd) { planfwd=fftw_create_plan(FFT_LEN, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM ); planinverse=fftw_create_plan(IFFT_LEN, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM ); } fftw_one(planfwd, (fftw_complex *)data, (fftw_complex *)NULL); data[0]=0; data[1]=0; fftw(planinverse, NSTRIPS, (fftw_complex *)data, 1, IFFT_LEN, (fftw_complex *)NULL, 1, IFFT_LEN); for (i=0; i<NSTRIPS; i++) { output_samples(p, i, obuf_pos); p += IFFT_LEN*2; } obuf_pos+=IFFT_LEN*2/CHAR_BIT; }
int F77_FUNC_ (fft_x_stick, FFT_X_STICK) (fftw_plan *p, FFTW_COMPLEX *a, int *nx, int *ny, int *nz, int *ldx, int *ldy ) { int i, j, ind; int xstride, bigstride; int xhowmany, xidist; double * ptr; /* trasform along x and y */ bigstride = (*ldx) * (*ldy); xhowmany = (*ny); xstride = 1; xidist = (*ldx); /* ptr = (double *)a; */ for(i = 0; i < *nz ; i++) { /* trasform along x */ fftw(*p,xhowmany,&a[i*bigstride],xstride,xidist,0,0,0); } return 0; }
void NormalLineArray::doFirstFFT(int fftid, int direction) { LineFFTinfo &fftinfo = (infoVec[fftid]->info); int ptype = fftinfo.ptype; int pblock = fftinfo.pblock; complex *line = fftinfo.dataPtr; int sizeX = fftinfo.sizeX; int sizeZ = fftinfo.sizeZ; int *xsquare = fftinfo.xsquare; int *ysquare = fftinfo.ysquare; int *zsquare = fftinfo.zsquare; #ifdef HEAVYVERBOSE { char fname[80]; if(direction) snprintf(fname,80,"xline_%d.y%d.z%d.out", fftid,thisIndex.x, thisIndex.y); else snprintf(fname,80,"zline_%d.x%d.y%d.out", fftid,thisIndex.x, thisIndex.y); FILE *fp=fopen(fname,"w"); for(int x = 0; x < sizeX*xsquare[0]*xsquare[1]; x++) fprintf(fp, "%d %g %g\n", x, line[x].re, line[x].im); fclose(fp); } #endif if(direction && ptype==PencilType::XLINE) fftw(fwdplan, xsquare[0]*xsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0); // xPencilsPerSlab many 1-D fft's else if(!direction && ptype==PencilType::ZLINE) fftw(bwdplan, zsquare[0]*zsquare[1], (fftw_complex*)line, 1, sizeZ, NULL, 0, 0); else CkAbort("Can't do this FFT\n"); int x, y, z=0; #ifdef VERBOSE CkPrintf("First FFT done at [%d %d] [%d %d]\n", thisIndex.x, thisIndex.y,sizeX,sizeZ); #endif int baseX, ix, iy, iz; if(true) {//else if(pblock == PencilBlock::SQUAREBLOCK){ if(direction) { int sendSquarethick = ysquare[1] <= xsquare[1] ? ysquare[1]:xsquare[1]; int sendDataSize = ysquare[0]*xsquare[0] * sendSquarethick; int zpos = thisIndex.y; int index=0; complex *sendData = NULL; for(z = 0; z < xsquare[1]; z+=sendSquarethick){ for(x = 0; x < sizeX; x+=ysquare[0]) { SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg; sendData = msg->data; msg->ypos = thisIndex.x; msg->size = sendDataSize; msg->id = fftid; msg->direction = direction; msg->data = sendData; CkSetQueueing(msg, CK_QUEUEING_IFIFO); #ifdef _PRIOMSG_ int prioNum = (zpos+z) + x*sizeX; *(int*)CkPriorityPtr(msg) = prioNum; #endif index = 0; for(iz = z; iz < z+sendSquarethick; iz++) for(ix = x; ix < x+ysquare[0]; ix++) for(y = 0; y < xsquare[0]; y++) sendData[index++] = line[iz*sizeX*xsquare[0]+y*sizeX+ix]; #ifdef VERBOSE CkPrintf(" [%d %d] sending to YLINES [ %d %d] \n", thisIndex.x, thisIndex.y, thisIndex.y, x); #endif yProxy(zpos+z, x).doSecondFFT(msg); } //memset(sendData, 0, sizeof(complex)*yPencilsPerSlab*xPencilsPerSlab); } } else { int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0]; int sendDataSize = ysquare[1] * sendSquarewidth * zsquare[1]; int xpos = thisIndex.x; int ypos = thisIndex.y; int index=0; complex *sendData = NULL; for(x = 0; x < zsquare[0]; x+=sendSquarewidth) for(z = 0; z < sizeZ; z+=ysquare[1]){ SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg; sendData = msg->data; msg->ypos = thisIndex.y; msg->size = sendDataSize; msg->id = fftid; msg->direction = direction; msg->data = sendData; CkSetQueueing(msg, CK_QUEUEING_IFIFO); #ifdef _PRIOMSG_ int prioNum = (z) + (x+xpos)*sizeX; *(int*)CkPriorityPtr(msg) = prioNum; #endif index = 0; for(iz = z; iz < z+ysquare[1]; iz++) for (ix = x; ix < x+sendSquarewidth; ix++) for(iy = 0; iy < zsquare[1]; iy++) sendData[index++] = line[iz+ix*sizeZ+iy*sizeZ*zsquare[0]]; #ifdef VERBOSE CkPrintf(" [%d %d] sending to YLINES [%d %d] \n", thisIndex.x, thisIndex.y, z, thisIndex.x); #endif yProxy(z, xpos+x).doSecondFFT(msg); } } } }
int main(int argc, char **argv) { int sample, samples, spectrum, spectra, bin, bins, dummy; float re, im, binPower; fftw_plan sigPlan; FILE *iFp, *oFp; if (getArgs(argc, argv)) exit (1); if (!(iFp = fopen(infile, "r"))) { cout << " opening input file" << endl; exit(2); } if (!(oFp = fopen(outfile, "w"))) { cout << " opening output file" << endl; exit(3); } samples = subbands * halfFrames * 512; bins = 2 * subbands * 512; spectra = samples / bins; sigPlan = fftw_create_plan(bins, FFTW_FORWARD, FFTW_ESTIMATE); double power[bins]; float_complex td[samples]; float_complex fd[samples]; // extract the samples from the channel file for (sample = 0; sample < samples; sample++) { fscanf(iFp, "%d (%f, %f)\n", &dummy, &re, &im); td[sample] = float_complex(re, im); } // now perform a full-width fft to create the signal bins fftw(sigPlan, spectra, (fftw_complex *) td, 1, bins, (fftw_complex *) fd, 1, bins); // rearrange the data so that DC is in the middle of the spectrum float_complex temp[bins/2]; for (spectrum = 0; spectrum < spectra; spectrum++) { memcpy(temp, &fd[spectrum*bins], sizeof(float_complex) * bins / 2); memcpy(&fd[spectrum*bins], &fd[spectrum*bins+bins/2], sizeof(float_complex) * bins / 2); memcpy(&fd[spectrum*bins+bins/2], &temp, sizeof(float_complex) * bins / 2); } for (bin = 0; bin < bins; bin++) power[bin] = 0; // now compute the total power in each frequency bin for (spectrum = 0; spectrum < spectra; spectrum++) { for (bin = 0; bin < bins; bin++) { binPower = norm(fd[spectrum*bins+bin]); #ifdef notdef fprintf(oFp, "%03d:%05d (%.3f, %.3f) (%.3f)\n", spectrum, bin, fd[spectrum*bins+bin].real(), fd[spectrum*bins+bin].imag(), binPower); #endif power[bin] += binPower; } } // print the powers in the bins for (bin = 0; bin < bins; bin++) fprintf(oFp, "%05d: %.3le\n", bin, power[bin]); fclose(iFp); fclose(oFp); }
void fftw_buffered(fftw_plan p, int howmany, fftw_complex *in, int istride, int idist, fftw_complex *work, int nbuffers, fftw_complex *buffers) { int i = 0, n, nb; n = p->n; nb = n + FFTWND_BUFFER_PADDING; do { for (; i <= howmany - nbuffers; i += nbuffers) { fftw_complex *cur_in = in + i * idist; int j, buf; /* * First, copy nbuffers strided arrays to the * contiguous buffer arrays (reading consecutive * locations, assuming that idist is 1): */ for (j = 0; j < n; ++j) { fftw_complex *cur_in2 = cur_in + j * istride; fftw_complex *cur_buffers = buffers + j; for (buf = 0; buf <= nbuffers - 4; buf += 4) { *cur_buffers = *cur_in2; *(cur_buffers += nb) = *(cur_in2 += idist); *(cur_buffers += nb) = *(cur_in2 += idist); *(cur_buffers += nb) = *(cur_in2 += idist); cur_buffers += nb; cur_in2 += idist; } for (; buf < nbuffers; ++buf) { *cur_buffers = *cur_in2; cur_buffers += nb; cur_in2 += idist; } } /* * Now, compute the FFTs in the buffers (in-place * using work): */ fftw(p, nbuffers, buffers, 1, nb, work, 1, 0); /* * Finally, copy the results back from the contiguous * buffers to the strided arrays (writing consecutive * locations): */ for (j = 0; j < n; ++j) { fftw_complex *cur_in2 = cur_in + j * istride; fftw_complex *cur_buffers = buffers + j; for (buf = 0; buf <= nbuffers - 4; buf += 4) { *cur_in2 = *cur_buffers; *(cur_in2 += idist) = *(cur_buffers += nb); *(cur_in2 += idist) = *(cur_buffers += nb); *(cur_in2 += idist) = *(cur_buffers += nb); cur_buffers += nb; cur_in2 += idist; } for (; buf < nbuffers; ++buf) { *cur_in2 = *cur_buffers; cur_buffers += nb; cur_in2 += idist; } } } /* * we skip howmany % nbuffers ffts at the end of the loop, * so we have to go back and do them: */ nbuffers = howmany - i; } while (i < howmany); }
int increBoundary(void) { /* External Variables */ extern int Nx, Nz; extern fftw_complex ***CT; /* 6-by-(3Nz/2)-by-(3*Nx/4+1) */ extern mcomplex **Uxb, **Uzb; extern fftw_plan pf1, pf2; extern rfftwnd_plan pr1, pr2; extern double *Kx, *Kz; int x, i, z, idx; double norm, tmp1, tmp2, tmp3; fftw_real *RT; /* real to complex transform */ fftw_complex *fout = NULL; fftw_real *rout = NULL; idx = (3 * Nz / 2) * (3 * Nx / 2 + 2); RT = (fftw_real *) CT[0][0]; norm = 1.0 / ((3. * Nx / 2.) * (3. * Nz / 2.)); memset(CT[0][0], 0, MAXT * (3 * Nz / 2) * (3 * Nx / 4 + 1) * sizeof(fftw_complex)); /* store Uxb hat and Uzb hat and w hat on CT for inverse FFT */ for (z = 0; z < Nz / 2; ++z) { /* CT[0] store the data of Uxb, CT[1] storedata for Uzb */ memcpy(CT[0][z], Uxb[z], (Nx / 2) * sizeof(fftw_complex)); memcpy(CT[1][z], Uzb[z], (Nx / 2) * sizeof(fftw_complex)); /* for(x=0; x<Nx/2; ++x) { Re(CT[2][z][x])=1.0; Im(CT[2][z][x])=0.; } */ } for (z = Nz / 2 + 1; z < Nz; ++z) { memcpy(CT[0][z + Nz / 2], Uxb[z], (Nx / 2) * sizeof(fftw_complex)); memcpy(CT[1][z + Nz / 2], Uzb[z], (Nx / 2) * sizeof(fftw_complex)); /*for(x=0; x<Nx/2; ++x) { Re(CT[2][z+Nz/2][x])=1.0; Im(CT[2][z+Nz/2][x])=0.; } */ } Re(CT[2][1][1]) = 1.; //Re(CT[2][3*Nz/2-1][0])=1.; //Re(CT[2][0][0])=1.; // Re(CT[2][0][0])=1.; /* inverse Fourier transform */ for (i = 0; i < 3; ++i) { /* Each column of CT[i] */ fftw(pf1, Nx / 2, CT[i][0], 3 * Nx / 4 + 1, 1, fout, -1, -1); /* Each row of CT[i] */ rfftwnd_complex_to_real(pr1, 3 * Nz / 2, CT[i][0], 1, 3 * Nx / 4 + 1, rout, -1, -1); } /* compute (dux)*(w.n) and (duz)*(w.n) */ for (z = 0; z < (3 * Nz / 2); ++z) { for (x = 0; x < 3 * Nx / 2; ++x) { RT[(z * (3 * Nx / 2 + 2) + x)] = RT[(z * (3 * Nx / 2 + 2) + x)] * RT[2 * idx + (z * (3 * Nx / 2 + 2) + x)]; RT[idx + (z * (3 * Nx / 2 + 2) + x)] = RT[idx + (z * (3 * Nx / 2 + 2) + x)] * RT[2 * idx + (z * (3 * Nx / 2 + 2) + x)]; } } /* Fourier transform to get Uxb hats and Uzb hats. */ for (i = 0; i < 3; ++i) { /* Each row of RT[i] */ rfftwnd_real_to_complex(pr2, 3 * Nz / 2, RT + (i * idx), 1, 3 * Nx / 2 + 2, fout, -1, -1); /* Each column of CT[i] */ fftw(pf2, Nx / 2, CT[i][0], 3 * Nx / 4 + 1, 1, fout, -1, -1); /* constant of FFT */ for (z = 0; z < Nz / 2; ++z) { for (x = 0; x < Nx / 2; ++x) { Re(CT[i][z][x]) = norm * Re(CT[i][z][x]); Im(CT[i][z][x]) = norm * Im(CT[i][z][x]); } } for (z = Nz + 1; z < 3 * Nz / 2; ++z) { for (x = 0; x < Nx / 2; ++x) { Re(CT[i][z][x]) = norm * Re(CT[i][z][x]); Im(CT[i][z][x]) = norm * Im(CT[i][z][x]); } } } /*put date back in array Uxb and Uzb */ memset(Uxb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex)); memset(Uzb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex)); for (z = 0; z < Nz / 2; ++z) { memcpy(Uxb[z], CT[0][z], Nx / 2 * sizeof(fftw_complex)); memcpy(Uzb[z], CT[1][z], Nx / 2 * sizeof(fftw_complex)); } for (z = Nz + 1; z < 3 * Nz / 2; ++z) { memcpy(Uxb[z - Nz / 2], CT[0][z], Nx / 2 * sizeof(fftw_complex)); memcpy(Uzb[z - Nz / 2], CT[1][z], Nx / 2 * sizeof(fftw_complex)); } /* further computation to get c1, c2, c3, c4 as in the note and results are rewritten in Uxb and Uzb: c1=g hat=iKz*Uxb-iKx*Uzb;-------rewrittin in Uxb c2=du_y=-iKx*Uxb-iKz*Uzb;-------rewrittin in Uzb c3=U=Uxb(0,0); -------rewritten in Uxb[0][0] c4=Uzb(0,0) -------rewritten in Uzb[0][0] */ for (z = 0; z < Nz; ++z) { for (x = 0; x < Nx / 2; ++x) { if (z * z + x * x > 0) { tmp1 = -Kz[z] * Im(Uxb[z][x]) + Kx[x] * Im(Uzb[z][x]); tmp2 = Kz[z] * Re(Uxb[z][x]) - Kx[x] * Re(Uzb[z][x]); tmp3 = Kx[x] * Im(Uxb[z][x]) + Kz[z] * Im(Uzb[z][x]); Im(Uzb[z][x]) = -Kx[x] * Re(Uxb[z][x]) - Kz[z] * Re(Uzb[z][x]); Re(Uzb[z][x]) = tmp3; Re(Uxb[z][x]) = tmp1; Im(Uxb[z][x]) = tmp2; } } } return (NO_ERR); }
void F77_FUNC_(fftw_f77,FFTW_F77) (fftw_plan *p, int *howmany, fftw_complex *in, int *istride, int *idist, fftw_complex *out, int *ostride, int *odist) { fftw(*p,*howmany,in,*istride,*idist,out,*ostride,*odist); }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size, nalloc; fftw_complex *in, *work; fftw_plan plan = 0; fftw_mpi_plan mpi_plan; double t, t0 = 0.0; if (specific || !(flags & FFTW_IN_PLACE)) return; if (io_okay && !only_parallel) plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); CHECK(mpi_plan, "failed to create plan!"); fftw_mpi_local_sizes(mpi_plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); if (io_okay && !only_parallel) nalloc = n; else nalloc = total_local_size; in = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(nalloc * howmany_fields * sizeof(fftw_complex)); if (io_okay) { WHEN_VERBOSE(2, fftw_mpi_print_plan(mpi_plan)); } if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, work, 1, 0), in, n * howmany_fields, t0); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, NULL), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftw_mpi(mpi_plan, howmany_fields, in, work), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("w/WORK: \"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); if (!only_parallel) WHEN_VERBOSE(1, printf("w/WORK: parallel speedup: %f\n", t0 / t)); } fftw_free(in); fftw_free(work); fftw_mpi_destroy_plan(mpi_plan); WHEN_VERBOSE(1, my_printf("\n")); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { int local_n, local_start, local_n_after_transform, local_start_after_transform, total_local_size; fftw_complex *in1, *work = NULL, *in2, *out2; fftw_mpi_plan plan; int i; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (specific) { WHEN_VERBOSE(2, my_printf("N/A\n")); return; } if (coinflip()) flags |= FFTW_THREADSAFE; plan = fftw_mpi_create_plan(MPI_COMM_WORLD, n, dir, flags); fftw_mpi_local_sizes(plan, &local_n, &local_start, &local_n_after_transform, &local_start_after_transform, &total_local_size); in1 = (fftw_complex *) fftw_malloc(total_local_size * sizeof(fftw_complex) * howmany); if (coinflip()) { WHEN_VERBOSE(2, my_printf("w/work...")); work = (fftw_complex *) fftw_malloc(total_local_size * sizeof(fftw_complex) * howmany); } in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); } for (i = 0; i < local_n * howmany; ++i) { c_re(in1[i]) = c_re(in2[i + local_start*howmany]); c_im(in1[i]) = c_im(in2[i + local_start*howmany]); } /* fft-ize */ fftw_mpi(plan, howmany, in1, work); fftw_mpi_destroy_plan(plan); fftw(validated_plan, howmany, in2, howmany, 1, out2, howmany, 1); CHECK(compute_error_complex(in1, 1, out2 + local_start_after_transform*howmany, 1, howmany*local_n_after_transform) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, my_printf("OK\n")); fftw_free(in1); fftw_free(work); fftw_free(in2); fftw_free(out2); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in1, *in2, *out2; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); if (!specific) plan = fftw_create_plan(n, dir, flags); else plan = fftw_create_plan_specific(n, dir, flags, in1, istride, (fftw_complex *) NULL, 0); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); } /* * fill in other positions of the array, to make sure that * fftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride + j]) = i * istride + j; c_im(in1[i * istride + j]) = i * istride - j; } CHECK(plan != NULL, "can't create plan"); WHEN_VERBOSE(2, fftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) fftw(plan, howmany, in1, istride, n * istride, (fftw_complex *) NULL, 0, 0); else fftw_one(plan, in1, NULL); fftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(c_re(in1[i * istride + j]) == i * istride + j && c_im(in1[i * istride + j]) == i * istride - j, "input has been overwritten"); for (i = 0; i < howmany; ++i) { fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); } CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); }
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; FFT_SCALAR norm, *out_ptr; FFT_DATA *data,*copy; // system specific constants #if defined(FFT_SCSL) int isys = 0; FFT_PREC scalef = 1.0; #elif defined(FFT_DEC) char c = 'C'; char f = 'F'; char b = 'B'; int one = 1; #elif defined(FFT_T3E) int isys = 0; double scalef = 1.0; #elif defined(FFT_ACML) int info; #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; #else // nothing to do for other FFTs. #endif // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result if (plan->pre_plan) { if (plan->pre_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->pre_plan); data = copy; } else data = in; // 1d FFTs along fast axis total = plan->total1; length = plan->length1; #if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff1); #elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); #elif defined(FFT_ACML) num=total/length; FFT_1D(&flag,&num,&length,data,plan->coeff1,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff1); #elif defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_fast,data); else DftiComputeBackward(plan->handle_fast,data); #elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); #elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_fast_forward; else theplan=plan->plan_fast_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); #endif // 1st mid-remap to prepare for 2nd FFTs // copy = loc for remap result if (plan->mid1_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid1_plan); data = copy; // 1d FFTs along mid axis total = plan->total2; length = plan->length2; #if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff2); #elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); #elif defined(FFT_ACML) num=total/length; FFT_1D(&flag,&num,&length,data,plan->coeff2,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff2); #elif defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_mid,data); else DftiComputeBackward(plan->handle_mid,data); #elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); #elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_mid_forward; else theplan=plan->plan_mid_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); #endif // 2nd mid-remap to prepare for 3rd FFTs // copy = loc for remap result if (plan->mid2_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid2_plan); data = copy; // 1d FFTs along slow axis total = plan->total3; length = plan->length3; #if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff3); #elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_ACML) num=total/length; FFT_1D(&flag,&num,&length,data,plan->coeff3,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff3); #elif defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_slow,data); else DftiComputeBackward(plan->handle_slow,data); #elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); #elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_slow_forward; else theplan=plan->plan_slow_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); #endif // post-remap to put data in output format if needed // destination is always out if (plan->post_plan) remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out, (FFT_SCALAR *) plan->scratch, plan->post_plan); // scaling if required #if !defined(FFT_T3E) && !defined(FFT_ACML) if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; out_ptr = (FFT_SCALAR *)out; for (i = 0; i < num; i++) { #if defined(FFT_FFTW3) *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_MKL) out[i] *= norm; #else out[i].re *= norm; out[i].im *= norm; #endif } } #endif #ifdef FFT_T3E if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; for (i = 0; i < num; i++) out[i] *= (norm,norm); } #endif #ifdef FFT_ACML norm = plan->norm; num = plan->normnum; for (i = 0; i < num; i++) { out[i].re *= norm; out[i].im *= norm; } #endif }
int main(int argc, char **argv) { int c, mu, status; int filename_set = 0; int mode = 0; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iiy, gid; int Thp1, nclass; int *oh_count=(int*)NULL, *oh_id=(int*)NULL, oh_nc; int *picount; double *conn = (double*)NULL; double *conn2 = (double*)NULL; double **oh_val=(double**)NULL; double q[4], qsqr; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; fftw_complex *corrt=NULL; fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL; fftw_plan plan_m; while ((c = getopt(argc, argv, "h?vf:m:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'm': mode = atoi(optarg); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); if(plan_m==NULL) { fprintf(stderr, "Error, could not create fftw plan\n"); return(1); } T = T_global; Thp1 = T/2 + 1; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(3); } /* conn2 = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn2==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(4); } pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex)); if( (pi00==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pi00\n"); exit(2); } pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex)); if( (pijj==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pijj\n"); exit(2); } */ corrt = fftw_malloc(T*sizeof(fftw_complex)); for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { // for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;} // for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;} /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "%s", filename_prefix); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } /* sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn2, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } */ retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime); /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; /* for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { for(x0=0; x0<T; x0++) { iix = g_ipt[0][x1][x2][x3]*T+x0; for(mu=1; mu<4; mu++) { ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME); pijj[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } ix = 2*g_ipt[x0][x1][x2][x3]; pi00[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } }}} */ for(x0=0; x0<T; x0++) { ix = g_ipt[x0][0][0][0]; corrt[x0].re = conn[_GWI(5,ix,VOLUME) ] + conn[_GWI(10,ix,VOLUME) ] + conn[_GWI(15,ix,VOLUME) ]; corrt[x0].im = conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1]; corrt[x0].re /= (double)T; corrt[x0].im /= (double)T; } /* fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */ fftw_one(plan_m, corrt, NULL); sprintf(filename, "rho.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing VKVK data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid); for(x0=1; x0<(T/2); x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, corrt[x0].re, corrt[T-x0].re, gid); } fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime); #ifdef _UNDEF free(conn); /* free(conn2); */ /******************************** * test: print correl to stdout * ********************************/ /* fprintf(stdout, "\n\n# ***************** pijj *****************\n"); for(ix=0; ix<LX*LY*LZ; ix++) { iix = ix*T; for(x0=0; x0<T; x0++) { fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pijj[iix+x0].re, pijj[iix+x0].im); } } fprintf(stdout, "\n\n# ***************** pi00 *****************\n"); for(ix=0; ix<LX*LY*LZ; ix++) { iix = ix*T; for(x0=0; x0<T; x0++) { fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pi00[iix+x0].re, pi00[iix+x0].im); } } */ /***************************************** * do the reverse Fourier transformation * *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; fftw(plan_m, LX*LY*LZ, pi00, 1, T, (fftw_complex*)NULL, 0, 0); fftw(plan_m, LX*LY*LZ, pijj, 1, T, (fftw_complex*)NULL, 0, 0); for(ix=0; ix<VOLUME; ix++) { pi00[ix].re /= (double)T; pi00[ix].im /= (double)T; pijj[ix].re /= 3.*(double)T; pijj[ix].im /= 3.*(double)T; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for Fourier transform %e seconds\n", retime-ratime); /***************************************** * write to file *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "pi00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing pi00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[0][x1][x2][x3]*T; /* fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */ for(x0=0; x0<T; x0++) { /* fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pi00[ix+x0].re, pi00[ix+x0].im); */ fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pi00[ix+x0].re, pi00[ix+x0].im); } }}} fclose(ofs); sprintf(filename, "pijj.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing pijj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[0][x1][x2][x3]*T; /* fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */ for(x0=0; x0<T; x0++) { /* fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pijj[ix+x0].re, pijj[ix+x0].im); */ fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pijj[ix+x0].re, pijj[ix+x0].im); } }}} fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to write correlator %e seconds\n", retime-ratime); /* if(mode==0) { ratime = (double)clock() / CLOCKS_PER_SEC; if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); sprintf(filename, "corr.00.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } for(ix=0; ix<VOLUME; ix++) picount[ix] = 0; for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) { ix = g_ipt[0][x1][x2][x3]; picount[ix] = 1; fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr); } }}} fclose(ofs); sprintf(filename, "corr_00.00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im); } } } fclose(ofs); sprintf(filename, "corr_jj.00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im); } } } fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime); free(picount); } else if(mode==1) { ratime = (double)clock() / CLOCKS_PER_SEC; if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); sprintf(filename, "corr.01.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); for(ix=0; ix<VOLUME; ix++) picount[ix] = 0; for(x1=0; x1<LX; x1++) { q[1] = 2. * M_PI * (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = 2. * M_PI * (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = 2. * M_PI * (double)x3 / (double)LZ; qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) { ix = g_ipt[0][x1][x2][x3]; picount[ix] = 1; fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr); } }}} fclose(ofs); sprintf(filename, "corr_00.01.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_01-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im); } } } fclose(ofs); sprintf(filename, "corr_jj.01.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im); } } } fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for writing: %e seconds\n", retime-ratime); free(picount); } else if(mode==2) { if(make_H3orbits(&oh_id, &oh_count, &oh_val, &oh_nc) != 0) return(123); ratime = (double)clock() / CLOCKS_PER_SEC; nclass = oh_nc / Thp1; if( (piavg = (fftw_complex*)malloc(oh_nc*sizeof(fftw_complex))) == (fftw_complex*)NULL) exit(110); if( (picount = (int*)malloc(oh_nc*sizeof(int))) == (int*)NULL) exit(110); for(ix=0; ix<oh_nc; ix++) { piavg[ix].re = 0.; piavg[ix].im = 0.; picount[ix] = 0; } for(ix=0; ix<LX*LY*LZ; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*T+x0; iiy = oh_id[ix]*Thp1+x0; piavg[iiy].re += pi00[iix].re; piavg[iiy].im += pi00[iix].im; if(x0>0 && x0<T/2) { iix = ix*T+(T-x0); piavg[iiy].re += pi00[iix].re; piavg[iiy].im += pi00[iix].im; } } picount[oh_id[ix]]++; } for(ix=0; ix<nclass; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*Thp1+x0; if(picount[ix]>0) { piavg[iix].re /= (double)picount[ix]; piavg[iix].im /= (double)picount[ix]; if(x0>0 && x0<T/2) { piavg[iix].re /= 2.; piavg[iix].im /= 2.; } } } } sprintf(filename, "corr02_00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr-00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<nclass; x1++) { if(oh_val[0][x1]>=g_qhatsqr_min-_Q2EPS && oh_val[0][x1]<=g_qhatsqr_max+_Q2EPS) { ix = x1*Thp1; for(x0=0; x0<Thp1; x0++) { fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, picount[x1]); } } } fclose(ofs); for(ix=0; ix<oh_nc; ix++) { piavg[ix].re = 0.; piavg[ix].im = 0.; picount[ix] = 0; } for(ix=0; ix<LX*LY*LZ; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*T+x0; iiy = oh_id[ix]*Thp1+x0; piavg[iiy].re += pijj[iix].re; piavg[iiy].im += pijj[iix].im; if(x0>0 && x0<T/2) { iix = ix*T+(T-x0); piavg[iiy].re += pijj[iix].re; piavg[iiy].im += pijj[iix].im; } } picount[oh_id[ix]]++; } for(ix=0; ix<nclass; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*Thp1+x0; if(picount[ix]>0) { piavg[iix].re /= (double)picount[ix]; piavg[iix].im /= (double)picount[ix]; if(x0>0 && x0<T/2) { piavg[iix].re /= 2.; piavg[iix].im /= 2.; } } }} sprintf(filename, "corr02_jj.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr-jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<nclass; x1++) { ix = x1*Thp1; for(x0=0; x0<Thp1; x0++) { fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, picount[x1]); } } fclose(ofs); sprintf(filename, "corr.02.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } for(ix=0; ix<VOLUME; ix++) fprintf(ofs, "%5d%25.16e%5d", ix, oh_val[0][ix], picount[ix]); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime); free(piavg); free(picount); } */ #endif } /*************************************** * free the allocated memory, finalize * ***************************************/ free(corrt); free_geometry(); /* free(pi00); free(pijj); */ fftw_destroy_plan(plan_m); return(0); }
int F77_FUNC_ (fft_y_stick, FFT_Y_STICK) (fftw_plan *p, FFTW_COMPLEX *a, int *ny, int *ldx ) { fftw(*p, 1, a, (*ldx), 1, 0, 0, 0); return 0; }
void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; FFT_SCALAR norm, *data_ptr; // system specific constants #ifdef FFT_SCSL int isys = 0; FFT_PREC scalef = 1.0; #endif #ifdef FFT_DEC char c = 'C'; char f = 'F'; char b = 'B'; int one = 1; #endif #ifdef FFT_T3E int isys = 0; double scalef = 1.0; #endif // total = size of data needed in each dim // length = length of 1d FFT in each dim // total/length = # of 1d FFTs in each dim // if total > nsize, limit # of 1d FFTs to available size of data int total1 = plan->total1; int length1 = plan->length1; int total2 = plan->total2; int length2 = plan->length2; int total3 = plan->total3; int length3 = plan->length3; // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set. #if defined(FFT_MKL) || defined(FFT_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif if (total1 > nsize) total1 = (nsize/length1) * length1; if (total2 > nsize) total2 = (nsize/length2) * length2; if (total3 > nsize) total3 = (nsize/length3) * length3; // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 #ifdef FFT_SGI for (offset = 0; offset < total1; offset += length1) FFT_1D(flag,length1,&data[offset],1,plan->coeff1); for (offset = 0; offset < total2; offset += length2) FFT_1D(flag,length2,&data[offset],1,plan->coeff2); for (offset = 0; offset < total3; offset += length3) FFT_1D(flag,length3,&data[offset],1,plan->coeff3); #elif defined(FFT_SCSL) for (offset = 0; offset < total1; offset += length1) FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); for (offset = 0; offset < total2; offset += length2) FFT_1D(flag,length2,scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); for (offset = 0; offset < total3; offset += length3) FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_ACML) int info=0; num=total1/length1; FFT_1D(&flag,&num,&length1,data,plan->coeff1,&info); num=total2/length2; FFT_1D(&flag,&num,&length2,data,plan->coeff2,&info); num=total3/length3; FFT_1D(&flag,&num,&length3,data,plan->coeff3,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total1; offset += length1) FFT_1D(&data[offset],&length1,&flag,plan->coeff1); for (offset = 0; offset < total2; offset += length2) FFT_1D(&data[offset],&length2,&flag,plan->coeff2); for (offset = 0; offset < total3; offset += length3) FFT_1D(&data[offset],&length3,&flag,plan->coeff3); #elif defined(FFT_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,data); DftiComputeForward(plan->handle_mid,data); DftiComputeForward(plan->handle_slow,data); } else { DftiComputeBackward(plan->handle_fast,data); DftiComputeBackward(plan->handle_mid,data); DftiComputeBackward(plan->handle_slow,data); } #elif defined(FFT_DEC) if (flag == -1) { for (offset = 0; offset < total1; offset += length1) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one); for (offset = 0; offset < total2; offset += length2) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length2,&one); for (offset = 0; offset < total3; offset += length3) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length3,&one); } else { for (offset = 0; offset < total1; offset += length1) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length1,&one); for (offset = 0; offset < total2; offset += length2) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length2,&one); for (offset = 0; offset < total3; offset += length3) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one); } #elif defined(FFT_T3E) for (offset = 0; offset < total1; offset += length1) FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); for (offset = 0; offset < total2; offset += length2) FFT_1D(&flag,&length2,&scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); for (offset = 0; offset < total3; offset += length3) FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_FFTW2) if (flag == -1) { fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0); } else { fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0); } #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; if (flag == -1) theplan=plan->plan_fast_forward; else theplan=plan->plan_fast_backward; FFTW_API(execute_dft)(theplan,data,data); if (flag == -1) theplan=plan->plan_mid_forward; else theplan=plan->plan_mid_backward; FFTW_API(execute_dft)(theplan,data,data); if (flag == -1) theplan=plan->plan_slow_forward; else theplan=plan->plan_slow_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) { for (offset = 0; offset < total1; offset += length1) kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); for (offset = 0; offset < total2; offset += length2) kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); for (offset = 0; offset < total3; offset += length3) kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); } else { for (offset = 0; offset < total1; offset += length1) kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); for (offset = 0; offset < total2; offset += length2) kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); for (offset = 0; offset < total3; offset += length3) kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); } #endif // scaling if required // limit num to size of data #ifndef FFT_T3E if (flag == 1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); data_ptr = (FFT_SCALAR *)data; for (i = 0; i < num; i++) { #if defined(FFT_FFTW3) *(data_ptr++) *= norm; *(data_ptr++) *= norm; #elif defined(FFT_MKL) data[i] *= norm; #else data[i].re *= norm; data[i].im *= norm; #endif } } #endif #ifdef FFT_T3E if (flag == 1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); for (i = 0; i < num; i++) data[i] *= (norm,norm); } #endif }
int F77_FUNC_ ( fftw_inplace_drv_1d, FFTW_INPLACE_DRV_1D ) (fftw_plan *p, int *nfft, FFTW_COMPLEX *a, int *inca, int *idist) { fftw(*p, (*nfft), a, (*inca), (*idist), 0, 0, 0); return 0; }
void NormalLineArray::doThirdFFT(int zpos, int xpos, complex *val, int datasize, int fftid, int direction) { LineFFTinfo &fftinfo = (infoVec[fftid]->info); int ptype = fftinfo.ptype; complex *line = fftinfo.dataPtr; int sizeX = fftinfo.sizeX; int sizeZ = fftinfo.sizeZ; int *xsquare = fftinfo.xsquare; int *ysquare = fftinfo.ysquare; int *zsquare = fftinfo.zsquare; int expectSize=0, expectMsg=0, offset=0, i; int x,y,z,idx; if(direction){ int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0]; expectSize = sendSquarewidth * ysquare[1] * zsquare[1]; expectMsg = sizeZ/ysquare[1] * (zsquare[0]/sendSquarewidth); CkAssert(datasize == expectSize); idx=0; for(y=0; y<zsquare[1]; y++) for(x=0; x<sendSquarewidth; x++) for(z=0; z<ysquare[1]; z++) line[z+zpos+(x+xpos)*sizeZ+y*sizeZ*zsquare[0]] = val[idx++]; } else{ int sendSquarethick = ysquare[1]<=xsquare[1] ? ysquare[1]:xsquare[1]; expectSize = ysquare[0]*xsquare[0] * sendSquarethick; expectMsg = sizeX/ysquare[0] * (xsquare[1]/sendSquarethick); CkAssert(datasize == expectSize); int idx=0; for(z=0; z<sendSquarethick; z++) for(y=0; y<xsquare[0]; y++) for(x=0; x<ysquare[0]; x++) line[(z+zpos)*sizeX*xsquare[0]+y*sizeX+xpos+x] = val[idx++]; } infoVec[fftid]->count ++; if (infoVec[fftid]->count == expectMsg) { infoVec[fftid]->count = 0; #ifdef HEAVYVERBOSE { char fname[80]; if(direction) snprintf(fname,80,"zline_%d.x%d.y%d.out", fftid, thisIndex.x, thisIndex.y); else snprintf(fname,80,"xline_%d.y%d.z%d.out", fftid, thisIndex.x, thisIndex.y); FILE *fp=fopen(fname,"w"); for(int x = 0; x < sizeX*xsquare[0]*xsquare[1]; x++) fprintf(fp, "%g %g\n", line[x].re, line[x].im); fclose(fp); } #endif if(direction && ptype==PencilType::ZLINE) fftw(fwdplan, zsquare[0]*zsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0); else if(!direction && ptype==PencilType::XLINE) fftw(bwdplan, xsquare[0]*xsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0); // sPencilsPerSlab many 1-D fft's else CkAbort("Can't do this FFT\n"); #ifdef VERBOSE CkPrintf("Third FFT done at [%d %d]\n", thisIndex.x, thisIndex.y); #endif doneFFT(fftid, direction); // contribute(sizeof(int), &count, CkReduction::sum_int); } }
int main(int argc, char *argv[]) { float *data1, *data2; fcomplex *ptr1, *ptr2; long n, npts, tmp = 0, ct, plimit, prn = 0; long i, isign = -1; double err = 0.0; #if defined USERAWFFTW FILE *wisdomfile; fftw_plan plan_forward, plan_inverse; static char wisdomfilenm[120]; #endif struct tms runtimes; double ttim, stim, utim, tott; if (argc <= 1 || argc > 4) { printf("\nUsage: testffts [sign (1/-1)] [print (0/1)] [frac err tol]\n\n"); exit(0); } else if (argc == 2) { isign = atoi(argv[1]); prn = 0; err = 0.02; } else if (argc == 3) { isign = atoi(argv[1]); prn = atoi(argv[2]); err = 0.02; } if (argc == 4) { isign = atoi(argv[1]); prn = atoi(argv[2]); err = atof(argv[3]); } /* import the wisdom for FFTW */ #if defined USERAWFFTW sprintf(wisdomfilenm, "%s/fftw_wisdom.txt", DATABASE); wisdomfile = fopen(wisdomfilenm, "r"); if (wisdomfile == NULL) { printf("Error opening '%s'. Run makewisdom again.\n", \ wisdomfilenm); printf("Exiting.\n"); exit(1); } if (FFTW_FAILURE == fftw_import_wisdom_from_file(wisdomfile)) { printf("Error importing FFTW wisdom.\n"); printf("Exiting.\n"); exit(1); } fclose(wisdomfile); #endif for (i = 0; i <= 8; i++) { /* npts = 1 << (i + 14); # of points in FFT */ /* npts = 1 << 16; # of points in FFT */ /* npts = 4096; # of points in FFT */ /* npts = 524288; # of points in FFT */ npts = 300000 * (i + 1); n = npts << 1; /* # of float vals */ data1 = gen_fvect(n); data2 = gen_fvect(n); ptr1 = (fcomplex *)data1; ptr2 = (fcomplex *)data2; /* make the data = {1,1,1,1,-1,-1,-1,-1} (all real) */ /* for (ct = 0; ct < npts/2; ct++) { tmp = 2 * ct; data1[tmp] = 1.0; data1[tmp + 1] = 0.0; data1[tmp + npts] = -1.0; data1[tmp + npts + 1] = 0.0; data2[tmp] = 1.0; data2[tmp + 1] = 0.0; data2[tmp + npts] = -1.0; data2[tmp + npts + 1] = 0.0; } */ /* make the data a sin wave of fourier freq 12.12345... */ /* for (ct = 0; ct < npts; ct++) { tmp = 2 * ct; data1[tmp] = sin(2.0*3.14159265358979*ct*12.12345/npts)+1.0; data2[tmp] = data1[tmp]; data1[tmp+1] = 0.0; data2[tmp+1] = data1[tmp+1]; } */ /* make the data a sin wave of fourier freq 12.12345... with noise */ for (ct = 0; ct < npts; ct++) { tmp = 2 * ct; data1[tmp] = 10.0 * sin(TWOPI * ct * 12.12345 / npts) + 100.0; data1[tmp] = gennor(data1[tmp], 10.0); data2[tmp] = data1[tmp]; data1[tmp + 1] = gennor(100.0, 10.0); data2[tmp + 1] = data1[tmp + 1]; } printf("\nCalculating...\n"); /* The challenger... */ tott = times(&runtimes) / (double) CLK_TCK; utim = runtimes.tms_utime / (double) CLK_TCK; stim = runtimes.tms_stime / (double) CLK_TCK; tablesixstepfft(ptr1, npts, isign); /* tablesixstepfft(plan1, plan2, ptr1, npts, isign); */ /* sixstepfft(ptr1, npts, isign); */ /* four1(ptr1 - 1, npts, isign); */ /* tablefft(ptr1, npts, isign); */ /* tablesplitfft(ptr1, npts, isign); */ /* realfft(ptr1, n, isign); */ /* fftw(plan, 1, in, 1, 0, out, 1, 0); */ tott = times(&runtimes) / (double) CLK_TCK - tott; printf("Timing summary (Ransom) npts = %ld:\n", npts); utim = runtimes.tms_utime / (double) CLK_TCK - utim; stim = runtimes.tms_stime / (double) CLK_TCK - stim; ttim = utim + stim; printf("CPU usage: %.3f sec total (%.3f sec user, %.3f sec system)\n", \ ttim, utim, stim); printf("Total time elapsed: %.3f sec.\n\n", tott); /* The "Standard" FFT... */ /* The following is for the fftw FFT */ /* Create new plans */ #if defined USERAWFFTW plan_forward = fftw_create_plan(npts, -1, FFTW_MEASURE | \ FFTW_USE_WISDOM | \ FFTW_IN_PLACE); plan_inverse = fftw_create_plan(npts, +1, FFTW_MEASURE | \ FFTW_USE_WISDOM | \ FFTW_IN_PLACE); #endif tott = times(&runtimes) / (double) CLK_TCK; utim = runtimes.tms_utime / (double) CLK_TCK; stim = runtimes.tms_stime / (double) CLK_TCK; /* four1(ptr2 - 1, npts, isign); */ /* tablefft(ptr2, npts, isign); */ /* tablesplitfft(ptr1, npts, isign); */ /* tablesixstepfft(ptr2, npts, isign); */ /* realft(ptr2 - 1, n, isign); */ fftwcall(ptr2, npts, -1); #if defined USERAWFFTW if (isign == -1) { fftw(plan_forward, 1, (FFTW_COMPLEX *) ptr2, 1, 1, NULL, 1, 1); } else { fftw(plan_inverse, 1, (FFTW_COMPLEX *) ptr2, 1, 1, NULL, 1, 1); } #endif tott = times(&runtimes) / (double) CLK_TCK - tott; printf("Timing summary (FFTW) npts = %ld:\n", npts); utim = runtimes.tms_utime / (double) CLK_TCK - utim; stim = runtimes.tms_stime / (double) CLK_TCK - stim; ttim = utim + stim; printf("CPU usage: %.3f sec total (%.3f sec user, %.3f sec system)\n", \ ttim, utim, stim); printf("Total time elapsed: %.3f sec.\n\n", tott); /* The following is for the fftw FFT */ #if defined USERAWFFTW fftw_destroy_plan(plan_forward); fftw_destroy_plan(plan_inverse); #endif /* Check if correct with fractional errors... */ for (ct = 0; ct < n; ct++) { if (data2[ct] != 0.0) { if (fabs((1.0 - (data1[ct] / data2[ct]))) > err) { if ((ct % 2) == 1) { printf("Values at freq %ld do not match to %4.2f%% fractional error:\n", (ct - 1) / 2, err * 100); printf(" rl1 = %f im1 = %f rl2 = %f im2 = %f\n", data1[ct - 1], data1[ct], data2[ct - 1], data2[ct]); } else { printf("Values at freq %ld do not match to %4.2f%% fractional error:\n", ct / 2, err * 100); printf(" rl1 = %f im1 = %f rl2 = %f im2 = %f\n", data1[ct], data1[ct + 1], data2[ct], data2[ct + 1]); } } } } if (npts >= 64) plimit = 64; else plimit = npts; /* Print the output... */ if (prn) { printf("\n #1: Challenger FFT... "); printf("#2: Standard...\n"); for (ct = 0; ct < plimit; ct++) { printf(" %3ld rl = %12.3f ", ct, data1[2 * ct]); printf("im = %12.3f rl = %12.3f im = %12.3f\n", \ data1[2 * ct + 1], data2[2 * ct], data2[2 * ct + 1]); } } free(data1); free(data2); } return 0; }