/* * create plans using the fftw_create_plan_specific planner, which * allows us to create plans for each dimension that are specialized * for the strides that we are going to use. */ fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans, int rank, const int *n, const int *n_after, fftw_direction dir, int flags, fftw_complex *in, int istride, fftw_complex *out, int ostride) { if (rank <= 0) return 0; if (plans) { int i, stride, cur_flags; fftw_complex *work = 0; int nwork; nwork = fftwnd_work_size(rank, n, flags, 1); if (nwork) work = (fftw_complex*)fftw_malloc(nwork * sizeof(fftw_complex)); for (i = 0; i < rank; ++i) { /* fft's except the last dimension are always in-place */ if (i < rank - 1) cur_flags = flags | FFTW_IN_PLACE; else cur_flags = flags; /* stride for transforming ith dimension */ stride = n_after[i]; if (cur_flags & FFTW_IN_PLACE) plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags, in, istride * stride, work, 1); else plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags, in, istride * stride, out, ostride * stride); if (!plans[i]) { destroy_plan_array(rank, plans); fftw_free(work); return 0; } } if (work) fftw_free(work); } return plans; }
void test_speed_aux(int n, fftw_direction dir, int flags, int specific) { fftw_complex *in, *out; fftw_plan plan; double t; fftw_time begin, end; in = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); out = (fftw_complex *) fftw_malloc(n * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftw_create_plan_specific(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, out, howmany_fields); end = fftw_get_time(); } else { begin = fftw_get_time(); plan = fftw_create_plan(n, dir, speed_flag | flags | wisdom_flag | no_vector_flag); end = fftw_get_time(); } CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, fftw_print_plan(plan)); if (paranoid && !(flags & FFTW_IN_PLACE)) { begin = fftw_get_time(); test_ergun(n, dir, plan); end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for validation: %f s\n", t)); } FFTW_TIME_FFT(fftw(plan, howmany_fields, in, howmany_fields, 1, out, howmany_fields, 1), in, n * howmany_fields, t); fftw_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, n))); fftw_free(in); fftw_free(out); WHEN_VERBOSE(1, printf("\n")); }
void test_in_place(int n, int istride, int howmany, fftw_direction dir, fftw_plan validated_plan, int specific) { fftw_complex *in1, *in2, *out2; fftw_plan plan; int i, j; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany); in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany); if (!specific) plan = fftw_create_plan(n, dir, flags); else plan = fftw_create_plan_specific(n, dir, flags, in1, istride, (fftw_complex *) NULL, 0); /* generate random inputs */ for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride]) = c_re(in2[i]) = DRAND(); c_im(in1[i * istride]) = c_im(in2[i]) = DRAND(); } /* * fill in other positions of the array, to make sure that * fftw doesn't overwrite them */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) { c_re(in1[i * istride + j]) = i * istride + j; c_im(in1[i * istride + j]) = i * istride - j; } CHECK(plan != NULL, "can't create plan"); WHEN_VERBOSE(2, fftw_print_plan(plan)); /* fft-ize */ if (howmany != 1 || istride != 1 || coinflip()) fftw(plan, howmany, in1, istride, n * istride, (fftw_complex *) NULL, 0, 0); else fftw_one(plan, in1, NULL); fftw_destroy_plan(plan); /* check for overwriting */ for (j = 1; j < istride; ++j) for (i = 0; i < n * howmany; ++i) CHECK(c_re(in1[i * istride + j]) == i * istride + j && c_im(in1[i * istride + j]) == i * istride - j, "input has been overwritten"); for (i = 0; i < howmany; ++i) { fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n); } CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE, "test_in_place: wrong answer"); WHEN_VERBOSE(2, printf("OK\n")); fftw_free(in1); fftw_free(in2); fftw_free(out2); }
int init(int _Nx, int _Ny, int _Nz, double _Lx, double _Lz, double _Re, double _flux, double _dt, int _nsteps) { /******************** Definition of all variables ********************/ /* External Variables. All external variables are defined in main.h */ extern int qpts, dimR, dimQ, Nx, Nz; extern double dt, re, flux; extern double *Kx, *Kz, **K2, *cfl2; extern double **Q, **Qp, **Qpp, **R, **Rp, **Qw, **Qpw, **Rw, **Qs, **Qps, **Qpps, **Rs, **Rps, *Rp0, **Rpw, **Qppw, *Rpp0; extern double *Uadd, *Vadd, *Vpadd; extern double *Qy; extern double *W; extern mcomplex ****U, ****C; /* state variables */ extern mcomplex **Fa, **Fb, **TM; extern mcomplex *fa, *fb, *tm; extern double **MZ; extern double ***M; extern mcomplex ****IU, ****IC; /* incremental state variables */ extern mcomplex **IFa, **IFb, **ITM; extern mcomplex *Ifa, *Ifb, *Itm; extern mcomplex ****AU, ****AC; /* adjoint variables and will use the same other variables used in state equations */ extern mcomplex ****IAU, ****IAC; /* incremental adjoint variables */ extern mcomplex **Uxbt, **Uzb; /* variables used to store dux duz evaluated at y=-1 used for computing boundary conditions for incremental state equations */ extern mcomplex **Uxb, **Uzb; /* variables used to store dux duz evaluated at y=-1 from previous state used for boundary conditions for incremental state equations */ extern mcomplex **IUxb, **IUzb; extern mcomplex **IAUxb, **IAUzb; extern mcomplex **AUxb, **AUzb; /* variables used to store dux duz evaluated at y=-1 used for computing boundary conditions for incremental state equations */ extern fftw_complex ***CT, ***ICT; /* variables used in fft */ extern fftw_plan pf1, pf2; extern fftw_plan Ipf1, Ipf2; extern rfftwnd_plan pr1, pr2; extern mcomplex *****MC, *****MIC; /* variables used to store state and incremental state solutions between two check points. */ extern mcomplex ****MU, ****MIU; /* variables used to store manufacture solutions */ extern mcomplex ****LU, ****LIU; /* Local Variables */ int Ny, sizeRealTransform; double Lx, Lz; fftw_complex *fout = NULL; /************************ end of variable definitions ****************/ Nx = _Nx; Ny = _Ny; Nz = _Nz; Lx = _Lx; Lz = _Lz; dt = _dt; nsteps = _nsteps; flux = _flux; re = _Re; printf("Nx,Ny,Nz,Lx | Lz,dt,nsteps | flux,Re\n" "%d %d %d %f | %f %f %d | %f %f\n", Nx, Ny, Nz, Lx, Lz, dt, nsteps, flux, re); re = 1. / re; /* time step routines assume I pass 1/Re */ qpts = 3 * Ny / 2; /* number of quadrature points (see page 9 of Moser's notes) */ dimR = Ny - 2; /* dimR and dimQ denote the number of terms */ dimQ = Ny - 4; /* in the truncated expansions for the */ /* functions v_hat, g_hat, U, W */ /* (see page 5 of Moser's notes). */ sizeRealTransform = 3 * Nx / 2; /* for the FFTs */ /**************** check input parameters, Nx/4, Nz/2 ***************/ if (Nx % 4 != 0) { printf("Required arguments Ny/4==0\n"); return (EXIT_FAILURE); } if (Nz % 2 != 0) { printf("Required arguments Nz/2==0\n"); return (EXIT_FAILURE); } if (Ny - 4 < 0) { printf("Required arguments Nzy>4\n"); return (EXIT_FAILURE); } /* Create matrices using Legendre polynomials */ if (LegendreSetup() != NO_ERR) { return (EXIT_FAILURE); } /*********************end of parameter checking ****************/ /****************Initialize and allocate all variables ***************/ /* Compute wave numbers */ if (waveNums(Nx / 2, Nz, Lx, Lz) != NO_ERR) { destroy(DESTROY_STATUS_LEGENDRE); return (EXIT_FAILURE); } /* get memory for 4D arrays and other matrices */ if (getMem() != NO_ERR) { destroy(DESTROY_STATUS_LEGENDRE | DESTROY_STATUS_WAVENUMS); return (EXIT_FAILURE); } /* Create plans for FFTs */ pf1 = fftw_create_plan_specific(3 * Nz / 2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE, CT[0][0], 3 * Nx / 4 + 1, fout, -1); pf2 = fftw_create_plan_specific(3 * Nz / 2, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE, CT[0][0], 3 * Nx / 4 + 1, fout, -1); pr1 = rfftwnd_create_plan(1, &sizeRealTransform, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE | FFTW_IN_PLACE); pr2 = rfftwnd_create_plan(1, &sizeRealTransform, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE | FFTW_IN_PLACE); /* Create plans for FFTs */ Ipf1 = fftw_create_plan_specific(3 * Nz / 2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE, ICT[0][0], 3 * Nx / 4 + 1, fout, -1); Ipf2 = fftw_create_plan_specific(3 * Nz / 2, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE, ICT[0][0], 3 * Nx / 4 + 1, fout, -1); /* set variables for checking CFL condition */ if (cflVars(Lx, Lz) != 0) { printf("Error creating CF variables\n"); destroy(DESTROY_STATUS_LEGENDRE | DESTROY_STATUS_WAVENUMS | DESTROY_STATUS_FFTW); return (EXIT_FAILURE); } /* initalize part */ memset(C[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(IC[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(AC[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(IAC[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(MC[0][0][0][0], 0, (nsteps * 3 + 1) * (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(MIC[0][0][0][0], 0, (nsteps * 3 + 1) * (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(U[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(AU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(IU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(IAU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(LU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(LIU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(Uxb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex)); memset(Uzb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex)); /******************************end of initialization part ***************/ return (EXIT_SUCCESS); }