/* * Class: jfftw_real_nd_Plan * Method: createPlan * Signature: ([III)V */ JNIEXPORT void JNICALL Java_jfftw_real_nd_Plan_createPlan( JNIEnv *env, jobject obj, jintArray dim, jint dir, jint flags ) { jclass clazz; jfieldID id; jbyteArray arr; unsigned char* carr; int rank; int *cdim; if( sizeof( jdouble ) != sizeof( fftw_real ) ) { (*env)->ThrowNew( env, (*env)->FindClass( env, "java/lang/RuntimeException" ), "jdouble and fftw_real are incompatible" ); return; } clazz = (*env)->GetObjectClass( env, obj ); id = (*env)->GetFieldID( env, clazz, "plan", "[B" ); arr = (*env)->NewByteArray( env, sizeof( rfftwnd_plan ) ); carr = (*env)->GetByteArrayElements( env, arr, 0 ); rank = (*env)->GetArrayLength( env, dim ); cdim = (*env)->GetIntArrayElements( env, dim, 0 ); (*env)->MonitorEnter( env, (*env)->FindClass( env, "jfftw/Plan" ) ); *(rfftwnd_plan*)carr = rfftwnd_create_plan( rank, cdim, dir, flags ); (*env)->MonitorExit( env, (*env)->FindClass( env, "jfftw/Plan" ) ); (*env)->ReleaseIntArrayElements( env, dim, cdim, 0 ); (*env)->ReleaseByteArrayElements( env, arr, carr, 0 ); (*env)->SetObjectField( env, obj, id, arr ); }
void F77_FUNC_(rfftwnd_f77_create_plan,RFFTWND_F77_CREATE_PLAN) (fftwnd_plan *p, int *rank, int *n, int *idir, int *flags) { fftw_direction dir = *idir < 0 ? FFTW_FORWARD : FFTW_BACKWARD; fftw_reverse_int_array(n,*rank); /* column-major -> row-major */ *p = rfftwnd_create_plan(*rank,n,dir,*flags); fftw_reverse_int_array(n,*rank); /* reverse back */ }
void Wavelet::fft1DInPlace() { // use the operator version of the fourier transform if(isReal_) { int flag; rfftwnd_plan plan; flag = FFTW_ESTIMATE | FFTW_IN_PLACE; plan = rfftwnd_create_plan(1, &nzp_ ,FFTW_REAL_TO_COMPLEX,flag); // // NBNB-PAL: The call rfftwnd_on_real_to_complex is causing UMRs in Purify. // rfftwnd_one_real_to_complex(plan,rAmp_,cAmp_); fftwnd_destroy_plan(plan); isReal_ = false; } }
void Wavelet::invFFT1DInPlace() { // use the operator version of the fourier transform if(!isReal_) { int flag; rfftwnd_plan plan; flag = FFTW_ESTIMATE | FFTW_IN_PLACE; plan= rfftwnd_create_plan(1,&nzp_,FFTW_COMPLEX_TO_REAL,flag); rfftwnd_one_complex_to_real(plan,cAmp_,rAmp_); fftwnd_destroy_plan(plan); isReal_=true; double scale= static_cast<double>(1.0/static_cast<double>(nzp_)); for(int i=0; i < nzp_; i++) rAmp_[i] = static_cast<fftw_real>(rAmp_[i]*scale); } }
rfftwnd_mpi_plan rfftwnd_mpi_create_plan(MPI_Comm comm, int rank, const int *n, fftw_direction dir, int flags) { rfftwnd_mpi_plan p; if (rank < 2) return 0; p = (rfftwnd_mpi_plan) fftw_malloc(sizeof(rfftwnd_mpi_plan_data)); p->p_fft_x = 0; p->p_fft = 0; p->p_transpose = 0; p->p_transpose_inv = 0; p->work = 0; p->p_fft_x = fftw_create_plan(n[0], dir, flags | FFTW_IN_PLACE); p->p_fft = rfftwnd_create_plan(rank-1, n+1, dir, flags | FFTW_IN_PLACE); if (!p->p_fft) rfftwnd_mpi_destroy_plan(p); p->p_transpose = transpose_mpi_create_plan(n[0], p->p_fft->n[0], comm); if (!p->p_transpose) rfftwnd_mpi_destroy_plan(p); p->p_transpose_inv = transpose_mpi_create_plan(p->p_fft->n[0], n[0], comm); if (!p->p_transpose_inv) rfftwnd_mpi_destroy_plan(p); if (n[0] > p->p_fft->nwork) p->work = (fftw_complex *) fftw_malloc(n[0] * sizeof(fftw_complex)); return p; }
void init_field(int n_d, int *n, double *L, field_info *FFT) { ptrdiff_t n_x_local; ptrdiff_t i_x_start_local; ptrdiff_t n_y_transpose_local; ptrdiff_t i_y_start_transpose_local; ptrdiff_t *n_x_rank; int flag_active; int n_active; int min_size, max_size; SID_log("Initializing ", SID_LOG_OPEN); for(ptrdiff_t i_d = 0; i_d < n_d; i_d++) { if(i_d < (n_d - 1)) SID_log("%dx", SID_LOG_CONTINUE, n[i_d]); else SID_log("%d element %d-d FFT ", SID_LOG_CONTINUE, n[i_d], n_d); } SID_log("(%d byte precision)...", SID_LOG_CONTINUE, (int)sizeof(GBPREAL)); // Initialize FFT sizes FFT->n_d = n_d; FFT->n = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->L = (double *)SID_calloc(sizeof(double) * FFT->n_d); FFT->n_k_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->n_R_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_R_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_k_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_R_stop_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_k_stop_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->n[i_d] = n[i_d]; FFT->L[i_d] = L[i_d]; FFT->i_R_start_local[i_d] = 0; FFT->i_k_start_local[i_d] = 0; FFT->n_R_local[i_d] = FFT->n[i_d]; FFT->n_k_local[i_d] = FFT->n[i_d]; } FFT->n_k_local[FFT->n_d - 1] = FFT->n[FFT->n_d - 1] / 2 + 1; // Initialize FFTW // Create an integer version of FFT->n[] to pass to ..._create_plan int *n_int=(int *)SID_malloc(sizeof(int)*FFT->n_d); for(int i_d=0;i_d<FFT->n_d;i_d++) n_int[i_d]=(int)FFT->n[i_d]; #if FFTW_V2 #if USE_MPI int total_local_size_int; int n_x_local_int; int i_x_start_local_int; int n_y_transpose_local_int; int i_y_start_transpose_local_int; FFT->plan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE); FFT->iplan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE); rfftwnd_mpi_local_sizes(FFT->plan, &(n_x_local_int), &(i_x_start_local_int), &(n_y_transpose_local_int), &(i_y_start_transpose_local_int), &total_local_size_int); n_x_local = (ptrdiff_t)n_x_local_int; i_x_start_local = (ptrdiff_t)i_x_start_local_int; n_y_transpose_local = (ptrdiff_t)n_y_transpose_local_int; i_y_start_transpose_local = (ptrdiff_t)i_y_start_transpose_local_int; FFT->total_local_size = (size_t)total_local_size_int; #else FFT->total_local_size = 1; for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { if(i_d < FFT->n_d - 1) FFT->total_local_size *= FFT->n[i_d]; else FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1); } #if USE_DOUBLE FFT->plan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); FFT->iplan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); #else FFT->plan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); FFT->iplan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); #endif #endif #else #if USE_MPI #if USE_DOUBLE fftw_mpi_init(); FFT->total_local_size = fftw_mpi_local_size_many_transposed(FFT->n_d, FFT->n, 1, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, SID_COMM_WORLD->comm, &(n_x_local), &(i_x_start_local), &(n_y_transpose_local), &(i_y_start_transpose_local)); FFT->plan = fftw_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); FFT->iplan = fftw_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); #else fftwf_mpi_init(); FFT->total_local_size = fftwf_mpi_local_size_many_transposed(FFT->n_d, FFT->n, 1, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, SID_COMM_WORLD->comm, &(n_x_local), &(i_x_start_local), &(n_y_transpose_local), &(i_y_start_transpose_local)); FFT->plan = fftwf_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); FFT->iplan = fftwf_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); #endif #else FFT->total_local_size = 1; for(ptrdiff_t i_d=0; i_d < FFT->n_d; i_d++) { if(i_d < FFT->n_d - 1) FFT->total_local_size *= FFT->n[i_d]; else FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1); } #if USE_DOUBLE FFT->plan = fftw_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE); FFT->iplan = fftw_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE); #else FFT->plan = fftwf_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE); FFT->iplan = fftwf_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE); #endif #endif #endif SID_free(SID_FARG n_int); // Set empty slabs to start at 0 to make ignoring them simple. if(n_x_local == 0) i_x_start_local = 0; if(n_y_transpose_local == 0) i_y_start_transpose_local = 0; // Modify the local slab dimensions according to what FFTW chose. FFT->i_R_start_local[0] = i_x_start_local; FFT->n_R_local[0] = n_x_local; if(FFT->n_d > 1) { FFT->i_k_start_local[1] = i_y_start_transpose_local; FFT->n_k_local[1] = n_y_transpose_local; } // Allocate field #if USE_FFTW3 FFT->field_local = (gbpFFT_real *)fftwf_alloc_real(FFT->total_local_size); #else FFT->field_local = (gbpFFT_real *)SID_malloc(sizeof(gbpFFT_real)*FFT->total_local_size); #endif FFT->cfield_local = (gbpFFT_complex *)FFT->field_local; // Upper limits of slab decomposition for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->i_R_stop_local[i_d] = FFT->i_R_start_local[i_d] + FFT->n_R_local[i_d] - 1; FFT->i_k_stop_local[i_d] = FFT->i_k_start_local[i_d] + FFT->n_k_local[i_d] - 1; } // FFTW padding sizes if(FFT->n_d > 1) { FFT->pad_size_R = 2 * (FFT->n_R_local[FFT->n_d - 1] / 2 + 1) - FFT->n_R_local[FFT->n_d - 1]; FFT->pad_size_k = 0; } else { FFT->pad_size_R = 0; FFT->pad_size_k = 0; } // Number of elements (global and local) in the FFT ptrdiff_t i_d = 0; for(FFT->n_field = 1, FFT->n_field_R_local = 1, FFT->n_field_k_local = 1; i_d < FFT->n_d; i_d++) { FFT->n_field *= (size_t)FFT->n[i_d]; FFT->n_field_R_local *= (size_t)FFT->n_R_local[i_d]; FFT->n_field_k_local *= (size_t)FFT->n_k_local[i_d]; } // Clear the field clear_field(FFT); // Initialize the FFT's real-space grid FFT->R_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d); FFT->dR = (double *)SID_malloc(sizeof(double *) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->R_field[i_d] = (double *)SID_malloc(sizeof(double) * (FFT->n[i_d] + 1)); FFT->dR[i_d] = FFT->L[i_d] / (double)(FFT->n[i_d]); for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) FFT->R_field[i_d][i_i] = FFT->L[i_d] * ((double)i_i / (double)(FFT->n[i_d])); FFT->R_field[i_d][FFT->n[i_d]] = FFT->L[i_d]; } // Initialize the FFT's k-space grid FFT->k_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d); FFT->dk = (double *)SID_malloc(sizeof(double *) * FFT->n_d); FFT->k_Nyquist = (double *)SID_malloc(sizeof(double *) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->k_field[i_d] = (double *)SID_malloc(sizeof(double) * FFT->n[i_d]); FFT->dk[i_d] = TWO_PI / FFT->L[i_d]; FFT->k_Nyquist[i_d] = TWO_PI * (double)(FFT->n[i_d]) / FFT->L[i_d] / 2.; for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) { if(i_i >= FFT->n[i_d] / 2) FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i - FFT->n[i_d]) / FFT->L[i_d]; else FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i) / FFT->L[i_d]; } } // Flags FFT->flag_padded = GBP_FALSE; // Slab info FFT->slab.n_x_local = FFT->n_R_local[0]; FFT->slab.i_x_start_local = FFT->i_R_start_local[0]; FFT->slab.i_x_stop_local = FFT->i_R_stop_local[0]; FFT->slab.x_min_local = FFT->R_field[0][FFT->i_R_start_local[0]]; if(FFT->slab.n_x_local > 0) FFT->slab.x_max_local = FFT->R_field[0][FFT->i_R_stop_local[0] + 1]; else FFT->slab.x_max_local = FFT->slab.x_min_local; SID_Allreduce(&(FFT->slab.x_max_local), &(FFT->slab.x_max), 1, SID_DOUBLE, SID_MAX, SID_COMM_WORLD); #if USE_MPI // All ranks are not necessarily assigned any slices, so // we need to figure out what ranks are to the right and the left for // buffer exchanges n_x_rank = (ptrdiff_t *)SID_malloc(sizeof(ptrdiff_t) * SID.n_proc); n_x_rank[SID.My_rank] = (ptrdiff_t)FFT->slab.n_x_local; if(n_x_rank[SID.My_rank] > 0) flag_active = GBP_TRUE; else flag_active = GBP_FALSE; SID_Allreduce(&flag_active, &n_active, 1, SID_INT, SID_SUM, SID_COMM_WORLD); SID_Allreduce(&n_x_rank[SID.My_rank], &min_size, 1, SID_INT, SID_MIN, SID_COMM_WORLD); SID_Allreduce(&n_x_rank[SID.My_rank], &max_size, 1, SID_INT, SID_MAX, SID_COMM_WORLD); for(int i_rank = 0; i_rank < SID.n_proc; i_rank++) SID_Bcast(&(n_x_rank[i_rank]), 1, SID_INT, i_rank, SID_COMM_WORLD); FFT->slab.rank_to_right = -1; for(int i_rank = SID.My_rank + 1; i_rank < SID.My_rank + SID.n_proc && FFT->slab.rank_to_right < 0; i_rank++) { int j_rank = i_rank % SID.n_proc; if(n_x_rank[j_rank] > 0) FFT->slab.rank_to_right = j_rank; } if(FFT->slab.rank_to_right < 0) FFT->slab.rank_to_right = SID.My_rank; FFT->slab.rank_to_left = -1; for(int i_rank = SID.My_rank - 1; i_rank > SID.My_rank - SID.n_proc && FFT->slab.rank_to_left < 0; i_rank--) { int j_rank = i_rank; if(i_rank < 0) j_rank = i_rank + SID.n_proc; if(n_x_rank[j_rank] > 0) FFT->slab.rank_to_left = j_rank; } if(FFT->slab.rank_to_left < 0) FFT->slab.rank_to_left = SID.My_rank; free(n_x_rank); SID_log("(%d cores unused, min/max slab size=%d/%d)...", SID_LOG_CONTINUE, SID.n_proc - n_active, min_size, max_size); #else FFT->slab.rank_to_right = SID.My_rank; FFT->slab.rank_to_left = SID.My_rank; if(FFT->slab.n_x_local > 0) { flag_active = GBP_TRUE; n_active = 1; min_size = FFT->slab.n_x_local; max_size = FFT->slab.n_x_local; } else { flag_active = GBP_FALSE; n_active = 0; min_size = 0; max_size = 0; } #endif SID_log("Done.", SID_LOG_CLOSE); }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { fftw_real *in; fftwnd_plan plan; double t; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank - 1; ++i) N *= sz.narray[i]; N *= (sz.narray[i] + 2); in = (fftw_real *) fftw_malloc(N * howmany_fields * sizeof(fftw_real)); if (specific) { begin = fftw_get_time(); plan = rfftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = rfftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (rfftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); if (dir == FFTW_REAL_TO_COMPLEX) { FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); } else { FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields, (fftw_complex *) in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); } rfftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (N log2 N) / (t in microseconds)" " = %f\n", 0.5 * howmany_fields * mflops(t, N))); fftw_free(in); WHEN_VERBOSE(1, printf("\n")); }
void test_planner(int rank) { /* * create and destroy many plans, at random. Check the * garbage-collecting allocator of twiddle factors */ int i, dim; int r, s; fftw_plan p[PLANNER_TEST_SIZE]; fftwnd_plan pnd[PLANNER_TEST_SIZE]; int *narr, maxdim; chk_mem_leak = 0; verbose--; please_wait(); if (rank < 1) rank = 1; narr = (int *) fftw_malloc(rank * sizeof(int)); maxdim = (int) pow(8192.0, 1.0/rank); for (i = 0; i < PLANNER_TEST_SIZE; ++i) { p[i] = (fftw_plan) 0; pnd[i] = (fftwnd_plan) 0; } for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { r = rand(); if (r < 0) r = -r; r = r % PLANNER_TEST_SIZE; for (dim = 0; dim < rank; ++dim) { do { s = rand(); if (s < 0) s = -s; s = s % maxdim + 1; } while (s == 0); narr[dim] = s; } if (rank == 1) { if (p[r]) rfftw_destroy_plan(p[r]); p[r] = rfftw_create_plan(narr[0], random_dir(), measure_flag | wisdom_flag); if (paranoid && narr[0] < 200) test_correctness(narr[0]); } if (pnd[r]) rfftwnd_destroy_plan(pnd[r]); pnd[r] = rfftwnd_create_plan(rank, narr, random_dir(), measure_flag | wisdom_flag); if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { WHEN_VERBOSE(0, printf("test planner: so far so good\n")); WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n", i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); } } for (i = 0; i < PLANNER_TEST_SIZE; ++i) { if (p[i]) rfftw_destroy_plan(p[i]); if (pnd[i]) rfftwnd_destroy_plan(pnd[i]); } fftw_free(narr); verbose++; chk_mem_leak = 1; }
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan, int alternate_api, int specific) { int istride, ostride, howmany; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real)); out3 = in1; out1 = (fftw_complex *) in1; in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (alternate_api && specific && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan_specific(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw2d_create_plan_specific(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } else { p = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } } else if (specific) { p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); } else if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags); } else { p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags); } } else { p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); } CHECK(p != NULL && ip != NULL, "can't create plan"); for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i) out3[i] = 0; for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nhc * 2 + j) * istride + k] = c_re(in2[i * nr + j]); } fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); howmany = ostride = istride; WHEN_VERBOSE(2, printf("\n testing in-place stride %d...", istride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, NULL); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "in-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, NULL); for (i = 0; i < nc * nhc * 2 * istride; ++i) out3[i] *= 1.0 / N; for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nhc * 2 * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "in-place (c2r): wrong answer (check 2)"); } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan) { int istride, ostride; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); CHECK(p != NULL && ip != NULL, "can't create plan"); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nr + j) * istride + k] = c_re(in2[i * nr + j]); } for (i = 0; i < N * istride; ++i) out3[i] = 0.0; fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { int howmany = (istride < ostride) ? istride : ostride; WHEN_VERBOSE(2, printf("\n testing stride %d/%d...", istride, ostride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, out1); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "out-of-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, out3); for (i = 0; i < N * istride; ++i) out3[i] *= 1.0 / N; if (istride == howmany) CHECK(compute_error(out3, 1, in1, 1, N * istride) < TOLERANCE, "out-of-place (c2r): wrong answer"); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nr * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "out-of-place (c2r): wrong answer (check 2)"); } } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out3); fftw_free(out2); fftw_free(in2); fftw_free(out1); fftw_free(in1); }
int init(int _Nx, int _Ny, int _Nz, double _Lx, double _Lz, double _Re, double _flux, double _dt, int _nsteps) { /******************** Definition of all variables ********************/ /* External Variables. All external variables are defined in main.h */ extern int qpts, dimR, dimQ, Nx, Nz; extern double dt, re, flux; extern double *Kx, *Kz, **K2, *cfl2; extern double **Q, **Qp, **Qpp, **R, **Rp, **Qw, **Qpw, **Rw, **Qs, **Qps, **Qpps, **Rs, **Rps, *Rp0, **Rpw, **Qppw, *Rpp0; extern double *Uadd, *Vadd, *Vpadd; extern double *Qy; extern double *W; extern mcomplex ****U, ****C; /* state variables */ extern mcomplex **Fa, **Fb, **TM; extern mcomplex *fa, *fb, *tm; extern double **MZ; extern double ***M; extern mcomplex ****IU, ****IC; /* incremental state variables */ extern mcomplex **IFa, **IFb, **ITM; extern mcomplex *Ifa, *Ifb, *Itm; extern mcomplex ****AU, ****AC; /* adjoint variables and will use the same other variables used in state equations */ extern mcomplex ****IAU, ****IAC; /* incremental adjoint variables */ extern mcomplex **Uxbt, **Uzb; /* variables used to store dux duz evaluated at y=-1 used for computing boundary conditions for incremental state equations */ extern mcomplex **Uxb, **Uzb; /* variables used to store dux duz evaluated at y=-1 from previous state used for boundary conditions for incremental state equations */ extern mcomplex **IUxb, **IUzb; extern mcomplex **IAUxb, **IAUzb; extern mcomplex **AUxb, **AUzb; /* variables used to store dux duz evaluated at y=-1 used for computing boundary conditions for incremental state equations */ extern fftw_complex ***CT, ***ICT; /* variables used in fft */ extern fftw_plan pf1, pf2; extern fftw_plan Ipf1, Ipf2; extern rfftwnd_plan pr1, pr2; extern mcomplex *****MC, *****MIC; /* variables used to store state and incremental state solutions between two check points. */ extern mcomplex ****MU, ****MIU; /* variables used to store manufacture solutions */ extern mcomplex ****LU, ****LIU; /* Local Variables */ int Ny, sizeRealTransform; double Lx, Lz; fftw_complex *fout = NULL; /************************ end of variable definitions ****************/ Nx = _Nx; Ny = _Ny; Nz = _Nz; Lx = _Lx; Lz = _Lz; dt = _dt; nsteps = _nsteps; flux = _flux; re = _Re; printf("Nx,Ny,Nz,Lx | Lz,dt,nsteps | flux,Re\n" "%d %d %d %f | %f %f %d | %f %f\n", Nx, Ny, Nz, Lx, Lz, dt, nsteps, flux, re); re = 1. / re; /* time step routines assume I pass 1/Re */ qpts = 3 * Ny / 2; /* number of quadrature points (see page 9 of Moser's notes) */ dimR = Ny - 2; /* dimR and dimQ denote the number of terms */ dimQ = Ny - 4; /* in the truncated expansions for the */ /* functions v_hat, g_hat, U, W */ /* (see page 5 of Moser's notes). */ sizeRealTransform = 3 * Nx / 2; /* for the FFTs */ /**************** check input parameters, Nx/4, Nz/2 ***************/ if (Nx % 4 != 0) { printf("Required arguments Ny/4==0\n"); return (EXIT_FAILURE); } if (Nz % 2 != 0) { printf("Required arguments Nz/2==0\n"); return (EXIT_FAILURE); } if (Ny - 4 < 0) { printf("Required arguments Nzy>4\n"); return (EXIT_FAILURE); } /* Create matrices using Legendre polynomials */ if (LegendreSetup() != NO_ERR) { return (EXIT_FAILURE); } /*********************end of parameter checking ****************/ /****************Initialize and allocate all variables ***************/ /* Compute wave numbers */ if (waveNums(Nx / 2, Nz, Lx, Lz) != NO_ERR) { destroy(DESTROY_STATUS_LEGENDRE); return (EXIT_FAILURE); } /* get memory for 4D arrays and other matrices */ if (getMem() != NO_ERR) { destroy(DESTROY_STATUS_LEGENDRE | DESTROY_STATUS_WAVENUMS); return (EXIT_FAILURE); } /* Create plans for FFTs */ pf1 = fftw_create_plan_specific(3 * Nz / 2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE, CT[0][0], 3 * Nx / 4 + 1, fout, -1); pf2 = fftw_create_plan_specific(3 * Nz / 2, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE, CT[0][0], 3 * Nx / 4 + 1, fout, -1); pr1 = rfftwnd_create_plan(1, &sizeRealTransform, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE | FFTW_IN_PLACE); pr2 = rfftwnd_create_plan(1, &sizeRealTransform, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE | FFTW_IN_PLACE); /* Create plans for FFTs */ Ipf1 = fftw_create_plan_specific(3 * Nz / 2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE, ICT[0][0], 3 * Nx / 4 + 1, fout, -1); Ipf2 = fftw_create_plan_specific(3 * Nz / 2, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE, ICT[0][0], 3 * Nx / 4 + 1, fout, -1); /* set variables for checking CFL condition */ if (cflVars(Lx, Lz) != 0) { printf("Error creating CF variables\n"); destroy(DESTROY_STATUS_LEGENDRE | DESTROY_STATUS_WAVENUMS | DESTROY_STATUS_FFTW); return (EXIT_FAILURE); } /* initalize part */ memset(C[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(IC[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(AC[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(IAC[0][0][0], 0, (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(MC[0][0][0][0], 0, (nsteps * 3 + 1) * (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(MIC[0][0][0][0], 0, (nsteps * 3 + 1) * (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex)); memset(U[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(AU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(IU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(IAU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(LU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(LIU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex)); memset(Uxb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex)); memset(Uzb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex)); /******************************end of initialization part ***************/ return (EXIT_SUCCESS); }
RockPhysicsInversion4D::RockPhysicsInversion4D(NRLib::Vector priorMean, NRLib::Matrix priorCov, NRLib::Matrix posteriorCov, std::vector<std::vector<double> > mSamp) { nf_.resize(4); nf_[0] = 60; nf_[1] = 60; nf_[2] = 60; nf_[3] = 60; nfp_= 135; fftplan1_ = rfftwnd_create_plan(1, &nfp_, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); fftplan2_ = rfftwnd_create_plan(1, &nfp_, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); v_.resize(4,6); SolveGEVProblem(priorCov,posteriorCov, v_); NRLib::Matrix tmp; NRLib::Matrix priorCovF; NRLib::Matrix posteriorCovF; meanf_ = priorMean*v_; tmp = priorCov*v_; priorCovF = transpose(v_)*tmp; tmp = posteriorCov*v_; posteriorCovF = transpose(v_)*tmp; int nSamp = mSamp[0].size(); NRLib::Vector m(6); for(int k=0;k<6;k++) m(k)=mSamp[k][0]; maxf_=m*v_; minf_=m*v_; for(int i=0;i<nSamp;i++) { NRLib::Vector f; for(int k=0;k<6;k++) m(k)=mSamp[k][i]; f=m*v_; for(int k=0;k<4;k++) { if(minf_(k) > f(k)) minf_(k)=f(k); if(maxf_(k) < f(k)) maxf_(k)=f(k); } } for(int k=0;k<4;k++) { minf_(k)-=3*sqrt(posteriorCovF(k,k)); maxf_(k)+=3*sqrt(posteriorCovF(k,k)); } smoothingFilter_.resize(4); priorDistribution_.resize(4); for(int i=0;i<4;i++) { double df=(maxf_(i)-minf_(i))/double(nf_[i]); priorDistribution_[i]=MakeGaussKernel(meanf_(i),priorCovF(i,i),minf_(i),df,nfp_); smoothingFilter_[i]=MakeSmoothingFilter(posteriorCovF(i,i),df); } allocatePredictionTables( ); std::vector<double> dummy(nSamp); for(int i=0;i<nSamp;i++) dummy[i]=1.0; fillInTable( mSamp,dummy,0); DivideAndSmoothTable(0,priorDistribution_,smoothingFilter_); }