Example #1
0
/*
 * Class:     jfftw_real_nd_Plan
 * Method:    createPlan
 * Signature: ([III)V
 */
JNIEXPORT void JNICALL Java_jfftw_real_nd_Plan_createPlan( JNIEnv *env, jobject obj, jintArray dim, jint dir, jint flags )
{
	jclass clazz;
	jfieldID id;
	jbyteArray arr;
	unsigned char* carr;
	int rank;
	int *cdim;

	if( sizeof( jdouble ) != sizeof( fftw_real ) )
	{
		(*env)->ThrowNew( env, (*env)->FindClass( env, "java/lang/RuntimeException" ), "jdouble and fftw_real are incompatible" );
		return;
	}

	clazz = (*env)->GetObjectClass( env, obj );
	id    = (*env)->GetFieldID( env, clazz, "plan", "[B" );
	arr   = (*env)->NewByteArray( env, sizeof( rfftwnd_plan ) );
	carr  = (*env)->GetByteArrayElements( env, arr, 0 );
	rank  = (*env)->GetArrayLength( env, dim );
	cdim  = (*env)->GetIntArrayElements( env, dim, 0 );

	(*env)->MonitorEnter( env, (*env)->FindClass( env, "jfftw/Plan" ) );

	*(rfftwnd_plan*)carr = rfftwnd_create_plan( rank, cdim, dir, flags );

	(*env)->MonitorExit( env, (*env)->FindClass( env, "jfftw/Plan" ) );

	(*env)->ReleaseIntArrayElements( env, dim, cdim, 0 );
	(*env)->ReleaseByteArrayElements( env, arr, carr, 0 );
	(*env)->SetObjectField( env, obj, id, arr );
}
Example #2
0
void F77_FUNC_(rfftwnd_f77_create_plan,RFFTWND_F77_CREATE_PLAN)
(fftwnd_plan *p, int *rank, int *n, int *idir, int *flags)
{
     fftw_direction dir = *idir < 0 ? FFTW_FORWARD : FFTW_BACKWARD;

     fftw_reverse_int_array(n,*rank);  /* column-major -> row-major */
     *p = rfftwnd_create_plan(*rank,n,dir,*flags);
     fftw_reverse_int_array(n,*rank);  /* reverse back */
}
Example #3
0
void Wavelet::fft1DInPlace()
{
  // use the operator version of the fourier transform
  if(isReal_) {
    int flag;
    rfftwnd_plan plan;
    flag    = FFTW_ESTIMATE | FFTW_IN_PLACE;
    plan    = rfftwnd_create_plan(1, &nzp_ ,FFTW_REAL_TO_COMPLEX,flag);
    //
    // NBNB-PAL: The call rfftwnd_on_real_to_complex is causing UMRs in Purify.
    //
    rfftwnd_one_real_to_complex(plan,rAmp_,cAmp_);
    fftwnd_destroy_plan(plan);
    isReal_ = false;
  }
}
Example #4
0
void Wavelet::invFFT1DInPlace()
{
  // use the operator version of the fourier transform
  if(!isReal_) {
    int flag;
    rfftwnd_plan plan;

    flag = FFTW_ESTIMATE | FFTW_IN_PLACE;
    plan= rfftwnd_create_plan(1,&nzp_,FFTW_COMPLEX_TO_REAL,flag);
    rfftwnd_one_complex_to_real(plan,cAmp_,rAmp_);
    fftwnd_destroy_plan(plan);
    isReal_=true;
    double scale= static_cast<double>(1.0/static_cast<double>(nzp_));
    for(int i=0; i < nzp_; i++)
      rAmp_[i] = static_cast<fftw_real>(rAmp_[i]*scale);
  }
}
Example #5
0
rfftwnd_mpi_plan rfftwnd_mpi_create_plan(MPI_Comm comm,
				       int rank, const int *n,
				       fftw_direction dir,
				       int flags)
{
    rfftwnd_mpi_plan p;

    if (rank < 2)
	return 0;

    p = (rfftwnd_mpi_plan) fftw_malloc(sizeof(rfftwnd_mpi_plan_data));
    p->p_fft_x = 0;
    p->p_fft = 0;
    p->p_transpose = 0;
    p->p_transpose_inv = 0;
    p->work = 0;

    p->p_fft_x = fftw_create_plan(n[0], dir, flags | FFTW_IN_PLACE);

    p->p_fft = rfftwnd_create_plan(rank-1, n+1, dir, flags | FFTW_IN_PLACE);
    if (!p->p_fft)
	rfftwnd_mpi_destroy_plan(p);

    p->p_transpose = transpose_mpi_create_plan(n[0], p->p_fft->n[0], comm);
    if (!p->p_transpose)
	rfftwnd_mpi_destroy_plan(p);

    p->p_transpose_inv = transpose_mpi_create_plan(p->p_fft->n[0], n[0], comm);
    if (!p->p_transpose_inv)
	rfftwnd_mpi_destroy_plan(p);

    if (n[0] > p->p_fft->nwork)
	 p->work = (fftw_complex *) fftw_malloc(n[0] * sizeof(fftw_complex));

    return p;
}
Example #6
0
void init_field(int n_d, int *n, double *L, field_info *FFT) {
    ptrdiff_t  n_x_local;
    ptrdiff_t  i_x_start_local;
    ptrdiff_t  n_y_transpose_local;
    ptrdiff_t  i_y_start_transpose_local;
    ptrdiff_t *n_x_rank;

    int  flag_active;
    int  n_active;
    int  min_size, max_size;

    SID_log("Initializing ", SID_LOG_OPEN);
    for(ptrdiff_t i_d = 0; i_d < n_d; i_d++) {
        if(i_d < (n_d - 1))
            SID_log("%dx", SID_LOG_CONTINUE, n[i_d]);
        else
            SID_log("%d element %d-d FFT ", SID_LOG_CONTINUE, n[i_d], n_d);
    }
    SID_log("(%d byte precision)...", SID_LOG_CONTINUE, (int)sizeof(GBPREAL));

    // Initialize FFT sizes
    FFT->n_d             = n_d;
    FFT->n               = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->L               = (double *)SID_calloc(sizeof(double) * FFT->n_d);
    FFT->n_k_local       = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->n_R_local       = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_R_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_k_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_R_stop_local  = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_k_stop_local  = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->n[i_d]               = n[i_d];
        FFT->L[i_d]               = L[i_d];
        FFT->i_R_start_local[i_d] = 0;
        FFT->i_k_start_local[i_d] = 0;
        FFT->n_R_local[i_d]       = FFT->n[i_d];
        FFT->n_k_local[i_d]       = FFT->n[i_d];
    }
    FFT->n_k_local[FFT->n_d - 1] = FFT->n[FFT->n_d - 1] / 2 + 1;

    // Initialize FFTW

    // Create an integer version of FFT->n[] to pass to ..._create_plan
    int *n_int=(int *)SID_malloc(sizeof(int)*FFT->n_d);
    for(int i_d=0;i_d<FFT->n_d;i_d++)
        n_int[i_d]=(int)FFT->n[i_d];
#if FFTW_V2
#if USE_MPI
    int total_local_size_int;
    int n_x_local_int;
    int i_x_start_local_int;
    int n_y_transpose_local_int;
    int i_y_start_transpose_local_int;
    FFT->plan  = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE);
    FFT->iplan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE);
    rfftwnd_mpi_local_sizes(FFT->plan,
                            &(n_x_local_int),
                            &(i_x_start_local_int),
                            &(n_y_transpose_local_int),
                            &(i_y_start_transpose_local_int),
                            &total_local_size_int);
    n_x_local =  (ptrdiff_t)n_x_local_int;
    i_x_start_local = (ptrdiff_t)i_x_start_local_int;
    n_y_transpose_local = (ptrdiff_t)n_y_transpose_local_int;
    i_y_start_transpose_local = (ptrdiff_t)i_y_start_transpose_local_int;
    FFT->total_local_size = (size_t)total_local_size_int;
#else
    FFT->total_local_size = 1;
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        if(i_d < FFT->n_d - 1)
            FFT->total_local_size *= FFT->n[i_d];
        else
            FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1);
    }
#if USE_DOUBLE
    FFT->plan  = fftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
    FFT->iplan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
#else
    FFT->plan  = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
    FFT->iplan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
#endif
#endif
#else
#if USE_MPI
#if USE_DOUBLE
    fftw_mpi_init();
    FFT->total_local_size = fftw_mpi_local_size_many_transposed(FFT->n_d,
                                                                FFT->n,
                                                                1,
                                                                FFTW_MPI_DEFAULT_BLOCK,
                                                                FFTW_MPI_DEFAULT_BLOCK,
                                                                SID_COMM_WORLD->comm,
                                                                &(n_x_local),
                                                                &(i_x_start_local),
                                                                &(n_y_transpose_local),
                                                                &(i_y_start_transpose_local));
    FFT->plan  = fftw_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
    FFT->iplan = fftw_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
#else
    fftwf_mpi_init();
    FFT->total_local_size   = fftwf_mpi_local_size_many_transposed(FFT->n_d,
                                                                 FFT->n,
                                                                 1,
                                                                 FFTW_MPI_DEFAULT_BLOCK,
                                                                 FFTW_MPI_DEFAULT_BLOCK,
                                                                 SID_COMM_WORLD->comm,
                                                                 &(n_x_local),
                                                                 &(i_x_start_local),
                                                                 &(n_y_transpose_local),
                                                                 &(i_y_start_transpose_local));
    FFT->plan  = fftwf_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
    FFT->iplan = fftwf_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
#endif
#else
    FFT->total_local_size = 1;
    for(ptrdiff_t i_d=0; i_d < FFT->n_d; i_d++) {
        if(i_d < FFT->n_d - 1)
            FFT->total_local_size *= FFT->n[i_d];
        else
            FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1);
    }
#if USE_DOUBLE
    FFT->plan  = fftw_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE);
    FFT->iplan = fftw_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE);
#else
    FFT->plan  = fftwf_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE);
    FFT->iplan = fftwf_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE);
#endif
#endif
#endif

    SID_free(SID_FARG n_int);


    // Set empty slabs to start at 0 to make ignoring them simple.
    if(n_x_local == 0)
        i_x_start_local = 0;
    if(n_y_transpose_local == 0)
        i_y_start_transpose_local = 0;

    // Modify the local slab dimensions according to what FFTW chose.
    FFT->i_R_start_local[0] = i_x_start_local;
    FFT->n_R_local[0]       = n_x_local;
    if(FFT->n_d > 1) {
        FFT->i_k_start_local[1] = i_y_start_transpose_local;
        FFT->n_k_local[1]       = n_y_transpose_local;
    }

    // Allocate field
#if USE_FFTW3
    FFT->field_local  = (gbpFFT_real    *)fftwf_alloc_real(FFT->total_local_size);
#else
    FFT->field_local  = (gbpFFT_real    *)SID_malloc(sizeof(gbpFFT_real)*FFT->total_local_size);
#endif
    FFT->cfield_local = (gbpFFT_complex *)FFT->field_local;

    // Upper limits of slab decomposition
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->i_R_stop_local[i_d] = FFT->i_R_start_local[i_d] + FFT->n_R_local[i_d] - 1;
        FFT->i_k_stop_local[i_d] = FFT->i_k_start_local[i_d] + FFT->n_k_local[i_d] - 1;
    }

    // FFTW padding sizes
    if(FFT->n_d > 1) {
        FFT->pad_size_R = 2 * (FFT->n_R_local[FFT->n_d - 1] / 2 + 1) - FFT->n_R_local[FFT->n_d - 1];
        FFT->pad_size_k = 0;
    } else {
        FFT->pad_size_R = 0;
        FFT->pad_size_k = 0;
    }

    // Number of elements (global and local) in the FFT
    ptrdiff_t i_d = 0;
    for(FFT->n_field = 1, FFT->n_field_R_local = 1, FFT->n_field_k_local = 1; i_d < FFT->n_d; i_d++) {
        FFT->n_field *= (size_t)FFT->n[i_d];
        FFT->n_field_R_local *= (size_t)FFT->n_R_local[i_d];
        FFT->n_field_k_local *= (size_t)FFT->n_k_local[i_d];
    }

    // Clear the field
    clear_field(FFT);

    // Initialize the FFT's real-space grid
    FFT->R_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d);
    FFT->dR      = (double *)SID_malloc(sizeof(double *) * FFT->n_d);
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->R_field[i_d] = (double *)SID_malloc(sizeof(double) * (FFT->n[i_d] + 1));
        FFT->dR[i_d]      = FFT->L[i_d] / (double)(FFT->n[i_d]);
        for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++)
            FFT->R_field[i_d][i_i] = FFT->L[i_d] * ((double)i_i / (double)(FFT->n[i_d]));
        FFT->R_field[i_d][FFT->n[i_d]] = FFT->L[i_d];
    }

    // Initialize the FFT's k-space grid
    FFT->k_field   = (double **)SID_malloc(sizeof(double *) * FFT->n_d);
    FFT->dk        = (double *)SID_malloc(sizeof(double *) * FFT->n_d);
    FFT->k_Nyquist = (double *)SID_malloc(sizeof(double *) * FFT->n_d);
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->k_field[i_d]   = (double *)SID_malloc(sizeof(double) * FFT->n[i_d]);
        FFT->dk[i_d]        = TWO_PI / FFT->L[i_d];
        FFT->k_Nyquist[i_d] = TWO_PI * (double)(FFT->n[i_d]) / FFT->L[i_d] / 2.;
        for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) {
            if(i_i >= FFT->n[i_d] / 2)
                FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i - FFT->n[i_d]) / FFT->L[i_d];
            else
                FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i) / FFT->L[i_d];
        }
    }

    // Flags
    FFT->flag_padded = GBP_FALSE;

    // Slab info
    FFT->slab.n_x_local       = FFT->n_R_local[0];
    FFT->slab.i_x_start_local = FFT->i_R_start_local[0];
    FFT->slab.i_x_stop_local  = FFT->i_R_stop_local[0];
    FFT->slab.x_min_local     = FFT->R_field[0][FFT->i_R_start_local[0]];
    if(FFT->slab.n_x_local > 0)
        FFT->slab.x_max_local = FFT->R_field[0][FFT->i_R_stop_local[0] + 1];
    else
        FFT->slab.x_max_local = FFT->slab.x_min_local;
    SID_Allreduce(&(FFT->slab.x_max_local), &(FFT->slab.x_max), 1, SID_DOUBLE, SID_MAX, SID_COMM_WORLD);

#if USE_MPI
    // All ranks are not necessarily assigned any slices, so
    //   we need to figure out what ranks are to the right and the left for
    //   buffer exchanges
    n_x_rank              = (ptrdiff_t *)SID_malloc(sizeof(ptrdiff_t) * SID.n_proc);
    n_x_rank[SID.My_rank] = (ptrdiff_t)FFT->slab.n_x_local;
    if(n_x_rank[SID.My_rank] > 0)
        flag_active = GBP_TRUE;
    else
        flag_active = GBP_FALSE;
    SID_Allreduce(&flag_active, &n_active, 1, SID_INT, SID_SUM, SID_COMM_WORLD);
    SID_Allreduce(&n_x_rank[SID.My_rank], &min_size, 1, SID_INT, SID_MIN, SID_COMM_WORLD);
    SID_Allreduce(&n_x_rank[SID.My_rank], &max_size, 1, SID_INT, SID_MAX, SID_COMM_WORLD);
    for(int i_rank = 0; i_rank < SID.n_proc; i_rank++)
        SID_Bcast(&(n_x_rank[i_rank]), 1, SID_INT, i_rank, SID_COMM_WORLD);
    FFT->slab.rank_to_right = -1;
    for(int i_rank = SID.My_rank + 1; i_rank < SID.My_rank + SID.n_proc && FFT->slab.rank_to_right < 0; i_rank++) {
        int j_rank = i_rank % SID.n_proc;
        if(n_x_rank[j_rank] > 0)
            FFT->slab.rank_to_right = j_rank;
    }
    if(FFT->slab.rank_to_right < 0)
        FFT->slab.rank_to_right = SID.My_rank;
    FFT->slab.rank_to_left = -1;
    for(int i_rank = SID.My_rank - 1; i_rank > SID.My_rank - SID.n_proc && FFT->slab.rank_to_left < 0; i_rank--) {
        int j_rank = i_rank;
        if(i_rank < 0)
            j_rank = i_rank + SID.n_proc;
        if(n_x_rank[j_rank] > 0)
            FFT->slab.rank_to_left = j_rank;
    }
    if(FFT->slab.rank_to_left < 0)
        FFT->slab.rank_to_left = SID.My_rank;
    free(n_x_rank);
    SID_log("(%d cores unused, min/max slab size=%d/%d)...", SID_LOG_CONTINUE, SID.n_proc - n_active, min_size, max_size);
#else
    FFT->slab.rank_to_right = SID.My_rank;
    FFT->slab.rank_to_left  = SID.My_rank;
    if(FFT->slab.n_x_local > 0) {
        flag_active = GBP_TRUE;
        n_active    = 1;
        min_size    = FFT->slab.n_x_local;
        max_size    = FFT->slab.n_x_local;
    } else {
        flag_active = GBP_FALSE;
        n_active    = 0;
        min_size    = 0;
        max_size    = 0;
    }
#endif

    SID_log("Done.", SID_LOG_CLOSE);
}
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_real *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank - 1; ++i)
	  N *= sz.narray[i];

     N *= (sz.narray[i] + 2);

     in = (fftw_real *) fftw_malloc(N * howmany_fields * sizeof(fftw_real));

     if (specific) {
	  begin = fftw_get_time();
	  plan = rfftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					      speed_flag | flags
					      | wisdom_flag | no_vector_flag,
					      in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = rfftwnd_create_plan(sz.rank, sz.narray,
				     dir, speed_flag | flags
				     | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (rfftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     if (dir == FFTW_REAL_TO_COMPLEX) {
	  FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields,
						in, howmany_fields, 1,
						0, 0, 0),
			in, N * howmany_fields, t);
     } else {
	  FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields,
						(fftw_complex *) in,
						howmany_fields, 1,
						0, 0, 0),
			in, N * howmany_fields, t);
     }

     rfftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (N log2 N) / (t in microseconds)"
			" = %f\n", 0.5 * howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
void test_planner(int rank)
{
     /* 
      * create and destroy many plans, at random.  Check the
      * garbage-collecting allocator of twiddle factors 
      */
     int i, dim;
     int r, s;
     fftw_plan p[PLANNER_TEST_SIZE];
     fftwnd_plan pnd[PLANNER_TEST_SIZE];
     int *narr, maxdim;

     chk_mem_leak = 0;
     verbose--;

     please_wait();
     if (rank < 1)
	  rank = 1;

     narr = (int *) fftw_malloc(rank * sizeof(int));

     maxdim = (int) pow(8192.0, 1.0/rank);

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  p[i] = (fftw_plan) 0;
	  pnd[i] = (fftwnd_plan) 0;
     }

     for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) {
	  r = rand();
	  if (r < 0)
	       r = -r;
	  r = r % PLANNER_TEST_SIZE;

	  for (dim = 0; dim < rank; ++dim) {
	       do {
		    s = rand();
		    if (s < 0)
			 s = -s;
		    s = s % maxdim + 1;
	       } while (s == 0);
	       narr[dim] = s;
	  }

	  if (rank == 1) {
	       if (p[r])
		    rfftw_destroy_plan(p[r]);

	       p[r] = rfftw_create_plan(narr[0], random_dir(), measure_flag |
					wisdom_flag);
	       if (paranoid && narr[0] < 200)
		    test_correctness(narr[0]);
	  }
	  if (pnd[r])
	       rfftwnd_destroy_plan(pnd[r]);

	  pnd[r] = rfftwnd_create_plan(rank, narr,
				       random_dir(), measure_flag |
				       wisdom_flag);

	  if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) {
	       WHEN_VERBOSE(0, printf("test planner: so far so good\n"));
	       WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n",
			      i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE));
	  }
     }

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  if (p[i])
	       rfftw_destroy_plan(p[i]);
	  if (pnd[i])
	       rfftwnd_destroy_plan(pnd[i]);
     }

     fftw_free(narr);
     verbose++;
     chk_mem_leak = 1;
}
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan,
		     int alternate_api, int specific)
{
     int istride, ostride, howmany;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real));
     out3 = in1;
     out1 = (fftw_complex *) in1;
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (alternate_api && specific && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  } else {
	       p = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  }
     } else if (specific) {
	  p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX,
					   flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
	  ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL,
					    flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
     } else if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL,
					flags);
	  } else {
	       p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL,
					flags);
	  }
     } else {
	  p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
	  ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     }

     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i)
	  out3[i] = 0;

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nhc * 2 + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  howmany = ostride = istride;

	  WHEN_VERBOSE(2, printf("\n    testing in-place stride %d...",
				 istride));

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
				       out1, ostride, 1);
	  else
	       rfftwnd_one_real_to_complex(p, in1, NULL);

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						ostride,
						out2 + i * nr, 1,
						nhc) < TOLERANCE,
			  "in-place (r2c): wrong answer");

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
				       out3, istride, 1);
	  else
	       rfftwnd_one_complex_to_real(ip, out1, NULL);

	  for (i = 0; i < nc * nhc * 2 * istride; ++i)
	       out3[i] *= 1.0 / N;

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error(out3 + i * nhc * 2 * istride + k,
					istride,
					(fftw_real *) (in2 + i * nr), 2,
					nr) < TOLERANCE,
			  "in-place (c2r): wrong answer (check 2)");
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE
					 * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
     ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nr + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }
	  for (i = 0; i < N * istride; ++i)
	       out3[i] = 0.0;

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       WHEN_VERBOSE(2, printf("\n    testing stride %d/%d...",
				      istride, ostride));

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
					    out1, ostride, 1);
	       else
		    rfftwnd_one_real_to_complex(p, in1, out1);

	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						     ostride,
						     out2 + i * nr, 1,
						     nhc) < TOLERANCE,
			       "out-of-place (r2c): wrong answer");

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
					    out3, istride, 1);
	       else
		    rfftwnd_one_complex_to_real(ip, out1, out3);

	       for (i = 0; i < N * istride; ++i)
		    out3[i] *= 1.0 / N;

	       if (istride == howmany)
		    CHECK(compute_error(out3, 1, in1, 1, N * istride)
			< TOLERANCE, "out-of-place (c2r): wrong answer");
	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error(out3 + i * nr * istride + k,
					     istride,
					 (fftw_real *) (in2 + i * nr), 2,
					     nr) < TOLERANCE,
			   "out-of-place (c2r): wrong answer (check 2)");
	  }
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out3);
     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Example #11
0
int
init(int _Nx, int _Ny, int _Nz, double _Lx, double _Lz, double _Re,
     double _flux, double _dt, int _nsteps)
{
    /******************** Definition of all variables ********************/
    /* External Variables.  All external variables are defined in main.h */
    extern int qpts, dimR, dimQ, Nx, Nz;
    extern double dt, re, flux;

    extern double *Kx, *Kz, **K2, *cfl2;
    extern double **Q, **Qp, **Qpp, **R, **Rp, **Qw, **Qpw, **Rw, **Qs,
        **Qps, **Qpps, **Rs, **Rps, *Rp0, **Rpw, **Qppw, *Rpp0;

    extern double *Uadd, *Vadd, *Vpadd;
    extern double *Qy;
    extern double *W;

    extern mcomplex ****U, ****C;    /* state variables */
    extern mcomplex **Fa, **Fb, **TM;
    extern mcomplex *fa, *fb, *tm;
    extern double **MZ;
    extern double ***M;

    extern mcomplex ****IU, ****IC;    /* incremental state variables */
    extern mcomplex **IFa, **IFb, **ITM;
    extern mcomplex *Ifa, *Ifb, *Itm;

    extern mcomplex ****AU, ****AC;    /* adjoint variables and will use
                       the same other variables
                       used in state equations */

    extern mcomplex ****IAU, ****IAC;    /* incremental adjoint variables */

    extern mcomplex **Uxbt, **Uzb;    /* variables used to store dux duz
                       evaluated at y=-1 used for
                       computing boundary conditions for
                       incremental state equations */
    extern mcomplex **Uxb, **Uzb;    /* variables used to store dux duz
                       evaluated at y=-1 from previous 
                       state used for boundary conditions
                       for incremental state equations */
    extern mcomplex **IUxb, **IUzb;
    extern mcomplex **IAUxb, **IAUzb;
    extern mcomplex **AUxb, **AUzb;    /* variables used to store dux duz
                       evaluated at y=-1 used for
                       computing boundary conditions
                       for incremental state equations */
    extern fftw_complex ***CT, ***ICT;    /* variables used in fft */
    extern fftw_plan pf1, pf2;
    extern fftw_plan Ipf1, Ipf2;
    extern rfftwnd_plan pr1, pr2;

    extern mcomplex *****MC, *****MIC;    /* variables used to store state and
                           incremental state solutions
                           between two check points. */

    extern mcomplex ****MU, ****MIU;    /* variables used to store
                           manufacture solutions */
    extern mcomplex ****LU, ****LIU;

    /* Local Variables */
    int Ny, sizeRealTransform;
    double Lx, Lz;
    fftw_complex *fout = NULL;

    /************************ end of variable definitions ****************/

    Nx = _Nx;
    Ny = _Ny;
    Nz = _Nz;
    Lx = _Lx;
    Lz = _Lz;

    dt = _dt;

    nsteps = _nsteps;
    flux = _flux;
    re = _Re;

    printf("Nx,Ny,Nz,Lx  |  Lz,dt,nsteps  |  flux,Re\n"
           "%d %d %d %f  |  %f %f %d |  %f %f\n",
           Nx, Ny, Nz, Lx, Lz, dt, nsteps, flux, re);

    re = 1. / re;         /* time step routines assume I pass 1/Re */
    qpts = 3 * Ny / 2;    /* number of quadrature points 
                   (see page 9 of Moser's notes) */
    dimR = Ny - 2;        /* dimR and dimQ denote the number of terms */
    dimQ = Ny - 4;        /* in the truncated expansions for the */

    /* functions v_hat, g_hat, U, W */
    /* (see page 5 of Moser's notes). */
    sizeRealTransform = 3 * Nx / 2;    /* for the FFTs */

    /**************** check input parameters, Nx/4, Nz/2 ***************/
    if (Nx % 4 != 0) {
        printf("Required arguments Ny/4==0\n");
        return (EXIT_FAILURE);
    }
    if (Nz % 2 != 0) {
        printf("Required arguments Nz/2==0\n");
        return (EXIT_FAILURE);
    }
    if (Ny - 4 < 0) {
        printf("Required arguments Nzy>4\n");
        return (EXIT_FAILURE);
    }
    /* Create matrices using Legendre polynomials */
    if (LegendreSetup() != NO_ERR) {
        return (EXIT_FAILURE);
    }

    /*********************end of parameter checking ****************/

    /****************Initialize and allocate all variables ***************/
    /* Compute wave numbers */
    if (waveNums(Nx / 2, Nz, Lx, Lz) != NO_ERR) {
        destroy(DESTROY_STATUS_LEGENDRE);
        return (EXIT_FAILURE);
    }

    /* get memory for 4D arrays and other matrices */
    if (getMem() != NO_ERR) {
        destroy(DESTROY_STATUS_LEGENDRE | DESTROY_STATUS_WAVENUMS);
        return (EXIT_FAILURE);
    }

    /* Create plans for FFTs */
    pf1 = fftw_create_plan_specific(3 * Nz / 2, FFTW_BACKWARD,
                    FFTW_MEASURE | FFTW_IN_PLACE, CT[0][0],
                    3 * Nx / 4 + 1, fout, -1);
    pf2 = fftw_create_plan_specific(3 * Nz / 2, FFTW_FORWARD,
                      FFTW_MEASURE | FFTW_IN_PLACE, CT[0][0],
                      3 * Nx / 4 + 1, fout, -1);
    pr1 = rfftwnd_create_plan(1, &sizeRealTransform, FFTW_COMPLEX_TO_REAL,
                FFTW_MEASURE | FFTW_IN_PLACE);
    pr2 = rfftwnd_create_plan(1, &sizeRealTransform, FFTW_REAL_TO_COMPLEX,
                FFTW_MEASURE | FFTW_IN_PLACE);

    /* Create plans for FFTs */
    Ipf1 = fftw_create_plan_specific(3 * Nz / 2, FFTW_BACKWARD,
                     FFTW_MEASURE | FFTW_IN_PLACE,
                     ICT[0][0], 3 * Nx / 4 + 1, fout, -1);
    Ipf2 = fftw_create_plan_specific(3 * Nz / 2, FFTW_FORWARD,
                      FFTW_MEASURE | FFTW_IN_PLACE, ICT[0][0],
                      3 * Nx / 4 + 1, fout, -1);

    /* set variables for checking CFL condition */
    if (cflVars(Lx, Lz) != 0) {
        printf("Error creating CF variables\n");
        destroy(DESTROY_STATUS_LEGENDRE | DESTROY_STATUS_WAVENUMS
            | DESTROY_STATUS_FFTW);
        return (EXIT_FAILURE);
    }

    /* initalize part */
    memset(C[0][0][0], 0,
           (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex));
    memset(IC[0][0][0], 0,
           (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex));
    memset(AC[0][0][0], 0,
           (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex));
    memset(IAC[0][0][0], 0,
           (Nz) * 2 * dimR * (Nx / 2) * sizeof(mcomplex));

    memset(MC[0][0][0][0], 0,
           (nsteps * 3 + 1) * (Nz) * 2 * dimR * (Nx / 2)
           * sizeof(mcomplex));
    memset(MIC[0][0][0][0], 0,
           (nsteps * 3 + 1) * (Nz) * 2 * dimR * (Nx / 2)
           * sizeof(mcomplex));

    memset(U[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex));
    memset(AU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex));
    memset(IU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex));
    memset(IAU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex));

    memset(LU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex));
    memset(LIU[0][0][0], 0, (Nz) * 5 * qpts * (Nx / 2) * sizeof(mcomplex));

    memset(Uxb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex));
    memset(Uzb[0], 0, Nz * (Nx / 2) * sizeof(mcomplex));

    /******************************end of initialization part ***************/
    return (EXIT_SUCCESS);
}
RockPhysicsInversion4D::RockPhysicsInversion4D(NRLib::Vector                      priorMean,
                                               NRLib::Matrix                      priorCov,
                                               NRLib::Matrix                      posteriorCov,
                                               std::vector<std::vector<double> >  mSamp)
{
  nf_.resize(4);
  nf_[0] = 60;
  nf_[1] = 60;
  nf_[2] = 60;
  nf_[3] = 60;
  nfp_= 135;

  fftplan1_ = rfftwnd_create_plan(1, &nfp_, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
  fftplan2_ = rfftwnd_create_plan(1, &nfp_, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);

  v_.resize(4,6);
  SolveGEVProblem(priorCov,posteriorCov, v_);
  NRLib::Matrix tmp;
  NRLib::Matrix priorCovF;
  NRLib::Matrix posteriorCovF;

  meanf_        = priorMean*v_;
  tmp           = priorCov*v_;
  priorCovF     = transpose(v_)*tmp;
  tmp           = posteriorCov*v_;
  posteriorCovF = transpose(v_)*tmp;

  int nSamp = mSamp[0].size();

  NRLib::Vector m(6);
  for(int k=0;k<6;k++)
      m(k)=mSamp[k][0];

  maxf_=m*v_;
  minf_=m*v_;

  for(int i=0;i<nSamp;i++)
  {
    NRLib::Vector f;
    for(int k=0;k<6;k++)
      m(k)=mSamp[k][i];
    f=m*v_;
    for(int k=0;k<4;k++)
    {
      if(minf_(k) > f(k))
        minf_(k)=f(k);
      if(maxf_(k) < f(k))
        maxf_(k)=f(k);
    }
  }

  for(int k=0;k<4;k++)
  {
    minf_(k)-=3*sqrt(posteriorCovF(k,k));
    maxf_(k)+=3*sqrt(posteriorCovF(k,k));
  }

  smoothingFilter_.resize(4);
  priorDistribution_.resize(4);

  for(int i=0;i<4;i++)
  {
    double df=(maxf_(i)-minf_(i))/double(nf_[i]);
    priorDistribution_[i]=MakeGaussKernel(meanf_(i),priorCovF(i,i),minf_(i),df,nfp_);
    smoothingFilter_[i]=MakeSmoothingFilter(posteriorCovF(i,i),df);
  }

  allocatePredictionTables( );

  std::vector<double> dummy(nSamp);
  for(int i=0;i<nSamp;i++)
    dummy[i]=1.0;

  fillInTable( mSamp,dummy,0);
  DivideAndSmoothTable(0,priorDistribution_,smoothingFilter_);
}