void FourierTransformer::setReal(MultidimArray<double> &input) { bool recomputePlan=false; if (fReal==NULL) recomputePlan=true; else if (dataPtr!=MULTIDIM_ARRAY(input)) recomputePlan=true; else recomputePlan=!(fReal->sameShape(input)); fFourier.resizeNoCopy(ZSIZE(input),YSIZE(input),XSIZE(input)/2+1); fReal=&input; if (recomputePlan) { int ndim=3; if (ZSIZE(input)==1) { ndim=2; if (YSIZE(input)==1) ndim=1; } int N[3]; switch (ndim) { case 1: N[0]=XSIZE(input); break; case 2: N[0]=YSIZE(input); N[1]=XSIZE(input); break; case 3: N[0]=ZSIZE(input); N[1]=YSIZE(input); N[2]=XSIZE(input); break; } pthread_mutex_lock(&fftw_plan_mutex); if (fPlanForward!=NULL) fftw_destroy_plan(fPlanForward); fPlanForward=NULL; fPlanForward = fftw_plan_dft_r2c(ndim, N, MULTIDIM_ARRAY(*fReal), (fftw_complex*) MULTIDIM_ARRAY(fFourier), FFTW_ESTIMATE); if (fPlanBackward!=NULL) fftw_destroy_plan(fPlanBackward); fPlanBackward=NULL; fPlanBackward = fftw_plan_dft_c2r(ndim, N, (fftw_complex*) MULTIDIM_ARRAY(fFourier), MULTIDIM_ARRAY(*fReal), FFTW_ESTIMATE); if (fPlanForward == NULL || fPlanBackward == NULL) REPORT_ERROR(ERR_PLANS_NOCREATE, "FFTW plans cannot be created"); dataPtr=MULTIDIM_ARRAY(*fReal); pthread_mutex_unlock(&fftw_plan_mutex); } }
static void initialize_circulant(hbhankel_matrix *h, const double *F, R_len_t rank, const R_len_t *N, const R_len_t *L, const int *circular) { fftw_complex *ocirc; fftw_plan p1, p2; double *circ; R_len_t *revN, r; /* Allocate needed memory */ circ = (double*) fftw_malloc(prod(rank, N) * sizeof(double)); ocirc = (fftw_complex*) fftw_malloc(hprod(rank, N) * sizeof(fftw_complex)); /* Estimate the best plans for given input length, note, that input data is stored in column-major mode, that's why we're passing dimensions in *reverse* order */ revN = Calloc(rank, R_len_t); for (r = 0; r < rank; ++r) revN[r] = N[rank - 1 - r]; p1 = fftw_plan_dft_r2c(rank, revN, circ, ocirc, FFTW_ESTIMATE); p2 = fftw_plan_dft_c2r(rank, revN, ocirc, circ, FFTW_ESTIMATE); Free(revN); /* Fill input buffer */ memcpy(circ, F, prod(rank, N) * sizeof(double)); /* Run the plan on input data */ fftw_execute(p1); /* Cleanup and return */ fftw_free(circ); h->circ_freq = ocirc; h->r2c_plan = p1; h->c2r_plan = p2; h->rank = rank; h->window = Calloc(rank, R_len_t); memcpy(h->window, L, rank * sizeof(R_len_t)); h->length = Calloc(rank, R_len_t); memcpy(h->length, N, rank * sizeof(R_len_t)); h->factor = Calloc(rank, R_len_t); for (r = 0; r < rank; ++r) h->factor[r] = circular[r] ? N[r] : N[r] - L[r] + 1; }
static PlanType create(const std::array<std::size_t,NDims>& _shape, RealType* _in, ComplexType* _out, fftw_direction _dir = fftw_direction::forward, unsigned plan_flags = FFTW_MEASURE){ std::array<int,NDims> converted; for(int i = 0;i < NDims;++i) converted[i] = _shape[i]; PlanType value = fftw_plan_dft_r2c(NDims, converted.data(), _in, _out, plan_flags ); return value; }
PetscInt main(PetscInt argc,char **args) { typedef enum {RANDOM, CONSTANT, TANH, NUM_FUNCS} FuncType; const char *funcNames[NUM_FUNCS] = {"random", "constant", "tanh"}; PetscMPIInt size; PetscInt n = 10,N,Ny,ndim=4,dim[4],DIM,i; Vec x,y,z; PetscScalar s; PetscRandom rdm; PetscReal enorm; PetscInt func=RANDOM; FuncType function = RANDOM; PetscBool view = PETSC_FALSE; PetscErrorCode ierr; PetscScalar *x_array,*y_array,*z_array; fftw_plan fplan,bplan; const ptrdiff_t N0 = 20, N1 = 20; ptrdiff_t alloc_local, local_n0, local_0_start; ierr = PetscInitialize(&argc,&args,(char *)0,help);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); alloc_local=fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This is a uniprocessor example only!"); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, PETSC_NULL, "FFTW Options", "ex142");CHKERRQ(ierr); ierr = PetscOptionsEList("-function", "Function type", "ex142", funcNames, NUM_FUNCS, funcNames[function], &func, PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view draw", "View the functions", "ex112", view, &view, PETSC_NULL);CHKERRQ(ierr); function = (FuncType) func; ierr = PetscOptionsEnd();CHKERRQ(ierr); for (DIM = 0; DIM < ndim; DIM++){ dim[DIM] = n; /* size of real space vector in DIM-dimension */ } ierr = PetscRandomCreate(PETSC_COMM_SELF, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); for (DIM = 1; DIM < 5; DIM++){ /* create vectors of length N=dim[0]*dim[1]* ...*dim[DIM-1] */ /*----------------------------------------------------------*/ N = Ny = 1; for (i = 0; i < DIM-1; i++) { N *= dim[i]; } Ny = N; Ny *= 2*(dim[DIM-1]/2 + 1); /* add padding elements to output vector y */ N *= dim[DIM-1]; ierr = PetscPrintf(PETSC_COMM_SELF, "\n %d-D: FFTW on vector of size %d \n",DIM,N);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,Ny,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecDuplicate(x,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set fftw plan */ /*----------------------------------*/ ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); ierr = VecGetArray(z,&z_array);CHKERRQ(ierr); unsigned int flags = FFTW_ESTIMATE; //or FFTW_MEASURE /* The data in the in/out arrays is overwritten during FFTW_MEASURE planning, so such planning should be done before the input is initialized by the user. */ printf("DIM: %d, N %d, Ny %d\n",DIM,N,Ny); switch (DIM){ case 1: fplan = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex*)y_array, flags); bplan = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex*)y_array, (double *)z_array, flags); break; case 2: fplan = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)y_array,(double *)z_array,flags); break; case 3: fplan = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)y_array,(double *)z_array,flags); break; default: fplan = fftw_plan_dft_r2c(DIM,dim,(double *)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r(DIM,dim,(fftw_complex*)y_array,(double *)z_array,flags); break; } ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(z,&z_array);CHKERRQ(ierr); /* Initialize Real space vector x: The data in the in/out arrays is overwritten during FFTW_MEASURE planning, so planning should be done before the input is initialized by the user. --------------------------------------------------------*/ if (function == RANDOM) { ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); } else if (function == CONSTANT) { ierr = VecSet(x, 1.0);CHKERRQ(ierr); } else if (function == TANH) { ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); for (i = 0; i < N; ++i) { x_array[i] = tanh((i - N/2.0)*(10.0/N)); } ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); } if (view) { ierr = VecView(x, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* FFT - also test repeated transformation */ /*-------------------------------------------*/ ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); ierr = VecGetArray(z,&z_array);CHKERRQ(ierr); for (i=0; i<3; i++){ /* FFTW_FORWARD */ fftw_execute(fplan); //printf("\n fout:\n"); //fftw_complex* fout = (fftw_complex*)y_array; //for (i=0; i<N/2+1; i++) printf("%d (%g %g)\n",i,fout[i][0],fout[i][1]); /* FFTW_BACKWARD: destroys its input array 'y_array' even for out-of-place transforms! */ fftw_execute(bplan); } ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(z,&z_array);CHKERRQ(ierr); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ /*------------------------------------------------------------------*/ s = 1.0/(PetscReal)N; ierr = VecScale(z,s);CHKERRQ(ierr); if (view) {ierr = VecView(x, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);} if (view) {ierr = VecView(z, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11){ ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %G\n",enorm);CHKERRQ(ierr); } /* free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
void init_field(int n_d, int *n, double *L, field_info *FFT) { ptrdiff_t n_x_local; ptrdiff_t i_x_start_local; ptrdiff_t n_y_transpose_local; ptrdiff_t i_y_start_transpose_local; ptrdiff_t *n_x_rank; int flag_active; int n_active; int min_size, max_size; SID_log("Initializing ", SID_LOG_OPEN); for(ptrdiff_t i_d = 0; i_d < n_d; i_d++) { if(i_d < (n_d - 1)) SID_log("%dx", SID_LOG_CONTINUE, n[i_d]); else SID_log("%d element %d-d FFT ", SID_LOG_CONTINUE, n[i_d], n_d); } SID_log("(%d byte precision)...", SID_LOG_CONTINUE, (int)sizeof(GBPREAL)); // Initialize FFT sizes FFT->n_d = n_d; FFT->n = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->L = (double *)SID_calloc(sizeof(double) * FFT->n_d); FFT->n_k_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->n_R_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_R_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_k_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_R_stop_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); FFT->i_k_stop_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->n[i_d] = n[i_d]; FFT->L[i_d] = L[i_d]; FFT->i_R_start_local[i_d] = 0; FFT->i_k_start_local[i_d] = 0; FFT->n_R_local[i_d] = FFT->n[i_d]; FFT->n_k_local[i_d] = FFT->n[i_d]; } FFT->n_k_local[FFT->n_d - 1] = FFT->n[FFT->n_d - 1] / 2 + 1; // Initialize FFTW // Create an integer version of FFT->n[] to pass to ..._create_plan int *n_int=(int *)SID_malloc(sizeof(int)*FFT->n_d); for(int i_d=0;i_d<FFT->n_d;i_d++) n_int[i_d]=(int)FFT->n[i_d]; #if FFTW_V2 #if USE_MPI int total_local_size_int; int n_x_local_int; int i_x_start_local_int; int n_y_transpose_local_int; int i_y_start_transpose_local_int; FFT->plan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE); FFT->iplan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE); rfftwnd_mpi_local_sizes(FFT->plan, &(n_x_local_int), &(i_x_start_local_int), &(n_y_transpose_local_int), &(i_y_start_transpose_local_int), &total_local_size_int); n_x_local = (ptrdiff_t)n_x_local_int; i_x_start_local = (ptrdiff_t)i_x_start_local_int; n_y_transpose_local = (ptrdiff_t)n_y_transpose_local_int; i_y_start_transpose_local = (ptrdiff_t)i_y_start_transpose_local_int; FFT->total_local_size = (size_t)total_local_size_int; #else FFT->total_local_size = 1; for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { if(i_d < FFT->n_d - 1) FFT->total_local_size *= FFT->n[i_d]; else FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1); } #if USE_DOUBLE FFT->plan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); FFT->iplan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); #else FFT->plan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); FFT->iplan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); #endif #endif #else #if USE_MPI #if USE_DOUBLE fftw_mpi_init(); FFT->total_local_size = fftw_mpi_local_size_many_transposed(FFT->n_d, FFT->n, 1, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, SID_COMM_WORLD->comm, &(n_x_local), &(i_x_start_local), &(n_y_transpose_local), &(i_y_start_transpose_local)); FFT->plan = fftw_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); FFT->iplan = fftw_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); #else fftwf_mpi_init(); FFT->total_local_size = fftwf_mpi_local_size_many_transposed(FFT->n_d, FFT->n, 1, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, SID_COMM_WORLD->comm, &(n_x_local), &(i_x_start_local), &(n_y_transpose_local), &(i_y_start_transpose_local)); FFT->plan = fftwf_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); FFT->iplan = fftwf_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE); #endif #else FFT->total_local_size = 1; for(ptrdiff_t i_d=0; i_d < FFT->n_d; i_d++) { if(i_d < FFT->n_d - 1) FFT->total_local_size *= FFT->n[i_d]; else FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1); } #if USE_DOUBLE FFT->plan = fftw_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE); FFT->iplan = fftw_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE); #else FFT->plan = fftwf_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE); FFT->iplan = fftwf_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE); #endif #endif #endif SID_free(SID_FARG n_int); // Set empty slabs to start at 0 to make ignoring them simple. if(n_x_local == 0) i_x_start_local = 0; if(n_y_transpose_local == 0) i_y_start_transpose_local = 0; // Modify the local slab dimensions according to what FFTW chose. FFT->i_R_start_local[0] = i_x_start_local; FFT->n_R_local[0] = n_x_local; if(FFT->n_d > 1) { FFT->i_k_start_local[1] = i_y_start_transpose_local; FFT->n_k_local[1] = n_y_transpose_local; } // Allocate field #if USE_FFTW3 FFT->field_local = (gbpFFT_real *)fftwf_alloc_real(FFT->total_local_size); #else FFT->field_local = (gbpFFT_real *)SID_malloc(sizeof(gbpFFT_real)*FFT->total_local_size); #endif FFT->cfield_local = (gbpFFT_complex *)FFT->field_local; // Upper limits of slab decomposition for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->i_R_stop_local[i_d] = FFT->i_R_start_local[i_d] + FFT->n_R_local[i_d] - 1; FFT->i_k_stop_local[i_d] = FFT->i_k_start_local[i_d] + FFT->n_k_local[i_d] - 1; } // FFTW padding sizes if(FFT->n_d > 1) { FFT->pad_size_R = 2 * (FFT->n_R_local[FFT->n_d - 1] / 2 + 1) - FFT->n_R_local[FFT->n_d - 1]; FFT->pad_size_k = 0; } else { FFT->pad_size_R = 0; FFT->pad_size_k = 0; } // Number of elements (global and local) in the FFT ptrdiff_t i_d = 0; for(FFT->n_field = 1, FFT->n_field_R_local = 1, FFT->n_field_k_local = 1; i_d < FFT->n_d; i_d++) { FFT->n_field *= (size_t)FFT->n[i_d]; FFT->n_field_R_local *= (size_t)FFT->n_R_local[i_d]; FFT->n_field_k_local *= (size_t)FFT->n_k_local[i_d]; } // Clear the field clear_field(FFT); // Initialize the FFT's real-space grid FFT->R_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d); FFT->dR = (double *)SID_malloc(sizeof(double *) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->R_field[i_d] = (double *)SID_malloc(sizeof(double) * (FFT->n[i_d] + 1)); FFT->dR[i_d] = FFT->L[i_d] / (double)(FFT->n[i_d]); for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) FFT->R_field[i_d][i_i] = FFT->L[i_d] * ((double)i_i / (double)(FFT->n[i_d])); FFT->R_field[i_d][FFT->n[i_d]] = FFT->L[i_d]; } // Initialize the FFT's k-space grid FFT->k_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d); FFT->dk = (double *)SID_malloc(sizeof(double *) * FFT->n_d); FFT->k_Nyquist = (double *)SID_malloc(sizeof(double *) * FFT->n_d); for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) { FFT->k_field[i_d] = (double *)SID_malloc(sizeof(double) * FFT->n[i_d]); FFT->dk[i_d] = TWO_PI / FFT->L[i_d]; FFT->k_Nyquist[i_d] = TWO_PI * (double)(FFT->n[i_d]) / FFT->L[i_d] / 2.; for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) { if(i_i >= FFT->n[i_d] / 2) FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i - FFT->n[i_d]) / FFT->L[i_d]; else FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i) / FFT->L[i_d]; } } // Flags FFT->flag_padded = GBP_FALSE; // Slab info FFT->slab.n_x_local = FFT->n_R_local[0]; FFT->slab.i_x_start_local = FFT->i_R_start_local[0]; FFT->slab.i_x_stop_local = FFT->i_R_stop_local[0]; FFT->slab.x_min_local = FFT->R_field[0][FFT->i_R_start_local[0]]; if(FFT->slab.n_x_local > 0) FFT->slab.x_max_local = FFT->R_field[0][FFT->i_R_stop_local[0] + 1]; else FFT->slab.x_max_local = FFT->slab.x_min_local; SID_Allreduce(&(FFT->slab.x_max_local), &(FFT->slab.x_max), 1, SID_DOUBLE, SID_MAX, SID_COMM_WORLD); #if USE_MPI // All ranks are not necessarily assigned any slices, so // we need to figure out what ranks are to the right and the left for // buffer exchanges n_x_rank = (ptrdiff_t *)SID_malloc(sizeof(ptrdiff_t) * SID.n_proc); n_x_rank[SID.My_rank] = (ptrdiff_t)FFT->slab.n_x_local; if(n_x_rank[SID.My_rank] > 0) flag_active = GBP_TRUE; else flag_active = GBP_FALSE; SID_Allreduce(&flag_active, &n_active, 1, SID_INT, SID_SUM, SID_COMM_WORLD); SID_Allreduce(&n_x_rank[SID.My_rank], &min_size, 1, SID_INT, SID_MIN, SID_COMM_WORLD); SID_Allreduce(&n_x_rank[SID.My_rank], &max_size, 1, SID_INT, SID_MAX, SID_COMM_WORLD); for(int i_rank = 0; i_rank < SID.n_proc; i_rank++) SID_Bcast(&(n_x_rank[i_rank]), 1, SID_INT, i_rank, SID_COMM_WORLD); FFT->slab.rank_to_right = -1; for(int i_rank = SID.My_rank + 1; i_rank < SID.My_rank + SID.n_proc && FFT->slab.rank_to_right < 0; i_rank++) { int j_rank = i_rank % SID.n_proc; if(n_x_rank[j_rank] > 0) FFT->slab.rank_to_right = j_rank; } if(FFT->slab.rank_to_right < 0) FFT->slab.rank_to_right = SID.My_rank; FFT->slab.rank_to_left = -1; for(int i_rank = SID.My_rank - 1; i_rank > SID.My_rank - SID.n_proc && FFT->slab.rank_to_left < 0; i_rank--) { int j_rank = i_rank; if(i_rank < 0) j_rank = i_rank + SID.n_proc; if(n_x_rank[j_rank] > 0) FFT->slab.rank_to_left = j_rank; } if(FFT->slab.rank_to_left < 0) FFT->slab.rank_to_left = SID.My_rank; free(n_x_rank); SID_log("(%d cores unused, min/max slab size=%d/%d)...", SID_LOG_CONTINUE, SID.n_proc - n_active, min_size, max_size); #else FFT->slab.rank_to_right = SID.My_rank; FFT->slab.rank_to_left = SID.My_rank; if(FFT->slab.n_x_local > 0) { flag_active = GBP_TRUE; n_active = 1; min_size = FFT->slab.n_x_local; max_size = FFT->slab.n_x_local; } else { flag_active = GBP_FALSE; n_active = 0; min_size = 0; max_size = 0; } #endif SID_log("Done.", SID_LOG_CLOSE); }
void FourierTransformer::setReal(MultidimArray<double>& input) { bool recomputePlan = false; if (fReal == NULL) { recomputePlan = true; } else if (dataPtr != MULTIDIM_ARRAY(input)) { recomputePlan = true; } else { recomputePlan = !(fReal->sameShape(input)); } fFourier.resize(ZSIZE(input), YSIZE(input), XSIZE(input) / 2 + 1); fReal = &input; if (recomputePlan) { int ndim = 3; if (ZSIZE(input) == 1) { ndim = 2; if (YSIZE(input) == 1) { ndim = 1; } } int* N = new int[ndim]; switch (ndim) { case 1: N[0] = XSIZE(input); break; case 2: N[0] = YSIZE(input); N[1] = XSIZE(input); break; case 3: N[0] = ZSIZE(input); N[1] = YSIZE(input); N[2] = XSIZE(input); break; } // Destroy both forward and backward plans if they already exist destroyPlans(); // Make new plans pthread_mutex_lock(&fftw_plan_mutex); fPlanForward = fftw_plan_dft_r2c(ndim, N, MULTIDIM_ARRAY(*fReal), (fftw_complex*) MULTIDIM_ARRAY(fFourier), FFTW_ESTIMATE); fPlanBackward = fftw_plan_dft_c2r(ndim, N, (fftw_complex*) MULTIDIM_ARRAY(fFourier), MULTIDIM_ARRAY(*fReal), FFTW_ESTIMATE); pthread_mutex_unlock(&fftw_plan_mutex); if (fPlanForward == NULL || fPlanBackward == NULL) { REPORT_ERROR("FFTW plans cannot be created"); } #ifdef DEBUG_PLANS std::cerr << " SETREAL fPlanForward= " << fPlanForward << " fPlanBackward= " << fPlanBackward << " this= " << this << std::endl; #endif delete [] N; dataPtr = MULTIDIM_ARRAY(*fReal); } }
/******************************************************* * * globale Funktionen * *******************************************************/ fepc_real_t* fft_faltung(fepc_real_t* a, vec_p n_a, fepc_real_t* b, vec_p n_b) { int size_a, size_b, size_c, dim; int k, i, wert, test; int *n; vec_p temp, n_c; fepc_real_t *c; fftw_complex *in, *out_a, *out_b; double *out, *in_a, *in_b; fftw_plan p; /*Auf Testen von Konsistenz wird verzichtet, da Input bereits auf Konsistenz getestet*/ /*Faltung ueber Fouriertrafo (Theorie ist in Dokumentation zu finden)*/ dim = n_a->dim; n_c = vec_new(dim); for(k=0;k<dim;k++) { n_c->array[k] = n_a->array[k]+n_b->array[k]-1; } n = n_c->array; size_a = vec_size( n_a ); size_b = vec_size( n_b ); size_c = vec_size( n_c ); /*Initialisieren des Ergebnis Array*/ c = (fepc_real_t*) malloc(sizeof(fepc_real_t) * size_c); /*Berechnen der Fouriertrafo von in_a*/ in_a = (double*) fftw_malloc(sizeof(double) * size_c); out_a = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * size_c); for (k=0;k<size_c;k++) { temp = entry_one2d(k,n_c); test = 0; for(i=0;i<dim;i++) { if ((temp->array[i] <0)||(temp->array[i]>=n_a->array[i])) { test = test + 1; } } if (test == 0) { wert = entry_d2one(temp,n_a); in_a[k] = a[wert]; } else { in_a[k] = 0; } vec_del(temp); } p = fftw_plan_dft_r2c(dim,n,in_a,out_a,FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); /*Berechnen der Fouriertrafo von in_b*/ in_b = (double*) fftw_malloc(sizeof(double) * size_c); out_b = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * size_c); for (k=0;k<size_c;k++) { temp = entry_one2d(k,n_c); test = 0; for(i=0;i<dim;i++) { if ((temp->array[i] <0)||(temp->array[i]>=n_b->array[i])) { test = test + 1; } } if (test == 0) { wert = entry_d2one(temp,n_b); in_b[k] = b[wert]; } else { in_b[k] = 0; } vec_del(temp); } p = fftw_plan_dft_r2c(dim,n,in_b,out_b,FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * size_c); out = (double*) fftw_malloc(sizeof(double) * size_c); for (k=0;k<size_c;k++) { in[k][0] = out_a[k][0]*out_b[k][0] - out_a[k][1]*out_b[k][1]; in[k][1] = out_a[k][1]*out_b[k][0] + out_a[k][0]*out_b[k][1]; } /*Berechnung der Inversen Fouriertrafo von in*/ p = fftw_plan_dft_c2r(dim,n,in,out,FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); for (k=0;k<size_c;k++) { c[k] = (fepc_real_t) out[k]/size_c; } vec_del(n_c); fftw_free(in); free(in_a); free(in_b); free(out); fftw_free(out_a); fftw_free(out_b); return c; }
SEXP convolveN(SEXP x, SEXP y, SEXP input_dim, SEXP output_dim, SEXP Conj) { SEXP x_dim = NILSXP, y_dim = NILSXP; R_len_t rank = length(input_dim); R_len_t *N = INTEGER(input_dim); R_len_t pN = prod(rank, N), phN = hprod(rank, N); int conjugate = LOGICAL(Conj)[0]; fftw_complex *ox, *oy; fftw_plan r2c_plan, c2r_plan; double *circ; R_len_t *revN, r, i; /* Allocate needed memory */ circ = (double*) fftw_malloc(pN * sizeof(double)); ox = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex)); oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex)); /* Estimate the best plans for given input length, note, that input data is stored in column-major mode, that's why we're passing dimensions in *reverse* order */ revN = Calloc(rank, R_len_t); for (r = 0; r < rank; ++r) revN[r] = N[rank - 1 - r]; r2c_plan = fftw_plan_dft_r2c(rank, revN, circ, ox, FFTW_ESTIMATE); c2r_plan = fftw_plan_dft_c2r(rank, revN, ox, circ, FFTW_ESTIMATE); Free(revN); PROTECT(x_dim = getAttrib(x, R_DimSymbol)); PROTECT(y_dim = getAttrib(y, R_DimSymbol)); /* Fill input buffer by X values*/ memset(circ, 0, pN * sizeof(double)); fill_subarray(circ, REAL(x), rank, N, INTEGER(x_dim), 1); /* Run the plan on X-input data */ fftw_execute_dft_r2c(r2c_plan, circ, ox); /* Fill input buffer by Y values*/ memset(circ, 0, pN * sizeof(double)); fill_subarray(circ, REAL(y), rank, N, INTEGER(y_dim), 1); /* Run the plan on Y-input data */ fftw_execute_dft_r2c(r2c_plan, circ, oy); /* Compute conjugation if needed */ if (conjugate) for (i = 0; i < phN; ++i) oy[i] = conj(oy[i]); /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/ for (i = 0; i < phN; ++i) oy[i] *= ox[i] / pN; /* Compute the reverse transform to obtain result */ fftw_execute_dft_c2r(c2r_plan, oy, circ); SEXP res; PROTECT(res = allocVector(REALSXP, prod(rank, INTEGER(output_dim)))); fill_subarray(circ, REAL(res), rank, N, INTEGER(output_dim), 0); /* setAttrib(output_dim, R_NamesSymbol, R_NilValue); */ setAttrib(res, R_DimSymbol, output_dim); /* setAttrib(res, R_DimNamesSymbol, R_NilValue); */ /* Cleanup */ fftw_free(ox); fftw_free(oy); fftw_free(circ); /* Return */ UNPROTECT(3); return res; }