/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  makeNewGrid
 *    Arguments:  int gridDim - Dimensions dimXdim of new grid.
 *      Returns:  Pointer to new grid object.
 *  Description:  Makes new grid object and sets all data points to 0.
 * =====================================================================================
 */
Grid * makeNewGrid(int gridDimX, int gridDimY) {
	fftw_mpi_init() ;
	Grid * newGrid = malloc(sizeof(Grid)) ;
	ptrdiff_t localDimY, globalOffset, localDimX ;
	newGrid->allocScheme = fftw_mpi_local_size_2d(gridDimY, gridDimX, MPI_COMM_WORLD, &localDimY, &globalOffset);
	newGrid->localGridDimY = localDimY ;
	newGrid->localGridDimX = gridDimX ;
	newGrid->globalGridDimY = gridDimY ;
	newGrid->globalGridDimX = gridDimX ;
	newGrid->globalOffset = globalOffset ;
	newGrid->gridPoints = fftw_alloc_real(newGrid->allocScheme) ;
	return newGrid ;
}		/* -----  end of function makeNewGrid  ----- */
Beispiel #2
0
int main(int argc, char* argv[])
{

	MPI_Init(&argc, &argv);
	fftw_mpi_init();

	example_2d();

	example_3d();

	hypre_example_2d();

	hypre_example_3d();

	MPI_Finalize();

}
Beispiel #3
0
int main(int argc,char **args)
{
  PetscErrorCode ierr;
  PetscMPIInt    rank,size;
  PetscInt       N0=50,N1=20,N=N0*N1,DIM;
  PetscRandom    rdm;
  PetscScalar    a;
  PetscReal      enorm;
  Vec            x,y,z;
  PetscBool      view=PETSC_FALSE,use_interface=PETSC_TRUE;

  ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers");
#endif

  ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex143");CHKERRQ(ierr);
  ierr = PetscOptionsBool("-vec_view draw", "View the vectors", "ex143", view, &view, NULL);CHKERRQ(ierr);
  ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex143",use_interface, &use_interface, NULL);CHKERRQ(ierr);
  ierr = PetscOptionsEnd();CHKERRQ(ierr);

  ierr = PetscOptionsGetBool(NULL,"-use_FFTW_interface",&use_interface,NULL);CHKERRQ(ierr);
  ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);

  ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr);
  ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr);

  if (!use_interface) {
    /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
    /*---------------------------------------------------------*/
    fftw_plan    fplan,bplan;
    fftw_complex *data_in,*data_out,*data_out2;
    ptrdiff_t    alloc_local,local_n0,local_0_start;
    
    DIM = 2;
    if (!rank) {
      ierr = PetscPrintf(PETSC_COMM_SELF,"Use FFTW without PETSc-FFTW interface, DIM %D\n",DIM);CHKERRQ(ierr);
    }
    fftw_mpi_init();
    N           = N0*N1;
    alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start);

    data_in   = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
    data_out  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
    data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);

    ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr);
    ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr);
    ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr);
    ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr);
    ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr);
    ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr);

    fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE);
    bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);

    ierr = VecSetRandom(x, rdm);CHKERRQ(ierr);
    if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}

    fftw_execute(fplan);
    if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}

    fftw_execute(bplan);

    /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
    a    = 1.0/(PetscReal)N;
    ierr = VecScale(z,a);CHKERRQ(ierr);
    if (view) {ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
    ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr);
    ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr);
    if (enorm > 1.e-11 && !rank) {
      ierr = PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %g\n",(double)enorm);CHKERRQ(ierr);
    }

    /* Free spaces */
    fftw_destroy_plan(fplan);
    fftw_destroy_plan(bplan);
    fftw_free(data_in);  ierr = VecDestroy(&x);CHKERRQ(ierr);
    fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr);
    fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr);

  } else {
    /* Use PETSc-FFTW interface                  */
    /*-------------------------------------------*/
    PetscInt i,*dim,k;
    Mat      A;

    N=1;
    for (i=1; i<5; i++) {
      DIM  = i;
      ierr = PetscMalloc1(i,&dim);CHKERRQ(ierr);
      for (k=0; k<i; k++) {
        dim[k]=30;
      }
      N *= dim[i-1];


      /* Create FFTW object */
      if (!rank) printf("Use PETSc-FFTW interface...%d-DIM: %d\n",(int)DIM,(int)N);

      ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr);

      /* Create vectors that are compatible with parallel layout of A - must call MatCreateVecs()! */

      ierr = MatCreateVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr);

      /* Set values of space vector x */
      ierr = VecSetRandom(x,rdm);CHKERRQ(ierr);

      if (view) {ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}

      /* Apply FFTW_FORWARD and FFTW_BACKWARD */
      ierr = MatMult(A,x,y);CHKERRQ(ierr);
      if (view) {ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}

      ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr);

      /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
      a    = 1.0/(PetscReal)N;
      ierr = VecScale(z,a);CHKERRQ(ierr);
      if (view) {ierr = VecView(z,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
      ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr);
      ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr);
      if (enorm > 1.e-9 && !rank) {
        ierr = PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr);
      }

      ierr = VecDestroy(&x);CHKERRQ(ierr);
      ierr = VecDestroy(&y);CHKERRQ(ierr);
      ierr = VecDestroy(&z);CHKERRQ(ierr);
      ierr = MatDestroy(&A);CHKERRQ(ierr);

      ierr = PetscFree(dim);CHKERRQ(ierr);
    }
  }

  ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}
Beispiel #4
0
Datei: fft2d.c Projekt: LeeJH/pde
void init_fft2d_(void )
{
  int i,j,k;
  double vm2,vm1,v,vp1,vp2,vb;
  
  commx = MPI_Comm_f2c(topo_.commxc);
  MPI_Comm_rank(commx, &irankx);
  MPI_Comm_size(commx, &isizex);

  commyz = MPI_Comm_f2c(topo_.commyzc);
  MPI_Comm_rank(commyz, &irankyz);
  MPI_Comm_size(commyz, &isizeyz);

  fftw_mpi_init();

  howmany = topo_.mxlc;
//***********  
  alloc_ly = fftw_mpi_local_size_2d(my, mz, commx, &ly, &lys);
/*  alloc_ly=fftw_mpi_local_size_many(rnk, myz, howmany,
           FFTW_MPI_DEFAULT_BLOCK, commx, &ly, &lys);
*/
//***********
  
  if(((ly-topo_.mylc)!=0) || topo_.npzc>1) {
  printf("Error,npz should equal to 1, or %d\t%d\n",irankx,ly-topo_.mylc);
  MPI_Abort(commx,1);
  }

  minp = fftw_alloc_complex(alloc_ly);
  mout = fftw_alloc_complex(alloc_ly);
  
  if( !(freq = r3tensor(topo_.mxlc, topo_.mylc*topo_.mzlc, 2)) ) 
  	printf("Malloc error!\n");  
  if( !(data = r3tensor(topo_.mxlc, topo_.mylc*topo_.mzlc, 2)) ) 
  	printf("Malloc error!\n");
/*  if( !(dar = r3tensor(topo_.mxlc, topo_.mylc, topo_.mzlc)) ) 
  	printf("Malloc error!\n");
  if( !(dai = r3tensor(topo_.mxlc, topo_.mylc, topo_.mzlc)) ) 
  	printf("Malloc error!\n");
*/
//***********
  mplanF = fftw_mpi_plan_dft_2d(my, mz, minp, mout, commx, FFTW_FORWARD, FFTW_MEASURE);
  mplanR = fftw_mpi_plan_dft_2d(my, mz, minp, mout, commx, FFTW_BACKWARD, FFTW_MEASURE);
/*  mplanF = fftw_mpi_plan_many_dft(rnk, myz, howmany,
        FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK,
        minp, mout, commx, FFTW_FORWARD, FFTW_MEASURE);
  mplanR = fftw_mpi_plan_many_dft(rnk, myz, howmany,
        FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK,
        minp, mout, commx, FFTW_BACKWARD, FFTW_MEASURE);
*/
//***********
//***** Solver part ******

  dxs = topo_.dx0*topo_.dx0;
  dys = topo_.dy0*topo_.dy0;
  dzs = topo_.dz0*topo_.dz0;

  vm2=-1.0/12.0;
  vm1=16.0/12.0;
  v  =-30.0/12.0;
  vp1=16.0/12.0;
  vp2=-1.0/12.0;

  MatCreateMPIAIJ(commyz, PETSC_DECIDE, PETSC_DECIDE, mx, mx, 5, PETSC_NULL, 5, PETSC_NULL, &A);

  ierr = MatGetOwnershipRange(A,&Istart,&Iend);

  
  for (Ii=Istart; Ii<Iend; Ii++) {
    i = Ii; j = Ii;
    if ((i>1)&&(i<mx-2))   {

	J = Ii - 2; MatSetValues(A,1,&Ii,1,&J,&vm2,INSERT_VALUES);
	J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&vm1,INSERT_VALUES);CHKERRQ(ierr);
	J = Ii; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);
	J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&vp1,INSERT_VALUES);CHKERRQ(ierr);
	J = Ii + 2; ierr = MatSetValues(A,1,&Ii,1,&J,&vp2,INSERT_VALUES);CHKERRQ(ierr);
    }

    if (i==0) {
	J = Ii; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
    if (i==1)   {
	J = Ii - 1; vb = 11.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);CHKERRQ(ierr);
	J = Ii ; vb = -5.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
	J = Ii + 1; vb = 0.5; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
	J = Ii + 2; vb = 1.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
	J = Ii + 3; vb = -1.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
}

    if (i==mx-2) {
        J = Ii + 1; vb = 11.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);CHKERRQ(ierr);
        J = Ii ; vb = -5.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
        J = Ii - 1; vb = 0.5; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
        J = Ii - 2; vb = 1.0/3.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
        J = Ii - 3; vb = -1.0/12.0; ierr = MatSetValues(A,1,&Ii,1,&J,&vb,INSERT_VALUES);
}
    if (i==mx-1) {J = Ii; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 

    }

  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  ierr = VecCreate(commyz,&br);CHKERRQ(ierr);
  ierr = VecSetSizes(br,PETSC_DECIDE,mx);CHKERRQ(ierr);
  ierr = VecSetFromOptions(br);CHKERRQ(ierr);
  ierr = VecDuplicate(br,&xr);CHKERRQ(ierr);
  ierr = VecDuplicate(br,&bi);CHKERRQ(ierr);
  ierr = VecDuplicate(br,&xi);CHKERRQ(ierr);


  ierr = KSPCreate(commyz,&ksp);CHKERRQ(ierr);

  ierr = KSPSetOperators(ksp,A,A,SAME_PRECONDITIONER);CHKERRQ(ierr);
  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
  ierr = KSPGetPC(ksp,&pc);
  PCSetType(pc,PCJACOBI);
  ierr = KSPSetTolerances(ksp,1.e-7,1.e-50,PETSC_DEFAULT,
                          PETSC_DEFAULT);CHKERRQ(ierr);

//******* End  *******
}
Beispiel #5
0
void init_field(int n_d, int *n, double *L, field_info *FFT) {
    ptrdiff_t  n_x_local;
    ptrdiff_t  i_x_start_local;
    ptrdiff_t  n_y_transpose_local;
    ptrdiff_t  i_y_start_transpose_local;
    ptrdiff_t *n_x_rank;

    int  flag_active;
    int  n_active;
    int  min_size, max_size;

    SID_log("Initializing ", SID_LOG_OPEN);
    for(ptrdiff_t i_d = 0; i_d < n_d; i_d++) {
        if(i_d < (n_d - 1))
            SID_log("%dx", SID_LOG_CONTINUE, n[i_d]);
        else
            SID_log("%d element %d-d FFT ", SID_LOG_CONTINUE, n[i_d], n_d);
    }
    SID_log("(%d byte precision)...", SID_LOG_CONTINUE, (int)sizeof(GBPREAL));

    // Initialize FFT sizes
    FFT->n_d             = n_d;
    FFT->n               = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->L               = (double *)SID_calloc(sizeof(double) * FFT->n_d);
    FFT->n_k_local       = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->n_R_local       = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_R_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_k_start_local = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_R_stop_local  = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    FFT->i_k_stop_local  = (ptrdiff_t *)SID_calloc(sizeof(ptrdiff_t) * FFT->n_d);
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->n[i_d]               = n[i_d];
        FFT->L[i_d]               = L[i_d];
        FFT->i_R_start_local[i_d] = 0;
        FFT->i_k_start_local[i_d] = 0;
        FFT->n_R_local[i_d]       = FFT->n[i_d];
        FFT->n_k_local[i_d]       = FFT->n[i_d];
    }
    FFT->n_k_local[FFT->n_d - 1] = FFT->n[FFT->n_d - 1] / 2 + 1;

    // Initialize FFTW

    // Create an integer version of FFT->n[] to pass to ..._create_plan
    int *n_int=(int *)SID_malloc(sizeof(int)*FFT->n_d);
    for(int i_d=0;i_d<FFT->n_d;i_d++)
        n_int[i_d]=(int)FFT->n[i_d];
#if FFTW_V2
#if USE_MPI
    int total_local_size_int;
    int n_x_local_int;
    int i_x_start_local_int;
    int n_y_transpose_local_int;
    int i_y_start_transpose_local_int;
    FFT->plan  = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE);
    FFT->iplan = rfftwnd_mpi_create_plan(SID.COMM_WORLD->comm, FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE);
    rfftwnd_mpi_local_sizes(FFT->plan,
                            &(n_x_local_int),
                            &(i_x_start_local_int),
                            &(n_y_transpose_local_int),
                            &(i_y_start_transpose_local_int),
                            &total_local_size_int);
    n_x_local =  (ptrdiff_t)n_x_local_int;
    i_x_start_local = (ptrdiff_t)i_x_start_local_int;
    n_y_transpose_local = (ptrdiff_t)n_y_transpose_local_int;
    i_y_start_transpose_local = (ptrdiff_t)i_y_start_transpose_local_int;
    FFT->total_local_size = (size_t)total_local_size_int;
#else
    FFT->total_local_size = 1;
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        if(i_d < FFT->n_d - 1)
            FFT->total_local_size *= FFT->n[i_d];
        else
            FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1);
    }
#if USE_DOUBLE
    FFT->plan  = fftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
    FFT->iplan = fftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
#else
    FFT->plan  = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
    FFT->iplan = rfftwnd_create_plan(FFT->n_d, n_int, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
#endif
#endif
#else
#if USE_MPI
#if USE_DOUBLE
    fftw_mpi_init();
    FFT->total_local_size = fftw_mpi_local_size_many_transposed(FFT->n_d,
                                                                FFT->n,
                                                                1,
                                                                FFTW_MPI_DEFAULT_BLOCK,
                                                                FFTW_MPI_DEFAULT_BLOCK,
                                                                SID_COMM_WORLD->comm,
                                                                &(n_x_local),
                                                                &(i_x_start_local),
                                                                &(n_y_transpose_local),
                                                                &(i_y_start_transpose_local));
    FFT->plan  = fftw_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
    FFT->iplan = fftw_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
#else
    fftwf_mpi_init();
    FFT->total_local_size   = fftwf_mpi_local_size_many_transposed(FFT->n_d,
                                                                 FFT->n,
                                                                 1,
                                                                 FFTW_MPI_DEFAULT_BLOCK,
                                                                 FFTW_MPI_DEFAULT_BLOCK,
                                                                 SID_COMM_WORLD->comm,
                                                                 &(n_x_local),
                                                                 &(i_x_start_local),
                                                                 &(n_y_transpose_local),
                                                                 &(i_y_start_transpose_local));
    FFT->plan  = fftwf_mpi_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
    FFT->iplan = fftwf_mpi_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, SID_COMM_WORLD->comm, FFTW_ESTIMATE);
#endif
#else
    FFT->total_local_size = 1;
    for(ptrdiff_t i_d=0; i_d < FFT->n_d; i_d++) {
        if(i_d < FFT->n_d - 1)
            FFT->total_local_size *= FFT->n[i_d];
        else
            FFT->total_local_size *= 2 * (FFT->n[i_d] / 2 + 1);
    }
#if USE_DOUBLE
    FFT->plan  = fftw_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE);
    FFT->iplan = fftw_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE);
#else
    FFT->plan  = fftwf_plan_dft_r2c(FFT->n_d, FFT->n, FFT->field_local, FFT->cfield_local, FFTW_ESTIMATE);
    FFT->iplan = fftwf_plan_dft_c2r(FFT->n_d, FFT->n, FFT->cfield_local, FFT->field_local, FFTW_ESTIMATE);
#endif
#endif
#endif

    SID_free(SID_FARG n_int);


    // Set empty slabs to start at 0 to make ignoring them simple.
    if(n_x_local == 0)
        i_x_start_local = 0;
    if(n_y_transpose_local == 0)
        i_y_start_transpose_local = 0;

    // Modify the local slab dimensions according to what FFTW chose.
    FFT->i_R_start_local[0] = i_x_start_local;
    FFT->n_R_local[0]       = n_x_local;
    if(FFT->n_d > 1) {
        FFT->i_k_start_local[1] = i_y_start_transpose_local;
        FFT->n_k_local[1]       = n_y_transpose_local;
    }

    // Allocate field
#if USE_FFTW3
    FFT->field_local  = (gbpFFT_real    *)fftwf_alloc_real(FFT->total_local_size);
#else
    FFT->field_local  = (gbpFFT_real    *)SID_malloc(sizeof(gbpFFT_real)*FFT->total_local_size);
#endif
    FFT->cfield_local = (gbpFFT_complex *)FFT->field_local;

    // Upper limits of slab decomposition
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->i_R_stop_local[i_d] = FFT->i_R_start_local[i_d] + FFT->n_R_local[i_d] - 1;
        FFT->i_k_stop_local[i_d] = FFT->i_k_start_local[i_d] + FFT->n_k_local[i_d] - 1;
    }

    // FFTW padding sizes
    if(FFT->n_d > 1) {
        FFT->pad_size_R = 2 * (FFT->n_R_local[FFT->n_d - 1] / 2 + 1) - FFT->n_R_local[FFT->n_d - 1];
        FFT->pad_size_k = 0;
    } else {
        FFT->pad_size_R = 0;
        FFT->pad_size_k = 0;
    }

    // Number of elements (global and local) in the FFT
    ptrdiff_t i_d = 0;
    for(FFT->n_field = 1, FFT->n_field_R_local = 1, FFT->n_field_k_local = 1; i_d < FFT->n_d; i_d++) {
        FFT->n_field *= (size_t)FFT->n[i_d];
        FFT->n_field_R_local *= (size_t)FFT->n_R_local[i_d];
        FFT->n_field_k_local *= (size_t)FFT->n_k_local[i_d];
    }

    // Clear the field
    clear_field(FFT);

    // Initialize the FFT's real-space grid
    FFT->R_field = (double **)SID_malloc(sizeof(double *) * FFT->n_d);
    FFT->dR      = (double *)SID_malloc(sizeof(double *) * FFT->n_d);
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->R_field[i_d] = (double *)SID_malloc(sizeof(double) * (FFT->n[i_d] + 1));
        FFT->dR[i_d]      = FFT->L[i_d] / (double)(FFT->n[i_d]);
        for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++)
            FFT->R_field[i_d][i_i] = FFT->L[i_d] * ((double)i_i / (double)(FFT->n[i_d]));
        FFT->R_field[i_d][FFT->n[i_d]] = FFT->L[i_d];
    }

    // Initialize the FFT's k-space grid
    FFT->k_field   = (double **)SID_malloc(sizeof(double *) * FFT->n_d);
    FFT->dk        = (double *)SID_malloc(sizeof(double *) * FFT->n_d);
    FFT->k_Nyquist = (double *)SID_malloc(sizeof(double *) * FFT->n_d);
    for(ptrdiff_t i_d = 0; i_d < FFT->n_d; i_d++) {
        FFT->k_field[i_d]   = (double *)SID_malloc(sizeof(double) * FFT->n[i_d]);
        FFT->dk[i_d]        = TWO_PI / FFT->L[i_d];
        FFT->k_Nyquist[i_d] = TWO_PI * (double)(FFT->n[i_d]) / FFT->L[i_d] / 2.;
        for(ptrdiff_t i_i = 0; i_i < FFT->n[i_d]; i_i++) {
            if(i_i >= FFT->n[i_d] / 2)
                FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i - FFT->n[i_d]) / FFT->L[i_d];
            else
                FFT->k_field[i_d][i_i] = TWO_PI * (double)(i_i) / FFT->L[i_d];
        }
    }

    // Flags
    FFT->flag_padded = GBP_FALSE;

    // Slab info
    FFT->slab.n_x_local       = FFT->n_R_local[0];
    FFT->slab.i_x_start_local = FFT->i_R_start_local[0];
    FFT->slab.i_x_stop_local  = FFT->i_R_stop_local[0];
    FFT->slab.x_min_local     = FFT->R_field[0][FFT->i_R_start_local[0]];
    if(FFT->slab.n_x_local > 0)
        FFT->slab.x_max_local = FFT->R_field[0][FFT->i_R_stop_local[0] + 1];
    else
        FFT->slab.x_max_local = FFT->slab.x_min_local;
    SID_Allreduce(&(FFT->slab.x_max_local), &(FFT->slab.x_max), 1, SID_DOUBLE, SID_MAX, SID_COMM_WORLD);

#if USE_MPI
    // All ranks are not necessarily assigned any slices, so
    //   we need to figure out what ranks are to the right and the left for
    //   buffer exchanges
    n_x_rank              = (ptrdiff_t *)SID_malloc(sizeof(ptrdiff_t) * SID.n_proc);
    n_x_rank[SID.My_rank] = (ptrdiff_t)FFT->slab.n_x_local;
    if(n_x_rank[SID.My_rank] > 0)
        flag_active = GBP_TRUE;
    else
        flag_active = GBP_FALSE;
    SID_Allreduce(&flag_active, &n_active, 1, SID_INT, SID_SUM, SID_COMM_WORLD);
    SID_Allreduce(&n_x_rank[SID.My_rank], &min_size, 1, SID_INT, SID_MIN, SID_COMM_WORLD);
    SID_Allreduce(&n_x_rank[SID.My_rank], &max_size, 1, SID_INT, SID_MAX, SID_COMM_WORLD);
    for(int i_rank = 0; i_rank < SID.n_proc; i_rank++)
        SID_Bcast(&(n_x_rank[i_rank]), 1, SID_INT, i_rank, SID_COMM_WORLD);
    FFT->slab.rank_to_right = -1;
    for(int i_rank = SID.My_rank + 1; i_rank < SID.My_rank + SID.n_proc && FFT->slab.rank_to_right < 0; i_rank++) {
        int j_rank = i_rank % SID.n_proc;
        if(n_x_rank[j_rank] > 0)
            FFT->slab.rank_to_right = j_rank;
    }
    if(FFT->slab.rank_to_right < 0)
        FFT->slab.rank_to_right = SID.My_rank;
    FFT->slab.rank_to_left = -1;
    for(int i_rank = SID.My_rank - 1; i_rank > SID.My_rank - SID.n_proc && FFT->slab.rank_to_left < 0; i_rank--) {
        int j_rank = i_rank;
        if(i_rank < 0)
            j_rank = i_rank + SID.n_proc;
        if(n_x_rank[j_rank] > 0)
            FFT->slab.rank_to_left = j_rank;
    }
    if(FFT->slab.rank_to_left < 0)
        FFT->slab.rank_to_left = SID.My_rank;
    free(n_x_rank);
    SID_log("(%d cores unused, min/max slab size=%d/%d)...", SID_LOG_CONTINUE, SID.n_proc - n_active, min_size, max_size);
#else
    FFT->slab.rank_to_right = SID.My_rank;
    FFT->slab.rank_to_left  = SID.My_rank;
    if(FFT->slab.n_x_local > 0) {
        flag_active = GBP_TRUE;
        n_active    = 1;
        min_size    = FFT->slab.n_x_local;
        max_size    = FFT->slab.n_x_local;
    } else {
        flag_active = GBP_FALSE;
        n_active    = 0;
        min_size    = 0;
        max_size    = 0;
    }
#endif

    SID_log("Done.", SID_LOG_CLOSE);
}
Beispiel #6
0
void init_common(void) {
	/* This routine will initialize everything */
	int i,j,k;
	
	DEBUG_START_FUNC;
	
#ifdef MPI_SUPPORT
#ifdef FFTW3_MPI_SUPPORT	
	fftw_mpi_init();
#endif
#endif
#ifdef _OPENMP
	if( !(fftw_init_threads()) ) ERROR_HANDLER( ERROR_CRITICAL, "Threads initialisation failed");
#endif
	
	/* We start with the coordinate system */
	kx = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kx == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kx allocation");
	
	ky = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (ky == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ky allocation");
	
	kz = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kz == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kz allocation");
	
	kxt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kxt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kxt allocation");
	kyt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kyt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kyt allocation");
	kzt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kzt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kzt allocation");
	
	k2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (k2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for k2t allocation");
	
	ik2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (ik2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ik2t allocation");
	

	for( i = 0; i < NX_COMPLEX / NPROC; i++) {
		for( j = 0; j < NY_COMPLEX; j++) {
			for( k = 0; k < NZ_COMPLEX; k++) {
				kx[ IDX3D ] = (2.0 * M_PI) / param.lx *
						(fmod( NX_COMPLEX * rank / NPROC  + i + (NX_COMPLEX / 2) ,  NX_COMPLEX ) - NX_COMPLEX / 2 );
					 
#ifdef WITH_2D
				ky[ IDX3D ] = (2.0 * M_PI) / param.ly * j;
					 
				kz[ IDX3D ] = 0.0;
#else
				ky[ IDX3D ] = (2.0 * M_PI) / param.ly *
						(fmod( j + (NY_COMPLEX / 2) ,  NY_COMPLEX ) - NY_COMPLEX / 2 );
					 
				kz[ IDX3D ] = (2.0 * M_PI) / param.lz * k;
#endif

				kxt[ IDX3D ]= kx[IDX3D];
				kyt[ IDX3D ]= ky[IDX3D]; 
				kzt[ IDX3D ]= kz[IDX3D]; 
			
				k2t[ IDX3D ] = kxt[IDX3D] * kxt[IDX3D] +
								kyt[IDX3D] * kyt[IDX3D] +
								kzt[IDX3D] * kzt[IDX3D];
							  
				if ( k2t[IDX3D] == 0.0 ) ik2t[IDX3D] = 1.0;
				else	ik2t[IDX3D] = 1.0 / k2t[IDX3D];
			}
		}
	}
	kxmax = 2.0 * M_PI/ param.lx * ( (NX / 2) - 1);
	kymax = 2.0 * M_PI/ param.ly * ( (NY / 2) - 1);
	kzmax = 2.0 * M_PI/ param.lz * ( (NZ / 2) - 1);
#ifdef WITH_2D
	kzmax = 0.0;
#endif
	kmax=pow(kxmax*kxmax+kymax*kymax+kzmax*kzmax,0.5);
	
	/* Initialize the dealiazing mask Or the nyquist frequency mask (in case dealiasing is not required) */
	
	mask = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (mask == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for mask allocation");
	
	for( i = 0; i < NX_COMPLEX/NPROC; i++) {
		for( j = 0; j < NY_COMPLEX; j++) {
			for( k = 0; k < NZ_COMPLEX; k++) {

				mask[ IDX3D ] = 1.0;
				if(param.antialiasing) {
					if( fabs( kx[ IDX3D ] ) > 2.0/3.0 * kxmax)
						mask[ IDX3D ] = 0.0;
				
					if( fabs( ky[ IDX3D ] ) > 2.0/3.0 * kymax)
						mask[ IDX3D ] = 0.0;
#ifndef WITH_2D
					if( fabs( kz[ IDX3D ] ) > 2.0/3.0 * kzmax)
						mask[ IDX3D ] = 0.0;
#endif
				}
				else {			
				        if (  NX_COMPLEX / NPROC * rank + i == NX_COMPLEX / 2 ) 
						mask[ IDX3D ] = 0.0;
					if ( j == NY_COMPLEX / 2 )  
						mask[ IDX3D ] = 0.0;
#ifndef WITH_2D
					if ( k == NZ_COMPLEX ) 
						mask[ IDX3D ] = 0.0;
#endif
				}
			}
		}
	}

	if(param.antialiasing) {
		kxmax = kxmax * 2.0 / 3.0;
		kymax = kymax * 2.0 / 3.0;
		kzmax = kzmax * 2.0 / 3.0;
		kmax = kmax * 2.0 / 3.0;
	}
	

// Allocate fields
// Complex fields

	w1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w1 allocation");
	
	w2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2 allocation");
	
	w3 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w3 allocation");
	
	w4 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w4 allocation");
	
	w5 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w5 allocation");
	
	w6 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w6 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w6 allocation");
	
	w7 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w7 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w7 allocation");
	
	w8 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w8 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w8 allocation");
	
	w9 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w9 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w9 allocation");
	
	w10 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w10 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w10 allocation");
	
	w11 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w11 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w11 allocation");
	
	w12 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w12 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w12 allocation");
	
	w13 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w13 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w13 allocation");
	
	w14 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w14 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w14 allocation");
	
	w15 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w15 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	w16 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w16 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	w17 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w17 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	w18 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w18 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	
	wh1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (wh1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh1 allocation");	
	wh2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (wh2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh2 allocation");
	wh3 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1));
	if (wh3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh3 allocation");
	wh4 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ);
	if (wh4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh4 allocation");
	wh5 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ);
	if (wh5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh5 allocation");
	
// Initialize wh1,wh2,wh3;
	for(i=0;i<NX*(NY/2+1);i++) {wh1[i]=0; wh2[i]=0; wh3[i]=0;}	
		
	/* Will use the same memory space for real and complex fields */
	
	wr1 = (double *) w1;
	wr2 = (double *) w2;
	wr3 = (double *) w3;
	wr4 = (double *) w4;
	wr5 = (double *) w5;
	wr6 = (double *) w6;
	wr7 = (double *) w7;
	wr8 = (double *) w8;
	wr9 = (double *) w9;
	wr10 = (double *) w10;
	wr11 = (double *) w11;
	wr12 = (double *) w12;
	wr13 = (double *) w13;
	wr14 = (double *) w14;
	wr15 = (double *) w15;
	wr16 = (double *) w16;
	wr17 = (double *) w17;
	wr18 = (double *) w18;
	
	wrh1 = (double *) wh1;
	wrh2 = (double *) wh2;
	wrh3 = (double *) wh3;
	wrh4 = (double *) wh4;
	wrh5 = (double *) wh5;
// Physic initialisation
//	init_real_mask();

	//set Reynolds numbers using input powers AJB 08/03/12
	param.reynolds = pow(10.0,param.reynolds);
	nu = 1.0 / param.reynolds;
#ifdef BOUSSINESQ	
	param.reynolds_th = pow(10.0,param.reynolds_th);
	nu_th = 1.0 / param.reynolds_th;
#endif
#ifdef MHD
	param.reynolds_m = pow(10.0,param.reynolds_m);
	eta = 1.0 / param.reynolds_m;
#endif
	DEBUG_END_FUNC;
	return;
}
int main(int narg, char **args)
{
  MPI_Init(&narg, &args);
#ifdef _USE_FFTW_FILTER
  fftw_mpi_init();
#endif
  Json::Value root;
  jsonParser::parseJsonInputFile(root, narg, args);
  int dim = jsonParser::getDimensionality(root, DEFAULT_DIMENSIONALITY);

  GRID grid(dim);
  EM_FIELD myfield;
  CURRENT current;
  std::vector<SPECIE*> species;
  std::vector<SPECIE*>::const_iterator spec_iterator;
  my_rng_generator rng;

  //*******************************************BEGIN GRID DEFINITION*******************************************************
  jsonParser::setXrange(root, &grid);
  jsonParser::setYrange(root, &grid);
  jsonParser::setZrange(root, &grid);
  jsonParser::setNCells(root, &grid);
  jsonParser::setNprocs(root, &grid);
  jsonParser::setStretchedGrid(root, &grid);
  jsonParser::setBoundaryConditions(root, &grid);

  jsonParser::setRadiationFriction(root, &grid);
  jsonParser::setMasterProc(root, &grid);

  grid.mpi_grid_initialize(&narg, args);

  jsonParser::setCourantFactor(root, &grid);
  jsonParser::setSimulationTime(root, &grid);
  jsonParser::setMovingWindow(root, &grid);

  srand(time(NULL));
  grid.initRNG(rng, RANDOM_NUMBER_GENERATOR_SEED);

  grid.finalize();

  jsonParser::setDumpControl(root, &grid);
  grid.visualDiag();

  //********************************************END GRID DEFINITION********************************************************
  //*******************************************BEGIN FIELD DEFINITION*********************************************************
  myfield.allocate(&grid);
  myfield.setAllValuesToZero();


  jsonParser::setLaserPulses(root, &myfield);
  myfield.boundary_conditions();

  current.allocate(&grid);
  current.setAllValuesToZero();
  //*******************************************END FIELD DEFINITION***********************************************************
  //******************** BEGIN TO READ OF user defined INPUT - PARAMETERS ****************************************
  bool isThereSpecial = false;
  bool areThereSpheres = false;

  std::string fileSpheresName;
  Json::Value special;
  SPHERES myspheres;
  if (isThereSpecial = jsonParser::setValue(special, root, "special")) {
    if (areThereSpheres = jsonParser::setString(&fileSpheresName, special, "spheresFile")) {
      readAndAllocateSpheres(myspheres, fileSpheresName, grid);
      selectSpheres(myspheres, grid);
    }
  }
  std::map<std::string, PLASMA*>::iterator pIterator;

  //********************  END READ OF "SPECIAL" (user defined) INPUT - PARAMETERS  ****************************************

  //*******************************************BEGIN SPECIES DEFINITION*********************************************************

  std::map<std::string, PLASMA*> plasmas;
  jsonParser::setPlasmas(root, plasmas);

  if (areThereSpheres) {
    for (pIterator = plasmas.begin(); pIterator != plasmas.end(); pIterator++) {
      (pIterator)->second->params.spheres = &myspheres;
    }
  }
  jsonParser::setSpecies(root, species, plasmas, &grid, rng);

  uint64_t totPartNum = 0;
  for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) {
    totPartNum += (*spec_iterator)->printParticleNumber();
  }
  if (grid.myid == grid.master_proc) {
    std::cout << "Total particle number: " << totPartNum << std::endl;
  }


  if (areThereSpheres) {
    delete[] myspheres.coords;
  }
  //*******************************************END SPECIES DEFINITION***********************************************************

  //*******************************************BEGIN DIAG DEFINITION**************************************************
  std::map<std::string, outDomain*> outDomains;
  OUTPUT_MANAGER manager(&grid, &myfield, &current, species);
  jsonParser::setDomains(root, outDomains);
  jsonParser::setOutputRequests(root, manager, outDomains, species);
  jsonParser::setOutputDirPath(root, manager);

  manager.initialize();
  //*******************************************END DIAG DEFINITION**************************************************
  grid.setDumpPath(DIRECTORY_DUMP);
  //@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ MAIN CYCLE (DO NOT MODIFY) @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  if (grid.myid == grid.master_proc) {
    printf("----- START temporal cicle -----\n");
    fflush(stdout);
  }

  int dumpID = 1;
  grid.istep = 0;
  if (grid.dumpControl.doRestart) {
    dumpID = grid.dumpControl.restartFromDump;
    restartFromDump(&dumpID, &grid, &myfield, species);
  }

  while (grid.istep <= grid.getTotalNumberOfTimesteps())
  {
#ifdef NO_ALLOCATION
    manager.close();
    MPI_Finalize();
    exit(0);
#endif

    grid.printTStepEvery(FREQUENCY_STDOUT_STATUS);

    manager.callDiags(grid.istep);

    myfield.openBoundariesE_1();
    myfield.new_halfadvance_B();
    myfield.boundary_conditions();

    current.setAllValuesToZero();
    for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) {
      (*spec_iterator)->current_deposition_standard(&current);
    }
    current.pbc();

    for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) {
      (*spec_iterator)->position_parallel_pbc();
    }

    myfield.openBoundariesB();
    myfield.new_advance_E(&current);

    myfield.boundary_conditions();

#ifdef _USE_FFTW_FILTER
    myfield.fftw_filter_Efield();
    myfield.boundary_conditions();
#endif

    myfield.openBoundariesE_2();

    if (!(grid.istep % 20)) {
      //myfield.applyFilter(fltr_Ex|fltr_Ey, dir_x|dir_y);
    }

    myfield.new_halfadvance_B();
    myfield.boundary_conditions();

    for (spec_iterator = species.begin(); spec_iterator != species.end(); spec_iterator++) {
      if (grid.isRadiationFrictionEnabled()) {
        (*spec_iterator)->momenta_advance_with_friction(&myfield, grid.getLambda0());
      }
      else {
        (*spec_iterator)->momenta_advance(&myfield);
      }
    }

    grid.time += grid.dt;

    moveWindow(&grid, &myfield, species);

    grid.istep++;
    if (grid.dumpControl.doDump) {
      if (grid.istep != 0 && !(grid.istep % ((int)(grid.dumpControl.dumpEvery / grid.dt)))) {
        dumpFilesForRestart(&dumpID, &grid, &myfield, species);
      }
    }
  }

  manager.close();
  MPI_Finalize();
  exit(0);

}
Beispiel #8
0
int main(int argc, char **argv) {
   
  // Set up MPI
  // ==========
  ierr = MPI_Init(&argc, &argv);
  ierr = MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
  ierr = MPI_Comm_size(MPI_COMM_WORLD, &NTask);
#ifdef SINGLE_PRECISION
  fftwf_mpi_init();
#else
  fftw_mpi_init();
#endif

  if(argc < 2) {
    if(ThisTask == 0) {
      fprintf(stdout, "Input parameters not found\n");
      fprintf(stdout, "Call with <ParameterFile>\n");
    }
    ierr = MPI_Finalize();
    exit(0);
  }
   
  // Read the run parameters and setup code
  // ======================================
  int stepDistr;   
  int subtractLPT;
  double da=0;

  read_parameterfile(argv[1]);
  
  if (UseCOLA == 1){
    subtractLPT = 1; 
    stepDistr   = 0;
    StdDA       = 0;
  } else{
    subtractLPT = 0; 
    stepDistr   = 1;
    StdDA       = 2;
  }
  if (StdDA == 0){
    fullT = 1;
    nLPT  = -2.5;
  }
  filter = 0;              // Whether or not to smooth the forces
  Scale  = 2.*M_PI/Box;    // The force smoothing scale 

  if(ThisTask == 0) {
    printf("Run Parameters\n");
    printf("==============\n");
    printf("Cosmology:\n");
    printf("  Omega Matter(z=0) = %lf\n",Omega);
    printf("  Omega Baryon(z=0) = %lf\n",OmegaBaryon);
    printf("  Hubble Parameter(z=0) = %lf\n",HubbleParam);
    printf("  Sigma8(z=0) = %lf\n",Sigma8);
#ifndef GAUSSIAN
    printf("  F_nl = %lf\n",Fnl);
#endif
    printf("  Primordial Index = %lf\n",PrimordialIndex);
    printf("  Initial Redshift  = %lf\n",Init_Redshift);
    printf("  Final Redshift    = %lf\n",Final_Redshift);
#ifndef GAUSSIAN
    printf("  F_nl Redshift  = %lf\n",Fnl_Redshift);
#endif
    printf("Simulation:\n");
    printf("  Nmesh = %d\n", Nmesh);
    printf("  Nsample = %d\n", Nsample);
    printf("  Boxsize = %lf\n", Box);
    printf("  Buffer Size = %lf\n", Buffer);
    switch(WhichSpectrum) {
      case 0:
        switch (WhichTransfer) {
          case 1:
            printf("  Using Eisenstein & Hu Transfer Function\n");
            break;
          case 2:
            printf("  Using Tabulated Transfer Function\n");
            break;
          default:
            printf("  Using Efstathiou Transfer Function\n");
            break;
        }
        break;
      case 1:
        printf("  Using Eisenstein & Hu Power Spectrum\n");
        break;
      case 2:
        printf("  Using Tabulated Power Spectrum\n");
        break;   
      default:
        printf("  Using Efstathiou Power Spectrum\n");
        break;
    }      
    printf("  Number of Timesteps = %d\n",nsteps);
    if (UseCOLA) {
      printf("  Using COLA method\n\n");
    } else {
      printf("  Using Standard PM method\n\n");
    }
    fflush(stdout);
  }   
  
  // Initial and final scale factors:
  double ai=1.0/(1.0+Init_Redshift);
  double af=1.0/(1.0+Final_Redshift);
    
  if (stepDistr == 0) da=(af-ai)/((double)nsteps);
  if (stepDistr == 1) da=(log(af)-log(ai))/((double)nsteps);
  if (stepDistr == 2) da=(CosmoTime(af)-CosmoTime(ai))/((double)nsteps);

  set_units();

  if (ThisTask == 0) {
    printf("Initialising Transfer Function/Power Spectrum\n");
    printf("=============================================\n");
  }
  initialize_transferfunction();
  initialize_powerspectrum();
  initialize_ffts();
  initialize_parts();

  if(ThisTask == 0) {
    printf("Creating initial conditions\n");
    printf("===========================\n");
    fflush(stdout);
  }

  // Create the calculate the Zeldovich and 2LPT displacements and create the initial conditions
  // ===========================================================================================
  int i, j, k, m;
  unsigned int n, coord;
  double A=ai;                // This is the scale factor which we'll be advancing below.
  double Di=growthD(1.0, A);  // initial growth factor
  double Di2=growthD2(A);     // initial 2nd order growth factor  
  double Dv=DprimeQ(A,1.0);   // T[D_{za}]=dD_{za}/dy
  double Dv2=growthD2v(A);    // T[D_{2lpt}]=dD_{2lpt}/dy

  displacement_fields();
    
  P = (struct part_data *) malloc((int)(ceil(NumPart*Buffer))*sizeof(struct part_data));

  // Generate the initial particle positions and velocities
  // If subtractLPT = 0 (non-COLA), then velocity is ds/dy, which is simply the 2LPT IC.
  // Else set vel = 0 if we subtract LPT. This is the same as the action of the operator L_- from TZE, as initial velocities are in 2LPT.
  for(i=0; i<Local_np; i++) {
    for (j=0; j<Nsample; j++) {
      for (k=0; k<Nsample; k++) {
        coord = (i * Nsample + j) * Nsample + k;
           
        P[coord].ID = ((i + Local_p_start) * Nsample + j) * Nsample + k;
        for (m=0; m<3; m++) {
          P[coord].Dz[m] = ZA[m][coord];
          P[coord].D2[m] = LPT[m][coord];
          if (subtractLPT == 0) {
            P[coord].Vel[m]=P[coord].Dz[m]*Dv+P[coord].D2[m]*Dv2;
          } else {
            P[coord].Vel[m] = 0.0;
          }
        }

        P[coord].Pos[0] = periodic_wrap((i+Local_p_start)*(Box/Nsample)+P[coord].Dz[0]*Di+P[coord].D2[0]*Di2);
        P[coord].Pos[1] = periodic_wrap(j*(Box/Nsample)+P[coord].Dz[1]*Di+P[coord].D2[1]*Di2);
        P[coord].Pos[2] = periodic_wrap(k*(Box/Nsample)+P[coord].Dz[2]*Di+P[coord].D2[2]*Di2);
      }
    }
  }

  for (i=0; i<3; i++) {
    free(ZA[i]);
    free(LPT[i]);
  }

  // Now, we get to the N-Body part where we evolve with time via the Kick-Drift-Kick Method
  // =======================================================================================
  int timeStep;
  double AF=0,AI,AC,AFF=0;
  double growth1   = Di;
  double growth1L2 = Di2;

  // The density grid and force grids  and associated fftw plans
#ifndef MEMORY_MODE
  density = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
  N11  = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
  N12  = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
  N13  = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
  P3D  = (complex_kind*)density;
  FN11 = (complex_kind*)N11;
  FN12 = (complex_kind*)N12;
  FN13 = (complex_kind*)N13;
#ifdef SINGLE_PRECISION
  plan = fftwf_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE);
  p11  = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE);
  p12  = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE);
  p13  = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE);
#else
  plan = fftw_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE);
  p11  = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE);
  p12  = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE);
  p13  = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE);
#endif
#endif

 if(ThisTask == 0) {
    printf("Beginning timestepping\n");
    printf("======================\n");
    fflush(stdout);
  }
  
  // AI stores the scale factor to which the velocities have been kicked to. Initially it's just A.
  AI=A;
  for (timeStep=0;timeStep<=nsteps;timeStep++){
    
    // AFF is the scale factor to which we should drift the particle positions.
    // AF is the scale factor to which we should kick the particle velocities.
    if (stepDistr == 0) AFF=A+da;
    if (stepDistr == 1) AFF=A*exp(da);
    if (stepDistr == 2) AFF=AofTime(CosmoTime(A)+da);

    // half time-step for final kick
    if (timeStep == nsteps) {
      AF=A; 
    } else { 
      // Set to mid-point of interval. In the infinitesimal timestep limit, these choices are identical. 
      // How one chooses the mid-point when not in that limit is really an extra degree of freedom in the code 
      // but Tassev et al. report negligible effects from the different choices below. 
      // Hence, this is not exported as an extra switch at this point.
      if (stepDistr == 0) AF=A+da*0.5;
      if (stepDistr == 1) AF=A*exp(da*0.5);
      if (stepDistr == 2) AF=AofTime((CosmoTime(AFF)+CosmoTime(A))*0.5); 
    }
    
    if (ThisTask == 0) {
      printf("Iteration = %d\n------------------\n",timeStep+1);
      printf("a = %lf\n",A);
      printf("z = %lf\n",1.0/A-1.0);
      fflush(stdout);
    }

    // First we check whether all the particles are on the correct processor after the last time step/
    // original 2LPT displacement and move them if not
    if (ThisTask == 0) printf("Moving particles across task boundaries...\n");
    MoveParticles();

#ifdef MEMORY_MODE
    density = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
    P3D  = (complex_kind*)density;
#ifdef SINGLE_PRECISION
    plan = fftwf_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE);
#else
    plan = fftw_mpi_plan_dft_r2c_3d(Nmesh,Nmesh,Nmesh,density,P3D,MPI_COMM_WORLD,FFTW_ESTIMATE);
#endif
#endif

    // Then we do the Cloud-in-Cell assignment to get the density grid and FFT it.  
    if (ThisTask == 0) printf("Calculating density using Cloud-in-Cell...\n");
    PtoMesh();

#ifdef MEMORY_MODE
    N11  = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
    N12  = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
    N13  = (float_kind *)calloc(2*Total_size,sizeof(float_kind));
    FN11 = (complex_kind*)N11;
    FN12 = (complex_kind*)N12;
    FN13 = (complex_kind*)N13;
#ifdef SINGLE_PRECISION
    p11  = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE);
    p12  = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE);
    p13  = fftwf_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE);
#else
    p11  = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN11,N11,MPI_COMM_WORLD,FFTW_ESTIMATE);
    p12  = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN12,N12,MPI_COMM_WORLD,FFTW_ESTIMATE);
    p13  = fftw_mpi_plan_dft_c2r_3d(Nmesh,Nmesh,Nmesh,FN13,N13,MPI_COMM_WORLD,FFTW_ESTIMATE);
#endif
#endif
    
    // This returns N11,N12,N13 which hold the components of
    // the vector (grad grad^{-2} density) on a grid.
    if (ThisTask == 0) printf("Calculating forces...\n");
    Forces(); 

#ifdef MEMORY_MODE
    free(density);
    for (i=0; i<3; i++) Disp[i] = (float *)malloc(NumPart*sizeof(float));
#ifdef SINGLE_PRECISION
    fftwf_destroy_plan(plan);
#else 
    fftw_destroy_plan(plan);
#endif
#else
    for (i=0; i<3; i++) Disp[i] = (float_kind *)malloc(NumPart*sizeof(float_kind));
#endif
    
    // Now find the accelerations at the particle positions using 3-linear interpolation. 
    if (ThisTask == 0) printf("Calculating accelerations...\n");
    MtoParticles();

#ifdef MEMORY_MODE
  free(N11);
  free(N12);
  free(N13);  
#ifdef SINGLE_PRECISION
  fftwf_destroy_plan(p11);
  fftwf_destroy_plan(p12);
  fftwf_destroy_plan(p13);
#else
  fftw_destroy_plan(p11);
  fftw_destroy_plan(p12);
  fftw_destroy_plan(p13);
#endif
#endif
    
    // Calculate the mean displacement and subtract later.
    if (ThisTask == 0) printf("Calculating mean of displacements...\n");
    double sumDx=0,sumDy=0,sumDz=0;
    for(n=0; n<NumPart; n++) {
      sumDx += Disp[0][n];
      sumDy += Disp[1][n];
      sumDz += Disp[2][n];
    }

    // Make sumDx, sumDy and sumDz global averages
    ierr = MPI_Allreduce(MPI_IN_PLACE,&sumDx,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
    ierr = MPI_Allreduce(MPI_IN_PLACE,&sumDy,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
    ierr = MPI_Allreduce(MPI_IN_PLACE,&sumDz,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);  
    
    sumDx /= (double)TotNumPart; // We will subtract these below to conserve momentum. 
    sumDy /= (double)TotNumPart;
    sumDz /= (double)TotNumPart; 

    if (ThisTask == 0) {
      printf("Kicking the particles...\n");
      fflush(stdout);
    }

    // Kick
    // ===============
    double dda;
    double q1,q2;
    double ax,ay,az;
    double sumx=0,sumy=0,sumz=0; 
    double Om143=pow(Omega/(Omega+(1-Omega)*A*A*A),1./143.);
    
    if (StdDA == 0) {
      dda=Sphi(AI,AF,A);
    } else if (StdDA == 1) {
      dda=(AF-AI)*A/Qfactor(A);
    } else {
      dda=SphiStd(AI,AF);
    }  
    
    q2=1.5*Omega*growth1*growth1*(1.0+7./3.*Om143)*A; // T^2[D_{2lpt}]=d^2 D_{2lpt}/dy^2
    q1=1.5*Omega*growth1*A;                           // T^2[D_{ZA}]=d^2 D_{ZA}/dy^2
    
    for(n=0; n<NumPart; n++) {

      Disp[0][n] -= sumDx;
      Disp[1][n] -= sumDy;
      Disp[2][n] -= sumDz;

      ax=-1.5*Omega*Disp[0][n]-subtractLPT*(P[n].Dz[0]*q1+P[n].D2[0]*q2)/A;
      ay=-1.5*Omega*Disp[1][n]-subtractLPT*(P[n].Dz[1]*q1+P[n].D2[1]*q2)/A;
      az=-1.5*Omega*Disp[2][n]-subtractLPT*(P[n].Dz[2]*q1+P[n].D2[2]*q2)/A;

      P[n].Vel[0] += ax*dda;
      P[n].Vel[1] += ay*dda;
      P[n].Vel[2] += az*dda;

      sumx += P[n].Vel[0];
      sumy += P[n].Vel[1];
      sumz += P[n].Vel[2];
    }

    for (i=0; i<3; i++) free(Disp[i]);

    // Make sumx, sumy and sumz global averages
    ierr = MPI_Allreduce(MPI_IN_PLACE,&sumx,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
    ierr = MPI_Allreduce(MPI_IN_PLACE,&sumy,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
    ierr = MPI_Allreduce(MPI_IN_PLACE,&sumz,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);  
    
    sumx /= (double)TotNumPart;  // We will subtract these below to conserve momentum. 
    sumy /= (double)TotNumPart;  // Should be conserved, but just in case 3-linear interpolation makes a problem.
    sumz /= (double)TotNumPart;  // Never checked whether this makes a difference.

    if (timeStep == nsteps) {

      if (ThisTask == 0) {
        printf("Iteration %d finished\n------------------\n\n", timeStep+1);
        printf("Timestepping finished\n\n");
        fflush(stdout);
      }
    
      // At final timestep, add back LPT velocities if we had subtracted them. 
      // This corresponds to L_+ operator in TZE.
      Dv  = DprimeQ(A,1.0);  // dD_{za}/dy
      Dv2 = growthD2v(A);    // dD_{2lpt}/dy

      for(n=0; n<NumPart; n++) {
        P[n].Vel[0] += -sumx+(P[n].Dz[0]*Dv+P[n].D2[0]*Dv2)*subtractLPT;
        P[n].Vel[1] += -sumy+(P[n].Dz[1]*Dv+P[n].D2[1]*Dv2)*subtractLPT;
        P[n].Vel[2] += -sumz+(P[n].Dz[2]*Dv+P[n].D2[2]*Dv2)*subtractLPT;
      }

      goto finalize; // Sorry for "goto" :)
    }
    
    if (ThisTask == 0) {
      printf("Drifting the particles...\n");
      fflush(stdout);
    }

    // Drift
    // =============
    double dyyy;
    double da1,da2;

    AC = AF;
    AF = AFF;
    
    if (StdDA == 0) {
      dyyy=Sq(A,AF,AC);
    } else if (StdDA == 1) {
      dyyy=(AF-A)/Qfactor(AC);
    } else {
      dyyy=SqStd(A,AF);
    }

    da1=growthD(1.0, AF)-growth1;    // change in D
    da2=growthD2(AF)-growth1L2; // change in D_{2lpt}
    
    for(n=0; n<NumPart; n++) {
        P[n].Pos[0] += (P[n].Vel[0]-sumx)*dyyy;
        P[n].Pos[1] += (P[n].Vel[1]-sumy)*dyyy;
        P[n].Pos[2] += (P[n].Vel[2]-sumz)*dyyy;

        P[n].Pos[0] = periodic_wrap(P[n].Pos[0]+subtractLPT*(P[n].Dz[0]*da1+P[n].D2[0]*da2));
        P[n].Pos[1] = periodic_wrap(P[n].Pos[1]+subtractLPT*(P[n].Dz[1]*da1+P[n].D2[1]*da2));
        P[n].Pos[2] = periodic_wrap(P[n].Pos[2]+subtractLPT*(P[n].Dz[2]*da1+P[n].D2[2]*da2));
    }

    // Step in time
    // ================
    A  = AF;   // WRT to the above name change, A  = AFF
    AI = AC;   // WRT to the above name change, AI = AF

    growth1   = growthD(1.0, A);
    growth1L2 = growthD2(A);

    if (ThisTask == 0) {
      printf("Iteration %d finished\n------------------\n\n", timeStep+1);
      fflush(stdout);
    }
     
    ierr = MPI_Barrier(MPI_COMM_WORLD);

  }

  // Here is the last little bit
  // ===========================
  finalize:

  if (ThisTask == 0) {
    printf("Finishing up\n");
    printf("============\n");
    fflush(stdout);
  }
    
  // Now convert velocities to v_{rsd}\equiv (ds/d\eta)/(a H(a))
  velRSD(A);
    
  // Output a slice just for the sake of doing something with P.
  if (ThisTask == 0) {
    printf("Converting to RSD velocities...\n");
    printf("Outputting particles...\n"); 
  }
  slice();
  print_spec();
  fflush(stdout);

  free_powertable();
  free_transfertable();
#ifdef GENERIC_FNL
  free(KernelTable);
#endif

  free(P);
  free(Slab_to_task);
  free(Part_to_task);
  free(Local_nx_table);
  free(Local_np_table);
#ifndef MEMORY_MODE
  free(density);
  free(N11);
  free(N12);
  free(N13);  
#ifdef SINGLE_PRECISION
  fftwf_destroy_plan(plan);
  fftwf_destroy_plan(p11);
  fftwf_destroy_plan(p12);
  fftwf_destroy_plan(p13);
#else
  fftw_destroy_plan(plan);
  fftw_destroy_plan(p11);
  fftw_destroy_plan(p12);
  fftw_destroy_plan(p13);
#endif
#endif

#ifdef SINGLE_PRECISION
  fftwf_mpi_cleanup();
#else
  fftw_mpi_cleanup();
#endif

  if (ThisTask == 0) printf("Done :)\n");

  MPI_Finalize();   

  return 0;
}
Beispiel #9
0
/*! This routines generates the FFTW-plans to carry out the parallel FFTs
 *  later on. Some auxiliary variables are also initialized.
 */
void pm_init_periodic(void)
{
  int i;
  int slab_to_task_local[PMGRID];

  All.Asmth[0] = ASMTH * All.BoxSize / PMGRID;
  All.Rcut[0] = RCUT * All.Asmth[0];

  /* Initialize FFTW MPI */
  #ifdef FFTW3
  fftw_mpi_init();
  #endif

  #ifdef FFTW3
  /* If using FFTW3, don't create plans yet, just figure out the local array sizes */
  fftsize_complex = fftw_mpi_local_size_3d_transposed(PMGRID, PMGRID, 0.5*PMGRID2, MPI_COMM_WORLD, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y);
  fftsize_real = 2.*fftsize_complex;
  fftw_plan_exists = false;
  #else
  /* Set up the FFTW plan files. */

  fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, PMGRID,
					     FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
  fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, PMGRID,
					     FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);

  /* Workspace out the ranges on each processor. */

  rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
  #endif

  for(i = 0; i < PMGRID; i++)
    slab_to_task_local[i] = 0;

  for(i = 0; i < nslab_x; i++)
    slab_to_task_local[slabstart_x + i] = ThisTask;

  MPI_Allreduce(slab_to_task_local, slab_to_task, PMGRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  MPI_Allreduce(&nslab_x, &smallest_slab, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);

  slabs_per_task = malloc(NTask * sizeof(int));
  MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD);

  if(ThisTask == 0)
    {
      for(i = 0; i < NTask; i++)
	printf("Task=%d  FFT-Slabs=%d\n", i, slabs_per_task[i]);
    }

  first_slab_of_task = malloc(NTask * sizeof(int));
  MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD);

  meshmin_list = malloc(3 * NTask * sizeof(int));
  meshmax_list = malloc(3 * NTask * sizeof(int));


  to_slab_fac = PMGRID / All.BoxSize;

  MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
}