Exemplo n.º 1
0
void init_fcs(void) {

  FCSResult res;
  fcs_int srf = 1;
  char *method;

  fcs_int   pbc [3] = { pbc_dirs.x, pbc_dirs.y, pbc_dirs.z };
  fcs_float BoxX[3] = { box_x.x, box_x.y, box_x.z };
  fcs_float BoxY[3] = { box_y.x, box_y.y, box_y.z };
  fcs_float BoxZ[3] = { box_z.x, box_z.y, box_z.z };
  fcs_float off [3] = { 0.0, 0.0, 0.0 };

  /* subtract CM momentum */
  if (0 == imdrestart) {
    int i, k; real ptot[4], ptot_2[4], px, py, pz;
    ptot[0] = 0.0; ptot[1] = 0.0; ptot[2] = 0.0, ptot[3] = 0.0; 
    for (k=0; k<NCELLS; ++k) { /* loop over all cells */
      cell *p = CELLPTR(k);
      for (i=0; i<p->n; i++) {
        ptot[0] += IMPULS(p,i,X);
        ptot[1] += IMPULS(p,i,Y);
        ptot[2] += IMPULS(p,i,Z);
        ptot[3] += MASSE(p,i);
      }
    }
#ifdef MPI
    MPI_Allreduce( ptot, ptot_2, 4, REAL, MPI_SUM, cpugrid);
    ptot[0] = ptot_2[0];
    ptot[1] = ptot_2[1]; 
    ptot[2] = ptot_2[2]; 
    ptot[3] = ptot_2[3]; 
#endif
    px = ptot[0]/ptot[3];
    py = ptot[1]/ptot[3];
    pz = ptot[2]/ptot[3];
    for (k=0; k<NCELLS; ++k) { /* loop over all cells */
      cell *p = CELLPTR(k);
      for (i=0; i<p->n; i++) {
        IMPULS(p,i,X) -= px * MASSE(p,i);
        IMPULS(p,i,Y) -= py * MASSE(p,i);
        IMPULS(p,i,Z) -= pz * MASSE(p,i);
      }
    }
  }

  switch (fcs_method) {
    case FCS_METH_DIRECT: method = "direct"; break;
    case FCS_METH_PEPC:   method = "pepc";   break;
    case FCS_METH_FMM:    method = "fmm";    break;
    case FCS_METH_P3M:    method = "p3m";    srf = fcs_near_field_flag; break;
    case FCS_METH_P2NFFT: method = "p2nfft"; srf = fcs_near_field_flag; break;
    case FCS_METH_VMG:    method = "vmg";    break;
    case FCS_METH_PP3MG:  method = "pp3mg";  break;
  }

  /* initialize handle and set common parameters */
  res = fcs_init(&handle, method, cpugrid); 
  ASSERT_FCS(res);
  res = fcs_set_common(handle, srf, BoxX, BoxY, BoxZ, off, pbc, natoms);
  ASSERT_FCS(res);
  res = fcs_require_virial(handle, 1);
  ASSERT_FCS(res);

  /* set method specific parameters */
  switch (fcs_method) {
#ifdef FCS_ENABLE_DIRECT
    case FCS_METH_DIRECT:
      /* nothing to do */
      break;
#endif
#ifdef FCS_ENABLE_PEPC
    case FCS_METH_PEPC:
      res = fcs_pepc_setup(handle, (fcs_float)fcs_pepc_eps, 
            (fcs_float)fcs_pepc_theta );
      ASSERT_FCS(res);
      res = fcs_pepc_set_num_walk_threads( handle, (fcs_int)fcs_pepc_nthreads );
      ASSERT_FCS(res);
      break;
#endif
#ifdef FCS_ENABLE_FMM
    case FCS_METH_FMM:
      res = fcs_fmm_set_absrel(handle, (fcs_int)fcs_fmm_absrel);
      ASSERT_FCS(res);
      res = fcs_fmm_set_tolerance_energy(handle, (fcs_float)fcs_tolerance);
      ASSERT_FCS(res);
      break;
#endif
#ifdef FCS_ENABLE_P3M
    case FCS_METH_P3M:
      if (0==srf) {
        res = fcs_p3m_set_r_cut(handle, (fcs_float)fcs_rcut);
        ASSERT_FCS(res);
      }
      res = fcs_set_tolerance(handle, FCS_TOLERANCE_TYPE_FIELD,
                              (fcs_float)fcs_tolerance);
      ASSERT_FCS(res);
      if (fcs_grid_dim.x) 
        res = fcs_p3m_set_grid(handle, (fcs_int)fcs_grid_dim.x);
      ASSERT_FCS(res);
      break;
#endif
#ifdef FCS_ENABLE_P2NFFT
    case FCS_METH_P2NFFT:
      if (0==srf) {
        res = fcs_p2nfft_set_r_cut(handle, (fcs_float)fcs_rcut);
        ASSERT_FCS(res);
      }
      res = fcs_set_tolerance(handle, FCS_TOLERANCE_TYPE_FIELD,
                              (fcs_float)fcs_tolerance);
      ASSERT_FCS(res);
      if (fcs_grid_dim.x) { 
        res = fcs_p2nfft_set_grid(handle, (fcs_int)fcs_grid_dim.x,
              (fcs_int)fcs_grid_dim.y, (fcs_int)fcs_grid_dim.z);
        ASSERT_FCS(res);
      }
      if (fcs_p2nfft_intpol_order) {
        res = fcs_p2nfft_set_pnfft_interpolation_order(handle, 
              (fcs_int)fcs_p2nfft_intpol_order);
        ASSERT_FCS(res);
      }
      if (fcs_p2nfft_epsI) {
        res = fcs_p2nfft_set_epsI(handle, (fcs_float)fcs_p2nfft_epsI);
        ASSERT_FCS(res);
      }
      //res = fcs_p2nfft_set_pnfft_window_by_name(handle, "bspline");
      //ASSERT_FCS(res);
      break;
#endif
#ifdef FCS_ENABLE_VMG
    case FCS_METH_VMG:
      if (fcs_vmg_near_field_cells) {
        res = fcs_vmg_set_near_field_cells(handle, (fcs_int)fcs_vmg_near_field_cells);
        ASSERT_FCS(res);
      }
      if (fcs_vmg_interpol_order) {
        res = fcs_vmg_set_interpolation_order(handle, (fcs_int)fcs_vmg_interpol_order);
        ASSERT_FCS(res);
      }
      if (fcs_vmg_discr_order) {
        res = fcs_vmg_set_discretization_order(handle, (fcs_int)fcs_vmg_discr_order);
        ASSERT_FCS(res);
      }
      if (fcs_iter_tolerance > 0) {
        res = fcs_vmg_set_precision(handle, (fcs_float)fcs_iter_tolerance);
        ASSERT_FCS(res);
      }
      break;
#endif
#ifdef FCS_ENABLE_PP3MG
    case FCS_METH_PP3MG:
      /* use default values, if not specified otherwise */
      if (fcs_grid_dim.x) {
        res = fcs_pp3mg_set_cells_x(handle, (fcs_int)fcs_grid_dim.x);
        ASSERT_FCS(res);
        res = fcs_pp3mg_set_cells_y(handle, (fcs_int)fcs_grid_dim.y);
        ASSERT_FCS(res);
        res = fcs_pp3mg_set_cells_z(handle, (fcs_int)fcs_grid_dim.z);
        ASSERT_FCS(res);
      }
      if (fcs_pp3mg_ghosts) {
        res = fcs_pp3mg_set_ghosts(handle, (fcs_int)fcs_pp3mg_ghosts);
        ASSERT_FCS(res);
      }
      if (fcs_pp3mg_degree) {
        res = fcs_pp3mg_set_degree(handle, (fcs_int)fcs_pp3mg_degree);
        ASSERT_FCS(res);
      }
      if (fcs_pp3mg_max_part) { 
        res = fcs_pp3mg_set_max_particles(handle, (fcs_int)fcs_pp3mg_max_part);
        ASSERT_FCS(res);
      }
      if (fcs_max_iter) { 
        res = fcs_pp3mg_set_max_iterations(handle,(fcs_int)fcs_max_iter);
        ASSERT_FCS(res);
      }
      if (fcs_iter_tolerance > 0) {
        res = fcs_pp3mg_set_tol(handle, (fcs_float)fcs_iter_tolerance);
        ASSERT_FCS(res);
      }      
break;
#endif
    default: 
      error("FCS method unknown or not implemented"); 
      break;
  }
  pack_fcs();
  res = fcs_tune(handle, nloc, nloc_max, pos, chg);
  ASSERT_FCS(res);

  /* inform about tuned parameters */
  switch (fcs_method) {
    fcs_int grid_dim[3];
    fcs_float r_cut;
#ifdef FCS_ENABLE_P3M
    case FCS_METH_P3M:
      res = fcs_p3m_get_r_cut(handle, &r_cut);
      ASSERT_FCS(res);
      res = fcs_p3m_get_grid(handle, grid_dim);
      ASSERT_FCS(res);
      if (0==myid) 
        printf("FCS: Tuned grid dimensions, cutoff: %d %d %d, %f\n",
               grid_dim[0], grid_dim[1], grid_dim[2], r_cut);
      break;
#endif
#ifdef FCS_ENABLE_P2NFFT
    case FCS_METH_P2NFFT:
      res = fcs_p2nfft_get_grid(handle, grid_dim, grid_dim+1, grid_dim+2);
      ASSERT_FCS(res);
      res = fcs_p2nfft_get_r_cut(handle, &r_cut);
      ASSERT_FCS(res);
      if (0==myid) 
        printf("FCS: Tuned grid dimensions, cutoff: %d %d %d, %f\n",
               grid_dim[0], grid_dim[1], grid_dim[2], r_cut);
      break;
#endif
#ifdef FCS_ENABLE_PP3MG
    case FCS_METH_PP3MG:
      res = fcs_pp3mg_get_cells_x(handle, grid_dim  );
      ASSERT_FCS(res);
      res = fcs_pp3mg_get_cells_y(handle, grid_dim+1);
      ASSERT_FCS(res);
      res = fcs_pp3mg_get_cells_z(handle, grid_dim+2);
      if (0==myid) 
        printf("FCS: Tuned grid dimensions: %d %d %d\n",
               grid_dim[0], grid_dim[1], grid_dim[2]);
      ASSERT_FCS(res);
      break;
#endif
    default: 
      break;
  }

  /* add near-field potential, after fcs_tune */
  if (0==srf) fcs_update_pottab();
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
  int comm_rank, comm_size;
  const char* method = "p2nfft";
/* DEBUG */  
//  const char* datafile = "../inp_data/p2nfft/debug_wall_small.dat";
  const char* datafile = "../inp_data/p3m/p3m_wall.dat";
  MPI_Comm comm = MPI_COMM_WORLD;
  fcs_int periodicity[3] = { 1, 1, 1 };
  fcs_int pid;
  fcs_float tolerance = 1.e-3;

  MPI_Init(&argc, &argv);

  MPI_Comm_size(comm, &comm_size);
  MPI_Comm_rank(comm, &comm_rank);

  if(!comm_rank){
    printf("-------------------\n");
    printf("Running p2nfft test\n");
    printf("-------------------\n");
  }

  fcs_float box_l[3] = { 10.0, 10.0, 10.0 };
  fcs_float offset[3] = {0.0, 0.0, 0.0};
/* DEBUG */  
//  int n_particles = 4;
  int n_particles = 300;

  fcs_float charges[300];
  fcs_float positions[900];
  fcs_float far_fields[900];
  fcs_float forces[900];
  fcs_float reference_forces[900];
  fcs_float far_potentials[300];
  fcs_float energies[300];
  fcs_float virial[9];

  fcs_int local_particles = 0;
  fcs_float local_charges[300];
  fcs_float local_positions[900];
  fcs_int global_particle_indices[300];


  if(!comm_rank)
    printf("Reading %s...\n", datafile);
  FILE *data = fopen(datafile, "r");
  if (!data) {
    fprintf(stderr, "ERROR: Can't read %s!", datafile);
    perror("ERROR");
    exit(1);
  }
  
  fcs_float charge_sum = 0.0;
  for (pid = 0; pid < n_particles; pid++) {
    fscanf(data, "%" FCS_CONV_FLOAT "f %" FCS_CONV_FLOAT "f %" FCS_CONV_FLOAT "f",
        &positions[3*pid], &positions[3*pid+1], &positions[3*pid+2]);
    fscanf(data, "%" FCS_CONV_FLOAT "f",
        &charges[pid]);
    fscanf(data, "%" FCS_CONV_FLOAT "f %" FCS_CONV_FLOAT "f %" FCS_CONV_FLOAT "f",
        &reference_forces[3*pid], &reference_forces[3*pid+1], &reference_forces[3*pid+2]);
    charge_sum += charges[pid];
  }

  fclose(data);


  FCS handle = NULL;
  FCSResult result = NULL;

  MPI_Barrier(MPI_COMM_WORLD);
  if(!comm_rank)
    printf("Initializing p2nfft...\n");
  result = fcs_init(&handle, method, comm);
  assert_fcs(result);

  if(!comm_rank)
    printf("Reading particles ... \n");
  for (pid = 0; pid < n_particles; ++pid) {
    if (pid % comm_size == comm_rank) {
      local_charges[local_particles] = charges[pid];
      local_positions[3*local_particles] = positions[3*pid];
      local_positions[3*local_particles+1] = positions[3*pid+1];
      local_positions[3*local_particles+2] = positions[3*pid+2];
      global_particle_indices[local_particles] = pid;
      ++local_particles;
    }
  }

  if(!comm_rank)
    printf("Setting parameters...\n");

  fcs_float box_a[3] = { 0.0, 0.0, 0.0 };
  fcs_float box_b[3] = { 0.0, 0.0, 0.0 };
  fcs_float box_c[3] = { 0.0, 0.0, 0.0 };
  box_a[0] = box_l[0];
  box_b[1] = box_l[1];
  box_c[2] = box_l[2];

  result = fcs_set_common(handle, 1, box_a, box_b, box_c,
     offset, periodicity, n_particles);
  assert_fcs(result);

  /* Tuning */
  fcs_set_tolerance(handle, FCS_TOLERANCE_TYPE_FIELD, tolerance);

  if(!comm_rank)
    printf("Tuning p2nfft to tolerance %" FCS_LMOD_FLOAT "e...\n", tolerance);
  result = fcs_tune(handle, local_particles, local_positions, local_charges);
  assert_fcs(result);

  /* activate virial computation */
  result = fcs_set_compute_virial(handle, 1);
  assert_fcs(result);

  /* Far field computation */
  if(!comm_rank)
    printf("Running p2nfft (computing far fields and potentials)...\n");
  result = fcs_run(handle, local_particles,
      local_positions, local_charges, far_fields, far_potentials);
  assert_fcs(result);

  /* get and print virial */
  result = fcs_get_virial(handle, virial);
  assert_fcs(result);

  if(!comm_rank)
    printf("virial tensor = [%" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e; %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e; %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e]\n", virial[0], virial[1], virial[2],
        virial[3], virial[4], virial[5], virial[6], virial[7], virial[8]);

  /* Add components */
  for (pid = 0; pid < local_particles; pid++) {
    forces[3*pid]   = local_charges[pid] * far_fields[3*pid];
    forces[3*pid+1] = local_charges[pid] * far_fields[3*pid+1];
    forces[3*pid+2] = local_charges[pid] * far_fields[3*pid+2];

    energies[pid] = 0.5 * local_charges[pid] * far_potentials[pid];
  }
  
  /* Compare forces to reference field */
  fcs_float sqr_sum = 0.0;
  fcs_float sum_energy = 0.0;
  for (pid = 0; pid < local_particles; pid++) {
    sum_energy += energies[pid];

    fcs_float d0 = forces[3*pid]   - reference_forces[3*global_particle_indices[pid]];
    fcs_float d1 = forces[3*pid+1] - reference_forces[3*global_particle_indices[pid]+1];
    fcs_float d2 = forces[3*pid+2] - reference_forces[3*global_particle_indices[pid]+2];
    sqr_sum += d0*d0+d1*d1+d2*d2;
  }

  /* Reduce to global values */
  fcs_float global_total_energy, global_sqr_sum;
  MPI_Reduce(&sum_energy, &global_total_energy, 1, FCS_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&sqr_sum, &global_sqr_sum, 1, FCS_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
  if (!comm_rank) {
    printf("sum_energy=%" FCS_LMOD_FLOAT "f\n", global_total_energy);
    printf("rms_error=%e\n", sqrt(global_sqr_sum / (fcs_float)n_particles));
  }

  if(!comm_rank)
    printf("Finalizing...\n");
  fcs_destroy(handle);

  MPI_Finalize();
  if(!comm_rank)
    printf("Done.\n");

  return 0;
}
Exemplo n.º 3
0
int main(int argc, char **argv)
{
  fcs_int total_num_particles = TEST_N_PARTICLES;
  fcs_int num_particles, max_num_particles;
  fcs_float box_size = TEST_BOX_SIZE;
  fcs_int pid, px, py, pz;
  fcs_float positions[3*TEST_N_PARTICLES];
  fcs_float charges[TEST_N_PARTICLES];
  fcs_float direct_fields[3*TEST_N_PARTICLES];
  fcs_float direct_potentials[TEST_N_PARTICLES];
  fcs_float p2nfft_fields[3*TEST_N_PARTICLES];
  fcs_float p2nfft_potentials[TEST_N_PARTICLES];
  fcs_float direct_virial[9], p2nfft_virial[9];
  
  MPI_Init(&argc, &argv);
  MPI_Comm comm = MPI_COMM_WORLD;
  int comm_rank, comm_size;
  MPI_Comm_size(comm, &comm_size);
  MPI_Comm_rank(comm, &comm_rank);

  pid = num_particles = 0;
  for (px = 0; px < TEST_BOX_SIZE; ++px) {
    for (py = 0; py < TEST_BOX_SIZE; ++py) {
      for (pz = 0; pz < TEST_BOX_SIZE; ++pz, ++pid) {
        if (pid % comm_size == comm_rank) {
          positions[3*num_particles] = px + 0.5;
          positions[3*num_particles + 1] = py + 0.5;
          positions[3*num_particles + 2] = pz + 0.5;
          charges[num_particles] = 1.0-((px + py + pz) % 2)*2;
          ++num_particles;
        }
      }
    }
  }
  max_num_particles = num_particles;

/* Debugging */
for(int t=0; t<6; t++)
  fprintf(stderr, "init positions[%d] = %" FCS_LMOD_FLOAT "f\n", t, positions[t]);

  fcs_float box_a[] = { box_size, 0.0, 0.0 };
  fcs_float box_b[] = { 0.0, box_size, 0.0 };
  fcs_float box_c[] = { 0.0, 0.0, box_size };
  fcs_float offset[] = {0.0, 0.0, 0.0};
  fcs_int periodicity[] = {0, 0, 0};
//  fcs_int periodicity[] = {1, 1, 1};

  FCS fcs_handle = NULL;
  FCSResult fcs_result = NULL;

  /* Calculate this system via FCS direct solver */
  fcs_result = fcs_init(&fcs_handle, "direct", comm);
  assert_fcs(fcs_result);

  fcs_result = fcs_set_common(fcs_handle, 1, box_a, box_b, box_c, offset, periodicity, total_num_particles);
  assert_fcs(fcs_result);

  fcs_result = fcs_set_max_local_particles(fcs_handle, max_num_particles);
  assert_fcs(fcs_result);

  fcs_result = fcs_tune(fcs_handle, num_particles, positions, charges);
  assert_fcs(fcs_result);

  fcs_result = fcs_set_compute_virial(fcs_handle, 1);
  assert_fcs(fcs_result);

/* Debugging */
for(int t=0; t<6; t++)
  fprintf(stderr, "before direct run: positions[%d] = %" FCS_LMOD_FLOAT "f\n", t, positions[t]);

  fcs_result = fcs_run(fcs_handle, num_particles, positions, charges,
      direct_fields, direct_potentials);
  assert_fcs(fcs_result);

/* Debugging */
for(int t=0; t<6; t++)
  fprintf(stderr, "after direct run: positions[%d] = %" FCS_LMOD_FLOAT "f\n", t, positions[t]);

  fcs_result = fcs_get_virial(fcs_handle, direct_virial);
  assert_fcs(fcs_result);

  printf("Virial via FCS direct:\n");
  if(!comm_rank)
    printf("virial tensor = [%" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e; %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e; %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e]\n", direct_virial[0], direct_virial[1], direct_virial[2],
        direct_virial[3], direct_virial[4], direct_virial[5], direct_virial[6], direct_virial[7], direct_virial[8]);

  printf("Potentials via FCS direct:\n");
  for (pid = 0; pid < num_particles; ++pid)
    printf("%" FCS_LMOD_FLOAT "f\n", direct_potentials[pid]);

  printf("Fields via FCS direct:\n");
  for (pid = 0; pid < num_particles; ++pid)
    printf("[%" FCS_LMOD_FLOAT "f %" FCS_LMOD_FLOAT "f %" FCS_LMOD_FLOAT "f]\n", direct_fields[3*pid+0], direct_fields[3*pid+1], direct_fields[3*pid+2]);

  fcs_destroy(fcs_handle);


  /* set p2nfft specific parameters */
  fcs_float tolerance = 1.e-3;
  
  /* Calculate this system via FCS p2nfft solver */
  fcs_result = fcs_init(&fcs_handle, "p2nfft", comm);
  assert_fcs(fcs_result);

  fcs_result = fcs_set_common(fcs_handle, 1, box_a, box_b, box_c, offset, periodicity, total_num_particles);
  assert_fcs(fcs_result);

  fcs_result = fcs_set_max_local_particles(fcs_handle, max_num_particles);
  assert_fcs(fcs_result);

  fcs_set_tolerance(fcs_handle, FCS_TOLERANCE_TYPE_POTENTIAL, tolerance);
  fcs_result = fcs_tune(fcs_handle, num_particles, positions, charges);
  assert_fcs(fcs_result);

  fcs_result = fcs_set_compute_virial(fcs_handle, 1);
  assert_fcs(fcs_result);

/* Debugging */
for(int t=0; t<6; t++)
  fprintf(stderr, "test: positions[%d] = %" FCS_LMOD_FLOAT "f\n", t, positions[t]);

  fcs_result = fcs_run(fcs_handle, num_particles, positions, charges,
      p2nfft_fields, p2nfft_potentials);
  assert_fcs(fcs_result);

  fcs_result = fcs_get_virial(fcs_handle, p2nfft_virial);
  assert_fcs(fcs_result);

  printf("Virial via FCS p2nfft:\n");
  if(!comm_rank)
    printf("virial tensor = [%" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e; %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e; %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e %" FCS_LMOD_FLOAT "e]\n", p2nfft_virial[0], p2nfft_virial[1], p2nfft_virial[2],
        p2nfft_virial[3], p2nfft_virial[4], p2nfft_virial[5], p2nfft_virial[6], p2nfft_virial[7], p2nfft_virial[8]);

  printf("Potentials via FCS p2nfft:\n");
  for (pid = 0; pid < num_particles; ++pid)
    printf("%" FCS_LMOD_FLOAT "f\n", p2nfft_potentials[pid]);

  printf("Fields via FCS p2nfft:\n");
  for (pid = 0; pid < num_particles; ++pid)
    printf("[%" FCS_LMOD_FLOAT "f %" FCS_LMOD_FLOAT "f %" FCS_LMOD_FLOAT "f]\n", p2nfft_fields[3*pid+0], p2nfft_fields[3*pid+1], p2nfft_fields[3*pid+2]);

  fcs_destroy(fcs_handle);

  /* Compare results of direct and p2nfft solver */

  fcs_float direct_energy = 0, p2nfft_energy = 0;
  for (pid = 0; pid < num_particles; pid++) {
    direct_energy += 0.5 * charges[pid] * direct_potentials[pid];
    p2nfft_energy += 0.5 * charges[pid] * p2nfft_potentials[pid];
  }

  fcs_float sqr_sum = 0.0;
  for (pid = 0; pid < num_particles; ++pid) {
    fcs_float d0 = p2nfft_fields[3*pid]   - direct_fields[3*pid];
    fcs_float d1 = p2nfft_fields[3*pid+1] - direct_fields[3*pid+1];
    fcs_float d2 = p2nfft_fields[3*pid+2] - direct_fields[3*pid+2];
    sqr_sum += d0*d0+d1*d1+d2*d2;
  }

  /* Reduce to global values */
  fcs_float direct_total_energy, p2nfft_total_energy, total_sqr_sum;
  MPI_Reduce(&direct_energy, &direct_total_energy, 1, FCS_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&p2nfft_energy, &p2nfft_total_energy, 1, FCS_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&sqr_sum, &total_sqr_sum, 1, FCS_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);

  if (!comm_rank) {
    printf("direct_energy = %" FCS_LMOD_FLOAT "f\n", direct_total_energy);
    printf("p2nfft_energy = %" FCS_LMOD_FLOAT "f\n", p2nfft_total_energy);
    printf("rms_error = %e\n", sqrt(total_sqr_sum / (fcs_float)total_num_particles));
  }

  MPI_Finalize();

  return 0;
}