int main(int argc, char* argv[]) { MPI_Comm comm; int comm_size, comm_rank; fcs_float *x, *q, *f, *p; fcs_int n_axis, n_total, n_local, n_local_max; fcs_int i, j, k; fcs_int p_c, p_start, p_stop,ip; fcs_float e_local, e_total; fcs_float madelung_approx; const fcs_float madelung = 1.74756459463318219; int mpi_thread_requested = MPI_THREAD_MULTIPLE; int mpi_thread_provided; FCS fcs_handle; FCSResult fcs_result; char method[] = "pepc"; fcs_float box_a[] = { 1.0, 0.0, 0.0 }; fcs_float box_b[] = { 0.0, 1.0, 0.0 }; fcs_float box_c[] = { 0.0, 0.0, 1.0 }; fcs_float offset[] = { 0.0, 0.0, 0.0 }; fcs_int periodic[] = { 1, 1, 1 }; //fcs_int periodic[] = { 0, 0, 0 }; fcs_float theta = 0.2; fcs_float epsilon = 1.23e-6; MPI_Init_thread(&argc, &argv, mpi_thread_requested, &mpi_thread_provided); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &comm_size); MPI_Comm_rank(comm, &comm_rank); if (mpi_thread_provided < mpi_thread_requested && comm_rank == 0) { printf("Call to MPI_INIT_THREAD failed. Requested/provided level of multithreading: %d / %d. Continuing but expect program crash.\n", mpi_thread_requested, mpi_thread_provided); } n_axis = 16; n_total = n_axis * n_axis * n_axis; n_local = n_total / comm_size; if(comm_rank == comm_size-1) n_local += n_total % comm_size; n_local_max = n_total / comm_size + n_total % comm_size; if (comm_rank == 0) { printf("*** RUNNING pepc TEST ***\n"); printf(" n_total = %" FCS_LMOD_INT "d\n", n_total); printf(" n_procs = %d\n", comm_size); printf(" theta = %e\n", theta); printf(" epsilon = %e\n", epsilon); } x = (fcs_float*)malloc(3 * n_local * sizeof(fcs_float)); q = (fcs_float*)malloc( n_local * sizeof(fcs_float)); f = (fcs_float*)malloc(3 * n_local * sizeof(fcs_float)); p = (fcs_float*)malloc( n_local * sizeof(fcs_float)); p_c = 0; p_start = comm_rank*(n_total/comm_size); p_stop = p_start + n_local; for (ip=p_start; ip<p_stop; ip++, p_c++) { i = ip % n_axis; j = (ip / n_axis) % n_axis; k = ip / (n_axis*n_axis); x[3*p_c ] = offset[0] + (i * box_a[0]) / n_axis; x[3*p_c+1] = offset[1] + (j * box_b[1]) / n_axis; x[3*p_c+2] = offset[2] + (k * box_c[2]) / n_axis; q[p_c] = ((i+j+k)%2 ? 1.0 : -1.0); /* printf("init positions (rank %d) for particle id %d: %e %e %e %e\n", */ /* comm_rank, ip, x[3*p_c ], x[3*p_c+1], x[3*p_c+2], q[p_c]); */ } fcs_result = fcs_init(&fcs_handle, method, comm); assert_fcs(fcs_result); fcs_result = fcs_set_common(fcs_handle, 1, box_a, box_b, box_c, offset, periodic, n_total); assert_fcs(fcs_result); fcs_result = fcs_pepc_setup(fcs_handle, epsilon, theta); assert_fcs(fcs_result); fcs_result = fcs_tune(fcs_handle, n_local, n_local_max, x, q); assert_fcs(fcs_result); fcs_result = fcs_run(fcs_handle, n_local, n_local_max, x, q, f, p); assert_fcs(fcs_result); e_local = 0.0; for (i=0; i<n_local; ++i) e_local += p[i] * q[i]; MPI_Reduce(&e_local, &e_total, 1, MPI_DOUBLE, MPI_SUM, 0, comm); //madelung_approx = 8.0 * M_PI / n_axis * e_total / n_total; madelung_approx = 1.0 / n_axis * e_total / n_total; if (comm_rank == 0) { printf("\n"); printf(" Results:\n"); printf(" Energy: %e\n", e_total); printf(" Madelung constant: %e\n", madelung_approx); printf(" Relative error: %e\n", fabs(madelung-fabs(madelung_approx))/madelung); } p_c = 0; p_start = comm_rank*(n_total/comm_size); p_stop = p_start + n_local; for (ip=p_start; ip<p_stop; ip++, p_c++) { /* printf("results (rank %d) for particle id %d: %e %e %e %e\n", */ /* comm_rank, ip, f[3*p_c ], f[3*p_c+1], f[3*p_c+2], p[p_c]); */ /* printf("dataout: %e %e %e %e %e %e %e %e\n", x[3*p_c ], x[3*p_c+1], x[3*p_c+2], q[p_c], */ /* f[3*p_c ], f[3*p_c+1], f[3*p_c+2], p[p_c]); */ } fcs_destroy(fcs_handle); free(x); free(q); free(f); free(p); if (comm_rank == 0) printf("*** pepc DONE ***\n"); MPI_Finalize(); return 0; }
void init_fcs(void) { FCSResult res; fcs_int srf = 1; char *method; fcs_int pbc [3] = { pbc_dirs.x, pbc_dirs.y, pbc_dirs.z }; fcs_float BoxX[3] = { box_x.x, box_x.y, box_x.z }; fcs_float BoxY[3] = { box_y.x, box_y.y, box_y.z }; fcs_float BoxZ[3] = { box_z.x, box_z.y, box_z.z }; fcs_float off [3] = { 0.0, 0.0, 0.0 }; /* subtract CM momentum */ if (0 == imdrestart) { int i, k; real ptot[4], ptot_2[4], px, py, pz; ptot[0] = 0.0; ptot[1] = 0.0; ptot[2] = 0.0, ptot[3] = 0.0; for (k=0; k<NCELLS; ++k) { /* loop over all cells */ cell *p = CELLPTR(k); for (i=0; i<p->n; i++) { ptot[0] += IMPULS(p,i,X); ptot[1] += IMPULS(p,i,Y); ptot[2] += IMPULS(p,i,Z); ptot[3] += MASSE(p,i); } } #ifdef MPI MPI_Allreduce( ptot, ptot_2, 4, REAL, MPI_SUM, cpugrid); ptot[0] = ptot_2[0]; ptot[1] = ptot_2[1]; ptot[2] = ptot_2[2]; ptot[3] = ptot_2[3]; #endif px = ptot[0]/ptot[3]; py = ptot[1]/ptot[3]; pz = ptot[2]/ptot[3]; for (k=0; k<NCELLS; ++k) { /* loop over all cells */ cell *p = CELLPTR(k); for (i=0; i<p->n; i++) { IMPULS(p,i,X) -= px * MASSE(p,i); IMPULS(p,i,Y) -= py * MASSE(p,i); IMPULS(p,i,Z) -= pz * MASSE(p,i); } } } switch (fcs_method) { case FCS_METH_DIRECT: method = "direct"; break; case FCS_METH_PEPC: method = "pepc"; break; case FCS_METH_FMM: method = "fmm"; break; case FCS_METH_P3M: method = "p3m"; srf = fcs_near_field_flag; break; case FCS_METH_P2NFFT: method = "p2nfft"; srf = fcs_near_field_flag; break; case FCS_METH_VMG: method = "vmg"; break; case FCS_METH_PP3MG: method = "pp3mg"; break; } /* initialize handle and set common parameters */ res = fcs_init(&handle, method, cpugrid); ASSERT_FCS(res); res = fcs_set_common(handle, srf, BoxX, BoxY, BoxZ, off, pbc, natoms); ASSERT_FCS(res); res = fcs_require_virial(handle, 1); ASSERT_FCS(res); /* set method specific parameters */ switch (fcs_method) { #ifdef FCS_ENABLE_DIRECT case FCS_METH_DIRECT: /* nothing to do */ break; #endif #ifdef FCS_ENABLE_PEPC case FCS_METH_PEPC: res = fcs_pepc_setup(handle, (fcs_float)fcs_pepc_eps, (fcs_float)fcs_pepc_theta ); ASSERT_FCS(res); res = fcs_pepc_set_num_walk_threads( handle, (fcs_int)fcs_pepc_nthreads ); ASSERT_FCS(res); break; #endif #ifdef FCS_ENABLE_FMM case FCS_METH_FMM: res = fcs_fmm_set_absrel(handle, (fcs_int)fcs_fmm_absrel); ASSERT_FCS(res); res = fcs_fmm_set_tolerance_energy(handle, (fcs_float)fcs_tolerance); ASSERT_FCS(res); break; #endif #ifdef FCS_ENABLE_P3M case FCS_METH_P3M: if (0==srf) { res = fcs_p3m_set_r_cut(handle, (fcs_float)fcs_rcut); ASSERT_FCS(res); } res = fcs_set_tolerance(handle, FCS_TOLERANCE_TYPE_FIELD, (fcs_float)fcs_tolerance); ASSERT_FCS(res); if (fcs_grid_dim.x) res = fcs_p3m_set_grid(handle, (fcs_int)fcs_grid_dim.x); ASSERT_FCS(res); break; #endif #ifdef FCS_ENABLE_P2NFFT case FCS_METH_P2NFFT: if (0==srf) { res = fcs_p2nfft_set_r_cut(handle, (fcs_float)fcs_rcut); ASSERT_FCS(res); } res = fcs_set_tolerance(handle, FCS_TOLERANCE_TYPE_FIELD, (fcs_float)fcs_tolerance); ASSERT_FCS(res); if (fcs_grid_dim.x) { res = fcs_p2nfft_set_grid(handle, (fcs_int)fcs_grid_dim.x, (fcs_int)fcs_grid_dim.y, (fcs_int)fcs_grid_dim.z); ASSERT_FCS(res); } if (fcs_p2nfft_intpol_order) { res = fcs_p2nfft_set_pnfft_interpolation_order(handle, (fcs_int)fcs_p2nfft_intpol_order); ASSERT_FCS(res); } if (fcs_p2nfft_epsI) { res = fcs_p2nfft_set_epsI(handle, (fcs_float)fcs_p2nfft_epsI); ASSERT_FCS(res); } //res = fcs_p2nfft_set_pnfft_window_by_name(handle, "bspline"); //ASSERT_FCS(res); break; #endif #ifdef FCS_ENABLE_VMG case FCS_METH_VMG: if (fcs_vmg_near_field_cells) { res = fcs_vmg_set_near_field_cells(handle, (fcs_int)fcs_vmg_near_field_cells); ASSERT_FCS(res); } if (fcs_vmg_interpol_order) { res = fcs_vmg_set_interpolation_order(handle, (fcs_int)fcs_vmg_interpol_order); ASSERT_FCS(res); } if (fcs_vmg_discr_order) { res = fcs_vmg_set_discretization_order(handle, (fcs_int)fcs_vmg_discr_order); ASSERT_FCS(res); } if (fcs_iter_tolerance > 0) { res = fcs_vmg_set_precision(handle, (fcs_float)fcs_iter_tolerance); ASSERT_FCS(res); } break; #endif #ifdef FCS_ENABLE_PP3MG case FCS_METH_PP3MG: /* use default values, if not specified otherwise */ if (fcs_grid_dim.x) { res = fcs_pp3mg_set_cells_x(handle, (fcs_int)fcs_grid_dim.x); ASSERT_FCS(res); res = fcs_pp3mg_set_cells_y(handle, (fcs_int)fcs_grid_dim.y); ASSERT_FCS(res); res = fcs_pp3mg_set_cells_z(handle, (fcs_int)fcs_grid_dim.z); ASSERT_FCS(res); } if (fcs_pp3mg_ghosts) { res = fcs_pp3mg_set_ghosts(handle, (fcs_int)fcs_pp3mg_ghosts); ASSERT_FCS(res); } if (fcs_pp3mg_degree) { res = fcs_pp3mg_set_degree(handle, (fcs_int)fcs_pp3mg_degree); ASSERT_FCS(res); } if (fcs_pp3mg_max_part) { res = fcs_pp3mg_set_max_particles(handle, (fcs_int)fcs_pp3mg_max_part); ASSERT_FCS(res); } if (fcs_max_iter) { res = fcs_pp3mg_set_max_iterations(handle,(fcs_int)fcs_max_iter); ASSERT_FCS(res); } if (fcs_iter_tolerance > 0) { res = fcs_pp3mg_set_tol(handle, (fcs_float)fcs_iter_tolerance); ASSERT_FCS(res); } break; #endif default: error("FCS method unknown or not implemented"); break; } pack_fcs(); res = fcs_tune(handle, nloc, nloc_max, pos, chg); ASSERT_FCS(res); /* inform about tuned parameters */ switch (fcs_method) { fcs_int grid_dim[3]; fcs_float r_cut; #ifdef FCS_ENABLE_P3M case FCS_METH_P3M: res = fcs_p3m_get_r_cut(handle, &r_cut); ASSERT_FCS(res); res = fcs_p3m_get_grid(handle, grid_dim); ASSERT_FCS(res); if (0==myid) printf("FCS: Tuned grid dimensions, cutoff: %d %d %d, %f\n", grid_dim[0], grid_dim[1], grid_dim[2], r_cut); break; #endif #ifdef FCS_ENABLE_P2NFFT case FCS_METH_P2NFFT: res = fcs_p2nfft_get_grid(handle, grid_dim, grid_dim+1, grid_dim+2); ASSERT_FCS(res); res = fcs_p2nfft_get_r_cut(handle, &r_cut); ASSERT_FCS(res); if (0==myid) printf("FCS: Tuned grid dimensions, cutoff: %d %d %d, %f\n", grid_dim[0], grid_dim[1], grid_dim[2], r_cut); break; #endif #ifdef FCS_ENABLE_PP3MG case FCS_METH_PP3MG: res = fcs_pp3mg_get_cells_x(handle, grid_dim ); ASSERT_FCS(res); res = fcs_pp3mg_get_cells_y(handle, grid_dim+1); ASSERT_FCS(res); res = fcs_pp3mg_get_cells_z(handle, grid_dim+2); if (0==myid) printf("FCS: Tuned grid dimensions: %d %d %d\n", grid_dim[0], grid_dim[1], grid_dim[2]); ASSERT_FCS(res); break; #endif default: break; } /* add near-field potential, after fcs_tune */ if (0==srf) fcs_update_pottab(); }