int main(int argc, char ** argv) { // Initialize the global VCluster openfpm_init(&argc,&argv); // Vcluster Vcluster & vcl = create_vcluster(); //! [Create CartDecomposition vtk gen] CartDecomposition<2,float> dec(vcl); // Physical domain Box<2,float> box({0.0,0.0},{1.0,1.0}); // division on each direction size_t div[2] = {20,20}; // Define ghost Ghost<2,float> g(0.01); // boundary conditions size_t bc[2] = {PERIODIC,PERIODIC}; // Decompose and write the decomposed graph dec.setParameters(div,box,bc,g); dec.decompose(); // create a ghost border dec.calculateGhostBoxes(); // Write the decomposition dec.write("CartDecomposition/out_"); //! [Create CartDecomposition] // deinitialize the library openfpm_finalize(); }
int main(int argc, char* argv[]) { /*! * \page Grid_1_stencil Grid 1 stencil * * ## Initialization ## {#e1_st_init} * * Initialize the library and several objects * * \see \ref e0_s_initialization * * \snippet Grid/1_stencil/main.cpp parameters * * */ //! \cond [parameters] \endcond openfpm_init(&argc,&argv); // domain Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0}); // grid sizes size_t sz[3] = {100,100,100}; // ghost extension Ghost<3,float> g(0.03); //! \cond [parameters] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * ## Grid create ## {#e1_st_inst} * * Create a distributed grid in 3D. With typedef we create an alias name for aggregate<float[3],float[3]>. * In practice the type of grid_point == aggregate<float[3],float[3]> * * \see \ref e0_s_grid_inst * * \snippet Grid/1_stencil/main.cpp grid * */ //! \cond [grid] \endcond // a convenient alias for aggregate<...> typedef aggregate<float,float> grid_point; grid_dist_id<3, float, grid_point> g_dist(sz,domain,g); //! \cond [grid] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * ## Loop over grid points ## {#e1_s_loop_gp} * * Get an iterator that go through the point of the domain (No ghost) * * \see \ref e0_s_loop_gp * * \snippet Grid/1_stencil/main.cpp iterator * \snippet Grid/1_stencil/main.cpp iterator2 * */ //! \cond [iterator] \endcond auto dom = g_dist.getDomainIterator(); while (dom.isNext()) { //! \cond [iterator] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * Inside the cycle we get the local grid key * * \see \ref e0_s_grid_coord * * \snippet Grid/1_stencil/main.cpp local key * */ //! \cond [local key] \endcond auto key = dom.get(); //! \cond [local key] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * We convert the local grid position, into global position, key_g contain 3 integers that identify the position * of the grid point in global coordinates * * \see \ref e0_s_grid_coord * * \snippet Grid/1_stencil/main.cpp global key * */ //! \cond [global key] \endcond auto key_g = g_dist.getGKey(key); //! \cond [global key] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * we write on the grid point of position (i,j,k) the value i*i + j*j + k*k on the property A. * Mathematically is equivalent to the function * * \f$ f(x,y,z) = x^2 + y^2 + z^2 \f$ * * \snippet Grid/1_stencil/main.cpp function * */ //! \cond [function] \endcond g_dist.template get<A>(key) = key_g.get(0)*key_g.get(0) + key_g.get(1)*key_g.get(1) + key_g.get(2)*key_g.get(2); //! \cond [function] \endcond //! \cond [iterator2] \endcond ++dom; } //! \cond [iterator2] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * ## Ghost ## {#e1_s_ghost} * * Each sub-domain has an extended part, that is materially contained into another processor. * In general is not synchronized * ghost_get<A> synchronize the property A in the ghost part * * \snippet Grid/1_stencil/main.cpp ghost * */ //! \cond [ghost] \endcond g_dist.template ghost_get<A>(); //! \cond [ghost] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * Get again another iterator, iterate across all the domain points, calculating a Laplace stencil. Write the * result on B * * \snippet Grid/1_stencil/main.cpp laplacian * */ //! \cond [laplacian] \endcond auto dom2 = g_dist.getDomainIterator(); while (dom2.isNext()) { auto key = dom2.get(); // Laplace stencil g_dist.template get<B>(key) = g_dist.template get<A>(key.move(x,1)) + g_dist.template get<A>(key.move(x,-1)) + g_dist.template get<A>(key.move(y,1)) + g_dist.template get<A>(key.move(y,-1)) + g_dist.template get<A>(key.move(z,1)) + g_dist.template get<A>(key.move(z,-1)) - 6*g_dist.template get<A>(key); ++dom2; } //! \cond [laplacian] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * * Finally we want a nice output to visualize the information stored by the distributed grid * * \see \ref e0_s_VTK_vis * * \snippet Grid/1_stencil/main.cpp output * */ //! \cond [output] \endcond g_dist.write("output"); //! \cond [output] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * Deinitialize the library * * \snippet Grid/1_stencil/main.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond /*! * \page Grid_1_stencil Grid 1 stencil * * # Full code # {#code} * * \include Grid/1_stencil/main.cpp * */ }
int main(int argc, char* argv[]) { /*! * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme * * ## Simulation ## {#md_e5_sym_sim_crs} * * The simulation is equal to the simulation explained in the example molecular dynamic * * \see \ref md_e5_sym * * The difference is that we create a symmetric Verlet-list for crossing scheme instead of a normal one * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp sim verlet * * The rest of the code remain unchanged * * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp simulation * */ //! \cond [simulation] \endcond double dt = 0.00025; double sigma = 0.1; double r_cut = 3.0*sigma; double r_gskin = 1.3*r_cut; double sigma12 = pow(sigma,12); double sigma6 = pow(sigma,6); openfpm::vector<double> x; openfpm::vector<openfpm::vector<double>> y; openfpm_init(&argc,&argv); Vcluster & v_cl = create_vcluster(); // we will use it do place particles on a 10x10x10 Grid like size_t sz[3] = {10,10,10}; // domain Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0}); // Boundary conditions size_t bc[3]={PERIODIC,PERIODIC,PERIODIC}; // ghost, big enough to contain the interaction radius Ghost<3,float> ghost(r_gskin); ghost.setLow(0,0.0); ghost.setLow(1,0.0); ghost.setLow(2,0.0); vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost,BIND_DEC_TO_GHOST); size_t k = 0; size_t start = vd.accum(); auto it = vd.getGridIterator(sz); while (it.isNext()) { vd.add(); auto key = it.get(); vd.getLastPos()[0] = key.get(0) * it.getSpacing(0); vd.getLastPos()[1] = key.get(1) * it.getSpacing(1); vd.getLastPos()[2] = key.get(2) * it.getSpacing(2); vd.template getLastProp<velocity>()[0] = 0.0; vd.template getLastProp<velocity>()[1] = 0.0; vd.template getLastProp<velocity>()[2] = 0.0; vd.template getLastProp<force>()[0] = 0.0; vd.template getLastProp<force>()[1] = 0.0; vd.template getLastProp<force>()[2] = 0.0; k++; ++it; } vd.map(); vd.ghost_get<>(); timer tsim; tsim.start(); //! \cond [sim verlet] \endcond // Get the Cell list structure auto NN = vd.getVerletCrs(r_gskin);; //! \cond [sim verlet] \endcond // calculate forces calc_forces(vd,NN,sigma12,sigma6,r_cut); unsigned long int f = 0; int cnt = 0; double max_disp = 0.0; // MD time stepping for (size_t i = 0; i < 10000 ; i++) { // Get the iterator auto it3 = vd.getDomainIterator(); double max_displ = 0.0; // integrate velicity and space based on the calculated forces (Step1) while (it3.isNext()) { auto p = it3.get(); // here we calculate v(tn + 0.5) vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0]; vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1]; vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2]; Point<3,double> disp({vd.template getProp<velocity>(p)[0]*dt,vd.template getProp<velocity>(p)[1]*dt,vd.template getProp<velocity>(p)[2]*dt}); // here we calculate x(tn + 1) vd.getPos(p)[0] += disp.get(0); vd.getPos(p)[1] += disp.get(1); vd.getPos(p)[2] += disp.get(2); if (disp.norm() > max_displ) max_displ = disp.norm(); ++it3; } if (max_disp < max_displ) max_disp = max_displ; // Because we moved the particles in space we have to map them and re-sync the ghost if (cnt % 10 == 0) { vd.map(); vd.template ghost_get<>(); // Get the Cell list structure vd.updateVerlet(NN,r_gskin,VL_CRS_SYMMETRIC); } else { vd.template ghost_get<>(SKIP_LABELLING); } cnt++; // calculate forces or a(tn + 1) Step 2 calc_forces(vd,NN,sigma12,sigma6,r_cut); // Integrate the velocity Step 3 auto it4 = vd.getDomainIterator(); while (it4.isNext()) { auto p = it4.get(); // here we calculate v(tn + 1) vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0]; vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1]; vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2]; ++it4; } // After every iteration collect some statistic about the confoguration if (i % 100 == 0) { // We write the particle position for visualization (Without ghost) vd.deleteGhost(); vd.write("particles_",f); // we resync the ghost vd.ghost_get<>(); // We calculate the energy double energy = calc_energy(vd,NN,sigma12,sigma6,r_cut); auto & vcl = create_vcluster(); vcl.sum(energy); vcl.max(max_disp); vcl.execute(); // we save the energy calculated at time step i c contain the time-step y contain the energy x.add(i); y.add({energy}); // We also print on terminal the value of the energy // only one processor (master) write on terminal if (vcl.getProcessUnitID() == 0) std::cout << "Energy: " << energy << " " << max_disp << " " << std::endl; max_disp = 0.0; f++; } } tsim.stop(); std::cout << "Time: " << tsim.getwct() << std::endl; //! \cond [simulation] \endcond // Google charts options, it store the options to draw the X Y graph GCoptions options; // Title of the graph options.title = std::string("Energy with time"); // Y axis name options.yAxis = std::string("Energy"); // X axis name options.xAxis = std::string("iteration"); // width of the line options.lineWidth = 1.0; // Object that draw the X Y graph GoogleChart cg; // Add the graph // The graph that it produce is in svg format that can be opened on browser cg.AddLinesGraph(x,y,options); // Write into html format cg.write("gc_plot2_out.html"); //! \cond [google chart] \endcond /*! * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme * * ## Finalize ## {#finalize_v_e5_md_sym_crs} * * At the very end of the program we have always to de-initialize the library * * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond /*! * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme * * ## Full code ## {#full_code_v_e5_md_sym_crs} * * \include Vector/5_molecular_dynamic_sym_crs/main.cpp * */ }
int main(int argc, char* argv[]) { /*! * * \page Vector_4_complex_prop Vector 4 complex properties * * * ## Initialization and vector creation ## * * We first initialize the library and define useful constants * * \see \ref e0_s_init * * \snippet Vector/4_complex_prop/main.cpp lib init * * We also define a custom structure * * \snippet Vector/4_complex_prop/main.cpp struct A * * After we initialize the library we can create a vector with complex properties * with the following line * * \snippet Vector/4_complex_prop/main.cpp vect create * * In this this particular case every particle carry a scalar, * a vector in form of float[3], a Point, a list * in form of vector of float and a list of custom structures, and a vector of vector. * In general particles can have properties of arbitrary complexity. * * \warning For arbitrary complexity mean that we can use any openfpm data structure with and arbitrary nested complexity. * For example a openfpm::vector<aggregate<grid_cpu<openfpm::vector<aggregate<double,double[3]>>>,openfpm::vector<float>> is valid * \verbatim particle * vector / \ / \ grid vector<float> /\ / \ double double[3] * \endverbatim * * Our custom data-structure A is defined below. Note that this data-structure * does not have pointers * * \snippet Vector/4_complex_prop/main.cpp struct A * * * \warning custom data structure are allowed only if they does not have pointer. * In case they have pointer we have to define how to serialize our data-structure * * \see \ref vector_example_cp_ser * */ //! \cond [lib init] \endcond // initialize the library openfpm_init(&argc,&argv); // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y Box<2,float> domain({0.0,0.0},{1.0,1.0}); // Here we define the boundary conditions of our problem size_t bc[2]={PERIODIC,PERIODIC}; // extended boundary around the domain, and the processor domain Ghost<2,float> g(0.01); // the scalar is the element at position 0 in the aggregate constexpr int scalar = 0; // the vector is the element at position 1 in the aggregate constexpr int vector = 1; // the tensor is the element at position 2 in the aggregate constexpr int point = 2; // A list1 constexpr int list = 3; // A listA constexpr int listA = 4; // A list of list constexpr int listlist = 5; //! \cond [lib init] \endcond //! \cond [struct A] \endcond // The custom structure struct A { float p1; int p2; A() {}; A(float p1, int p2) :p1(p1),p2(p2) {} }; //! \cond [struct A] \endcond //! \cond [vect create] \endcond vector_dist<2,float, aggregate<float, float[3], Point<3,double>, openfpm::vector<float>, openfpm::vector<A>, openfpm::vector<openfpm::vector<float>>> > vd(4096,domain,bc,g); //! \cond [vect create] \endcond /*! * * \page Vector_4_complex_prop Vector 4 complex properties * * * ## Assign values to properties ## * * Assign values to properties does not changes, from the simple case. Consider * now that each particle has a list, so when we can get the property listA for particle p * and resize such list with **vd.getProp<listA>(p).resize(...)**. We can add new elements at the * end with **vd.getProp<listA>(p).add(...)** and get some element of this list with **vd.getProp<listA>(p).get(i)**. * More in general vd.getProp<listA>(p) return a reference to the openfpm::vector contained by the particle. * * \snippet Vector/4_complex_prop/main.cpp vect assign * */ //! \cond [vect assign] \endcond auto it = vd.getDomainIterator(); while (it.isNext()) { auto p = it.get(); // we define x, assign a random position between 0.0 and 1.0 vd.getPos(p)[0] = (float)rand() / RAND_MAX; // we define y, assign a random position between 0.0 and 1.0 vd.getPos(p)[1] = (float)rand() / RAND_MAX; vd.getProp<scalar>(p) = 1.0; vd.getProp<vector>(p)[0] = 1.0; vd.getProp<vector>(p)[1] = 1.0; vd.getProp<vector>(p)[2] = 1.0; vd.getProp<point>(p).get(0) = 1.0; vd.getProp<point>(p).get(1) = 1.0; vd.getProp<point>(p).get(2) = 1.0; size_t n_cp = (float)10.0 * rand()/RAND_MAX; vd.getProp<listA>(p).resize(n_cp); for (size_t i = 0 ; i < n_cp ; i++) { vd.getProp<list>(p).add(i + 10); vd.getProp<list>(p).add(i + 20); vd.getProp<list>(p).add(i + 30); vd.getProp<listA>(p).get(i) = A(i+10.0,i+20.0); } vd.getProp<listlist>(p).resize(2); vd.getProp<listlist>(p).get(0).resize(2); vd.getProp<listlist>(p).get(1).resize(2); vd.getProp<listlist>(p).get(0).get(0) = 1.0; vd.getProp<listlist>(p).get(0).get(1) = 2.0; vd.getProp<listlist>(p).get(1).get(0) = 3.0; vd.getProp<listlist>(p).get(1).get(1) = 4.0; // next particle ++it; } //! \cond [vect assign] \endcond /*! * * \page Vector_4_complex_prop Vector 4 complex properties * * * ## Mapping and ghost_get ## * * Particles are redistributed across processors all properties are communicated but instead of * using map we use **map_list** that we can use to select properties. * A lot of time complex properties can be recomputed and communicate them is not a good idea. * The same concept also apply for ghost_get. In general we choose which properties to communicate * * * \see \ref e0_s_map * * \see \ref e1_part_ghost * * \snippet Vector/4_complex_prop/main.cpp vect map ghost * */ //! \cond [vect map ghost] \endcond // Particles are redistribued across the processors but only the scalar,vector, and point properties // are transfert vd.map_list<scalar,vector,point,list,listA,listlist>(); // Synchronize the ghost vd.ghost_get<scalar,vector,point,listA,listlist>(); //! \cond [vect map ghost] \endcond /*! * * \page Vector_4_complex_prop Vector 4 complex properties * * * ## Output and VTK visualization ## * * Vector with complex properties can be still be visualized, because unknown properties are * automatically excluded * * \see \ref e0_s_vis_vtk * * \snippet Vector/4_complex_prop/main.cpp vtk * */ //! \cond [vtk] \endcond vd.write("particles"); //! \cond [vtk] \endcond /*! * * \page Vector_4_complex_prop Vector 4 complex properties * * ## Print 4 particles in the ghost area ## * * Here we print that the first 4 particles to show that the list of A and the list of list are filled * and the ghosts contain the correct information * * \snippet Vector/4_complex_prop/main.cpp print ghost info * */ //! \cond [print ghost info] \endcond size_t fg = vd.size_local(); Vcluster & v_cl = create_vcluster(); if (v_cl.getProcessUnitID() == 0) { for ( ; fg < vd.size_local()+4 ; fg++) { std::cout << "List of A" << std::endl; for (size_t i = 0 ; i < vd.getProp<listA>(fg).size() ; i++) std::cout << "Element: " << i << " p1=" << vd.getProp<listA>(fg).get(i).p1 << " p2=" << vd.getProp<listA>(fg).get(i).p2 << std::endl; std::cout << "List of list" << std::endl; for (size_t i = 0 ; i < vd.getProp<listlist>(fg).size() ; i++) { for (size_t j = 0 ; j < vd.getProp<listlist>(fg).get(i).size() ; j++) std::cout << "Element: " << i << " " << j << " " << vd.getProp<listlist>(fg).get(i).get(j) << std::endl; } } } //! \cond [print ghost info] \endcond /*! * \page Vector_4_complex_prop Vector 4 complex properties * * ## Finalize ## {#finalize} * * At the very end of the program we have always to de-initialize the library * * \snippet Vector/4_complex_prop/main.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond /*! * \page Vector_4_complex_prop Vector 4 complex properties * * # Full code # {#code} * * \include Vector/4_complex_prop/main.cpp * */ }
int main(int argc, char* argv[]) { /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * ## Initialization ## {#num_sk_inc_petsc_3D_init} * * After model our equation we: * * Initialize the library * * Define some useful constants * * define Ghost size * * Non-periodic boundary conditions * * Padding domain expansion * * Padding and Ghost differ in the fact the padding extend the domain. * Ghost is an extension for each sub-domain * * \snippet Numerics/Stoke_flow/0_2D_incompressible/main_petsc.cpp init * */ //! \cond [init] \endcond // Initialize openfpm_init(&argc,&argv); // velocity in the grid is the property 0, pressure is the property 1 constexpr int velocity = 0; constexpr int pressure = 1; // Domain Box<3,float> domain({0.0,0.0,0.0},{3.0,1.0,1.0}); // Ghost (Not important in this case but required) Ghost<3,float> g(0.01); // Grid points on x=36 and y=12 z=12 long int sz[] = {36,12,12}; size_t szu[3]; szu[0] = (size_t)sz[0]; szu[1] = (size_t)sz[1]; szu[2] = (size_t)sz[2]; // We need one more point on the left and down part of the domain // This is given by the boundary conditions that we impose. // Padding<3> pd({1,1,1},{0,0,0}); //! \cond [init] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * Distributed grid that store the solution * * \see \ref e0_s_grid_inst * * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp grid inst * */ //! \cond [grid inst] \endcond grid_dist_id<3,float,aggregate<float[3],float>> g_dist(szu,domain,g); //! \cond [grid inst] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * Solving the system above require the solution of a system like that * * \f$ Ax = b \quad x = A^{-1}b\f$ * * where A is the system the discretize the left hand side of the equations + boundary conditions * and b discretize the right hand size + boundary conditions * * FDScheme is the object that we use to produce the Matrix A and the vector b. * Such object require the maximum extension of the stencil * * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp fd scheme * */ //! \cond [fd scheme] \endcond // It is the maximum extension of the stencil (order 2 laplacian stencil has extension 1) Ghost<3,long int> stencil_max(1); // Finite difference scheme FDScheme<lid_nn> fd(pd, stencil_max, domain, g_dist.getGridInfo(), g_dist); //! \cond [fd scheme] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * ## Impose the equation on the domain ## {#num_sk_inc_3D_ied} * * Here we impose the system of equation, we start from the incompressibility Eq imposed in the bulk with the * exception of the first point {0,0} and than we set P = 0 in {0,0}, why we are doing this is again * mathematical to have a well defined system, an intuitive explanation is that P and P + c are both * solution for the incompressibility equation, this produce an ill-posed problem to make it well posed * we set one point in this case {0,0} the pressure to a fixed constant for convenience P = 0 * * The best way to understand what we are doing is to draw a smaller example like 8x8. * Considering that we have one additional point on the left for padding we have a grid * 9x9. If on each point we have v_x v_y and P unknown we have * 9x9x3 = 243 unknown. In order to fully determine and unique solution we have to * impose 243 condition. The code under impose (in the case of 9x9) between domain * and bulk 243 conditions. * * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp impose eq dom * * */ //! \cond [impose eq dom] \endcond // start and end of the bulk fd.impose(ic_eq(),0.0, EQ_4, {0,0,0},{sz[0]-2,sz[1]-2,sz[2]-2},true); fd.impose(Prs(), 0.0, EQ_4, {0,0,0},{0,0,0}); fd.impose(vx_eq(),0.0, EQ_1, {1,0},{sz[0]-2,sz[1]-2,sz[2]-2}); fd.impose(vy_eq(),0.0, EQ_2, {0,1},{sz[0]-2,sz[1]-2,sz[2]-2}); fd.impose(vz_eq(),0.0, EQ_3, {0,0,1},{sz[0]-2,sz[1]-2,sz[2]-2}); // v_x // R L (Right,Left) fd.impose(v_x(),0.0, EQ_1, {0,0,0}, {0,sz[1]-2,sz[2]-2}); fd.impose(v_x(),0.0, EQ_1, {sz[0]-1,0,0},{sz[0]-1,sz[1]-2,sz[2]-2}); // T B (Top,Bottom) fd.impose(avg_y_vx_f(),0.0, EQ_1, {0,-1,0}, {sz[0]-1,-1,sz[2]-2}); fd.impose(avg_y_vx(),0.0, EQ_1, {0,sz[1]-1,0},{sz[0]-1,sz[1]-1,sz[2]-2}); // A F (Forward,Backward) fd.impose(avg_z_vx_f(),0.0, EQ_1, {0,-1,-1}, {sz[0]-1,sz[1]-1,-1}); fd.impose(avg_z_vx(),0.0, EQ_1, {0,-1,sz[2]-1},{sz[0]-1,sz[1]-1,sz[2]-1}); // v_y // R L fd.impose(avg_x_vy_f(),0.0, EQ_2, {-1,0,0}, {-1,sz[1]-1,sz[2]-2}); fd.impose(avg_x_vy(),1.0, EQ_2, {sz[0]-1,0,0},{sz[0]-1,sz[1]-1,sz[2]-2}); // T B fd.impose(v_y(), 0.0, EQ_2, {0,0,0}, {sz[0]-2,0,sz[2]-2}); fd.impose(v_y(), 0.0, EQ_2, {0,sz[1]-1,0},{sz[0]-2,sz[1]-1,sz[2]-2}); // F A fd.impose(avg_z_vy(),0.0, EQ_2, {-1,0,sz[2]-1}, {sz[0]-1,sz[1]-1,sz[2]-1}); fd.impose(avg_z_vy_f(),0.0, EQ_2, {-1,0,-1}, {sz[0]-1,sz[1]-1,-1}); // v_z // R L fd.impose(avg_x_vz_f(),0.0, EQ_3, {-1,0,0}, {-1,sz[1]-2,sz[2]-1}); fd.impose(avg_x_vz(),1.0, EQ_3, {sz[0]-1,0,0},{sz[0]-1,sz[1]-2,sz[2]-1}); // T B fd.impose(avg_y_vz(),0.0, EQ_3, {-1,sz[1]-1,0},{sz[0]-1,sz[1]-1,sz[2]-1}); fd.impose(avg_y_vz_f(),0.0, EQ_3, {-1,-1,0}, {sz[0]-1,-1,sz[2]-1}); // F A fd.impose(v_z(),0.0, EQ_3, {0,0,0}, {sz[0]-2,sz[1]-2,0}); fd.impose(v_z(),0.0, EQ_3, {0,0,sz[2]-1},{sz[0]-2,sz[1]-2,sz[2]-1}); // When we pad the grid, there are points of the grid that are not // touched by the previous condition. Mathematically this lead // to have too many variables for the conditions that we are imposing. // Here we are imposing variables that we do not touch to zero // // L R fd.impose(Prs(), 0.0, EQ_4, {-1,-1,-1},{-1,sz[1]-1,sz[2]-1}); fd.impose(Prs(), 0.0, EQ_4, {sz[0]-1,-1,-1},{sz[0]-1,sz[1]-1,sz[2]-1}); // T B fd.impose(Prs(), 0.0, EQ_4, {0,sz[1]-1,-1}, {sz[0]-2,sz[1]-1,sz[2]-1}); fd.impose(Prs(), 0.0, EQ_4, {0,-1 ,-1}, {sz[0]-2,-1, sz[2]-1}); // F A fd.impose(Prs(), 0.0, EQ_4, {0,0,sz[2]-1}, {sz[0]-2,sz[1]-2,sz[2]-1}); fd.impose(Prs(), 0.0, EQ_4, {0,0,-1}, {sz[0]-2,sz[1]-2,-1}); // Impose v_x v_y v_z padding fd.impose(v_x(), 0.0, EQ_1, {-1,-1,-1},{-1,sz[1]-1,sz[2]-1}); fd.impose(v_y(), 0.0, EQ_2, {-1,-1,-1},{sz[0]-1,-1,sz[2]-1}); fd.impose(v_z(), 0.0, EQ_3, {-1,-1,-1},{sz[0]-1,sz[1]-1,-1}); //! \cond [impose eq dom] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * ## Solve the system of equation ## {#num_sk_inc_3D_petsc_sse} * * Once we imposed all the equations we can retrieve the Matrix A and the vector b * and pass these two element to the solver. In this example we are using PETSC solvers * direct/Iterative solvers. While Umfpack * has only one solver, PETSC wrap several solvers. The function best_solve set the solver in * the modality to try multiple solvers to solve your system. The subsequent call to solve produce a report * of all the solvers tried comparing them in error-convergence and speed. If you do not use * best_solve try to solve your system with the default solver GMRES (That is the most robust iterative solver * method) * * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp solver * */ //! \cond [solver] \endcond // Create an PETSC solver petsc_solver<double> solver; // Warning try many solver and collect statistics require a lot of time // To just solve you can comment this line // solver.best_solve(); // Give to the solver A and b, return x, the solution auto x = solver.solve(fd.getA(),fd.getB()); //! \cond [solver] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * ## Copy the solution on the grid and write on VTK ## {#num_sk_inc_3D_petsc_csg} * * Once we have the solution we copy it on the grid * * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp copy write * */ //! \cond [copy write] \endcond // Bring the solution to grid fd.template copy<velocity,pressure>(x,{0,0},{sz[0]-1,sz[1]-1,sz[2]-1},g_dist); g_dist.write("lid_driven_cavity_p_petsc"); //! \cond [copy write] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * ## Finalize ## {#num_sk_inc_3D_petsc_fin} * * At the very end of the program we have always to de-initialize the library * * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp fin lib * */ //! \cond [fin lib] \endcond openfpm_finalize(); //! \cond [fin lib] \endcond /*! * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc * * # Full code # {#num_sk_inc_3D_petsc_code} * * \include Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp * */ }
int main(int argc, char* argv[]) { // // ### WIKI 2 ### // // Initialize the library and several objects // openfpm_init(&argc,&argv); // // ### WIKI 3 ### // // Get the vcluster object and the number of processor // Vcluster & v_cl = create_vcluster(); size_t N_prc = v_cl.getProcessingUnits(); // // ### WIKI 3 ### // // We find the maximum of the processors rank, that should be the Number of // processora minus one, only processor 0 print on terminal // size_t id = v_cl.getProcessUnitID(); v_cl.max(id); v_cl.execute(); if (v_cl.getProcessUnitID() == 0) std::cout << "Maximum processor rank: " << id << "\n"; // // ### WIKI 4 ### // // We sum all the processor ranks the maximum, the result should be that should // be $\frac{(n-1)n}{2}$, only processor 0 print on terminal // size_t id2 = v_cl.getProcessUnitID(); v_cl.sum(id2); v_cl.execute(); if (v_cl.getProcessUnitID() == 0) std::cout << "Sum of all processors rank: " << id2 << "\n"; // // ### WIKI 5 ### // // we can collect information from all processors using the function gather // size_t id3 = v_cl.getProcessUnitID(); openfpm::vector<size_t> v; v_cl.allGather(id3,v); v_cl.execute(); if (v_cl.getProcessUnitID() == 0) { std::cout << "Collected ids: "; for(size_t i = 0 ; i < v.size() ; i++) std::cout << " " << v.get(i) << " "; std::cout << "\n"; } // // ### WIKI 5 ### // // we can also send messages to specific processors, with the condition that the receiving // processors know we want to communicate with them, if you are searching for a more // free way to communicate where the receiving processors does not know which one processor // want to communicate with us, see the example 1_dsde // std::stringstream ss_message_1; std::stringstream ss_message_2; ss_message_1 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n"; ss_message_2 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n"; std::string message_1 = ss_message_1.str(); std::string message_2 = ss_message_2.str(); size_t msg_size = message_1.size(); // Processor 0 send to processors 1,2 , 1 to 2,1, 2 to 0,1 v_cl.send(((id3+1)%N_prc + N_prc)%N_prc,0,message_1.c_str(),msg_size); v_cl.send(((id3+2)%N_prc + N_prc)%N_prc,0,message_2.c_str(),msg_size); openfpm::vector<char> v_one; v_one.resize(msg_size); openfpm::vector<char> v_two(msg_size); v_two.resize(msg_size); v_cl.recv(((id3-1)%N_prc + N_prc)%N_prc,0,(void *)v_one.getPointer(),msg_size); v_cl.recv(((id3-2)%N_prc + N_prc)%N_prc,0,(void *)v_two.getPointer(),msg_size); v_cl.execute(); if (v_cl.getProcessUnitID() == 0) { for (size_t i = 0 ; i < msg_size ; i++) std::cout << v_one.get(i); for (size_t i = 0 ; i < msg_size ; i++) std::cout << v_two.get(i); } // // ### WIKI 5 ### // // we can also do what we did before in one shot // id = v_cl.getProcessUnitID(); id2 = v_cl.getProcessUnitID(); id3 = v_cl.getProcessUnitID(); v.clear(); // convert the string into a vector openfpm::vector<char> message_1_v(msg_size); openfpm::vector<char> message_2_v(msg_size); for (size_t i = 0 ; i < msg_size ; i++) message_1_v.get(i) = message_1[i]; for (size_t i = 0 ; i < msg_size ; i++) message_2_v.get(i) = message_2[i]; v_cl.max(id); v_cl.sum(id2); v_cl.allGather(id3,v); // in the case of vector we have special functions that avoid to specify the size v_cl.send(((id+1)%N_prc + N_prc)%N_prc,0,message_1_v); v_cl.send(((id+2)%N_prc + N_prc)%N_prc,0,message_2_v); v_cl.recv(((id-1)%N_prc + N_prc)%N_prc,0,v_one); v_cl.recv(((id-2)%N_prc + N_prc)%N_prc,0,v_two); v_cl.execute(); if (v_cl.getProcessUnitID() == 0) { std::cout << "Maximum processor rank: " << id << "\n"; std::cout << "Sum of all processors rank: " << id << "\n"; std::cout << "Collected ids: "; for(size_t i = 0 ; i < v.size() ; i++) std::cout << " " << v.get(i) << " "; std::cout << "\n"; for (size_t i = 0 ; i < msg_size ; i++) std::cout << v_one.get(i); for (size_t i = 0 ; i < msg_size ; i++) std::cout << v_two.get(i); } openfpm_finalize(); }
int main(int argc, char* argv[]) { /*! * * \page Vector_4_complex_prop_ser Vector 4 property serialization * * * ## Initialization and vector creation ## * * After we initialize the library we can create a vector with complex properties * with the following line * * \snippet Vector/4_complex_prop/main.cpp vect create * * In this this particular case every particle carry two my_struct object * */ // initialize the library openfpm_init(&argc,&argv); // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y Box<2,float> domain({0.0,0.0},{1.0,1.0}); // Here we define the boundary conditions of our problem size_t bc[2]={PERIODIC,PERIODIC}; // extended boundary around the domain, and the processor domain Ghost<2,float> g(0.01); // my_struct at position 0 in the aggregate constexpr int my_s1 = 0; // my_struct at position 1 in the aggregate constexpr int my_s2 = 1; //! \cond [vect create] \endcond vector_dist<2,float, aggregate<my_struct,my_struct>> vd(4096,domain,bc,g); std::cout << "HAS PACK: " << has_pack_agg<aggregate<my_struct,my_struct>>::result::value << std::endl; //! \cond [vect create] \endcond /*! * * \page Vector_4_complex_prop_ser Vector 4 property serialization * * * ## Assign values to properties ## * * In this loop we assign position to particles and we fill the two my_struct * that each particle contain. As demostration the first my_struct is filled * with the string representation of the particle coordinates. The second my struct * is filled with the string representation of the particle position multiplied by 2.0. * The the vectors of the two my_struct are filled respectively with the sequence * 1,2,3 and 1,2,3,4 * * * * \snippet Vector/4_complex_prop/main_ser.cpp vect assign * */ //! \cond [vect assign] \endcond auto it = vd.getDomainIterator(); while (it.isNext()) { auto p = it.get(); // we define x, assign a random position between 0.0 and 1.0 vd.getPos(p)[0] = (float)rand() / RAND_MAX; // we define y, assign a random position between 0.0 and 1.0 vd.getPos(p)[1] = (float)rand() / RAND_MAX; // Get the particle position as point Point<2,float> pt = vd.getPos(p); // create a C string from the particle coordinates // and copy into my struct vd.getProp<my_s1>(p).size = 32; vd.getProp<my_s1>(p).ptr = new char[32]; strcpy(vd.getProp<my_s1>(p).ptr,pt.toString().c_str()); // create a C++ string from the particle coordinates vd.getProp<my_s1>(p).str = std::string(pt.toString()); vd.getProp<my_s1>(p).v.add(1); vd.getProp<my_s1>(p).v.add(2); vd.getProp<my_s1>(p).v.add(3); pt = pt * 2.0; // create a C string from the particle coordinates multiplied by 2.0 // and copy into my struct vd.getProp<my_s2>(p).size = 32; vd.getProp<my_s2>(p).ptr = new char[32]; strcpy(vd.getProp<my_s2>(p).ptr,pt.toString().c_str()); // create a C++ string from the particle coordinates vd.getProp<my_s2>(p).str = std::string(pt.toString()); vd.getProp<my_s2>(p).v.add(1); vd.getProp<my_s2>(p).v.add(2); vd.getProp<my_s2>(p).v.add(3); vd.getProp<my_s2>(p).v.add(4); // next particle ++it; } //! \cond [vect assign] \endcond /*! * * \page Vector_4_complex_prop_ser Vector 4 property serialization * * * ## Mapping and ghost_get ## * * Particles are redistributed across processors and we also synchronize the ghost * * \see \ref e0_s_map * * \see \ref e1_part_ghost * * \snippet Vector/4_complex_prop/main_ser.cpp vect map ghost * */ //! \cond [vect map ghost] \endcond // Particles are redistribued across the processors vd.map(); // Synchronize the ghost vd.ghost_get<my_s1,my_s2>(); //! \cond [vect map ghost] \endcond /*! * * \page Vector_4_complex_prop_ser Vector 4 property serialization * * * ## Output and VTK visualization ## * * Vector with complex properties can be still be visualized, because unknown properties are * automatically excluded * * \see \ref e0_s_vis_vtk * * \snippet Vector/4_complex_prop/main.cpp vtk * */ //! \cond [vtk] \endcond vd.write("particles"); //! \cond [vtk] \endcond /*! * * \page Vector_4_complex_prop_ser Vector 4 property serialization * * ## Print 4 particles in the ghost area ## * * Here we print that the first 4 particles to show that the two my_struct contain the * right information * * \snippet Vector/4_complex_prop/main_ser.cpp print ghost info * */ //! \cond [print ghost info] \endcond size_t fg = vd.size_local(); Vcluster & v_cl = create_vcluster(); // Only the master processor print if (v_cl.getProcessUnitID() == 0) { // Print 4 particles for ( ; fg < vd.size_local()+4 ; fg++) { // Print my struct1 information std::cout << "my_struct1:" << std::endl; std::cout << "C-string: " << vd.getProp<my_s1>(fg).ptr << std::endl; std::cout << "Cpp-string: " << vd.getProp<my_s1>(fg).str << std::endl; for (size_t i = 0 ; i < vd.getProp<my_s1>(fg).v.size() ; i++) std::cout << "Element: " << i << " " << vd.getProp<my_s1>(fg).v.get(i) << std::endl; // Print my struct 2 information std::cout << "my_struct2" << std::endl; std::cout << "C-string: " << vd.getProp<my_s2>(fg).ptr << std::endl; std::cout << "Cpp-string: " << vd.getProp<my_s2>(fg).str << std::endl; for (size_t i = 0 ; i < vd.getProp<my_s2>(fg).v.size() ; i++) std::cout << "Element: " << i << " " << vd.getProp<my_s2>(fg).v.get(i) << std::endl; } } //! \cond [print ghost info] \endcond /*! * \page Vector_4_complex_prop_ser Vector 4 property serialization * * ## Finalize ## {#finalize} * * At the very end of the program we have always to de-initialize the library * * \snippet Vector/4_complex_prop/main_ser.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond /*! * \page Vector_4_complex_prop_ser Vector 4 property serialization * * # Full code # {#code} * * \include Vector/4_complex_prop/main_ser.cpp * */ }
int main(int argc, char* argv[]) { // // ### WIKI 2 ### // // Here we Initialize the library, than we create a uniform random generator between 0 and 1 to to generate particles // randomly in the domain, we create a Box that define our domain, boundary conditions, and ghost // // initialize the library openfpm_init(&argc,&argv); // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y Box<2,float> domain({0.0,0.0},{1.0,1.0}); // Here we define the boundary conditions of our problem size_t bc[2]={PERIODIC,PERIODIC}; // extended boundary around the domain, and the processor domain Ghost<2,float> g(0.01); // // ### WIKI 3 ### // // Here we are creating a distributed vector defined by the following parameters // // * 2 is the Dimensionality of the space where the objects live // * float is the type used for the spatial coordinate of the particles // * float,float[3],float[3][3] is the information stored by each particle a scalar float, a vector float[3] and a tensor of rank 2 float[3][3] // the list of properties must be put into an aggregate data astructure aggregate<prop1,prop2,prop3, ... > // // vd is the instantiation of the object // // The Constructor instead require: // // * Number of particles 4096 in this case // * Domain where is defined this structure // * bc boundary conditions // * g Ghost // // The following construct a vector where each processor has 4096 / N_proc (N_proc = number of processor) // objects with an undefined position in space. This non-space decomposition is also called data-driven // decomposition // vector_dist<2,float, aggregate<float,float[3],float[3][3]> > vd(4096,domain,bc,g); // the scalar is the element at position 0 in the aggregate const int scalar = 0; // the vector is the element at position 1 in the aggregate const int vector = 1; // the tensor is the element at position 2 in the aggregate const int tensor = 2; // // ### WIKI 5 ### // // Get an iterator that go through the 4096 particles, in an undefined position state and define its position // auto it = vd.getDomainIterator(); while (it.isNext()) { auto key = it.get(); // we define x, assign a random position between 0.0 and 1.0 vd.getPos(key)[0] = rand() / RAND_MAX; // we define y, assign a random position between 0.0 and 1.0 vd.getPos(key)[1] = rand() / RAND_MAX; // next particle ++it; } // // ### WIKI 6 ### // // Once we define the position, we distribute them according to the default space decomposition // The default decomposition is created even before assigning the position to the object. It determine // which part of space each processor manage // vd.map(); // // ### WIKI 7 ### // // We get the object that store the decomposition, than we iterate again across all the objects, we count them // and we confirm that all the particles are local // //Counter we use it later size_t cnt = 0; // Get the space decomposition auto & ct = vd.getDecomposition(); // Get a particle iterator it = vd.getDomainIterator(); // For each particle ... while (it.isNext()) { // ... p auto p = it.get(); // we set the properties of the particle p // the scalar property vd.template getProp<scalar>(p) = 1.0; vd.template getProp<vector>(p)[0] = 1.0; vd.template getProp<vector>(p)[1] = 1.0; vd.template getProp<vector>(p)[2] = 1.0; vd.template getProp<tensor>(p)[0][0] = 1.0; vd.template getProp<tensor>(p)[0][1] = 1.0; vd.template getProp<tensor>(p)[0][2] = 1.0; vd.template getProp<tensor>(p)[1][0] = 1.0; vd.template getProp<tensor>(p)[1][1] = 1.0; vd.template getProp<tensor>(p)[1][2] = 1.0; vd.template getProp<tensor>(p)[2][0] = 1.0; vd.template getProp<tensor>(p)[2][1] = 1.0; vd.template getProp<tensor>(p)[2][2] = 1.0; // increment the counter cnt++; // next particle ++it; } // // ### WIKI 8 ### // // cnt contain the number of object the local processor contain, if we are interested to count the total number across the processor // we can use the function add, to sum across processors. First we have to get an instance of Vcluster, queue an operation of add with // the variable count and finaly execute. All the operations are asynchronous, execute work like a barrier and ensure that all the // queued operations are executed // auto & v_cl = create_vcluster(); v_cl.sum(cnt); v_cl.execute(); // // ### WIKI 9 ### // // Output the particle position for each processor // vd.write("output",VTK_WRITER); // // ### WIKI 10 ### // // Deinitialize the library // openfpm_finalize(); }
int main(int argc, char* argv[]) { /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Initialization ## * * The initialization is the same as the molecular dynamic example. The differences are in the * parameters. We will use a bigger system, with more particles. The delta time for integration * is chosen in order to keep the system stable. * * \see \ref e3_md_init * * \snippet Vector/4_reorder/main_comp_ord.cpp vect create * */ //! \cond [vect create] \endcond double dt = 0.0001; float r_cut = 0.03; double sigma = r_cut/3.0; double sigma12 = pow(sigma,12); double sigma6 = pow(sigma,6); openfpm::vector<double> x; openfpm::vector<openfpm::vector<double>> y; openfpm_init(&argc,&argv); Vcluster & v_cl = create_vcluster(); // we will use it do place particles on a 40x40x40 Grid like size_t sz[3] = {40,40,40}; // domain Box<3,float> box({0.0,0.0,0.0}, {1.0,1.0,1.0}); // Boundary conditions size_t bc[3]= {PERIODIC,PERIODIC,PERIODIC}; // ghost, big enough to contain the interaction radius Ghost<3,float> ghost(r_cut); vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost); //! \cond [vect create] \endcond /*! * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Particles on a grid like position ## * * Here we place the particles on a grid like manner * * \see \ref e3_md_gl * * \snippet Vector/4_reorder/main_comp_ord.cpp vect grid * */ //! \cond [vect grid] \endcond auto it = vd.getGridIterator(sz); while (it.isNext()) { vd.add(); auto key = it.get(); vd.getLastPos()[0] = key.get(0) * it.getSpacing(0); vd.getLastPos()[1] = key.get(1) * it.getSpacing(1); vd.getLastPos()[2] = key.get(2) * it.getSpacing(2); vd.template getLastProp<velocity>()[0] = 0.0; vd.template getLastProp<velocity>()[1] = 0.0; vd.template getLastProp<velocity>()[2] = 0.0; vd.template getLastProp<force>()[0] = 0.0; vd.template getLastProp<force>()[1] = 0.0; vd.template getLastProp<force>()[2] = 0.0; ++it; } //! \cond [vect grid] \endcond /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Molecular dynamic steps ## * * Here we do 30000 MD steps using verlet integrator the cycle is the same as the * molecular dynamic example. with the following changes. * * ### Cell lists ### * * Instead of getting the normal cell list we get an hilbert curve cell-list. Such cell list has a * function called **getIterator** used inside the function **calc_forces** and **calc_energy** * that iterate across all the particles but in a smart-way. In practice * given an r-cut a cell-list is constructed with the provided spacing. Suppose to have a cell-list * \f$ m \times n \f$, an hilbert curve \f$ 2^k \times 2^k \f$ is contructed with \f$ k = ceil(log_2(max(m,n))) \f$. * Cell-lists are explored according to this Hilbert curve, If a cell does not exist is simply skipped. * * * \verbatim +------+------+------+------+ Example of Hilbert curve running on a 3 x 3 Cell | | | | | An hilbert curve of k = ceil(log_2(3)) = 4 | X+---->X | X +---> X | | ^ | + | ^ | + | ***|******|******|****---|--+ ******* * + | v | + * v | * * * 7 | 8+---->9 * X | * * = Domain * ^ | | * + | * * *--|-----------------*---|--+ ******* * + | | * v | * 4<----+5 | 6<---+ X | * | ^ | + * | *---------|-------|--*------+ * | + | v * | * 1+---->2 | 3+---> X | * | | * | **********************------+ this mean that we will iterate the following cells 1,2,5,4,7,8,9,6,3 Suppose now that the particles are ordered like described Particles id Cell 0 1 1 7 2 8 3 1 4 9 5 9 6 6 7 7 8 3 9 2 10 4 11 3 The iterator of the cell-list will explore the particles in the following way Cell 1 2 5 4 7 8 9 6 3 | | | | | | | | | | 0,3,9,,10,1,7,2,4,5,6,8 * \endverbatim * * We cannot explain here what is a cache, but in practice is a fast memory in the CPU able * to store chunks of memory. The cache in general is much smaller than RAM, but the big advantage * is its speed. Retrieve data from the cache is much faster than RAM. Unfortunately the factors * that determine what is on cache and what is not are multiples: Type of cache, algorithm ... . * Qualitatively all caches will tend to load chunks of data that you read multiple-time, or chunks * of data that probably you will read based on pattern analysis. A small example is a linear memory copy where * you read consecutively memory and you write on consecutive memory. * Modern CPU recognize such pattern and decide to load on cache the consecutive memory before * you actually require it. * * * Iterating the vector in the way described above has the advantage that when we do computation on particles * and its neighborhood with the sequence described above it will happen that: * * * If to process a particle A we read some neighborhood particles to process the next particle A+1 * we will probably read most of the previous particles. * * * In order to show in practice what happen we first show the graph when we do not reorder * * \htmlinclude Vector/4_reorder/no_reorder.html * * The measure has oscillation but we see an asymptotic behavior from 0.04 in the initial condition to * 0.124 . Below we show what happen when we use iterator from the Cell list hilbert * * \htmlinclude Vector/4_reorder/comp_reord.html * * In cases where particles does not move or move very slowly consider to use data-reordering, because it can * give **8-10% speedup** * * \see \ref e4_reo * * ## Timers ## * * In order to collect the time of the force calculation we insert two timers around the function * calc_force. The overall performance is instead calculated with another timer around the time stepping * * \snippet Vector/4_reorder/main_data_ord.cpp timer start * \snippet Vector/4_reorder/main_data_ord.cpp timer stop * * \see \ref e3_md_vi * * */ //! \cond [md steps] \endcond // Get the Cell list structure auto NN = vd.getCellList_hilb(r_cut); // calculate forces calc_forces(vd,NN,sigma12,sigma6); unsigned long int f = 0; timer time2; time2.start(); #ifndef TEST_RUN size_t Nstep = 30000; #else size_t Nstep = 300; #endif // MD time stepping for (size_t i = 0; i < Nstep ; i++) { // Get the iterator auto it3 = vd.getDomainIterator(); // integrate velicity and space based on the calculated forces (Step1) while (it3.isNext()) { auto p = it3.get(); // here we calculate v(tn + 0.5) vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0]; vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1]; vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2]; // here we calculate x(tn + 1) vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt; vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt; vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt; ++it3; } // Because we mooved the particles in space we have to map them and re-sync the ghost vd.map(); vd.template ghost_get<>(); timer time; if (i % 10 == 0) time.start(); // calculate forces or a(tn + 1) Step 2 calc_forces(vd,NN,sigma12,sigma6); if (i % 10 == 0) { time.stop(); x.add(i); y.add({time.getwct()}); } // Integrate the velocity Step 3 auto it4 = vd.getDomainIterator(); while (it4.isNext()) { auto p = it4.get(); // here we calculate v(tn + 1) vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0]; vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1]; vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2]; ++it4; } // After every iteration collect some statistic about the confoguration if (i % 100 == 0) { // We write the particle position for visualization (Without ghost) vd.deleteGhost(); vd.write("particles_",f); // we resync the ghost vd.ghost_get<>(); // We calculate the energy double energy = calc_energy(vd,NN,sigma12,sigma6); auto & vcl = create_vcluster(); vcl.sum(energy); vcl.execute(); // We also print on terminal the value of the energy // only one processor (master) write on terminal if (vcl.getProcessUnitID() == 0) std::cout << std::endl << "Energy: " << energy << std::endl; f++; } } time2.stop(); std::cout << "Performance: " << time2.getwct() << std::endl; //! \cond [md steps] \endcond /*! * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Plotting graphs ## * * After we terminate the MD steps our vector x contains at which iteration we benchmark the force * calculation time, while y contains the measured time at that time-step. We can produce a graph X Y * * \note The graph produced is an svg graph that can be view with a browser. From the browser we can * also easily save the graph into pure svg format * * \snippet Vector/4_reorder/main_comp_ord.cpp google chart * */ //! \cond [google chart] \endcond // Google charts options, it store the options to draw the X Y graph GCoptions options; // Title of the graph options.title = std::string("Force calculation time"); // Y axis name options.yAxis = std::string("Time"); // X axis name options.xAxis = std::string("iteration"); // width of the line options.lineWidth = 1.0; // Object that draw the X Y graph GoogleChart cg; // Add the graph // The graph that it produce is in svg format that can be opened on browser cg.AddLinesGraph(x,y,options); // Write into html format cg.write("gc_plot2_out.html"); //! \cond [google chart] \endcond /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Finalize ## * * At the very end of the program we have always to de-initialize the library * * \snippet Vector/4_reorder/main_comp_ord.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * # Full code # * * \include Vector/4_reorder/main_comp_ord.cpp * */ }
int main(int argc, char* argv[]) { /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Initialization ## * * Here we Initialize the library, and we check the we are on a single processor. GoogleChart * cannot do parallel IO or write big files. So or we collect all data on one processor, or each * processor write a distinct file. In this particular example we simply stop if the program start * on more than one processor * * \snippet Plot/0_simple_graph/main.cpp initialize * */ //! \cond [initialize] \endcond openfpm_init(&argc,&argv); auto & v_cl = create_vcluster(); // Google chart is only single processor if (v_cl.getProcessingUnits() > 1) { std::cerr << "Error: only one processor is allowed" << "\n"; return 1; } //! \cond [initialize] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Graph data ## * * Here we have the vectors that will contain the information about the graph. * * \snippet Plot/0_simple_graph/main.cpp datas vector * */ //! \cond [datas vector] \endcond openfpm::vector<std::string> x; openfpm::vector<openfpm::vector<double>> y; openfpm::vector<std::string> yn; //! \cond [datas vector] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * We will try now to produce the following situation. Six values on **x** each of them having 4 values on **y** * * This mean that for each x value we have to define 4 y values. Having multiple values on on x can be used for * several purpose. * * * Define multiple lines. For example if we connect all the points # we obtain one line. If we connect * all the @ points we obtain another line, an so on ... (figure below) * * * Define error bands * * * Visualize different observables/parameters for the same value x * * * \verbatim y ^ $ dataset1 | * dataset2 0.9 | # dataset3 | @ @ dataset4 | # 0.6 | * * @ * | $ # @ # # | @ $ $ @ @ 0.3 | # * $ # | $ * | * $ 0 |_________________________________ o t t f f s x n w h o i i e o r u v x e r e e \endverbatim * * We start from the first case (Define multiple lines) * * \snippet Plot/0_simple_graph/main.cpp data fill * */ //! \cond [data fill] \endcond // Fill the x values x.add("one"); x.add("two"); x.add("three"); x.add("four"); x.add("five"); x.add("six"); // we have 4 dataset or lines yn.add("dataset1"); yn.add("dataset2"); yn.add("dataset3"); yn.add("dataset4"); // Because we have 6 points on x each containing 4 lines or dataset, we have to provides // 6 point with 4 values at each x point y.add({2,3,5,6}); y.add({5,6,1,6}); y.add({2,1,6,9}); y.add({1,6,3,2}); y.add({3,3,0,6}); y.add({2,1,4,6}); //! \cond [data fill] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Graph options ## * * We can specify several options for the graphs. * * * Title of the graph * * Title of the y axis * * Title of the x axis * * * \snippet Plot/0_simple_graph/main.cpp google chart * */ //! \cond [google chart] \endcond GCoptions options; options.title = std::string("Example"); options.yAxis = std::string("Y Axis"); options.xAxis = std::string("X Axis"); options.lineWidth = 5; //! \cond [google chart] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Graph write ## * * We create the object to create plots with Google Charts * * A writer can produce several graphs optionally interleaved with HTML code. * Here we write in HTML a description of the graph, than we output the graph * * AddLinesGraph create a typical graph with lines * * \snippet Plot/0_simple_graph/main.cpp google chart write1 * * \htmlonly <div id="chart_div0" style="width: 900px; height: 500px;"></div> \endhtmlonly * */ //! \cond [google chart write1] \endcond GoogleChart cg; // cg.addHTML("<h2>First graph</h2>"); cg.AddLinesGraph(x,y,yn,options); //! \cond [google chart write1] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * ## Hist graph ## * * Hist graph is instead a more flexible Graph writer. In particular we can specify * how to draw each dataset. With the option * * * **stype** specify how to draw each dataset * * **stypeext** we can override the default stype option. In this case we say that the third dataset * in must be reppresented as a line instead of a bars * * To note that we can reuse the same Google chart writer to write multiple * Graph on the same page, interleaved with HTML code * * \snippet Plot/0_simple_graph/main.cpp google chart write2 * * \htmlonly <div id="chart_div1" style="width: 900px; height: 500px;"></div> \endhtmlonly * * */ //! \cond [google chart write2] \endcond options.stype = std::string("bars"); // it say that the dataset4 must me represented with a line options.stypeext = std::string("{3: {type: 'line'}}"); cg.addHTML("<h2>Second graph</h2>"); cg.AddHistGraph(x,y,yn,options); //! \cond [google chart write2] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * ## %Error bars ## * * Here we show how to draw error bars. %Error bars are drawn specifying intervals with a min and a max. * Intervals in general does not have to encapsulate any curve. First we construct the vector y with 3 * values the first value contain the curve points, the second and third contain the min,max interval. * * \snippet Plot/0_simple_graph/main.cpp google chart write3 * * \htmlonly <div id="chart_div2" style="width: 900px; height: 500px;"></div> \endhtmlonly * * */ //! \cond [google chart write3] \endcond cg.addHTML("<h2>Third graph</h2>"); // The first colum are the values of a line while the other 2 values // are the min and max of an interval, as we can see interval does not // have to encapsulate any curve y.clear(); y.add({0.10,0.20,0.19}); y.add({0.11,0.21,0.18}); y.add({0.12,0.22,0.21}); y.add({0.15,0.25,0.20}); y.add({0.09,0.29,0.25}); y.add({0.08,0.28,0.27}); // Here we mark that the the colum 2 and 3 are intervals yn.clear(); yn.add("line1"); yn.add("interval"); yn.add("interval"); cg.AddLinesGraph(x,y,yn,options); //! \cond [google chart write3] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * The style of each interval can be controlled, and the definition of intervals can be interleaved with definition of * other lines. In this example we show how to define 3 lines and 3 intervals, controlling the style of the last interval * * \snippet Plot/0_simple_graph/main.cpp google chart write4 * * \htmlonly <div id="chart_div3" style="width: 900px; height: 500px;"></div> \endhtmlonly * * */ //! \cond [google chart write4] \endcond cg.addHTML("<h2>Four graph</h2>"); // again 6 point but 9 values y.clear(); y.add({0.10,0.20,0.19,0.22,0.195,0.215,0.35,0.34,0.36}); y.add({0.11,0.21,0.18,0.22,0.19,0.215,0.36,0.35,0.37}); y.add({0.12,0.22,0.21,0.23,0.215,0.225,0.35,0.34,0.36}); y.add({0.15,0.25,0.20,0.26,0.22,0.255,0.36,0.35,0.37}); y.add({0.09,0.29,0.25,0.30,0.26,0.295,0.35,0.34,0.36}); y.add({0.08,0.28,0.27,0.29,0.275,0.285,0.36,0.35,0.37}); // colum 0 and 1 are lines // colums 2-3 and 4-5 are intervals // colum 6 is a line // colum 7-8 is an interval yn.add("line1"); yn.add("line2"); yn.add("interval"); yn.add("interval"); yn.add("interval"); yn.add("interval"); yn.add("line3"); yn.add("interval"); yn.add("interval"); // Intervals are enumerated with iX, for example in this case with 3 intervals we have i0,i1,i2 // with this line we control the style of the intervals. In particular we change from the default // values options.intervalext = std::string("{'i2': { 'color': '#4374E0', 'style':'bars', 'lineWidth':4, 'fillOpacity':1 } }"); cg.AddLinesGraph(x,y,yn,options); //! \cond [google chart write4] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * ## More options ## * * In this last example we also show how to: * * * * Make the graph bigger, setting **width** and **height** options * * Give the possibility to to zoom-in and zoom-out with **GC_EXPLORER** * * Use lines instead a smooth function to connect points * * Use logaritmic scale * * \note For more options refer to doxygen and Google Charts * * \snippet Plot/0_simple_graph/main.cpp google chart write5 * * * \htmlonly <div id="chart_div4" style="width: 1280px; height: 700px;"></div> \endhtmlonly * */ //! \cond [google chart write5] \endcond openfpm::vector<double> xn; xn.add(1.0); xn.add(2.0); xn.add(3.0); xn.add(4.0); xn.add(5.0); xn.add(6.0); options.intervalext = ""; options.width = 1280; options.heigh = 720; options.curveType = "line"; options.more = GC_ZOOM + "," + GC_X_LOG + "," + GC_Y_LOG; cg.AddLinesGraph(xn,y,yn,options); cg.write("gc_out.html"); //! \cond [google chart write5] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * * ## Finalize ## {#finalize} * * At the very end of the program we have always de-initialize the library * * \snippet Plot/0_simple_graph/main.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond }
~ut_start() { BOOST_TEST_MESSAGE("Delete global VClster"); openfpm_finalize(); }