int main(int argc, char* argv[]) { // // ### WIKI 2 ### // // Here we Initialize the library, than we create a uniform random generator between 0 and 1 to to generate particles // randomly in the domain, we create a Box that define our domain, boundary conditions, and ghost // // initialize the library openfpm_init(&argc,&argv); // Here we define our domain a 2D box with internals from 0 to 1.0 for x and y Box<2,float> domain({0.0,0.0},{1.0,1.0}); // Here we define the boundary conditions of our problem size_t bc[2]={PERIODIC,PERIODIC}; // extended boundary around the domain, and the processor domain Ghost<2,float> g(0.01); // // ### WIKI 3 ### // // Here we are creating a distributed vector defined by the following parameters // // * 2 is the Dimensionality of the space where the objects live // * float is the type used for the spatial coordinate of the particles // * float,float[3],float[3][3] is the information stored by each particle a scalar float, a vector float[3] and a tensor of rank 2 float[3][3] // the list of properties must be put into an aggregate data astructure aggregate<prop1,prop2,prop3, ... > // // vd is the instantiation of the object // // The Constructor instead require: // // * Number of particles 4096 in this case // * Domain where is defined this structure // * bc boundary conditions // * g Ghost // // The following construct a vector where each processor has 4096 / N_proc (N_proc = number of processor) // objects with an undefined position in space. This non-space decomposition is also called data-driven // decomposition // vector_dist<2,float, aggregate<float,float[3],float[3][3]> > vd(4096,domain,bc,g); // the scalar is the element at position 0 in the aggregate const int scalar = 0; // the vector is the element at position 1 in the aggregate const int vector = 1; // the tensor is the element at position 2 in the aggregate const int tensor = 2; // // ### WIKI 5 ### // // Get an iterator that go through the 4096 particles, in an undefined position state and define its position // auto it = vd.getDomainIterator(); while (it.isNext()) { auto key = it.get(); // we define x, assign a random position between 0.0 and 1.0 vd.getPos(key)[0] = rand() / RAND_MAX; // we define y, assign a random position between 0.0 and 1.0 vd.getPos(key)[1] = rand() / RAND_MAX; // next particle ++it; } // // ### WIKI 6 ### // // Once we define the position, we distribute them according to the default space decomposition // The default decomposition is created even before assigning the position to the object. It determine // which part of space each processor manage // vd.map(); // // ### WIKI 7 ### // // We get the object that store the decomposition, than we iterate again across all the objects, we count them // and we confirm that all the particles are local // //Counter we use it later size_t cnt = 0; // Get the space decomposition auto & ct = vd.getDecomposition(); // Get a particle iterator it = vd.getDomainIterator(); // For each particle ... while (it.isNext()) { // ... p auto p = it.get(); // we set the properties of the particle p // the scalar property vd.template getProp<scalar>(p) = 1.0; vd.template getProp<vector>(p)[0] = 1.0; vd.template getProp<vector>(p)[1] = 1.0; vd.template getProp<vector>(p)[2] = 1.0; vd.template getProp<tensor>(p)[0][0] = 1.0; vd.template getProp<tensor>(p)[0][1] = 1.0; vd.template getProp<tensor>(p)[0][2] = 1.0; vd.template getProp<tensor>(p)[1][0] = 1.0; vd.template getProp<tensor>(p)[1][1] = 1.0; vd.template getProp<tensor>(p)[1][2] = 1.0; vd.template getProp<tensor>(p)[2][0] = 1.0; vd.template getProp<tensor>(p)[2][1] = 1.0; vd.template getProp<tensor>(p)[2][2] = 1.0; // increment the counter cnt++; // next particle ++it; } // // ### WIKI 8 ### // // cnt contain the number of object the local processor contain, if we are interested to count the total number across the processor // we can use the function add, to sum across processors. First we have to get an instance of Vcluster, queue an operation of add with // the variable count and finaly execute. All the operations are asynchronous, execute work like a barrier and ensure that all the // queued operations are executed // auto & v_cl = create_vcluster(); v_cl.sum(cnt); v_cl.execute(); // // ### WIKI 9 ### // // Output the particle position for each processor // vd.write("output",VTK_WRITER); // // ### WIKI 10 ### // // Deinitialize the library // openfpm_finalize(); }
int main(int argc, char* argv[]) { /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Initialization ## * * Here we Initialize the library, and we check the we are on a single processor. GoogleChart * cannot do parallel IO or write big files. So or we collect all data on one processor, or each * processor write a distinct file. In this particular example we simply stop if the program start * on more than one processor * * \snippet Plot/0_simple_graph/main.cpp initialize * */ //! \cond [initialize] \endcond openfpm_init(&argc,&argv); auto & v_cl = create_vcluster(); // Google chart is only single processor if (v_cl.getProcessingUnits() > 1) { std::cerr << "Error: only one processor is allowed" << "\n"; return 1; } //! \cond [initialize] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Graph data ## * * Here we have the vectors that will contain the information about the graph. * * \snippet Plot/0_simple_graph/main.cpp datas vector * */ //! \cond [datas vector] \endcond openfpm::vector<std::string> x; openfpm::vector<openfpm::vector<double>> y; openfpm::vector<std::string> yn; //! \cond [datas vector] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * We will try now to produce the following situation. Six values on **x** each of them having 4 values on **y** * * This mean that for each x value we have to define 4 y values. Having multiple values on on x can be used for * several purpose. * * * Define multiple lines. For example if we connect all the points # we obtain one line. If we connect * all the @ points we obtain another line, an so on ... (figure below) * * * Define error bands * * * Visualize different observables/parameters for the same value x * * * \verbatim y ^ $ dataset1 | * dataset2 0.9 | # dataset3 | @ @ dataset4 | # 0.6 | * * @ * | $ # @ # # | @ $ $ @ @ 0.3 | # * $ # | $ * | * $ 0 |_________________________________ o t t f f s x n w h o i i e o r u v x e r e e \endverbatim * * We start from the first case (Define multiple lines) * * \snippet Plot/0_simple_graph/main.cpp data fill * */ //! \cond [data fill] \endcond // Fill the x values x.add("one"); x.add("two"); x.add("three"); x.add("four"); x.add("five"); x.add("six"); // we have 4 dataset or lines yn.add("dataset1"); yn.add("dataset2"); yn.add("dataset3"); yn.add("dataset4"); // Because we have 6 points on x each containing 4 lines or dataset, we have to provides // 6 point with 4 values at each x point y.add({2,3,5,6}); y.add({5,6,1,6}); y.add({2,1,6,9}); y.add({1,6,3,2}); y.add({3,3,0,6}); y.add({2,1,4,6}); //! \cond [data fill] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Graph options ## * * We can specify several options for the graphs. * * * Title of the graph * * Title of the y axis * * Title of the x axis * * * \snippet Plot/0_simple_graph/main.cpp google chart * */ //! \cond [google chart] \endcond GCoptions options; options.title = std::string("Example"); options.yAxis = std::string("Y Axis"); options.xAxis = std::string("X Axis"); options.lineWidth = 5; //! \cond [google chart] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * ## Graph write ## * * We create the object to create plots with Google Charts * * A writer can produce several graphs optionally interleaved with HTML code. * Here we write in HTML a description of the graph, than we output the graph * * AddLinesGraph create a typical graph with lines * * \snippet Plot/0_simple_graph/main.cpp google chart write1 * * \htmlonly <div id="chart_div0" style="width: 900px; height: 500px;"></div> \endhtmlonly * */ //! \cond [google chart write1] \endcond GoogleChart cg; // cg.addHTML("<h2>First graph</h2>"); cg.AddLinesGraph(x,y,yn,options); //! \cond [google chart write1] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * ## Hist graph ## * * Hist graph is instead a more flexible Graph writer. In particular we can specify * how to draw each dataset. With the option * * * **stype** specify how to draw each dataset * * **stypeext** we can override the default stype option. In this case we say that the third dataset * in must be reppresented as a line instead of a bars * * To note that we can reuse the same Google chart writer to write multiple * Graph on the same page, interleaved with HTML code * * \snippet Plot/0_simple_graph/main.cpp google chart write2 * * \htmlonly <div id="chart_div1" style="width: 900px; height: 500px;"></div> \endhtmlonly * * */ //! \cond [google chart write2] \endcond options.stype = std::string("bars"); // it say that the dataset4 must me represented with a line options.stypeext = std::string("{3: {type: 'line'}}"); cg.addHTML("<h2>Second graph</h2>"); cg.AddHistGraph(x,y,yn,options); //! \cond [google chart write2] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * ## %Error bars ## * * Here we show how to draw error bars. %Error bars are drawn specifying intervals with a min and a max. * Intervals in general does not have to encapsulate any curve. First we construct the vector y with 3 * values the first value contain the curve points, the second and third contain the min,max interval. * * \snippet Plot/0_simple_graph/main.cpp google chart write3 * * \htmlonly <div id="chart_div2" style="width: 900px; height: 500px;"></div> \endhtmlonly * * */ //! \cond [google chart write3] \endcond cg.addHTML("<h2>Third graph</h2>"); // The first colum are the values of a line while the other 2 values // are the min and max of an interval, as we can see interval does not // have to encapsulate any curve y.clear(); y.add({0.10,0.20,0.19}); y.add({0.11,0.21,0.18}); y.add({0.12,0.22,0.21}); y.add({0.15,0.25,0.20}); y.add({0.09,0.29,0.25}); y.add({0.08,0.28,0.27}); // Here we mark that the the colum 2 and 3 are intervals yn.clear(); yn.add("line1"); yn.add("interval"); yn.add("interval"); cg.AddLinesGraph(x,y,yn,options); //! \cond [google chart write3] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * The style of each interval can be controlled, and the definition of intervals can be interleaved with definition of * other lines. In this example we show how to define 3 lines and 3 intervals, controlling the style of the last interval * * \snippet Plot/0_simple_graph/main.cpp google chart write4 * * \htmlonly <div id="chart_div3" style="width: 900px; height: 500px;"></div> \endhtmlonly * * */ //! \cond [google chart write4] \endcond cg.addHTML("<h2>Four graph</h2>"); // again 6 point but 9 values y.clear(); y.add({0.10,0.20,0.19,0.22,0.195,0.215,0.35,0.34,0.36}); y.add({0.11,0.21,0.18,0.22,0.19,0.215,0.36,0.35,0.37}); y.add({0.12,0.22,0.21,0.23,0.215,0.225,0.35,0.34,0.36}); y.add({0.15,0.25,0.20,0.26,0.22,0.255,0.36,0.35,0.37}); y.add({0.09,0.29,0.25,0.30,0.26,0.295,0.35,0.34,0.36}); y.add({0.08,0.28,0.27,0.29,0.275,0.285,0.36,0.35,0.37}); // colum 0 and 1 are lines // colums 2-3 and 4-5 are intervals // colum 6 is a line // colum 7-8 is an interval yn.add("line1"); yn.add("line2"); yn.add("interval"); yn.add("interval"); yn.add("interval"); yn.add("interval"); yn.add("line3"); yn.add("interval"); yn.add("interval"); // Intervals are enumerated with iX, for example in this case with 3 intervals we have i0,i1,i2 // with this line we control the style of the intervals. In particular we change from the default // values options.intervalext = std::string("{'i2': { 'color': '#4374E0', 'style':'bars', 'lineWidth':4, 'fillOpacity':1 } }"); cg.AddLinesGraph(x,y,yn,options); //! \cond [google chart write4] \endcond /*! * * \page Plot_0_cg Plot 0 Google Chart * * ## More options ## * * In this last example we also show how to: * * * * Make the graph bigger, setting **width** and **height** options * * Give the possibility to to zoom-in and zoom-out with **GC_EXPLORER** * * Use lines instead a smooth function to connect points * * Use logaritmic scale * * \note For more options refer to doxygen and Google Charts * * \snippet Plot/0_simple_graph/main.cpp google chart write5 * * * \htmlonly <div id="chart_div4" style="width: 1280px; height: 700px;"></div> \endhtmlonly * */ //! \cond [google chart write5] \endcond openfpm::vector<double> xn; xn.add(1.0); xn.add(2.0); xn.add(3.0); xn.add(4.0); xn.add(5.0); xn.add(6.0); options.intervalext = ""; options.width = 1280; options.heigh = 720; options.curveType = "line"; options.more = GC_ZOOM + "," + GC_X_LOG + "," + GC_Y_LOG; cg.AddLinesGraph(xn,y,yn,options); cg.write("gc_out.html"); //! \cond [google chart write5] \endcond /*! * \page Plot_0_cg Plot 0 Google Chart * * * ## Finalize ## {#finalize} * * At the very end of the program we have always de-initialize the library * * \snippet Plot/0_simple_graph/main.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond }
int main(int argc, char* argv[]) { /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Initialization ## * * The initialization is the same as the molecular dynamic example. The differences are in the * parameters. We will use a bigger system, with more particles. The delta time for integration * is chosen in order to keep the system stable. * * \see \ref e3_md_init * * \snippet Vector/4_reorder/main_comp_ord.cpp vect create * */ //! \cond [vect create] \endcond double dt = 0.0001; float r_cut = 0.03; double sigma = r_cut/3.0; double sigma12 = pow(sigma,12); double sigma6 = pow(sigma,6); openfpm::vector<double> x; openfpm::vector<openfpm::vector<double>> y; openfpm_init(&argc,&argv); Vcluster & v_cl = create_vcluster(); // we will use it do place particles on a 40x40x40 Grid like size_t sz[3] = {40,40,40}; // domain Box<3,float> box({0.0,0.0,0.0}, {1.0,1.0,1.0}); // Boundary conditions size_t bc[3]= {PERIODIC,PERIODIC,PERIODIC}; // ghost, big enough to contain the interaction radius Ghost<3,float> ghost(r_cut); vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost); //! \cond [vect create] \endcond /*! * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Particles on a grid like position ## * * Here we place the particles on a grid like manner * * \see \ref e3_md_gl * * \snippet Vector/4_reorder/main_comp_ord.cpp vect grid * */ //! \cond [vect grid] \endcond auto it = vd.getGridIterator(sz); while (it.isNext()) { vd.add(); auto key = it.get(); vd.getLastPos()[0] = key.get(0) * it.getSpacing(0); vd.getLastPos()[1] = key.get(1) * it.getSpacing(1); vd.getLastPos()[2] = key.get(2) * it.getSpacing(2); vd.template getLastProp<velocity>()[0] = 0.0; vd.template getLastProp<velocity>()[1] = 0.0; vd.template getLastProp<velocity>()[2] = 0.0; vd.template getLastProp<force>()[0] = 0.0; vd.template getLastProp<force>()[1] = 0.0; vd.template getLastProp<force>()[2] = 0.0; ++it; } //! \cond [vect grid] \endcond /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Molecular dynamic steps ## * * Here we do 30000 MD steps using verlet integrator the cycle is the same as the * molecular dynamic example. with the following changes. * * ### Cell lists ### * * Instead of getting the normal cell list we get an hilbert curve cell-list. Such cell list has a * function called **getIterator** used inside the function **calc_forces** and **calc_energy** * that iterate across all the particles but in a smart-way. In practice * given an r-cut a cell-list is constructed with the provided spacing. Suppose to have a cell-list * \f$ m \times n \f$, an hilbert curve \f$ 2^k \times 2^k \f$ is contructed with \f$ k = ceil(log_2(max(m,n))) \f$. * Cell-lists are explored according to this Hilbert curve, If a cell does not exist is simply skipped. * * * \verbatim +------+------+------+------+ Example of Hilbert curve running on a 3 x 3 Cell | | | | | An hilbert curve of k = ceil(log_2(3)) = 4 | X+---->X | X +---> X | | ^ | + | ^ | + | ***|******|******|****---|--+ ******* * + | v | + * v | * * * 7 | 8+---->9 * X | * * = Domain * ^ | | * + | * * *--|-----------------*---|--+ ******* * + | | * v | * 4<----+5 | 6<---+ X | * | ^ | + * | *---------|-------|--*------+ * | + | v * | * 1+---->2 | 3+---> X | * | | * | **********************------+ this mean that we will iterate the following cells 1,2,5,4,7,8,9,6,3 Suppose now that the particles are ordered like described Particles id Cell 0 1 1 7 2 8 3 1 4 9 5 9 6 6 7 7 8 3 9 2 10 4 11 3 The iterator of the cell-list will explore the particles in the following way Cell 1 2 5 4 7 8 9 6 3 | | | | | | | | | | 0,3,9,,10,1,7,2,4,5,6,8 * \endverbatim * * We cannot explain here what is a cache, but in practice is a fast memory in the CPU able * to store chunks of memory. The cache in general is much smaller than RAM, but the big advantage * is its speed. Retrieve data from the cache is much faster than RAM. Unfortunately the factors * that determine what is on cache and what is not are multiples: Type of cache, algorithm ... . * Qualitatively all caches will tend to load chunks of data that you read multiple-time, or chunks * of data that probably you will read based on pattern analysis. A small example is a linear memory copy where * you read consecutively memory and you write on consecutive memory. * Modern CPU recognize such pattern and decide to load on cache the consecutive memory before * you actually require it. * * * Iterating the vector in the way described above has the advantage that when we do computation on particles * and its neighborhood with the sequence described above it will happen that: * * * If to process a particle A we read some neighborhood particles to process the next particle A+1 * we will probably read most of the previous particles. * * * In order to show in practice what happen we first show the graph when we do not reorder * * \htmlinclude Vector/4_reorder/no_reorder.html * * The measure has oscillation but we see an asymptotic behavior from 0.04 in the initial condition to * 0.124 . Below we show what happen when we use iterator from the Cell list hilbert * * \htmlinclude Vector/4_reorder/comp_reord.html * * In cases where particles does not move or move very slowly consider to use data-reordering, because it can * give **8-10% speedup** * * \see \ref e4_reo * * ## Timers ## * * In order to collect the time of the force calculation we insert two timers around the function * calc_force. The overall performance is instead calculated with another timer around the time stepping * * \snippet Vector/4_reorder/main_data_ord.cpp timer start * \snippet Vector/4_reorder/main_data_ord.cpp timer stop * * \see \ref e3_md_vi * * */ //! \cond [md steps] \endcond // Get the Cell list structure auto NN = vd.getCellList_hilb(r_cut); // calculate forces calc_forces(vd,NN,sigma12,sigma6); unsigned long int f = 0; timer time2; time2.start(); #ifndef TEST_RUN size_t Nstep = 30000; #else size_t Nstep = 300; #endif // MD time stepping for (size_t i = 0; i < Nstep ; i++) { // Get the iterator auto it3 = vd.getDomainIterator(); // integrate velicity and space based on the calculated forces (Step1) while (it3.isNext()) { auto p = it3.get(); // here we calculate v(tn + 0.5) vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0]; vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1]; vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2]; // here we calculate x(tn + 1) vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt; vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt; vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt; ++it3; } // Because we mooved the particles in space we have to map them and re-sync the ghost vd.map(); vd.template ghost_get<>(); timer time; if (i % 10 == 0) time.start(); // calculate forces or a(tn + 1) Step 2 calc_forces(vd,NN,sigma12,sigma6); if (i % 10 == 0) { time.stop(); x.add(i); y.add({time.getwct()}); } // Integrate the velocity Step 3 auto it4 = vd.getDomainIterator(); while (it4.isNext()) { auto p = it4.get(); // here we calculate v(tn + 1) vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0]; vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1]; vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2]; ++it4; } // After every iteration collect some statistic about the confoguration if (i % 100 == 0) { // We write the particle position for visualization (Without ghost) vd.deleteGhost(); vd.write("particles_",f); // we resync the ghost vd.ghost_get<>(); // We calculate the energy double energy = calc_energy(vd,NN,sigma12,sigma6); auto & vcl = create_vcluster(); vcl.sum(energy); vcl.execute(); // We also print on terminal the value of the energy // only one processor (master) write on terminal if (vcl.getProcessUnitID() == 0) std::cout << std::endl << "Energy: " << energy << std::endl; f++; } } time2.stop(); std::cout << "Performance: " << time2.getwct() << std::endl; //! \cond [md steps] \endcond /*! * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Plotting graphs ## * * After we terminate the MD steps our vector x contains at which iteration we benchmark the force * calculation time, while y contains the measured time at that time-step. We can produce a graph X Y * * \note The graph produced is an svg graph that can be view with a browser. From the browser we can * also easily save the graph into pure svg format * * \snippet Vector/4_reorder/main_comp_ord.cpp google chart * */ //! \cond [google chart] \endcond // Google charts options, it store the options to draw the X Y graph GCoptions options; // Title of the graph options.title = std::string("Force calculation time"); // Y axis name options.yAxis = std::string("Time"); // X axis name options.xAxis = std::string("iteration"); // width of the line options.lineWidth = 1.0; // Object that draw the X Y graph GoogleChart cg; // Add the graph // The graph that it produce is in svg format that can be opened on browser cg.AddLinesGraph(x,y,options); // Write into html format cg.write("gc_plot2_out.html"); //! \cond [google chart] \endcond /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * ## Finalize ## * * At the very end of the program we have always to de-initialize the library * * \snippet Vector/4_reorder/main_comp_ord.cpp finalize * */ //! \cond [finalize] \endcond openfpm_finalize(); //! \cond [finalize] \endcond /*! * * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness * * # Full code # * * \include Vector/4_reorder/main_comp_ord.cpp * */ }
ut_start() { BOOST_TEST_MESSAGE("Initialize global VCluster"); openfpm_init(&boost::unit_test::framework::master_test_suite().argc,&boost::unit_test::framework::master_test_suite().argv); }