int main(int argc, char ** argv)
	// Initialize the global VCluster

	// Vcluster
	Vcluster & vcl = create_vcluster();

	//! [Create CartDecomposition vtk gen]
	CartDecomposition<2,float> dec(vcl);

	// Physical domain
	Box<2,float> box({0.0,0.0},{1.0,1.0});

	// division on each direction
	size_t div[2] = {20,20};

	// Define ghost
	Ghost<2,float> g(0.01);

	// boundary conditions
	size_t bc[2] = {PERIODIC,PERIODIC};

	// Decompose and write the decomposed graph

	// create a ghost border

	// Write the decomposition

	//! [Create CartDecomposition]

	// deinitialize the library
Exemplo n.º 2
int main(int argc, char* argv[])
	 * \page Grid_1_stencil Grid 1 stencil
	 * ## Initialization ## {#e1_st_init}
	 * Initialize the library and several objects
	 * \see \ref e0_s_initialization
	 * \snippet Grid/1_stencil/main.cpp parameters

	//! \cond [parameters] \endcond


	// domain
	Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});

	// grid sizes
	size_t sz[3] = {100,100,100};

	// ghost extension
	Ghost<3,float> g(0.03);

	//! \cond [parameters] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * ## Grid create ## {#e1_st_inst}
	 * Create a distributed grid in 3D. With typedef we create an alias name for aggregate<float[3],float[3]>.
	 * In practice the type of grid_point == aggregate<float[3],float[3]>
	 * \see \ref e0_s_grid_inst
	 * \snippet Grid/1_stencil/main.cpp grid

	//! \cond [grid] \endcond

	// a convenient alias for aggregate<...>
	typedef aggregate<float,float> grid_point;

	grid_dist_id<3, float, grid_point> g_dist(sz,domain,g);

	//! \cond [grid] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * ## Loop over grid points ## {#e1_s_loop_gp}
	 * Get an iterator that go through the point of the domain (No ghost)
	 * \see \ref e0_s_loop_gp
	 * \snippet Grid/1_stencil/main.cpp iterator
	 * \snippet Grid/1_stencil/main.cpp iterator2

	//! \cond [iterator] \endcond

	auto dom = g_dist.getDomainIterator();

	while (dom.isNext())

		//! \cond [iterator] \endcond

		 * \page Grid_1_stencil Grid 1 stencil
		 * Inside the cycle we get the local grid key
		 * \see \ref e0_s_grid_coord
		 * \snippet Grid/1_stencil/main.cpp local key

		//! \cond [local key] \endcond

		auto key = dom.get();

		//! \cond [local key] \endcond

		 * \page Grid_1_stencil Grid 1 stencil
		 * We convert the local grid position, into global position, key_g contain 3 integers that identify the position
		 * of the grid point in global coordinates
		 * \see \ref e0_s_grid_coord
		 * \snippet Grid/1_stencil/main.cpp global key

		//! \cond [global key] \endcond

		auto key_g = g_dist.getGKey(key);

		//! \cond [global key] \endcond

		 * \page Grid_1_stencil Grid 1 stencil
		 * we write on the grid point of position (i,j,k) the value i*i + j*j + k*k on the property A.
		 * Mathematically is equivalent to the function
		 * \f$ f(x,y,z) = x^2 + y^2 + z^2 \f$
		 * \snippet Grid/1_stencil/main.cpp function

		//! \cond [function] \endcond

		g_dist.template get<A>(key) = key_g.get(0)*key_g.get(0) + key_g.get(1)*key_g.get(1) + key_g.get(2)*key_g.get(2);

		//! \cond [function] \endcond

		//! \cond [iterator2] \endcond


	//! \cond [iterator2] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * ## Ghost ## {#e1_s_ghost}
	 * Each sub-domain has an extended part, that is materially contained into another processor.
	 * In general is not synchronized
	 * ghost_get<A> synchronize the property A in the ghost part
	 * \snippet Grid/1_stencil/main.cpp ghost

	//! \cond [ghost] \endcond

	g_dist.template ghost_get<A>();
	//! \cond [ghost] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * Get again another iterator, iterate across all the domain points, calculating a Laplace stencil. Write the
	 * result on B
	 * \snippet Grid/1_stencil/main.cpp laplacian

	//! \cond [laplacian] \endcond

	auto dom2 = g_dist.getDomainIterator();
	while (dom2.isNext())
		auto key = dom2.get();

		// Laplace stencil
		g_dist.template get<B>(key) = g_dist.template get<A>(key.move(x,1)) + g_dist.template get<A>(key.move(x,-1)) +
		                                 g_dist.template get<A>(key.move(y,1)) + g_dist.template get<A>(key.move(y,-1)) +
										 g_dist.template get<A>(key.move(z,1)) + g_dist.template get<A>(key.move(z,-1)) -
										 6*g_dist.template get<A>(key);


	//! \cond [laplacian] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * Finally we want a nice output to visualize the information stored by the distributed grid
	 * \see \ref e0_s_VTK_vis
	 * \snippet Grid/1_stencil/main.cpp output

	//! \cond [output] \endcond


	//! \cond [output] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * Deinitialize the library
	 * \snippet Grid/1_stencil/main.cpp finalize

	//! \cond [finalize] \endcond


	//! \cond [finalize] \endcond

	 * \page Grid_1_stencil Grid 1 stencil
	 * # Full code # {#code}
	 * \include Grid/1_stencil/main.cpp
Exemplo n.º 3
int main(int argc, char* argv[])
	 * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme
	 * ## Simulation ## {#md_e5_sym_sim_crs}
	 * The simulation is equal to the simulation explained in the example molecular dynamic
	 * \see \ref md_e5_sym
	 * The difference is that we create a symmetric Verlet-list for crossing scheme instead of a normal one
	 * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp sim verlet
	 * The rest of the code remain unchanged
	 * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp simulation

	//! \cond [simulation] \endcond

	double dt = 0.00025;
	double sigma = 0.1;
	double r_cut = 3.0*sigma;
	double r_gskin = 1.3*r_cut;
	double sigma12 = pow(sigma,12);
	double sigma6 = pow(sigma,6);

	openfpm::vector<double> x;
	openfpm::vector<openfpm::vector<double>> y;

	Vcluster & v_cl = create_vcluster();

	// we will use it do place particles on a 10x10x10 Grid like
	size_t sz[3] = {10,10,10};

	// domain
	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});

	// Boundary conditions

	// ghost, big enough to contain the interaction radius
	Ghost<3,float> ghost(r_gskin);

	vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost,BIND_DEC_TO_GHOST);

	size_t k = 0;
	size_t start = vd.accum();

	auto it = vd.getGridIterator(sz);

	while (it.isNext())

		auto key = it.get();

		vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
		vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
		vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);

		vd.template getLastProp<velocity>()[0] = 0.0;
		vd.template getLastProp<velocity>()[1] = 0.0;
		vd.template getLastProp<velocity>()[2] = 0.0;

		vd.template getLastProp<force>()[0] = 0.0;
		vd.template getLastProp<force>()[1] = 0.0;
		vd.template getLastProp<force>()[2] = 0.0;


	timer tsim;

	//! \cond [sim verlet] \endcond

	// Get the Cell list structure
	auto NN = vd.getVerletCrs(r_gskin);;

	//! \cond [sim verlet] \endcond

	// calculate forces
	unsigned long int f = 0;

	int cnt = 0;
	double max_disp = 0.0;

	// MD time stepping
	for (size_t i = 0; i < 10000 ; i++)
		// Get the iterator
		auto it3 = vd.getDomainIterator();

		double max_displ = 0.0;

		// integrate velicity and space based on the calculated forces (Step1)
		while (it3.isNext())
			auto p = it3.get();

			// here we calculate v(tn + 0.5)
			vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
			vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
			vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

			Point<3,double> disp({vd.template getProp<velocity>(p)[0]*dt,vd.template getProp<velocity>(p)[1]*dt,vd.template getProp<velocity>(p)[2]*dt});

			// here we calculate x(tn + 1)
			vd.getPos(p)[0] += disp.get(0);
			vd.getPos(p)[1] += disp.get(1);
			vd.getPos(p)[2] += disp.get(2);

			if (disp.norm() > max_displ)
				max_displ = disp.norm();


		if (max_disp < max_displ)
			max_disp = max_displ;

		// Because we moved the particles in space we have to map them and re-sync the ghost
		if (cnt % 10 == 0)
			vd.template ghost_get<>();
			// Get the Cell list structure
			vd.template ghost_get<>(SKIP_LABELLING);


		// calculate forces or a(tn + 1) Step 2

		// Integrate the velocity Step 3
		auto it4 = vd.getDomainIterator();

		while (it4.isNext())
			auto p = it4.get();

			// here we calculate v(tn + 1)
			vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
			vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
			vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];


		// After every iteration collect some statistic about the confoguration
		if (i % 100 == 0)
			// We write the particle position for visualization (Without ghost)

			// we resync the ghost

			// We calculate the energy
			double energy = calc_energy(vd,NN,sigma12,sigma6,r_cut);
			auto & vcl = create_vcluster();

			// we save the energy calculated at time step i c contain the time-step y contain the energy

			// We also print on terminal the value of the energy
			// only one processor (master) write on terminal
			if (vcl.getProcessUnitID() == 0)
				std::cout << "Energy: " << energy << "   " << max_disp << "   " << std::endl;

			max_disp = 0.0;


	std::cout << "Time: " << tsim.getwct()  << std::endl;

	//! \cond [simulation] \endcond

	// Google charts options, it store the options to draw the X Y graph
	GCoptions options;

	// Title of the graph
	options.title = std::string("Energy with time");

	// Y axis name
	options.yAxis = std::string("Energy");

	// X axis name
	options.xAxis = std::string("iteration");

	// width of the line
	options.lineWidth = 1.0;

	// Object that draw the X Y graph
	GoogleChart cg;

	// Add the graph
	// The graph that it produce is in svg format that can be opened on browser

	// Write into html format

	//! \cond [google chart] \endcond

	 * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme
	 * ## Finalize ## {#finalize_v_e5_md_sym_crs}
	 *  At the very end of the program we have always to de-initialize the library
	 * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp finalize

	//! \cond [finalize] \endcond


	//! \cond [finalize] \endcond

	 * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme
	 * ## Full code ## {#full_code_v_e5_md_sym_crs}
	 * \include Vector/5_molecular_dynamic_sym_crs/main.cpp
Exemplo n.º 4
int main(int argc, char* argv[])
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * ## Initialization and vector creation ##
	 * We first initialize the library and define useful constants
	 * \see \ref e0_s_init
	 * \snippet Vector/4_complex_prop/main.cpp lib init
	 * We also define a custom structure
	 * \snippet Vector/4_complex_prop/main.cpp struct A
	 * After we initialize the library we can create a vector with complex properties
	 * with the following line
	 * \snippet Vector/4_complex_prop/main.cpp vect create
	 * In this this particular case every particle carry a scalar,
	 * a vector in form of float[3], a Point, a list
	 * in form of vector of float and a list of custom structures, and a vector of vector.
	 * In general particles can have properties of arbitrary complexity.
	 * \warning For arbitrary complexity mean that we can use any openfpm data structure with and arbitrary nested complexity.
	 *          For example a openfpm::vector<aggregate<grid_cpu<openfpm::vector<aggregate<double,double[3]>>>,openfpm::vector<float>> is valid
	 * \verbatim

	         / \
	        /   \
	     grid    vector<float>
	     /  \
	double  double[3]

	 * \endverbatim
	 * Our custom data-structure A is defined below. Note that this data-structure
	 * does not have pointers
	 * \snippet Vector/4_complex_prop/main.cpp struct A
	 * \warning custom data structure are allowed only if they does not have pointer.
	 *          In case they have pointer we have to define how to serialize our data-structure
	 * \see \ref vector_example_cp_ser

	//! \cond [lib init] \endcond

    // initialize the library

	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
	Box<2,float> domain({0.0,0.0},{1.0,1.0});

	// Here we define the boundary conditions of our problem
    size_t bc[2]={PERIODIC,PERIODIC};

	// extended boundary around the domain, and the processor domain
	Ghost<2,float> g(0.01);
	// the scalar is the element at position 0 in the aggregate
	constexpr int scalar = 0;

	// the vector is the element at position 1 in the aggregate
	constexpr int vector = 1;

	// the tensor is the element at position 2 in the aggregate
	constexpr int point = 2;

	// A list1
	constexpr int list = 3;

	// A listA
	constexpr int listA = 4;

	// A list of list
	constexpr int listlist = 5;

	//! \cond [lib init] \endcond

	//! \cond [struct A] \endcond

	// The custom structure
	struct A
		float p1;
		int p2;

		A() {};

		A(float p1, int p2)

	//! \cond [struct A] \endcond

	//! \cond [vect create] \endcond

	vector_dist<2,float, aggregate<float,
								   openfpm::vector<openfpm::vector<float>>> >

	//! \cond [vect create] \endcond

	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * ## Assign values to properties ##
	 * Assign values to properties does not changes, from the simple case. Consider
	 * now that each particle has a list, so when we can get the property listA for particle p
	 * and resize such list with **vd.getProp<listA>(p).resize(...)**. We can add new elements at the
	 * end with **vd.getProp<listA>(p).add(...)** and get some element of this list with **vd.getProp<listA>(p).get(i)**.
	 * More in general vd.getProp<listA>(p) return a reference to the openfpm::vector contained by the particle.
	 * \snippet Vector/4_complex_prop/main.cpp vect assign

	//! \cond [vect assign] \endcond

	auto it = vd.getDomainIterator();

	while (it.isNext())
		auto p = it.get();

		// we define x, assign a random position between 0.0 and 1.0
		vd.getPos(p)[0] = (float)rand() / RAND_MAX;

		// we define y, assign a random position between 0.0 and 1.0
		vd.getPos(p)[1] = (float)rand() / RAND_MAX;

		vd.getProp<scalar>(p) = 1.0;

		vd.getProp<vector>(p)[0] = 1.0;
		vd.getProp<vector>(p)[1] = 1.0;
		vd.getProp<vector>(p)[2] = 1.0;

		vd.getProp<point>(p).get(0) = 1.0;
		vd.getProp<point>(p).get(1) = 1.0;
		vd.getProp<point>(p).get(2) = 1.0;

		size_t n_cp = (float)10.0 * rand()/RAND_MAX;


		for (size_t i = 0 ; i < n_cp ; i++)
			vd.getProp<list>(p).add(i + 10);
			vd.getProp<list>(p).add(i + 20);
			vd.getProp<list>(p).add(i + 30);

			vd.getProp<listA>(p).get(i) = A(i+10.0,i+20.0);


		vd.getProp<listlist>(p).get(0).get(0) = 1.0;
		vd.getProp<listlist>(p).get(0).get(1) = 2.0;
		vd.getProp<listlist>(p).get(1).get(0) = 3.0;
		vd.getProp<listlist>(p).get(1).get(1) = 4.0;

		// next particle

	//! \cond [vect assign] \endcond

	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * ## Mapping and ghost_get ##
	 * Particles are redistributed across processors all properties are communicated but instead of
	 * using map we use **map_list** that we can use to select properties.
	 * A lot of time complex properties can be recomputed and communicate them is not a good idea.
	 * The same concept also apply for ghost_get. In general we choose which properties to communicate
	 * \see \ref e0_s_map
	 * \see \ref e1_part_ghost
	 * \snippet Vector/4_complex_prop/main.cpp vect map ghost

	//! \cond [vect map ghost] \endcond

	// Particles are redistribued across the processors but only the scalar,vector, and point properties
	// are transfert
	// Synchronize the ghost

	//! \cond [vect map ghost] \endcond

	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * ## Output and VTK visualization ##
	 * Vector with complex properties can be still be visualized, because unknown properties are
	 * automatically excluded
	 * \see \ref e0_s_vis_vtk
	 * \snippet Vector/4_complex_prop/main.cpp vtk

	//! \cond [vtk] \endcond


	//! \cond [vtk] \endcond

	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * ## Print 4 particles in the ghost area ##
	 * Here we print that the first 4 particles to show that the list of A and the list of list are filled
	 * and the ghosts contain the correct information
	 * \snippet Vector/4_complex_prop/main.cpp print ghost info

	//! \cond [print ghost info] \endcond

	size_t fg = vd.size_local();

	Vcluster & v_cl = create_vcluster();
	if (v_cl.getProcessUnitID() == 0)
		for ( ; fg < vd.size_local()+4 ; fg++)
			std::cout << "List of A" << std::endl;
			for (size_t i = 0 ; i < vd.getProp<listA>(fg).size() ; i++)
				std::cout << "Element: " << i << "   p1=" << vd.getProp<listA>(fg).get(i).p1 << "   p2=" << vd.getProp<listA>(fg).get(i).p2 << std::endl;

			std::cout << "List of list" << std::endl;
			for (size_t i = 0 ; i < vd.getProp<listlist>(fg).size() ; i++)
				for (size_t j = 0 ; j < vd.getProp<listlist>(fg).get(i).size() ; j++)
					std::cout << "Element: " << i << "  " << j << "   " << vd.getProp<listlist>(fg).get(i).get(j) << std::endl;

	//! \cond [print ghost info] \endcond

	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * ## Finalize ## {#finalize}
	 *  At the very end of the program we have always to de-initialize the library
	 * \snippet Vector/4_complex_prop/main.cpp finalize

	//! \cond [finalize] \endcond


	//! \cond [finalize] \endcond

	 * \page Vector_4_complex_prop Vector 4 complex properties
	 * # Full code # {#code}
	 * \include Vector/4_complex_prop/main.cpp
Exemplo n.º 5
int main(int argc, char* argv[])
	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * ## Initialization ## {#num_sk_inc_petsc_3D_init}
	 * After model our equation we:
	 * * Initialize the library
	 * * Define some useful constants
	 * * define Ghost size
	 * * Non-periodic boundary conditions
	 * * Padding domain expansion
	 * Padding and Ghost differ in the fact the padding extend the domain.
	 * Ghost is an extension for each sub-domain
	 * \snippet Numerics/Stoke_flow/0_2D_incompressible/main_petsc.cpp init

	//! \cond [init] \endcond

	// Initialize

	// velocity in the grid is the property 0, pressure is the property 1
	constexpr int velocity = 0;
	constexpr int pressure = 1;

	// Domain
	Box<3,float> domain({0.0,0.0,0.0},{3.0,1.0,1.0});

	// Ghost (Not important in this case but required)
	Ghost<3,float> g(0.01);

	// Grid points on x=36 and y=12 z=12
	long int sz[] = {36,12,12};
	size_t szu[3];
	szu[0] = (size_t)sz[0];
	szu[1] = (size_t)sz[1];
	szu[2] = (size_t)sz[2];

	// We need one more point on the left and down part of the domain
	// This is given by the boundary conditions that we impose.
	Padding<3> pd({1,1,1},{0,0,0});

	//! \cond [init] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * Distributed grid that store the solution
	 * \see \ref e0_s_grid_inst
	 * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp grid inst

	//! \cond [grid inst] \endcond

	grid_dist_id<3,float,aggregate<float[3],float>> g_dist(szu,domain,g);

	//! \cond [grid inst] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * Solving the system above require the solution of a system like that
	 * \f$ Ax = b \quad x = A^{-1}b\f$
	 * where A is the system the discretize the left hand side of the equations + boundary conditions
	 * and b discretize the right hand size + boundary conditions
	 * FDScheme is the object that we use to produce the Matrix A and the vector b.
	 * Such object require the maximum extension of the stencil
	 * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp fd scheme

	//! \cond [fd scheme] \endcond

	// It is the maximum extension of the stencil (order 2 laplacian stencil has extension 1)
	Ghost<3,long int> stencil_max(1);

	// Finite difference scheme
	FDScheme<lid_nn> fd(pd, stencil_max, domain, g_dist.getGridInfo(), g_dist);

	//! \cond [fd scheme] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * ## Impose the equation on the domain ## {#num_sk_inc_3D_ied}
	 * Here we impose the system of equation, we start from the incompressibility Eq imposed in the bulk with the
	 * exception of the first point {0,0} and than we set P = 0 in {0,0}, why we are doing this is again
	 * mathematical to have a well defined system, an intuitive explanation is that P and P + c are both
	 * solution for the incompressibility equation, this produce an ill-posed problem to make it well posed
	 * we set one point in this case {0,0} the pressure to a fixed constant for convenience P = 0
	 * The best way to understand what we are doing is to draw a smaller example like 8x8.
	 * Considering that we have one additional point on the left for padding we have a grid
	 * 9x9. If on each point we have v_x v_y and P unknown we have
	 * 9x9x3 = 243 unknown. In order to fully determine and unique solution we have to
	 * impose 243 condition. The code under impose (in the case of 9x9) between domain
	 * and bulk 243 conditions.
	 * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp impose eq dom

	//! \cond [impose eq dom] \endcond

	// start and end of the bulk

	fd.impose(ic_eq(),0.0, EQ_4, {0,0,0},{sz[0]-2,sz[1]-2,sz[2]-2},true);
	fd.impose(Prs(),  0.0, EQ_4, {0,0,0},{0,0,0});
	fd.impose(vx_eq(),0.0, EQ_1, {1,0},{sz[0]-2,sz[1]-2,sz[2]-2});
	fd.impose(vy_eq(),0.0, EQ_2, {0,1},{sz[0]-2,sz[1]-2,sz[2]-2});
	fd.impose(vz_eq(),0.0, EQ_3, {0,0,1},{sz[0]-2,sz[1]-2,sz[2]-2});

	// v_x
	// R L (Right,Left)
	fd.impose(v_x(),0.0, EQ_1, {0,0,0},      {0,sz[1]-2,sz[2]-2});
	fd.impose(v_x(),0.0, EQ_1, {sz[0]-1,0,0},{sz[0]-1,sz[1]-2,sz[2]-2});

	// T B (Top,Bottom)
	fd.impose(avg_y_vx_f(),0.0, EQ_1, {0,-1,0},     {sz[0]-1,-1,sz[2]-2});
	fd.impose(avg_y_vx(),0.0, EQ_1,   {0,sz[1]-1,0},{sz[0]-1,sz[1]-1,sz[2]-2});

	// A F (Forward,Backward)
	fd.impose(avg_z_vx_f(),0.0, EQ_1, {0,-1,-1},     {sz[0]-1,sz[1]-1,-1});
	fd.impose(avg_z_vx(),0.0, EQ_1, {0,-1,sz[2]-1},{sz[0]-1,sz[1]-1,sz[2]-1});

	// v_y
	// R L
	fd.impose(avg_x_vy_f(),0.0, EQ_2,  {-1,0,0},     {-1,sz[1]-1,sz[2]-2});
	fd.impose(avg_x_vy(),1.0, EQ_2,    {sz[0]-1,0,0},{sz[0]-1,sz[1]-1,sz[2]-2});

	// T B
	fd.impose(v_y(), 0.0, EQ_2, {0,0,0},      {sz[0]-2,0,sz[2]-2});
	fd.impose(v_y(), 0.0, EQ_2, {0,sz[1]-1,0},{sz[0]-2,sz[1]-1,sz[2]-2});

	// F A
	fd.impose(avg_z_vy(),0.0, EQ_2,   {-1,0,sz[2]-1}, {sz[0]-1,sz[1]-1,sz[2]-1});
	fd.impose(avg_z_vy_f(),0.0, EQ_2, {-1,0,-1},      {sz[0]-1,sz[1]-1,-1});

	// v_z
	// R L
	fd.impose(avg_x_vz_f(),0.0, EQ_3, {-1,0,0},     {-1,sz[1]-2,sz[2]-1});
	fd.impose(avg_x_vz(),1.0, EQ_3,   {sz[0]-1,0,0},{sz[0]-1,sz[1]-2,sz[2]-1});

	// T B
	fd.impose(avg_y_vz(),0.0, EQ_3, {-1,sz[1]-1,0},{sz[0]-1,sz[1]-1,sz[2]-1});
	fd.impose(avg_y_vz_f(),0.0, EQ_3, {-1,-1,0},   {sz[0]-1,-1,sz[2]-1});

	// F A
	fd.impose(v_z(),0.0, EQ_3, {0,0,0},      {sz[0]-2,sz[1]-2,0});
	fd.impose(v_z(),0.0, EQ_3, {0,0,sz[2]-1},{sz[0]-2,sz[1]-2,sz[2]-1});

	// When we pad the grid, there are points of the grid that are not
	// touched by the previous condition. Mathematically this lead
	// to have too many variables for the conditions that we are imposing.
	// Here we are imposing variables that we do not touch to zero

	// L R
	fd.impose(Prs(), 0.0, EQ_4, {-1,-1,-1},{-1,sz[1]-1,sz[2]-1});
	fd.impose(Prs(), 0.0, EQ_4, {sz[0]-1,-1,-1},{sz[0]-1,sz[1]-1,sz[2]-1});

	// T B
	fd.impose(Prs(), 0.0, EQ_4, {0,sz[1]-1,-1}, {sz[0]-2,sz[1]-1,sz[2]-1});
	fd.impose(Prs(), 0.0, EQ_4, {0,-1     ,-1}, {sz[0]-2,-1,     sz[2]-1});

	// F A
	fd.impose(Prs(), 0.0, EQ_4, {0,0,sz[2]-1}, {sz[0]-2,sz[1]-2,sz[2]-1});
	fd.impose(Prs(), 0.0, EQ_4, {0,0,-1},      {sz[0]-2,sz[1]-2,-1});

	// Impose v_x  v_y v_z padding
	fd.impose(v_x(), 0.0, EQ_1, {-1,-1,-1},{-1,sz[1]-1,sz[2]-1});
	fd.impose(v_y(), 0.0, EQ_2, {-1,-1,-1},{sz[0]-1,-1,sz[2]-1});
	fd.impose(v_z(), 0.0, EQ_3, {-1,-1,-1},{sz[0]-1,sz[1]-1,-1});

	//! \cond [impose eq dom] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * ## Solve the system of equation ## {#num_sk_inc_3D_petsc_sse}
	 * Once we imposed all the equations we can retrieve the Matrix A and the vector b
	 * and pass these two element to the solver. In this example we are using  PETSC solvers
	 *  direct/Iterative solvers. While Umfpack
	 * has only one solver, PETSC wrap several solvers. The function best_solve set the solver in
	 * the modality to try multiple solvers to solve your system. The subsequent call to solve produce a report
	 * of all the solvers tried comparing them in error-convergence and speed. If you do not use
	 * best_solve try to solve your system with the default solver GMRES (That is the most robust iterative solver
	 *  method)
	 * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp solver

	//! \cond [solver] \endcond

	// Create an PETSC solver
	petsc_solver<double> solver;

	// Warning try many solver and collect statistics require a lot of time
	// To just solve you can comment this line
//	solver.best_solve();

	// Give to the solver A and b, return x, the solution
	auto x = solver.solve(fd.getA(),fd.getB());

	//! \cond [solver] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * ## Copy the solution on the grid and write on VTK ## {#num_sk_inc_3D_petsc_csg}
	 * Once we have the solution we copy it on the grid
	 * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp copy write

	//! \cond [copy write] \endcond

	// Bring the solution to grid
	fd.template copy<velocity,pressure>(x,{0,0},{sz[0]-1,sz[1]-1,sz[2]-1},g_dist);


	//! \cond [copy write] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * ## Finalize ## {#num_sk_inc_3D_petsc_fin}
	 *  At the very end of the program we have always to de-initialize the library
	 * \snippet Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp fin lib

	//! \cond [fin lib] \endcond


	//! \cond [fin lib] \endcond

	 * \page Stokes_1_3D_petsc Stokes incompressible 3D petsc
	 * # Full code # {#num_sk_inc_3D_petsc_code}
	 * \include Numerics/Stoke_flow/1_3D_incompressible/main_petsc.cpp
Exemplo n.º 6
int main(int argc, char* argv[])
	// ### WIKI 2 ###
	// Initialize the library and several objects 
	// ### WIKI 3 ###
	// Get the vcluster object and the number of processor

	Vcluster & v_cl = create_vcluster();
	size_t N_prc = v_cl.getProcessingUnits();

	// ### WIKI 3 ###
	// We find the maximum of the processors rank, that should be the Number of
	// processora minus one, only processor 0 print on terminal

	size_t id = v_cl.getProcessUnitID();

	if (v_cl.getProcessUnitID() == 0)
		std::cout << "Maximum processor rank: " << id << "\n";

	// ### WIKI 4 ###
	// We sum all the processor ranks the maximum, the result should be that should
	// be $\frac{(n-1)n}{2}$, only processor 0 print on terminal

	size_t id2 = v_cl.getProcessUnitID();

	if (v_cl.getProcessUnitID() == 0)
		std::cout << "Sum of all processors rank: " << id2 << "\n";

	// ### WIKI 5 ###
	// we can collect information from all processors using the function gather

	size_t id3 = v_cl.getProcessUnitID();
	openfpm::vector<size_t> v;
	if (v_cl.getProcessUnitID() == 0)
		std::cout << "Collected ids: ";
		for(size_t i = 0 ; i < v.size() ; i++)
			std::cout << " " << v.get(i) << " ";

		std::cout << "\n";

	// ### WIKI 5 ###
	// we can also send messages to specific processors, with the condition that the receiving
	// processors know we want to communicate with them, if you are searching for a more
	// free way to communicate where the receiving processors does not know which one processor
	// want to communicate with us, see the example 1_dsde

	std::stringstream ss_message_1;
	std::stringstream ss_message_2;
	ss_message_1 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n";
	ss_message_2 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n";
	std::string message_1 = ss_message_1.str();
	std::string message_2 = ss_message_2.str();
	size_t msg_size = message_1.size();
	// Processor 0 send to processors 1,2 , 1 to 2,1, 2 to 0,1

	v_cl.send(((id3+1)%N_prc + N_prc)%N_prc,0,message_1.c_str(),msg_size);
	v_cl.send(((id3+2)%N_prc + N_prc)%N_prc,0,message_2.c_str(),msg_size);

	openfpm::vector<char> v_one;
	openfpm::vector<char> v_two(msg_size);

	v_cl.recv(((id3-1)%N_prc + N_prc)%N_prc,0,(void *)v_one.getPointer(),msg_size);
	v_cl.recv(((id3-2)%N_prc + N_prc)%N_prc,0,(void *)v_two.getPointer(),msg_size);

	if (v_cl.getProcessUnitID() == 0)
		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_one.get(i);

		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_two.get(i);

	// ### WIKI 5 ###
	// we can also do what we did before in one shot

	id = v_cl.getProcessUnitID();
	id2 = v_cl.getProcessUnitID();
	id3 = v_cl.getProcessUnitID();

	// convert the string into a vector

	openfpm::vector<char> message_1_v(msg_size);
	openfpm::vector<char> message_2_v(msg_size);

	for (size_t i = 0 ; i < msg_size ; i++)
		message_1_v.get(i) = message_1[i];

	for (size_t i = 0 ; i < msg_size ; i++)
		message_2_v.get(i) = message_2[i];


	// in the case of vector we have special functions that avoid to specify the size
	v_cl.send(((id+1)%N_prc + N_prc)%N_prc,0,message_1_v);
	v_cl.send(((id+2)%N_prc + N_prc)%N_prc,0,message_2_v);
	v_cl.recv(((id-1)%N_prc + N_prc)%N_prc,0,v_one);
	v_cl.recv(((id-2)%N_prc + N_prc)%N_prc,0,v_two);

	if (v_cl.getProcessUnitID() == 0)
		std::cout << "Maximum processor rank: " << id << "\n";
		std::cout << "Sum of all processors rank: " << id << "\n";

		std::cout << "Collected ids: ";
		for(size_t i = 0 ; i < v.size() ; i++)
			std::cout << " " << v.get(i) << " ";

		std::cout << "\n";

		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_one.get(i);

		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_two.get(i);

Exemplo n.º 7
int main(int argc, char* argv[])
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * ## Initialization and vector creation ##
	 * After we initialize the library we can create a vector with complex properties
	 * with the following line
	 * \snippet Vector/4_complex_prop/main.cpp vect create
	 * In this this particular case every particle carry two my_struct object

    // initialize the library

	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
	Box<2,float> domain({0.0,0.0},{1.0,1.0});

	// Here we define the boundary conditions of our problem
    size_t bc[2]={PERIODIC,PERIODIC};

	// extended boundary around the domain, and the processor domain
	Ghost<2,float> g(0.01);

	// my_struct at position 0 in the aggregate
	constexpr int my_s1 = 0;

	// my_struct at position 1 in the aggregate
	constexpr int my_s2 = 1;

	//! \cond [vect create] \endcond

	vector_dist<2,float, aggregate<my_struct,my_struct>>

	std::cout << "HAS PACK: " << has_pack_agg<aggregate<my_struct,my_struct>>::result::value << std::endl;

	//! \cond [vect create] \endcond

	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * ## Assign values to properties ##
	 * In this loop we assign position to particles and we fill the two my_struct
	 * that each particle contain. As demostration the first my_struct is filled
	 *  with the string representation of the particle coordinates. The second my struct
	 *   is filled with the string representation of the particle position multiplied by 2.0.
	 * The the vectors of the two my_struct are filled respectively with the sequence
	 * 1,2,3 and 1,2,3,4
	 * \snippet Vector/4_complex_prop/main_ser.cpp vect assign

	//! \cond [vect assign] \endcond

	auto it = vd.getDomainIterator();

	while (it.isNext())
		auto p = it.get();

		// we define x, assign a random position between 0.0 and 1.0
		vd.getPos(p)[0] = (float)rand() / RAND_MAX;

		// we define y, assign a random position between 0.0 and 1.0
		vd.getPos(p)[1] = (float)rand() / RAND_MAX;

		// Get the particle position as point
		Point<2,float> pt = vd.getPos(p);

		// create a C string from the particle coordinates
		// and copy into my struct
		vd.getProp<my_s1>(p).size = 32;
		vd.getProp<my_s1>(p).ptr = new char[32];

		// create a C++ string from the particle coordinates
		vd.getProp<my_s1>(p).str = std::string(pt.toString());


		pt = pt * 2.0;

		// create a C string from the particle coordinates multiplied by 2.0
		// and copy into my struct
		vd.getProp<my_s2>(p).size = 32;
		vd.getProp<my_s2>(p).ptr = new char[32];

		// create a C++ string from the particle coordinates
		vd.getProp<my_s2>(p).str = std::string(pt.toString());


		// next particle

	//! \cond [vect assign] \endcond

	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * ## Mapping and ghost_get ##
	 * Particles are redistributed across processors and we also synchronize the ghost
	 * \see \ref e0_s_map
	 * \see \ref e1_part_ghost
	 * \snippet Vector/4_complex_prop/main_ser.cpp vect map ghost

	//! \cond [vect map ghost] \endcond

	// Particles are redistribued across the processors;

	// Synchronize the ghost

	//! \cond [vect map ghost] \endcond

	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * ## Output and VTK visualization ##
	 * Vector with complex properties can be still be visualized, because unknown properties are
	 * automatically excluded
	 * \see \ref e0_s_vis_vtk
	 * \snippet Vector/4_complex_prop/main.cpp vtk

	//! \cond [vtk] \endcond


	//! \cond [vtk] \endcond

	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * ## Print 4 particles in the ghost area ##
	 * Here we print that the first 4 particles to show that the two my_struct contain the
	 * right information
	 * \snippet Vector/4_complex_prop/main_ser.cpp print ghost info

	//! \cond [print ghost info] \endcond

	size_t fg = vd.size_local();

	Vcluster & v_cl = create_vcluster();

	// Only the master processor print
	if (v_cl.getProcessUnitID() == 0)
		// Print 4 particles
		for ( ; fg < vd.size_local()+4 ; fg++)
			// Print my struct1 information
			std::cout << "my_struct1:" << std::endl;
			std::cout << "C-string: " << vd.getProp<my_s1>(fg).ptr << std::endl;
			std::cout << "Cpp-string: " << vd.getProp<my_s1>(fg).str << std::endl;

			for (size_t i = 0 ; i < vd.getProp<my_s1>(fg).v.size() ; i++)
				std::cout << "Element: " << i << "   " << vd.getProp<my_s1>(fg).v.get(i) << std::endl;

			// Print my struct 2 information
			std::cout << "my_struct2" << std::endl;
			std::cout << "C-string: " << vd.getProp<my_s2>(fg).ptr << std::endl;
			std::cout << "Cpp-string: " << vd.getProp<my_s2>(fg).str << std::endl;

			for (size_t i = 0 ; i < vd.getProp<my_s2>(fg).v.size() ; i++)
				std::cout << "Element: " << i << "   " << vd.getProp<my_s2>(fg).v.get(i) << std::endl;

	//! \cond [print ghost info] \endcond

	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * ## Finalize ## {#finalize}
	 *  At the very end of the program we have always to de-initialize the library
	 * \snippet Vector/4_complex_prop/main_ser.cpp finalize

	//! \cond [finalize] \endcond


	//! \cond [finalize] \endcond

	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 * # Full code # {#code}
	 * \include Vector/4_complex_prop/main_ser.cpp
Exemplo n.º 8
int main(int argc, char* argv[])
	// ### WIKI 2 ###
	// Here we Initialize the library, than we create a uniform random generator between 0 and 1 to to generate particles
	// randomly in the domain, we create a Box that define our domain, boundary conditions, and ghost
        // initialize the library

	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
	Box<2,float> domain({0.0,0.0},{1.0,1.0});

	// Here we define the boundary conditions of our problem
    size_t bc[2]={PERIODIC,PERIODIC};

	// extended boundary around the domain, and the processor domain
	Ghost<2,float> g(0.01);
	// ### WIKI 3 ###
	// Here we are creating a distributed vector defined by the following parameters
	// * 2 is the Dimensionality of the space where the objects live
	// * float is the type used for the spatial coordinate of the particles
	// * float,float[3],float[3][3] is the information stored by each particle a scalar float, a vector float[3] and a tensor of rank 2 float[3][3]
	//   the list of properties must be put into an aggregate data astructure aggregate<prop1,prop2,prop3, ... >
	// vd is the instantiation of the object
	// The Constructor instead require:
	// * Number of particles 4096 in this case
	// * Domain where is defined this structure
	// * bc boundary conditions
	// * g Ghost
	// The following construct a vector where each processor has 4096 / N_proc (N_proc = number of processor)
	// objects with an undefined position in space. This non-space decomposition is also called data-driven
	// decomposition
	vector_dist<2,float, aggregate<float,float[3],float[3][3]> > vd(4096,domain,bc,g);

	// the scalar is the element at position 0 in the aggregate
	const int scalar = 0;

	// the vector is the element at position 1 in the aggregate
	const int vector = 1;

	// the tensor is the element at position 2 in the aggregate
	const int tensor = 2;

	// ### WIKI 5 ###
	// Get an iterator that go through the 4096 particles, in an undefined position state and define its position
	auto it = vd.getDomainIterator();

	while (it.isNext())
		auto key = it.get();

		// we define x, assign a random position between 0.0 and 1.0
		vd.getPos(key)[0] = rand() / RAND_MAX;

		// we define y, assign a random position between 0.0 and 1.0
		vd.getPos(key)[1] = rand() / RAND_MAX;

		// next particle

	// ### WIKI 6 ###
	// Once we define the position, we distribute them according to the default space decomposition
	// The default decomposition is created even before assigning the position to the object. It determine
	// which part of space each processor manage

	// ### WIKI 7 ###
	// We get the object that store the decomposition, than we iterate again across all the objects, we count them
	// and we confirm that all the particles are local
	//Counter we use it later
	size_t cnt = 0;

	// Get the space decomposition
	auto & ct = vd.getDecomposition();

	// Get a particle iterator
	it = vd.getDomainIterator();

	// For each particle ...
	while (it.isNext())
		// ... p
		auto p = it.get();

		// we set the properties of the particle p
                // the scalar property
		vd.template getProp<scalar>(p) = 1.0;

		vd.template getProp<vector>(p)[0] = 1.0;
		vd.template getProp<vector>(p)[1] = 1.0;
		vd.template getProp<vector>(p)[2] = 1.0;

		vd.template getProp<tensor>(p)[0][0] = 1.0;
		vd.template getProp<tensor>(p)[0][1] = 1.0;
		vd.template getProp<tensor>(p)[0][2] = 1.0;
		vd.template getProp<tensor>(p)[1][0] = 1.0;
		vd.template getProp<tensor>(p)[1][1] = 1.0;
		vd.template getProp<tensor>(p)[1][2] = 1.0;
		vd.template getProp<tensor>(p)[2][0] = 1.0;
		vd.template getProp<tensor>(p)[2][1] = 1.0;
		vd.template getProp<tensor>(p)[2][2] = 1.0;

		// increment the counter

		// next particle

	// ### WIKI 8 ###
	// cnt contain the number of object the local processor contain, if we are interested to count the total number across the processor
	// we can use the function add, to sum across processors. First we have to get an instance of Vcluster, queue an operation of add with
	// the variable count and finaly execute. All the operations are asynchronous, execute work like a barrier and ensure that all the 
	// queued operations are executed
	auto & v_cl = create_vcluster();
	// ### WIKI 9 ###
	// Output the particle position for each processor


	// ### WIKI 10 ###
	// Deinitialize the library
Exemplo n.º 9
int main(int argc, char* argv[])
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     * ## Initialization ##
     * The initialization is the same as the molecular dynamic example. The differences are in the
     * parameters. We will use a bigger system, with more particles. The delta time for integration
     * is chosen in order to keep the system stable.
     * \see \ref e3_md_init
     * \snippet Vector/4_reorder/main_comp_ord.cpp vect create

    //! \cond [vect create] \endcond

    double dt = 0.0001;
    float r_cut = 0.03;
    double sigma = r_cut/3.0;
    double sigma12 = pow(sigma,12);
    double sigma6 = pow(sigma,6);

    openfpm::vector<double> x;
    openfpm::vector<openfpm::vector<double>> y;

    Vcluster & v_cl = create_vcluster();

    // we will use it do place particles on a 40x40x40 Grid like
    size_t sz[3] = {40,40,40};

    // domain
    Box<3,float> box({0.0,0.0,0.0}, {1.0,1.0,1.0});

    // Boundary conditions

    // ghost, big enough to contain the interaction radius
    Ghost<3,float> ghost(r_cut);

    vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost);

    //! \cond [vect create] \endcond

     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     * ## Particles on a grid like position ##
     * Here we place the particles on a grid like manner
     * \see \ref e3_md_gl
     * \snippet Vector/4_reorder/main_comp_ord.cpp vect grid

    //! \cond [vect grid] \endcond

    auto it = vd.getGridIterator(sz);

    while (it.isNext())

        auto key = it.get();

        vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
        vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
        vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);

        vd.template getLastProp<velocity>()[0] = 0.0;
        vd.template getLastProp<velocity>()[1] = 0.0;
        vd.template getLastProp<velocity>()[2] = 0.0;

        vd.template getLastProp<force>()[0] = 0.0;
        vd.template getLastProp<force>()[1] = 0.0;
        vd.template getLastProp<force>()[2] = 0.0;


    //! \cond [vect grid] \endcond

     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     * ## Molecular dynamic steps ##
     * Here we do 30000 MD steps using verlet integrator the cycle is the same as the
     * molecular dynamic example. with the following changes.
     * ### Cell lists ###
     * Instead of getting the normal cell list we get an hilbert curve cell-list. Such cell list has a
     * function called **getIterator** used inside the function **calc_forces** and **calc_energy**
     * that iterate across all the particles but in a smart-way. In practice
     * given an r-cut a cell-list is constructed with the provided spacing. Suppose to have a cell-list
     * \f$ m \times n \f$, an hilbert curve \f$ 2^k \times 2^k \f$ is contructed with \f$ k = ceil(log_2(max(m,n))) \f$.
     * Cell-lists are explored according to this Hilbert curve, If a cell does not exist is simply skipped.
     * \verbatim
    +------+------+------+------+     Example of Hilbert curve running on a 3 x 3 Cell
    |      |      |      |      |     An hilbert curve of k = ceil(log_2(3)) = 4
    |  X+---->X   |  X +---> X  |
    |  ^   |  +   |  ^   |   +  |
    ***|******|******|****---|--+      *******
    *  +   |  v   |  +   *   v  |      *     *
    *  7   |  8+---->9   *   X  |      *     *  = Domain
    *  ^   |      |      *   +  |      *     *
    *--|-----------------*---|--+      *******
    *  +   |      |      *   v  |
    *  4<----+5   |   6<---+ X  |
    *      |  ^   |   +  *      |
    *      |  +   |   v  *      |
    *  1+---->2   |   3+---> X  |
    *      |      |      *      |

     this mean that we will iterate the following cells


     Suppose now that the particles are ordered like described

    Particles   id      Cell
                 0         1
                 1         7
                 2         8
    			 3		   1
    			 4		   9
    			 5		   9
    			 6		   6
    			 7		   7
    			 8		   3
    			 9		   2
    			10		   4
    			11		   3

    The iterator of the cell-list will explore the particles in the following way

    Cell     1  2 5 4  7  8  9  6 3
       	   |   | | | |   | |   | | |

     * \endverbatim
     * We cannot explain here what is a cache, but in practice is a fast memory in the CPU able
     * to store chunks of memory. The cache in general is much smaller than RAM, but the big advantage
     * is its speed. Retrieve data from the cache is much faster than RAM. Unfortunately the factors
     *  that determine what is on cache and what is not are multiples: Type of cache, algorithm ... .
     * Qualitatively all caches will tend to load chunks of data that you read multiple-time, or chunks
     *  of data that probably you will read based on pattern analysis. A small example is a linear memory copy where
     * you read consecutively memory and you write on consecutive memory.
     * Modern CPU recognize such pattern and decide to load on cache the consecutive memory before
     *  you actually require it.
     * Iterating the vector in the way described above has the advantage that when we do computation on particles
     * and its neighborhood with the sequence described above it will happen that:
     * * If to process a particle A we read some neighborhood particles to process the next particle A+1
     *   we will probably read most of the previous particles.
     * In order to show in practice what happen we first show the graph when we do not reorder
     * \htmlinclude Vector/4_reorder/no_reorder.html
     * The measure has oscillation but we see an asymptotic behavior from 0.04 in the initial condition to
     * 0.124 . Below we show what happen when we use iterator from the Cell list hilbert
     * \htmlinclude Vector/4_reorder/comp_reord.html
     * In cases where particles does not move or move very slowly consider to use data-reordering, because it can
     * give **8-10% speedup**
     * \see \ref e4_reo
     *  ## Timers ##
     *  In order to collect the time of the force calculation we insert two timers around the function
     *  calc_force. The overall performance is instead calculated with another timer around the time stepping
     * \snippet Vector/4_reorder/main_data_ord.cpp timer start
     * \snippet Vector/4_reorder/main_data_ord.cpp timer stop
     * \see \ref e3_md_vi

    //! \cond [md steps] \endcond

    // Get the Cell list structure
    auto NN = vd.getCellList_hilb(r_cut);

    // calculate forces
    unsigned long int f = 0;

    timer time2;

#ifndef TEST_RUN
    size_t Nstep = 30000;
    size_t Nstep = 300;

    // MD time stepping
    for (size_t i = 0; i < Nstep ; i++)
        // Get the iterator
        auto it3 = vd.getDomainIterator();

        // integrate velicity and space based on the calculated forces (Step1)
        while (it3.isNext())
            auto p = it3.get();

            // here we calculate v(tn + 0.5)
            vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
            vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
            vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

            // here we calculate x(tn + 1)
            vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;
            vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;
            vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;


        // Because we mooved the particles in space we have to map them and re-sync the ghost;
        vd.template ghost_get<>();

        timer time;
        if (i % 10 == 0)

        // calculate forces or a(tn + 1) Step 2

        if (i % 10 == 0)

        // Integrate the velocity Step 3
        auto it4 = vd.getDomainIterator();

        while (it4.isNext())
            auto p = it4.get();

            // here we calculate v(tn + 1)
            vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
            vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
            vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];


        // After every iteration collect some statistic about the confoguration
        if (i % 100 == 0)
            // We write the particle position for visualization (Without ghost)

            // we resync the ghost

            // We calculate the energy
            double energy = calc_energy(vd,NN,sigma12,sigma6);
            auto & vcl = create_vcluster();

            // We also print on terminal the value of the energy
            // only one processor (master) write on terminal
            if (vcl.getProcessUnitID() == 0)
                std::cout << std::endl << "Energy: " << energy << std::endl;


    std::cout << "Performance: " << time2.getwct() << std::endl;

    //! \cond [md steps] \endcond

     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     * ## Plotting graphs ##
     * After we terminate the MD steps our vector x contains at which iteration we benchmark the force
     * calculation time, while y contains the measured time at that time-step. We can produce a graph X Y
     * \note The graph produced is an svg graph that can be view with a browser. From the browser we can
     *       also easily save the graph into pure svg format
     * \snippet Vector/4_reorder/main_comp_ord.cpp google chart

    //! \cond [google chart] \endcond

    // Google charts options, it store the options to draw the X Y graph
    GCoptions options;

    // Title of the graph
    options.title = std::string("Force calculation time");

    // Y axis name
    options.yAxis = std::string("Time");

    // X axis name
    options.xAxis = std::string("iteration");

    // width of the line
    options.lineWidth = 1.0;

    // Object that draw the X Y graph
    GoogleChart cg;

    // Add the graph
    // The graph that it produce is in svg format that can be opened on browser

    // Write into html format

    //! \cond [google chart] \endcond

     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     * ## Finalize ##
     *  At the very end of the program we have always to de-initialize the library
     * \snippet Vector/4_reorder/main_comp_ord.cpp finalize

    //! \cond [finalize] \endcond


    //! \cond [finalize] \endcond

     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     * # Full code #
     * \include Vector/4_reorder/main_comp_ord.cpp
Exemplo n.º 10
int main(int argc, char* argv[])
	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## Initialization ##
	 * Here we Initialize the library, and we check the we are on a single processor. GoogleChart
	 * cannot do parallel IO or write big files. So or we collect all data on one processor, or each
	 * processor write a distinct file. In this particular example we simply stop if the program  start
	 * on more than one processor
	 * \snippet Plot/0_simple_graph/main.cpp initialize

	//! \cond [initialize] \endcond

	auto & v_cl = create_vcluster();

	// Google chart is only single processor
	if (v_cl.getProcessingUnits() > 1)
		std::cerr << "Error: only one processor is allowed" << "\n";
		return 1;

	//! \cond [initialize] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## Graph data ##
	 * Here we have the vectors that will contain the information about the graph.
	 * \snippet Plot/0_simple_graph/main.cpp datas vector

	//! \cond [datas vector] \endcond

	openfpm::vector<std::string> x;
	openfpm::vector<openfpm::vector<double>> y;
	openfpm::vector<std::string> yn;

	//! \cond [datas vector] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * We will try now to produce the following situation. Six values on **x** each of them having 4 values on **y**
	 * This mean that for each x value we have to define 4 y values. Having multiple values on on x can be used for
	 * several purpose.
	 * * Define multiple lines. For example if we connect all the points # we obtain one line. If we connect
	 *   all the @ points we obtain another line, an so on ... (figure below)
	 * * Define error bands
	 * * Visualize different observables/parameters for the same value x
	 * \verbatim

		  y  ^                          $ dataset1
		     |                          * dataset2
		 0.9 |                          # dataset3
		     |       @                  @ dataset4
		     |   #
		 0.6 |       *   *   @       *
		     |   $   #   @   #   #
		     |   @       $   $   @   @
		 0.3 |           #   *   $   #
		     |       $           *
		     |   *                   $
		  0  |_________________________________
				 o   t   t   f   f   s          x
				 n   w   h   o   i   i
				 e   o   r   u   v   x
						 e   r   e

	 * We start from the first case (Define multiple lines)
	 * \snippet Plot/0_simple_graph/main.cpp data fill

	//! \cond [data fill] \endcond

	// Fill the x values

	// we have 4 dataset  or lines

	// Because we have 6 points on x each containing 4 lines or dataset, we have to provides
	// 6 point with 4 values at each x point

	//! \cond [data fill] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## Graph options ##
	 * We can specify several options for the graphs.
	 * * Title of the graph
	 * * Title of the y axis
	 * * Title of the x axis
	 * \snippet Plot/0_simple_graph/main.cpp google chart

	//! \cond [google chart] \endcond

	GCoptions options;

	options.title = std::string("Example");
	options.yAxis = std::string("Y Axis");
	options.xAxis = std::string("X Axis");
	options.lineWidth = 5;

	//! \cond [google chart] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## Graph write ##
	 * We create the object to create plots with Google Charts
	 * A writer can produce several graphs optionally interleaved with HTML code.
	 * Here we write in HTML a description of the graph, than we output the graph
	 * AddLinesGraph create a typical graph with lines
	 * \snippet Plot/0_simple_graph/main.cpp google chart write1
	 * \htmlonly
		<div id="chart_div0" style="width: 900px; height: 500px;"></div>

	//! \cond [google chart write1] \endcond

	GoogleChart cg;
	cg.addHTML("<h2>First graph</h2>");

	//! \cond [google chart write1] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## Hist graph ##
	 * Hist graph is instead a more flexible Graph writer. In particular we can specify
	 * how to draw each dataset. With the option
	 * * **stype** specify how to draw each dataset
	 * * **stypeext** we can override the default stype option. In this case we say that the third dataset
	 *            in must be reppresented as a line instead of a bars
	 * To note that we can reuse the same Google chart writer to write multiple
	 * Graph on the same page, interleaved with HTML code
	 * \snippet Plot/0_simple_graph/main.cpp google chart write2
	 * \htmlonly
		<div id="chart_div1" style="width: 900px; height: 500px;"></div>

	//! \cond [google chart write2] \endcond

	options.stype = std::string("bars");

	// it say that the dataset4 must me represented with a line
	options.stypeext = std::string("{3: {type: 'line'}}");

	cg.addHTML("<h2>Second graph</h2>");

	//! \cond [google chart write2] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## %Error bars ##
	 * Here we show how to draw error bars. %Error bars are drawn specifying intervals with a min and a max.
	 * Intervals in general does not have to encapsulate any curve. First we construct the vector y with 3
	 *  values the first value contain the curve points, the second and third contain the min,max interval.
	 * \snippet Plot/0_simple_graph/main.cpp google chart write3
	 * \htmlonly
		<div id="chart_div2" style="width: 900px; height: 500px;"></div>

	//! \cond [google chart write3] \endcond

	cg.addHTML("<h2>Third graph</h2>");

	// The first colum are the values of a line while the other 2 values
	// are the min and max of an interval, as we can see interval does not
	// have to encapsulate any curve

	// Here we mark that the the colum 2 and 3 are intervals


	//! \cond [google chart write3] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * The style of each interval can be controlled, and the definition of intervals can be interleaved with definition of
	 * other lines. In this example we show how to define 3 lines and 3 intervals, controlling the style of the last interval
	 * \snippet Plot/0_simple_graph/main.cpp google chart write4
	 * \htmlonly
		<div id="chart_div3" style="width: 900px; height: 500px;"></div>

	//! \cond [google chart write4] \endcond

	cg.addHTML("<h2>Four graph</h2>");

	// again 6 point but 9 values

	// colum  0 and 1 are lines
	// colums 2-3 and 4-5 are intervals
	// colum 6 is a line
	// colum 7-8 is an interval

	// Intervals are enumerated with iX, for example in this case with 3 intervals we have i0,i1,i2
	// with this line we control the style of the intervals. In particular we change from the default
	// values
	options.intervalext = std::string("{'i2': { 'color': '#4374E0', 'style':'bars', 'lineWidth':4, 'fillOpacity':1 } }");


	//! \cond [google chart write4] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## More options ##
	 * In this last example we also show how to:
	 * * Make the graph bigger, setting **width** and **height** options
	 * * Give the possibility to to zoom-in and zoom-out with **GC_EXPLORER**
	 * * Use lines instead a smooth function to connect points
	 * * Use logaritmic scale
	 * \note For more options refer to doxygen and Google Charts
	 * \snippet Plot/0_simple_graph/main.cpp google chart write5
	 * \htmlonly
		<div id="chart_div4" style="width: 1280px; height: 700px;"></div>

	//! \cond [google chart write5] \endcond

	openfpm::vector<double> xn;


	options.intervalext = "";
	options.width = 1280;
	options.heigh = 720;
	options.curveType = "line";
	options.more = GC_ZOOM + "," + GC_X_LOG + "," + GC_Y_LOG;



	//! \cond [google chart write5] \endcond

	 * \page Plot_0_cg Plot 0 Google Chart
	 * ## Finalize ## {#finalize}
	 *  At the very end of the program we have always de-initialize the library
	 * \snippet Plot/0_simple_graph/main.cpp finalize

	//! \cond [finalize] \endcond


	//! \cond [finalize] \endcond
 ~ut_start()  { BOOST_TEST_MESSAGE("Delete global VClster"); openfpm_finalize(); }