void * msg_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i,size_t ri, void * ptr)
{
	// convert the void pointer argument into a pointer to receiving buffers
	openfpm::vector<openfpm::vector<unsigned char>> * v = static_cast<openfpm::vector<openfpm::vector<unsigned char>> *>(ptr);

	/////////////////////// IGNORE THESE LINES IN VCLUSTER DOCUMENTATION ////////////////////////
	/////////////////////// THEY COME FROM UNIT TESTING /////////////////////////////////////////

	if (create_vcluster().getProcessingUnits() <= 8)
	{if (totp_check) BOOST_REQUIRE_EQUAL(total_p,create_vcluster().getProcessingUnits()-1);}
	else
	{if (totp_check) BOOST_REQUIRE_EQUAL(total_p,(size_t)8);}

	BOOST_REQUIRE_EQUAL(msg_i, global_step);

	//////////////////////////////////////////////////////////////////////////

	// Create the memory to receive the message
	// msg_i contain the size of the message to receive
	// i contain the processor id
	v->get(i).resize(msg_i);

	// return the pointer of the allocated memory
	return &(v->get(i).get(0));
}
Пример #2
0
	/*! \brief constructor
	 *
	 * \param dom Box domain
	 * \param n_sub number of sub-domain to create (it is approximated to the biggest 2^n number)
	 * \param lp Local position of the particles
	 *
	 */
	ORB(Box dom, size_t n_sub, loc_pos & lp)
	:v_cl(create_vcluster()),lp(lp)
	{
		typedef ORB<dim,T,loc_wg,loc_pos,Box,Tree> ORB_class;

		dim_div = 0;

		n_sub = openfpm::math::round_big_2(n_sub);
		size_t nsub = log2(n_sub);

		// number of center or mass needed
		cm.resize(2 << nsub);
		cm_cnt.resize(2 << nsub);

		// Every time we divide from 0 to dim-1, calculate how many cycle from 0 to dim-1 we have
		size_t dim_cycle = nsub / dim;

		// Create the root node in the graph
		grp.addVertex();

		// unroll bisection cycle
		bisect_unroll<dim,ORB_class> bu(*this);
		for (size_t i = 0 ; i < dim_cycle ; i++)
		{
			boost::mpl::for_each< boost::mpl::range_c<int,0,dim> >(bu);
			// bu is recreated several time internaly
		}

		// calculate and execute the remaining cycles
		switch(nsub - dim_cycle * dim)
		{
		case 1:
			do_when_dim_gr_i<dim,1,ORB_class>::bisect_loop(bu);
			break;
		case 2:
			do_when_dim_gr_i<dim,2,ORB_class>::bisect_loop(bu);
			break;
		case 3:
			do_when_dim_gr_i<dim,3,ORB_class>::bisect_loop(bu);
			break;
		case 4:
			do_when_dim_gr_i<dim,4,ORB_class>::bisect_loop(bu);
			break;
		case 5:
			do_when_dim_gr_i<dim,5,ORB_class>::bisect_loop(bu);
			break;
		case 6:
			do_when_dim_gr_i<dim,6,ORB_class>::bisect_loop(bu);
			break;
		case 7:
			do_when_dim_gr_i<dim,7,ORB_class>::bisect_loop(bu);
			break;
		default:
			// To extend on higher dimension create other cases or create runtime
			// version of local_cm and bisect
			std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " ORB is not working for dimension bigger than 8";

		}
	}
	/*! \brief Create an empty Matrix
	 *
	 * \param N1 number of row
	 * \param N2 number of colums
	 * \param N1_loc number of local row
	 *
	 */
	SparseMatrix(size_t N1, size_t N2, size_t n_row_local)
	:l_row(n_row_local),l_col(n_row_local)
	{
		PETSC_SAFE_CALL(MatCreate(PETSC_COMM_WORLD,&mat));
		PETSC_SAFE_CALL(MatSetType(mat,MATMPIAIJ));
		PETSC_SAFE_CALL(MatSetSizes(mat,n_row_local,n_row_local,N1,N2));

		Vcluster & v_cl = create_vcluster();

		openfpm::vector<size_t> vn_row_local;
		v_cl.allGather(l_row,vn_row_local);
		v_cl.execute();

		// Calculate the starting row for this processor

		start_row = 0;
		for (size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++)
			start_row += vn_row_local.get(i);
	}
	/*!
	 *
	 * \brief Write a set of particle position and properties into HDF5
	 *
	 * \tparam Pos Vector of positions type
	 * \taparam Prp Vector of properties type
	 * \tparam prp list of properties to output
	 *
	 * \param pos Vector with the positions
	 * \param prp Vector with the properties
	 * \param stop size of the vector to output
	 *
	 */
	template<typename VPos, typename VPrp, int ... prp > bool write(const std::string & file, openfpm::vector<VPos> & v_pos, openfpm::vector<VPrp> & v_prp, size_t stop)
	{
		Vcluster & v_cl = create_vcluster();

		// Open and HDF5 file in parallel

		hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
		H5Pset_fapl_mpio(plist_id, v_cl.getMPIComm(), MPI_INFO_NULL);
		file_id = H5Fcreate(file.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
		H5Pclose(plist_id);

		// Single coordinate positional vector
		openfpm::vector<typename VPos::coord_type> x_n;
		x_n.resize(stop);

		//for each component, fill x_n
		for (size_t i = 0 ; i < VPos::dims ; i++)
		{
			//
			for (size_t j = 0 ; j < stop ; j++)
				x_n.get(j) = v_pos.template get<0>(j)[i];

			std::stringstream str;
			str << "x" << i;

			HDF5CreateDataSet<typename VPos::coord_type>(file_id,str.str(),x_n.getPointer(),stop*sizeof(typename VPos::coord_type));
		}

		// Now we write the properties

		typedef typename to_boost_vmpl<prp ... >::type v_prp_seq;
		H5_prop_out<openfpm::vector<VPrp>,has_attributes<VPrp>::value> f(file_id,v_prp,stop);

		boost::mpl::for_each_ref<v_prp_seq>(f);

		H5Fclose(file_id);

		return true;
	}
	/*! \brief Resize the Sparse Matrix
	 *
	 * \param row number for row
	 * \param col number of colums
	 * \param local number of row
	 * \param local number of colums
	 *
	 */
	void resize(size_t row, size_t col, size_t l_row, size_t l_col)
	{
		this->g_row = row;
		this->g_col = col;

		this->l_row = l_row;
		this->l_col = l_col;

		PETSC_SAFE_CALL(MatSetSizes(mat,l_row,l_col,g_row,g_col));

		Vcluster & v_cl = create_vcluster();

		openfpm::vector<size_t> vn_row_local;
		v_cl.allGather(l_row,vn_row_local);
		v_cl.execute();

		// Calculate the starting row for this processor

		start_row = 0;
		for (size_t i = 0 ; i < v_cl.getProcessUnitID() ; i++)
			start_row += vn_row_local.get(i);
	}
void create_decomposition2x2(openfpm::vector<openfpm::vector<long unsigned int>> & box_nn_processor, openfpm::vector<SpaceBox<2,float>> & sub_domains)
{
	Vcluster & v_cl = create_vcluster();

	box_nn_processor.add();

	if (v_cl.getProcessUnitID() == 0)
	{
		box_nn_processor.get(0).add(1);
		box_nn_processor.get(0).add(2);
		box_nn_processor.get(0).add(3);

		sub_domains.add(Box<2,float>({0.0,0.0},{0.5,0.5}));
	}
	else if (v_cl.getProcessUnitID() == 1)
	{
		box_nn_processor.get(0).add(0);
		box_nn_processor.get(0).add(2);
		box_nn_processor.get(0).add(3);

		sub_domains.add(Box<2,float>({0.5,0.0},{1.0,0.5}));
	}
	else if (v_cl.getProcessUnitID() == 2)
	{
		box_nn_processor.get(0).add(1);
		box_nn_processor.get(0).add(0);
		box_nn_processor.get(0).add(3);

		sub_domains.add(Box<2,float>({0.0,0.5},{0.5,1.0}));
	}
	else if (v_cl.getProcessUnitID() == 3)
	{
		box_nn_processor.get(0).add(1);
		box_nn_processor.get(0).add(2);
		box_nn_processor.get(0).add(0);

		sub_domains.add(Box<2,float>({0.5,0.5},{1.0,1.0}));
	}
}
int main(int argc, char ** argv)
{
	// Initialize the global VCluster
	openfpm_init(&argc,&argv);

	// Vcluster
	Vcluster & vcl = create_vcluster();

	//! [Create CartDecomposition vtk gen]
	CartDecomposition<2,float> dec(vcl);

	// Physical domain
	Box<2,float> box({0.0,0.0},{1.0,1.0});

	// division on each direction
	size_t div[2] = {20,20};

	// Define ghost
	Ghost<2,float> g(0.01);

	// boundary conditions
	size_t bc[2] = {PERIODIC,PERIODIC};

	// Decompose and write the decomposed graph
	dec.setParameters(div,box,bc,g);
	dec.decompose();

	// create a ghost border
	dec.calculateGhostBoxes();

	// Write the decomposition
	dec.write("CartDecomposition/out_");

	//! [Create CartDecomposition]

	// deinitialize the library
	openfpm_finalize();
}
template<unsigned int ip> void test_no_send_some_peer()
{
	Vcluster & vcl = create_vcluster();

	size_t n_proc = vcl.getProcessingUnits();

	// Check long communication with some peer not comunication

	size_t j = 4567;

	global_step = j;
	// Processor step
	long int ps = n_proc / (8 + 1);

	// send message
	openfpm::vector<openfpm::vector<unsigned char>> message;
	// recv message
	openfpm::vector<openfpm::vector<unsigned char>> recv_message(n_proc);

	openfpm::vector<size_t> prc;

	// only even communicate

	if (vcl.getProcessUnitID() % 2 == 0)
	{
		for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
		{
			size_t p_id = ((i+1) * ps + vcl.getProcessUnitID()) % n_proc;
			if (p_id != vcl.getProcessUnitID())
			{
				prc.add(p_id);
				message.add();
				std::ostringstream msg;
				msg << "Hello from " << vcl.getProcessUnitID() << " to " << p_id;
				std::string str(msg.str());
				message.last().resize(j);
				memset(message.last().getPointer(),0,j);
				std::copy(str.c_str(),&(str.c_str())[msg.str().size()],&(message.last().get(0)));
			}
		}
	}

	recv_message.resize(n_proc);

#ifdef VERBOSE_TEST
	timer t;
	t.start();
#endif

	commFunc_null_odd<ip>(vcl,prc,message,msg_alloc,&recv_message);

#ifdef VERBOSE_TEST
	t.stop();
	double clk = t.getwct();
	double clk_max = clk;

	size_t size_send_recv = 2 * j * (prc.size());
	vcl.sum(size_send_recv);
	vcl.max(clk_max);
	vcl.execute();

	if (vcl.getProcessUnitID() == 0)
		std::cout << "(Long pattern: " << method<ip>() << ")Buffer size: " << j << "    Bandwidth (Average): " << size_send_recv / vcl.getProcessingUnits() / clk / 1e6 << " MB/s  " << "    Bandwidth (Total): " << size_send_recv / clk / 1e6 << " MB/s    Clock: " << clk << "   Clock MAX: " << clk_max <<"\n";
#endif

	// Check the message
	for (long int i = 0 ; i < 8  && i < (long int)n_proc ; i++)
	{
		long int p_id = (- (i+1) * ps + (long int)vcl.getProcessUnitID());
		if (p_id < 0)
			p_id += n_proc;
		else
			p_id = p_id % n_proc;

		if (p_id != (long int)vcl.getProcessUnitID())
		{
			// only even processor communicate
			if (p_id % 2 == 1)
				continue;

			std::ostringstream msg;
			msg << "Hello from " << p_id << " to " << vcl.getProcessUnitID();
			std::string str(msg.str());
			BOOST_REQUIRE_EQUAL(std::equal(str.c_str(),str.c_str() + str.size() ,&(recv_message.get(p_id).get(0))),true);
		}
		else
		{
			BOOST_REQUIRE_EQUAL((size_t)0,recv_message.get(p_id).size());
		}
	}
}
Пример #9
0
int main(int argc, char* argv[])
{
	/*!
	 * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme
	 *
	 * ## Simulation ## {#md_e5_sym_sim_crs}
	 *
	 * The simulation is equal to the simulation explained in the example molecular dynamic
	 *
	 * \see \ref md_e5_sym
	 *
	 * The difference is that we create a symmetric Verlet-list for crossing scheme instead of a normal one
	 * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp sim verlet
	 *
	 * The rest of the code remain unchanged
	 *
	 * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp simulation
	 *
	 */

	//! \cond [simulation] \endcond

	double dt = 0.00025;
	double sigma = 0.1;
	double r_cut = 3.0*sigma;
	double r_gskin = 1.3*r_cut;
	double sigma12 = pow(sigma,12);
	double sigma6 = pow(sigma,6);

	openfpm::vector<double> x;
	openfpm::vector<openfpm::vector<double>> y;

	openfpm_init(&argc,&argv);
	Vcluster & v_cl = create_vcluster();

	// we will use it do place particles on a 10x10x10 Grid like
	size_t sz[3] = {10,10,10};

	// domain
	Box<3,float> box({0.0,0.0,0.0},{1.0,1.0,1.0});

	// Boundary conditions
	size_t bc[3]={PERIODIC,PERIODIC,PERIODIC};

	// ghost, big enough to contain the interaction radius
	Ghost<3,float> ghost(r_gskin);
	ghost.setLow(0,0.0);
	ghost.setLow(1,0.0);
	ghost.setLow(2,0.0);

	vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost,BIND_DEC_TO_GHOST);

	size_t k = 0;
	size_t start = vd.accum();

	auto it = vd.getGridIterator(sz);

	while (it.isNext())
	{
		vd.add();

		auto key = it.get();

		vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
		vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
		vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);

		vd.template getLastProp<velocity>()[0] = 0.0;
		vd.template getLastProp<velocity>()[1] = 0.0;
		vd.template getLastProp<velocity>()[2] = 0.0;

		vd.template getLastProp<force>()[0] = 0.0;
		vd.template getLastProp<force>()[1] = 0.0;
		vd.template getLastProp<force>()[2] = 0.0;

		k++;
		++it;
	}

	vd.map();
	vd.ghost_get<>();

	timer tsim;
	tsim.start();

	//! \cond [sim verlet] \endcond

	// Get the Cell list structure
	auto NN = vd.getVerletCrs(r_gskin);;

	//! \cond [sim verlet] \endcond

	// calculate forces
	calc_forces(vd,NN,sigma12,sigma6,r_cut);
	unsigned long int f = 0;

	int cnt = 0;
	double max_disp = 0.0;

	// MD time stepping
	for (size_t i = 0; i < 10000 ; i++)
	{
		// Get the iterator
		auto it3 = vd.getDomainIterator();

		double max_displ = 0.0;

		// integrate velicity and space based on the calculated forces (Step1)
		while (it3.isNext())
		{
			auto p = it3.get();

			// here we calculate v(tn + 0.5)
			vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
			vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
			vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

			Point<3,double> disp({vd.template getProp<velocity>(p)[0]*dt,vd.template getProp<velocity>(p)[1]*dt,vd.template getProp<velocity>(p)[2]*dt});

			// here we calculate x(tn + 1)
			vd.getPos(p)[0] += disp.get(0);
			vd.getPos(p)[1] += disp.get(1);
			vd.getPos(p)[2] += disp.get(2);

			if (disp.norm() > max_displ)
				max_displ = disp.norm();

			++it3;
		}

		if (max_disp < max_displ)
			max_disp = max_displ;

		// Because we moved the particles in space we have to map them and re-sync the ghost
		if (cnt % 10 == 0)
		{
			vd.map();
			vd.template ghost_get<>();
			// Get the Cell list structure
			vd.updateVerlet(NN,r_gskin,VL_CRS_SYMMETRIC);
		}
		else
		{
			vd.template ghost_get<>(SKIP_LABELLING);
		}

		cnt++;

		// calculate forces or a(tn + 1) Step 2
		calc_forces(vd,NN,sigma12,sigma6,r_cut);

		// Integrate the velocity Step 3
		auto it4 = vd.getDomainIterator();

		while (it4.isNext())
		{
			auto p = it4.get();

			// here we calculate v(tn + 1)
			vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
			vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
			vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

			++it4;
		}

		// After every iteration collect some statistic about the confoguration
		if (i % 100 == 0)
		{
			// We write the particle position for visualization (Without ghost)
			vd.deleteGhost();
			vd.write("particles_",f);

			// we resync the ghost
			vd.ghost_get<>();


			// We calculate the energy
			double energy = calc_energy(vd,NN,sigma12,sigma6,r_cut);
			auto & vcl = create_vcluster();
			vcl.sum(energy);
			vcl.max(max_disp);
			vcl.execute();

			// we save the energy calculated at time step i c contain the time-step y contain the energy
			x.add(i);
			y.add({energy});

			// We also print on terminal the value of the energy
			// only one processor (master) write on terminal
			if (vcl.getProcessUnitID() == 0)
				std::cout << "Energy: " << energy << "   " << max_disp << "   " << std::endl;

			max_disp = 0.0;

			f++;
		}
	}

	tsim.stop();
	std::cout << "Time: " << tsim.getwct()  << std::endl;

	//! \cond [simulation] \endcond

	// Google charts options, it store the options to draw the X Y graph
	GCoptions options;

	// Title of the graph
	options.title = std::string("Energy with time");

	// Y axis name
	options.yAxis = std::string("Energy");

	// X axis name
	options.xAxis = std::string("iteration");

	// width of the line
	options.lineWidth = 1.0;

	// Object that draw the X Y graph
	GoogleChart cg;

	// Add the graph
	// The graph that it produce is in svg format that can be opened on browser
	cg.AddLinesGraph(x,y,options);

	// Write into html format
	cg.write("gc_plot2_out.html");

	//! \cond [google chart] \endcond

	/*!
	 * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme
	 *
	 * ## Finalize ## {#finalize_v_e5_md_sym_crs}
	 *
	 *  At the very end of the program we have always to de-initialize the library
	 *
	 * \snippet Vector/5_molecular_dynamic_sym_crs/main.cpp finalize
	 *
	 */

	//! \cond [finalize] \endcond

	openfpm_finalize();

	//! \cond [finalize] \endcond

	/*!
	 * \page Vector_5_md_vl_sym_crs Vector 5 molecular dynamic with symmetric Verlet list crossing scheme
	 *
	 * ## Full code ## {#full_code_v_e5_md_sym_crs}
	 *
	 * \include Vector/5_molecular_dynamic_sym_crs/main.cpp
	 *
	 */
}
Пример #10
0
int main(int argc, char* argv[])
{
	/*!
	 *
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 *
	 * ## Initialization and vector creation ##
	 *
	 * We first initialize the library and define useful constants
	 *
	 * \see \ref e0_s_init
	 *
	 * \snippet Vector/4_complex_prop/main.cpp lib init
	 *
	 * We also define a custom structure
	 *
	 * \snippet Vector/4_complex_prop/main.cpp struct A
	 *
	 * After we initialize the library we can create a vector with complex properties
	 * with the following line
	 *
	 * \snippet Vector/4_complex_prop/main.cpp vect create
	 *
	 * In this this particular case every particle carry a scalar,
	 * a vector in form of float[3], a Point, a list
	 * in form of vector of float and a list of custom structures, and a vector of vector.
	 * In general particles can have properties of arbitrary complexity.
	 *
	 * \warning For arbitrary complexity mean that we can use any openfpm data structure with and arbitrary nested complexity.
	 *          For example a openfpm::vector<aggregate<grid_cpu<openfpm::vector<aggregate<double,double[3]>>>,openfpm::vector<float>> is valid
	 * \verbatim

	       particle
	          *
	        vector
	         / \
	        /   \
	     grid    vector<float>
	      /\
	     /  \
	double  double[3]

	 * \endverbatim
	 *
	 * Our custom data-structure A is defined below. Note that this data-structure
	 * does not have pointers
	 *
	 * \snippet Vector/4_complex_prop/main.cpp struct A
	 *
	 *
	 * \warning custom data structure are allowed only if they does not have pointer.
	 *          In case they have pointer we have to define how to serialize our data-structure
	 *
	 * \see \ref vector_example_cp_ser
	 *
	 */

	//! \cond [lib init] \endcond

    // initialize the library
	openfpm_init(&argc,&argv);

	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
	Box<2,float> domain({0.0,0.0},{1.0,1.0});

	// Here we define the boundary conditions of our problem
    size_t bc[2]={PERIODIC,PERIODIC};

	// extended boundary around the domain, and the processor domain
	Ghost<2,float> g(0.01);
	
	// the scalar is the element at position 0 in the aggregate
	constexpr int scalar = 0;

	// the vector is the element at position 1 in the aggregate
	constexpr int vector = 1;

	// the tensor is the element at position 2 in the aggregate
	constexpr int point = 2;

	// A list1
	constexpr int list = 3;

	// A listA
	constexpr int listA = 4;

	// A list of list
	constexpr int listlist = 5;

	//! \cond [lib init] \endcond

	//! \cond [struct A] \endcond

	// The custom structure
	struct A
	{
		float p1;
		int p2;

		A() {};

		A(float p1, int p2)
		:p1(p1),p2(p2)
		{}
	};

	//! \cond [struct A] \endcond

	//! \cond [vect create] \endcond

	vector_dist<2,float, aggregate<float,
	                               float[3],
								   Point<3,double>,
								   openfpm::vector<float>,
	                               openfpm::vector<A>,
								   openfpm::vector<openfpm::vector<float>>> >
	vd(4096,domain,bc,g);

	//! \cond [vect create] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 *
	 * ## Assign values to properties ##
	 *
	 * Assign values to properties does not changes, from the simple case. Consider
	 * now that each particle has a list, so when we can get the property listA for particle p
	 * and resize such list with **vd.getProp<listA>(p).resize(...)**. We can add new elements at the
	 * end with **vd.getProp<listA>(p).add(...)** and get some element of this list with **vd.getProp<listA>(p).get(i)**.
	 * More in general vd.getProp<listA>(p) return a reference to the openfpm::vector contained by the particle.
	 *
	 * \snippet Vector/4_complex_prop/main.cpp vect assign
	 *
	 */

	//! \cond [vect assign] \endcond

	auto it = vd.getDomainIterator();

	while (it.isNext())
	{
		auto p = it.get();

		// we define x, assign a random position between 0.0 and 1.0
		vd.getPos(p)[0] = (float)rand() / RAND_MAX;

		// we define y, assign a random position between 0.0 and 1.0
		vd.getPos(p)[1] = (float)rand() / RAND_MAX;


		vd.getProp<scalar>(p) = 1.0;

		vd.getProp<vector>(p)[0] = 1.0;
		vd.getProp<vector>(p)[1] = 1.0;
		vd.getProp<vector>(p)[2] = 1.0;

		vd.getProp<point>(p).get(0) = 1.0;
		vd.getProp<point>(p).get(1) = 1.0;
		vd.getProp<point>(p).get(2) = 1.0;

		size_t n_cp = (float)10.0 * rand()/RAND_MAX;

		vd.getProp<listA>(p).resize(n_cp);

		for (size_t i = 0 ; i < n_cp ; i++)
		{
			vd.getProp<list>(p).add(i + 10);
			vd.getProp<list>(p).add(i + 20);
			vd.getProp<list>(p).add(i + 30);

			vd.getProp<listA>(p).get(i) = A(i+10.0,i+20.0);
		}

		vd.getProp<listlist>(p).resize(2);
		vd.getProp<listlist>(p).get(0).resize(2);
		vd.getProp<listlist>(p).get(1).resize(2);

		vd.getProp<listlist>(p).get(0).get(0) = 1.0;
		vd.getProp<listlist>(p).get(0).get(1) = 2.0;
		vd.getProp<listlist>(p).get(1).get(0) = 3.0;
		vd.getProp<listlist>(p).get(1).get(1) = 4.0;

		// next particle
		++it;
	}

	//! \cond [vect assign] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 *
	 * ## Mapping and ghost_get ##
	 *
	 * Particles are redistributed across processors all properties are communicated but instead of
	 * using map we use **map_list** that we can use to select properties.
	 * A lot of time complex properties can be recomputed and communicate them is not a good idea.
	 * The same concept also apply for ghost_get. In general we choose which properties to communicate
	 *
	 *
	 * \see \ref e0_s_map
	 *
	 * \see \ref e1_part_ghost
	 *
	 * \snippet Vector/4_complex_prop/main.cpp vect map ghost
	 *
	 */

	//! \cond [vect map ghost] \endcond

	// Particles are redistribued across the processors but only the scalar,vector, and point properties
	// are transfert
	vd.map_list<scalar,vector,point,list,listA,listlist>();
	
	// Synchronize the ghost
	vd.ghost_get<scalar,vector,point,listA,listlist>();

	//! \cond [vect map ghost] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 *
	 * ## Output and VTK visualization ##
	 *
	 * Vector with complex properties can be still be visualized, because unknown properties are
	 * automatically excluded
	 *
	 * \see \ref e0_s_vis_vtk
	 *
	 * \snippet Vector/4_complex_prop/main.cpp vtk
	 *
	 */

	//! \cond [vtk] \endcond

	vd.write("particles");

	//! \cond [vtk] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 * ## Print 4 particles in the ghost area ##
	 *
	 * Here we print that the first 4 particles to show that the list of A and the list of list are filled
	 * and the ghosts contain the correct information
	 *
	 * \snippet Vector/4_complex_prop/main.cpp print ghost info
	 *
	 */

	//! \cond [print ghost info] \endcond

	size_t fg = vd.size_local();

	Vcluster & v_cl = create_vcluster();
	if (v_cl.getProcessUnitID() == 0)
	{
		for ( ; fg < vd.size_local()+4 ; fg++)
		{
			std::cout << "List of A" << std::endl;
			for (size_t i = 0 ; i < vd.getProp<listA>(fg).size() ; i++)
				std::cout << "Element: " << i << "   p1=" << vd.getProp<listA>(fg).get(i).p1 << "   p2=" << vd.getProp<listA>(fg).get(i).p2 << std::endl;

			std::cout << "List of list" << std::endl;
			for (size_t i = 0 ; i < vd.getProp<listlist>(fg).size() ; i++)
			{
				for (size_t j = 0 ; j < vd.getProp<listlist>(fg).get(i).size() ; j++)
					std::cout << "Element: " << i << "  " << j << "   " << vd.getProp<listlist>(fg).get(i).get(j) << std::endl;
			}
		}
	}

	//! \cond [print ghost info] \endcond

	/*!
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 * ## Finalize ## {#finalize}
	 *
	 *  At the very end of the program we have always to de-initialize the library
	 *
	 * \snippet Vector/4_complex_prop/main.cpp finalize
	 *
	 */

	//! \cond [finalize] \endcond

	openfpm_finalize();

	//! \cond [finalize] \endcond

	/*!
	 * \page Vector_4_complex_prop Vector 4 complex properties
	 *
	 * # Full code # {#code}
	 *
	 * \include Vector/4_complex_prop/main.cpp
	 *
	 */
}
	/*! \brief solve complex use Krylov solver in combination with Block-Jacobi + Nested
	 *
	 *
	 * Solve the system using block-jacobi preconditioner + nested solver for each block
	 *
	 */
	void solve_complex(Mat & A_, const Vec & b_, Vec & x_)
	{
		// We get the size of the Matrix A
		PetscInt row;
		PetscInt col;
		PetscInt row_loc;
		PetscInt col_loc;
		PetscInt * blks;
		PetscInt nlocal;
		PetscInt first;
		KSP *subksp;
		PC subpc;

		PETSC_SAFE_CALL(MatGetSize(A_,&row,&col));
		PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc));

		// Set the preconditioner to Block-jacobi
		PC pc;
		KSPGetPC(ksp,&pc);
		PCSetType(pc,PCBJACOBI);
		PETSC_SAFE_CALL(KSPSetType(ksp,KSPGMRES));

		PetscInt m = 1;

		PetscMalloc1(m,&blks);
		for (size_t i = 0 ; i < m ; i++) blks[i] = row_loc;

		PCBJacobiSetLocalBlocks(pc,m,blks);
		PetscFree(blks);

		KSPSetUp(ksp);
		PCSetUp(pc);

		PCBJacobiGetSubKSP(pc,&nlocal,&first,&subksp);

		for (size_t i = 0; i < nlocal; i++)
		{
			KSPGetPC(subksp[i],&subpc);

			PCSetType(subpc,PCLU);

//			PCSetType(subpc,PCJACOBI);
			KSPSetType(subksp[i],KSPPREONLY);
//			PETSC_SAFE_CALL(KSPSetConvergenceTest(ksp,KSPConvergedDefault,NULL,NULL));
//			KSPSetTolerances(subksp[i],1.e-6,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);

/*			if (!rank)
			{
				if (i%2)
				{
					PCSetType(subpc,PCILU);
				}
				else
				{
					PCSetType(subpc,PCNONE);
					KSPSetType(subksp[i],KSPBCGS);
					KSPSetTolerances(subksp[i],1.e-6,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
				}
			}
			else
			{
					PCSetType(subpc,PCJACOBI);
					KSPSetType(subksp[i],KSPGMRES);
					KSPSetTolerances(subksp[i],1.e-6,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
			}*/
		}


		KSPSolve(ksp,b_,x_);

		auto & v_cl = create_vcluster();
		if (try_solve == true)
		{
			// calculate error statistic about the solution
			solError err = statSolutionError(A_,b_,x_);

			if (v_cl.getProcessUnitID() == 0)
			{
				std::cout << "Method: " << s_type << " " << " pre-conditoner: " << PCJACOBI << "  iterations: " << err.its << std::endl;
				std::cout << "Norm of error: " << err.err_norm << "   Norm infinity: " << err.err_inf << std::endl;
			}
		}
	}
	/*! \brief solve simple use a Krylov solver + Simple selected Parallel Pre-conditioner
	 *
	 */
	void solve_Krylov_simple(Mat & A_, const Vec & b_, Vec & x_)
	{
		PETSC_SAFE_CALL(KSPSetType(ksp,s_type));

		// We set the size of x according to the Matrix A
		PetscInt row;
		PetscInt col;
		PetscInt row_loc;
		PetscInt col_loc;

		PETSC_SAFE_CALL(MatGetSize(A_,&row,&col));
		PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc));

		PC pc;

		// We set the Matrix operators
		PETSC_SAFE_CALL(KSPSetOperators(ksp,A_,A_));

		// We set the pre-conditioner
		PETSC_SAFE_CALL(KSPGetPC(ksp,&pc));

		// PETSC_SAFE_CALL(PCSetType(pc,PCJACOBI));

		PETSC_SAFE_CALL(PCSetType(pc,PCHYPRE));
//		PCGAMGSetNSmooths(pc,0);
	    PCFactorSetShiftType(pc, MAT_SHIFT_NONZERO);
	    PCFactorSetShiftAmount(pc, PETSC_DECIDE);
		PCHYPRESetType(pc, "boomeramg");
		MatSetBlockSize(A_,4);
		PetscOptionsSetValue("-pc_hypre_boomeramg_print_statistics","2");
		PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter","1000");
		PetscOptionsSetValue("-pc_hypre_boomeramg_nodal_coarsen","true");
		PetscOptionsSetValue("-pc_hypre_boomeramg_relax_type_all","SOR/Jacobi");
		PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type","Falgout");
		PetscOptionsSetValue("-pc_hypre_boomeramg_cycle_type","W");
		PetscOptionsSetValue("-pc_hypre_boomeramg_max_levels","10");
		KSPSetFromOptions(ksp);
		// if we are on on best solve set-up a monitor function

		if (try_solve == true)
		{
			// for bench-mark we are interested in non-preconditioned norm
			PETSC_SAFE_CALL(KSPMonitorSet(ksp,monitor,&vres,NULL));

			// Disable convergence check
			PETSC_SAFE_CALL(KSPSetConvergenceTest(ksp,KSPConvergedSkip,NULL,NULL));
		}

		// Solve the system
		PETSC_SAFE_CALL(KSPSolve(ksp,b_,x_));

		auto & v_cl = create_vcluster();
//		if (try_solve == true)
//		{
			// calculate error statistic about the solution
			solError err = statSolutionError(A_,b_,x_);

			if (v_cl.getProcessUnitID() == 0)
			{
				std::cout << "Method: " << s_type << " " << " pre-conditoner: " << PCJACOBI << "  iterations: " << err.its << std::endl;
				std::cout << "Norm of error: " << err.err_norm << "   Norm infinity: " << err.err_inf << std::endl;
			}
//		}
	}
	void solve_ASM(Mat & A_, const Vec & b_, Vec & x_)
	{
		PETSC_SAFE_CALL(KSPSetType(ksp,s_type));

		// We set the size of x according to the Matrix A
		PetscInt row;
		PetscInt col;
		PetscInt row_loc;
		PetscInt col_loc;

		PETSC_SAFE_CALL(MatGetSize(A_,&row,&col));
		PETSC_SAFE_CALL(MatGetLocalSize(A_,&row_loc,&col_loc));

		PC pc;

		// We set the Matrix operators
		PETSC_SAFE_CALL(KSPSetOperators(ksp,A_,A_));

		// We set the pre-conditioner
		PETSC_SAFE_CALL(KSPGetPC(ksp,&pc));
		PETSC_SAFE_CALL(PCSetType(pc,PCASM));

		////////////////

		/////////////////////


//		PCASMSetLocalSubdomains(pc);

		PCASMSetOverlap(pc,5);

		KSP       *subksp;        /* array of KSP contexts for local subblocks */
		PetscInt  nlocal,first;   /* number of local subblocks, first local subblock */
		PC        subpc;          /* PC context for subblock */
		PetscBool isasm;

		/*
		   Set runtime options
		*/
//		KSPSetFromOptions(ksp);

		/*
		  Flag an error if PCTYPE is changed from the runtime options
		*/
//		PetscObjectTypeCompare((PetscObject)pc,PCASM,&isasm);
//		if (!isasm) SETERRQ(PETSC_COMM_WORLD,1,"Cannot Change the PCTYPE when manually changing the subdomain solver settings");

		/*
		  Call KSPSetUp() to set the block Jacobi data structures (including
		  creation of an internal KSP context for each block).

		  Note: KSPSetUp() MUST be called before PCASMGetSubKSP().
		*/
		KSPSetUp(ksp);

		/*
		   Extract the array of KSP contexts for the local blocks
		*/
		PCASMGetSubKSP(pc,&nlocal,&first,&subksp);

		/*
		   Loop over the local blocks, setting various KSP options
		   for each block.
		*/
		for (size_t i = 0 ; i < nlocal ; i++)
		{
			KSPGetPC(subksp[i],&subpc);
//			PCFactorSetFill(subpc,30);
			PCFactorSetLevels(subpc,5);
			PCSetType(subpc,PCILU);
			KSPSetType(subksp[i],KSPRICHARDSON);
			KSPSetTolerances(subksp[i],1.e-3,0.1,PETSC_DEFAULT,PETSC_DEFAULT);
		}

		// if we are on on best solve set-up a monitor function

		if (try_solve == true)
		{
			// for bench-mark we are interested in non-preconditioned norm
			PETSC_SAFE_CALL(KSPMonitorSet(ksp,monitor,&vres,NULL));

			// Disable convergence check
			PETSC_SAFE_CALL(KSPSetConvergenceTest(ksp,KSPConvergedSkip,NULL,NULL));
		}

//		KSPGMRESSetRestart(ksp,100);

		// Solve the system
		PETSC_SAFE_CALL(KSPSolve(ksp,b_,x_));

		KSPConvergedReason reason;
		KSPGetConvergedReason(ksp,&reason);

		std::cout << "Reason: " << reason << std::endl;

		auto & v_cl = create_vcluster();
//		if (try_solve == true)
//		{
			// calculate error statistic about the solution
			solError err = statSolutionError(A_,b_,x_);

			if (v_cl.getProcessUnitID() == 0)
			{
				std::cout << "Method: " << s_type << " " << " pre-conditoner: " << PCJACOBI << "  iterations: " << err.its << std::endl;
				std::cout << "Norm of error: " << err.err_norm << "   Norm infinity: " << err.err_inf << std::endl;
			}
//		}

	}
template<unsigned int ip> void test_known()
{
	Vcluster & vcl = create_vcluster();

	// send/recv messages

	global_rank = vcl.getProcessUnitID();
	size_t n_proc = vcl.getProcessingUnits();

	// Checking short communication pattern

	for (size_t s = 0 ; s < N_TRY ; s++)
	{
		for (size_t j = 32 ; j < N_LOOP ; j*=2)
		{
			global_step = j;
			// send message
			openfpm::vector<openfpm::vector<unsigned char>> message;
			// recv message
			openfpm::vector<openfpm::vector<unsigned char>> recv_message(n_proc);
			recv_message.reserve(n_proc);

			openfpm::vector<size_t> prc_recv;
			openfpm::vector<size_t> recv_sz;

			openfpm::vector<size_t> prc;

			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				size_t p_id = (i + 1 + vcl.getProcessUnitID()) % n_proc;
				if (p_id != vcl.getProcessUnitID())
				{
					prc.add(p_id);
					message.add();
					std::ostringstream msg;
					msg << "Hello from " << vcl.getProcessUnitID() << " to " << p_id;
					std::string str(msg.str());
					message.last().resize(j);
					memset(message.last().getPointer(),0,j);
					std::copy(str.c_str(),&(str.c_str())[msg.str().size()],&(message.last().get(0)));
				}
			}

			recv_message.resize(n_proc);
			// The pattern is not really random preallocate the receive buffer
			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				long int p_id = vcl.getProcessUnitID() - i - 1;
				if (p_id < 0)
					p_id += n_proc;
				else
					p_id = p_id % n_proc;

				if (p_id != (long int)vcl.getProcessUnitID())
				{
					prc_recv.add(p_id);
					recv_message.get(p_id).resize(j);
					recv_sz.add(j);
				}
			}

#ifdef VERBOSE_TEST
			timer t;
			t.start();
#endif

			vcl.sendrecvMultipleMessagesNBX(prc,message,prc_recv,recv_sz,msg_alloc,&recv_message);

#ifdef VERBOSE_TEST
			t.stop();

			double clk = t.getwct();
			double clk_max = clk;

			size_t size_send_recv = 2 * j * (prc.size());
			vcl.sum(size_send_recv);
			vcl.max(clk_max);
			vcl.execute();

			if (vcl.getProcessUnitID() == 0)
				std::cout << "(Short pattern: " << method<ip>() << ")Buffer size: " << j << "    Bandwidth (Average): " << size_send_recv / vcl.getProcessingUnits() / clk / 1e6 << " MB/s  " << "    Bandwidth (Total): " << size_send_recv / clk / 1e6 << " MB/s    Clock: " << clk << "   Clock MAX: " << clk_max <<"\n";
#endif

			// Check the message
			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				long int p_id = vcl.getProcessUnitID() - i - 1;
				if (p_id < 0)
					p_id += n_proc;
				else
					p_id = p_id % n_proc;

				if (p_id != (long int)vcl.getProcessUnitID())
				{
					std::ostringstream msg;
					msg << "Hello from " << p_id << " to " << vcl.getProcessUnitID();
					std::string str(msg.str());
					BOOST_REQUIRE_EQUAL(std::equal(str.c_str(),str.c_str() + str.size() ,&(recv_message.get(p_id).get(0))),true);
				}
				else
				{
					BOOST_REQUIRE_EQUAL((size_t)0,recv_message.get(p_id).size());
				}
			}
		}
	}
}
Пример #15
0
int main(int argc, char* argv[])
{
	/*!
	 *
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 *
	 * ## Initialization and vector creation ##
	 *
	 * After we initialize the library we can create a vector with complex properties
	 * with the following line
	 *
	 * \snippet Vector/4_complex_prop/main.cpp vect create
	 *
	 * In this this particular case every particle carry two my_struct object
	 *
	 */

    // initialize the library
	openfpm_init(&argc,&argv);

	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
	Box<2,float> domain({0.0,0.0},{1.0,1.0});

	// Here we define the boundary conditions of our problem
    size_t bc[2]={PERIODIC,PERIODIC};

	// extended boundary around the domain, and the processor domain
	Ghost<2,float> g(0.01);

	// my_struct at position 0 in the aggregate
	constexpr int my_s1 = 0;

	// my_struct at position 1 in the aggregate
	constexpr int my_s2 = 1;

	//! \cond [vect create] \endcond

	vector_dist<2,float, aggregate<my_struct,my_struct>>
	vd(4096,domain,bc,g);

	std::cout << "HAS PACK: " << has_pack_agg<aggregate<my_struct,my_struct>>::result::value << std::endl;

	//! \cond [vect create] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 *
	 * ## Assign values to properties ##
	 *
	 * In this loop we assign position to particles and we fill the two my_struct
	 * that each particle contain. As demostration the first my_struct is filled
	 *  with the string representation of the particle coordinates. The second my struct
	 *   is filled with the string representation of the particle position multiplied by 2.0.
	 * The the vectors of the two my_struct are filled respectively with the sequence
	 * 1,2,3 and 1,2,3,4
	 *
	 *
	 *
	 * \snippet Vector/4_complex_prop/main_ser.cpp vect assign
	 *
	 */

	//! \cond [vect assign] \endcond

	auto it = vd.getDomainIterator();

	while (it.isNext())
	{
		auto p = it.get();

		// we define x, assign a random position between 0.0 and 1.0
		vd.getPos(p)[0] = (float)rand() / RAND_MAX;

		// we define y, assign a random position between 0.0 and 1.0
		vd.getPos(p)[1] = (float)rand() / RAND_MAX;

		// Get the particle position as point
		Point<2,float> pt = vd.getPos(p);

		// create a C string from the particle coordinates
		// and copy into my struct
		vd.getProp<my_s1>(p).size = 32;
		vd.getProp<my_s1>(p).ptr = new char[32];
		strcpy(vd.getProp<my_s1>(p).ptr,pt.toString().c_str());

		// create a C++ string from the particle coordinates
		vd.getProp<my_s1>(p).str = std::string(pt.toString());

		vd.getProp<my_s1>(p).v.add(1);
		vd.getProp<my_s1>(p).v.add(2);
		vd.getProp<my_s1>(p).v.add(3);

		pt = pt * 2.0;

		// create a C string from the particle coordinates multiplied by 2.0
		// and copy into my struct
		vd.getProp<my_s2>(p).size = 32;
		vd.getProp<my_s2>(p).ptr = new char[32];
		strcpy(vd.getProp<my_s2>(p).ptr,pt.toString().c_str());

		// create a C++ string from the particle coordinates
		vd.getProp<my_s2>(p).str = std::string(pt.toString());

		vd.getProp<my_s2>(p).v.add(1);
		vd.getProp<my_s2>(p).v.add(2);
		vd.getProp<my_s2>(p).v.add(3);
		vd.getProp<my_s2>(p).v.add(4);

		// next particle
		++it;
	}

	//! \cond [vect assign] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 *
	 * ## Mapping and ghost_get ##
	 *
	 * Particles are redistributed across processors and we also synchronize the ghost
	 *
	 * \see \ref e0_s_map
	 *
	 * \see \ref e1_part_ghost
	 *
	 * \snippet Vector/4_complex_prop/main_ser.cpp vect map ghost
	 *
	 */

	//! \cond [vect map ghost] \endcond

	// Particles are redistribued across the processors
	vd.map();

	// Synchronize the ghost
	vd.ghost_get<my_s1,my_s2>();

	//! \cond [vect map ghost] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 *
	 * ## Output and VTK visualization ##
	 *
	 * Vector with complex properties can be still be visualized, because unknown properties are
	 * automatically excluded
	 *
	 * \see \ref e0_s_vis_vtk
	 *
	 * \snippet Vector/4_complex_prop/main.cpp vtk
	 *
	 */

	//! \cond [vtk] \endcond

	vd.write("particles");

	//! \cond [vtk] \endcond

	/*!
	 *
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 * ## Print 4 particles in the ghost area ##
	 *
	 * Here we print that the first 4 particles to show that the two my_struct contain the
	 * right information
	 *
	 * \snippet Vector/4_complex_prop/main_ser.cpp print ghost info
	 *
	 */

	//! \cond [print ghost info] \endcond

	size_t fg = vd.size_local();

	Vcluster & v_cl = create_vcluster();

	// Only the master processor print
	if (v_cl.getProcessUnitID() == 0)
	{
		// Print 4 particles
		for ( ; fg < vd.size_local()+4 ; fg++)
		{
			// Print my struct1 information
			std::cout << "my_struct1:" << std::endl;
			std::cout << "C-string: " << vd.getProp<my_s1>(fg).ptr << std::endl;
			std::cout << "Cpp-string: " << vd.getProp<my_s1>(fg).str << std::endl;

			for (size_t i = 0 ; i < vd.getProp<my_s1>(fg).v.size() ; i++)
				std::cout << "Element: " << i << "   " << vd.getProp<my_s1>(fg).v.get(i) << std::endl;

			// Print my struct 2 information
			std::cout << "my_struct2" << std::endl;
			std::cout << "C-string: " << vd.getProp<my_s2>(fg).ptr << std::endl;
			std::cout << "Cpp-string: " << vd.getProp<my_s2>(fg).str << std::endl;

			for (size_t i = 0 ; i < vd.getProp<my_s2>(fg).v.size() ; i++)
				std::cout << "Element: " << i << "   " << vd.getProp<my_s2>(fg).v.get(i) << std::endl;
		}
	}

	//! \cond [print ghost info] \endcond

	/*!
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 * ## Finalize ## {#finalize}
	 *
	 *  At the very end of the program we have always to de-initialize the library
	 *
	 * \snippet Vector/4_complex_prop/main_ser.cpp finalize
	 *
	 */

	//! \cond [finalize] \endcond

	openfpm_finalize();

	//! \cond [finalize] \endcond

	/*!
	 * \page Vector_4_complex_prop_ser Vector 4 property serialization
	 *
	 * # Full code # {#code}
	 *
	 * \include Vector/4_complex_prop/main_ser.cpp
	 *
	 */
}
Пример #16
0
template<typename solver_type,typename lid_nn> void lid_driven_cavity_2d()
{
	Vcluster & v_cl = create_vcluster();

	if (v_cl.getProcessingUnits() > 3)
		return;

	//! [lid-driven cavity 2D]

	// velocity in the grid is the property 0, pressure is the property 1
	constexpr int velocity = 0;
	constexpr int pressure = 1;

	// Domain, a rectangle
	Box<2,float> domain({0.0,0.0},{3.0,1.0});

	// Ghost (Not important in this case but required)
	Ghost<2,float> g(0.01);

	// Grid points on x=256 and y=64
	long int sz[] = {256,64};
	size_t szu[2];
	szu[0] = (size_t)sz[0];
	szu[1] = (size_t)sz[1];

	// We need one more point on the left and down part of the domain
	// This is given by the boundary conditions that we impose, the
	// reason is mathematical in order to have a well defined system
	// and cannot be discussed here
	Padding<2> pd({1,1},{0,0});

	// Distributed grid that store the solution
	grid_dist_id<2,float,aggregate<float[2],float>,CartDecomposition<2,float>> g_dist(szu,domain,g);

	// It is the maximum extension of the stencil
	Ghost<2,long int> stencil_max(1);

	// Finite difference scheme
	FDScheme<lid_nn> fd(pd, stencil_max, domain, g_dist.getGridInfo(), g_dist);

	// Here we impose the equation, we start from the incompressibility Eq imposed in the bulk with the
	// exception of the first point {0,0} and than we set P = 0 in {0,0}, why we are doing this is again
	// mathematical to have a well defined system, an intuitive explanation is that P and P + c are both
	// solution for the incompressibility equation, this produce an ill-posed problem to make it well posed
	// we set one point in this case {0,0} the pressure to a fixed constant for convenience P = 0
	fd.impose(ic_eq(),0.0, EQ_3, {0,0},{sz[0]-2,sz[1]-2},true);
	fd.impose(Prs(),  0.0, EQ_3, {0,0},{0,0});

	// Here we impose the Eq1 and Eq2
	fd.impose(vx_eq(),0.0, EQ_1, {1,0},{sz[0]-2,sz[1]-2});
	fd.impose(vy_eq(),0.0, EQ_2, {0,1},{sz[0]-2,sz[1]-2});

	// v_x and v_y
	// Imposing B1
	fd.impose(v_x(),0.0, EQ_1, {0,0},{0,sz[1]-2});
	fd.impose(avg_vy_f(),0.0, EQ_2 , {-1,0},{-1,sz[1]-1});
	// Imposing B2
	fd.impose(v_x(),0.0, EQ_1, {sz[0]-1,0},{sz[0]-1,sz[1]-2});
	fd.impose(avg_vy(),1.0, EQ_2,    {sz[0]-1,0},{sz[0]-1,sz[1]-1});

	// Imposing B3
	fd.impose(avg_vx_f(),0.0, EQ_1, {0,-1},{sz[0]-1,-1});
	fd.impose(v_y(), 0.0, EQ_2, {0,0},{sz[0]-2,0});
	// Imposing B4
	fd.impose(avg_vx(),0.0, EQ_1,   {0,sz[1]-1},{sz[0]-1,sz[1]-1});
	fd.impose(v_y(), 0.0, EQ_2, {0,sz[1]-1},{sz[0]-2,sz[1]-1});

	// When we pad the grid, there are points of the grid that are not
	// touched by the previous condition. Mathematically this lead
	// to have too many variables for the conditions that we are imposing.
	// Here we are imposing variables that we do not touch to zero
	//

	// Padding pressure
	fd.impose(Prs(), 0.0, EQ_3, {-1,-1},{sz[0]-1,-1});
	fd.impose(Prs(), 0.0, EQ_3, {-1,sz[1]-1},{sz[0]-1,sz[1]-1});
	fd.impose(Prs(), 0.0, EQ_3, {-1,0},{-1,sz[1]-2});
	fd.impose(Prs(), 0.0, EQ_3, {sz[0]-1,0},{sz[0]-1,sz[1]-2});

	// Impose v_x Padding Impose v_y padding
	fd.impose(v_x(), 0.0, EQ_1, {-1,-1},{-1,sz[1]-1});
	fd.impose(v_y(), 0.0, EQ_2, {-1,-1},{sz[0]-1,-1});

	solver_type solver;
	auto x = solver.solve(fd.getA(),fd.getB());

	//! [lid-driven cavity 2D]

	//! [Copy the solution to grid]

	fd.template copy<velocity,pressure>(x,{0,0},{sz[0]-1,sz[1]-1},g_dist);

	std::string s = std::string(demangle(typeid(solver_type).name()));
	s += "_";

	//! [Copy the solution to grid]

	g_dist.write(s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid");

#ifdef HAVE_OSX

    std::string file1 = std::string("test/") + s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_osx.vtk";
    std::string file2 = s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk";

#else

	#if __GNUC__ == 5

		std::string file1 = std::string("test/") + s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_GCC5.vtk";
		std::string file2 = s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk";

	#else

		std::string file1 = std::string("test/") + s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + "_test_GCC4.vtk";
		std::string file2 = s + "lid_driven_cavity_p" + std::to_string(v_cl.getProcessingUnits()) + "_grid_" + std::to_string(v_cl.getProcessUnitID()) + ".vtk";

	#endif


#endif

    std::cout << "File1: " << file1 << std::endl;
    std::cout << "File2: " << file2 << std::endl;

	// Check that match
	bool test = compare(file1,file2);
	BOOST_REQUIRE_EQUAL(test,true);

}
Пример #17
0
int main(int argc, char* argv[])
{
	//
	// ### WIKI 2 ###
	//
	// Here we Initialize the library, than we create a uniform random generator between 0 and 1 to to generate particles
	// randomly in the domain, we create a Box that define our domain, boundary conditions, and ghost
	//
	
        // initialize the library
	openfpm_init(&argc,&argv);

	// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
	Box<2,float> domain({0.0,0.0},{1.0,1.0});

	// Here we define the boundary conditions of our problem
    size_t bc[2]={PERIODIC,PERIODIC};

	// extended boundary around the domain, and the processor domain
	Ghost<2,float> g(0.01);
	
	//
	// ### WIKI 3 ###
	//
	// Here we are creating a distributed vector defined by the following parameters
	//
	// * 2 is the Dimensionality of the space where the objects live
	// * float is the type used for the spatial coordinate of the particles
	// * float,float[3],float[3][3] is the information stored by each particle a scalar float, a vector float[3] and a tensor of rank 2 float[3][3]
	//   the list of properties must be put into an aggregate data astructure aggregate<prop1,prop2,prop3, ... >
	// 
	// vd is the instantiation of the object
	//
	// The Constructor instead require:
	//
	// * Number of particles 4096 in this case
	// * Domain where is defined this structure
	// * bc boundary conditions
	// * g Ghost
	//
	// The following construct a vector where each processor has 4096 / N_proc (N_proc = number of processor)
	// objects with an undefined position in space. This non-space decomposition is also called data-driven
	// decomposition
	//
	vector_dist<2,float, aggregate<float,float[3],float[3][3]> > vd(4096,domain,bc,g);

	// the scalar is the element at position 0 in the aggregate
	const int scalar = 0;

	// the vector is the element at position 1 in the aggregate
	const int vector = 1;

	// the tensor is the element at position 2 in the aggregate
	const int tensor = 2;

	//
	// ### WIKI 5 ###
	//
	// Get an iterator that go through the 4096 particles, in an undefined position state and define its position
	//
	auto it = vd.getDomainIterator();

	while (it.isNext())
	{
		auto key = it.get();

		// we define x, assign a random position between 0.0 and 1.0
		vd.getPos(key)[0] = rand() / RAND_MAX;

		// we define y, assign a random position between 0.0 and 1.0
		vd.getPos(key)[1] = rand() / RAND_MAX;

		// next particle
		++it;
	}

	//
	// ### WIKI 6 ###
	//
	// Once we define the position, we distribute them according to the default space decomposition
	// The default decomposition is created even before assigning the position to the object. It determine
	// which part of space each processor manage
	//
	vd.map();

	//
	// ### WIKI 7 ###
	//
	// We get the object that store the decomposition, than we iterate again across all the objects, we count them
	// and we confirm that all the particles are local
	//
	//Counter we use it later
	size_t cnt = 0;

	// Get the space decomposition
	auto & ct = vd.getDecomposition();

	// Get a particle iterator
	it = vd.getDomainIterator();

	// For each particle ...
	while (it.isNext())
	{
		// ... p
		auto p = it.get();

		// we set the properties of the particle p
		
                // the scalar property
		vd.template getProp<scalar>(p) = 1.0;

		vd.template getProp<vector>(p)[0] = 1.0;
		vd.template getProp<vector>(p)[1] = 1.0;
		vd.template getProp<vector>(p)[2] = 1.0;

		vd.template getProp<tensor>(p)[0][0] = 1.0;
		vd.template getProp<tensor>(p)[0][1] = 1.0;
		vd.template getProp<tensor>(p)[0][2] = 1.0;
		vd.template getProp<tensor>(p)[1][0] = 1.0;
		vd.template getProp<tensor>(p)[1][1] = 1.0;
		vd.template getProp<tensor>(p)[1][2] = 1.0;
		vd.template getProp<tensor>(p)[2][0] = 1.0;
		vd.template getProp<tensor>(p)[2][1] = 1.0;
		vd.template getProp<tensor>(p)[2][2] = 1.0;

		// increment the counter
		cnt++;

		// next particle
		++it;
	}

	//
	// ### WIKI 8 ###
	//
	// cnt contain the number of object the local processor contain, if we are interested to count the total number across the processor
	// we can use the function add, to sum across processors. First we have to get an instance of Vcluster, queue an operation of add with
	// the variable count and finaly execute. All the operations are asynchronous, execute work like a barrier and ensure that all the 
	// queued operations are executed
	//
	
	auto & v_cl = create_vcluster();
	v_cl.sum(cnt);
	v_cl.execute();
	
	//
	// ### WIKI 9 ###
	//
	// Output the particle position for each processor
	//

	vd.write("output",VTK_WRITER);

	//
	// ### WIKI 10 ###
	//
	// Deinitialize the library
	//
	openfpm_finalize();
}
Пример #18
0
int main(int argc, char* argv[])
{
    /*!
     *
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     *
     * ## Initialization ##
     *
     * The initialization is the same as the molecular dynamic example. The differences are in the
     * parameters. We will use a bigger system, with more particles. The delta time for integration
     * is chosen in order to keep the system stable.
     *
     * \see \ref e3_md_init
     *
     * \snippet Vector/4_reorder/main_comp_ord.cpp vect create
     *
     */

    //! \cond [vect create] \endcond

    double dt = 0.0001;
    float r_cut = 0.03;
    double sigma = r_cut/3.0;
    double sigma12 = pow(sigma,12);
    double sigma6 = pow(sigma,6);

    openfpm::vector<double> x;
    openfpm::vector<openfpm::vector<double>> y;

    openfpm_init(&argc,&argv);
    Vcluster & v_cl = create_vcluster();

    // we will use it do place particles on a 40x40x40 Grid like
    size_t sz[3] = {40,40,40};

    // domain
    Box<3,float> box({0.0,0.0,0.0}, {1.0,1.0,1.0});

    // Boundary conditions
    size_t bc[3]= {PERIODIC,PERIODIC,PERIODIC};

    // ghost, big enough to contain the interaction radius
    Ghost<3,float> ghost(r_cut);

    vector_dist<3,double, aggregate<double[3],double[3]> > vd(0,box,bc,ghost);

    //! \cond [vect create] \endcond

    /*!
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     *
     * ## Particles on a grid like position ##
     *
     * Here we place the particles on a grid like manner
     *
     * \see \ref e3_md_gl
     *
     * \snippet Vector/4_reorder/main_comp_ord.cpp vect grid
     *
     */

    //! \cond [vect grid] \endcond

    auto it = vd.getGridIterator(sz);

    while (it.isNext())
    {
        vd.add();

        auto key = it.get();

        vd.getLastPos()[0] = key.get(0) * it.getSpacing(0);
        vd.getLastPos()[1] = key.get(1) * it.getSpacing(1);
        vd.getLastPos()[2] = key.get(2) * it.getSpacing(2);

        vd.template getLastProp<velocity>()[0] = 0.0;
        vd.template getLastProp<velocity>()[1] = 0.0;
        vd.template getLastProp<velocity>()[2] = 0.0;

        vd.template getLastProp<force>()[0] = 0.0;
        vd.template getLastProp<force>()[1] = 0.0;
        vd.template getLastProp<force>()[2] = 0.0;

        ++it;
    }

    //! \cond [vect grid] \endcond

    /*!
     *
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     *
     * ## Molecular dynamic steps ##
     *
     * Here we do 30000 MD steps using verlet integrator the cycle is the same as the
     * molecular dynamic example. with the following changes.
     *
     * ### Cell lists ###
     *
     * Instead of getting the normal cell list we get an hilbert curve cell-list. Such cell list has a
     * function called **getIterator** used inside the function **calc_forces** and **calc_energy**
     * that iterate across all the particles but in a smart-way. In practice
     * given an r-cut a cell-list is constructed with the provided spacing. Suppose to have a cell-list
     * \f$ m \times n \f$, an hilbert curve \f$ 2^k \times 2^k \f$ is contructed with \f$ k = ceil(log_2(max(m,n))) \f$.
     * Cell-lists are explored according to this Hilbert curve, If a cell does not exist is simply skipped.
     *
     *
     * \verbatim
    +------+------+------+------+     Example of Hilbert curve running on a 3 x 3 Cell
    |      |      |      |      |     An hilbert curve of k = ceil(log_2(3)) = 4
    |  X+---->X   |  X +---> X  |
    |  ^   |  +   |  ^   |   +  |
    ***|******|******|****---|--+      *******
    *  +   |  v   |  +   *   v  |      *     *
    *  7   |  8+---->9   *   X  |      *     *  = Domain
    *  ^   |      |      *   +  |      *     *
    *--|-----------------*---|--+      *******
    *  +   |      |      *   v  |
    *  4<----+5   |   6<---+ X  |
    *      |  ^   |   +  *      |
    *---------|-------|--*------+
    *      |  +   |   v  *      |
    *  1+---->2   |   3+---> X  |
    *      |      |      *      |
    **********************------+

     this mean that we will iterate the following cells

     1,2,5,4,7,8,9,6,3

     Suppose now that the particles are ordered like described




    Particles   id      Cell
                 0         1
                 1         7
                 2         8
    			 3		   1
    			 4		   9
    			 5		   9
    			 6		   6
    			 7		   7
    			 8		   3
    			 9		   2
    			10		   4
    			11		   3


    The iterator of the cell-list will explore the particles in the following way

    Cell     1  2 5 4  7  8  9  6 3
       	   |   | | | |   | |   | | |
        	0,3,9,,10,1,7,2,4,5,6,8


     * \endverbatim
     *
     * We cannot explain here what is a cache, but in practice is a fast memory in the CPU able
     * to store chunks of memory. The cache in general is much smaller than RAM, but the big advantage
     * is its speed. Retrieve data from the cache is much faster than RAM. Unfortunately the factors
     *  that determine what is on cache and what is not are multiples: Type of cache, algorithm ... .
     * Qualitatively all caches will tend to load chunks of data that you read multiple-time, or chunks
     *  of data that probably you will read based on pattern analysis. A small example is a linear memory copy where
     * you read consecutively memory and you write on consecutive memory.
     * Modern CPU recognize such pattern and decide to load on cache the consecutive memory before
     *  you actually require it.
     *
     *
     * Iterating the vector in the way described above has the advantage that when we do computation on particles
     * and its neighborhood with the sequence described above it will happen that:
     *
     * * If to process a particle A we read some neighborhood particles to process the next particle A+1
     *   we will probably read most of the previous particles.
     *
     *
     * In order to show in practice what happen we first show the graph when we do not reorder
     *
     * \htmlinclude Vector/4_reorder/no_reorder.html
     *
     * The measure has oscillation but we see an asymptotic behavior from 0.04 in the initial condition to
     * 0.124 . Below we show what happen when we use iterator from the Cell list hilbert
     *
     * \htmlinclude Vector/4_reorder/comp_reord.html
     *
     * In cases where particles does not move or move very slowly consider to use data-reordering, because it can
     * give **8-10% speedup**
     *
     * \see \ref e4_reo
     *
     *  ## Timers ##
     *
     *  In order to collect the time of the force calculation we insert two timers around the function
     *  calc_force. The overall performance is instead calculated with another timer around the time stepping
     *
     * \snippet Vector/4_reorder/main_data_ord.cpp timer start
     * \snippet Vector/4_reorder/main_data_ord.cpp timer stop
     *
     * \see \ref e3_md_vi
     *
     *
     */

    //! \cond [md steps] \endcond

    // Get the Cell list structure
    auto NN = vd.getCellList_hilb(r_cut);

    // calculate forces
    calc_forces(vd,NN,sigma12,sigma6);
    unsigned long int f = 0;

    timer time2;
    time2.start();

#ifndef TEST_RUN
    size_t Nstep = 30000;
#else
    size_t Nstep = 300;
#endif

    // MD time stepping
    for (size_t i = 0; i < Nstep ; i++)
    {
        // Get the iterator
        auto it3 = vd.getDomainIterator();

        // integrate velicity and space based on the calculated forces (Step1)
        while (it3.isNext())
        {
            auto p = it3.get();

            // here we calculate v(tn + 0.5)
            vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
            vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
            vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

            // here we calculate x(tn + 1)
            vd.getPos(p)[0] += vd.template getProp<velocity>(p)[0]*dt;
            vd.getPos(p)[1] += vd.template getProp<velocity>(p)[1]*dt;
            vd.getPos(p)[2] += vd.template getProp<velocity>(p)[2]*dt;

            ++it3;
        }

        // Because we mooved the particles in space we have to map them and re-sync the ghost
        vd.map();
        vd.template ghost_get<>();

        timer time;
        if (i % 10 == 0)
            time.start();

        // calculate forces or a(tn + 1) Step 2
        calc_forces(vd,NN,sigma12,sigma6);

        if (i % 10 == 0)
        {
            time.stop();
            x.add(i);
            y.add({time.getwct()});
        }

        // Integrate the velocity Step 3
        auto it4 = vd.getDomainIterator();

        while (it4.isNext())
        {
            auto p = it4.get();

            // here we calculate v(tn + 1)
            vd.template getProp<velocity>(p)[0] += 0.5*dt*vd.template getProp<force>(p)[0];
            vd.template getProp<velocity>(p)[1] += 0.5*dt*vd.template getProp<force>(p)[1];
            vd.template getProp<velocity>(p)[2] += 0.5*dt*vd.template getProp<force>(p)[2];

            ++it4;
        }

        // After every iteration collect some statistic about the confoguration
        if (i % 100 == 0)
        {
            // We write the particle position for visualization (Without ghost)
            vd.deleteGhost();
            vd.write("particles_",f);

            // we resync the ghost
            vd.ghost_get<>();

            // We calculate the energy
            double energy = calc_energy(vd,NN,sigma12,sigma6);
            auto & vcl = create_vcluster();
            vcl.sum(energy);
            vcl.execute();

            // We also print on terminal the value of the energy
            // only one processor (master) write on terminal
            if (vcl.getProcessUnitID() == 0)
                std::cout << std::endl << "Energy: " << energy << std::endl;

            f++;
        }
    }

    time2.stop();
    std::cout << "Performance: " << time2.getwct() << std::endl;

    //! \cond [md steps] \endcond

    /*!
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     *
     * ## Plotting graphs ##
     *
     * After we terminate the MD steps our vector x contains at which iteration we benchmark the force
     * calculation time, while y contains the measured time at that time-step. We can produce a graph X Y
     *
     * \note The graph produced is an svg graph that can be view with a browser. From the browser we can
     *       also easily save the graph into pure svg format
     *
     * \snippet Vector/4_reorder/main_comp_ord.cpp google chart
     *
     */

    //! \cond [google chart] \endcond

    // Google charts options, it store the options to draw the X Y graph
    GCoptions options;

    // Title of the graph
    options.title = std::string("Force calculation time");

    // Y axis name
    options.yAxis = std::string("Time");

    // X axis name
    options.xAxis = std::string("iteration");

    // width of the line
    options.lineWidth = 1.0;

    // Object that draw the X Y graph
    GoogleChart cg;

    // Add the graph
    // The graph that it produce is in svg format that can be opened on browser
    cg.AddLinesGraph(x,y,options);

    // Write into html format
    cg.write("gc_plot2_out.html");

    //! \cond [google chart] \endcond

    /*!
     *
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     *
     * ## Finalize ##
     *
     *  At the very end of the program we have always to de-initialize the library
     *
     * \snippet Vector/4_reorder/main_comp_ord.cpp finalize
     *
     */

    //! \cond [finalize] \endcond

    openfpm_finalize();

    //! \cond [finalize] \endcond

    /*!
     *
     * \page Vector_4_comp_reo Vector 4 computational reordering and cache friendliness
     *
     * # Full code #
     *
     * \include Vector/4_reorder/main_comp_ord.cpp
     *
     */
}
Пример #19
0
int main(int argc, char* argv[])
{
	/*!
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## Initialization ##
	 *
	 * Here we Initialize the library, and we check the we are on a single processor. GoogleChart
	 * cannot do parallel IO or write big files. So or we collect all data on one processor, or each
	 * processor write a distinct file. In this particular example we simply stop if the program  start
	 * on more than one processor
	 *
	 * \snippet Plot/0_simple_graph/main.cpp initialize
	 *
	 */

	//! \cond [initialize] \endcond

	openfpm_init(&argc,&argv);
	auto & v_cl = create_vcluster();

	// Google chart is only single processor
	if (v_cl.getProcessingUnits() > 1)
	{
		std::cerr << "Error: only one processor is allowed" << "\n";
		return 1;
	}

	//! \cond [initialize] \endcond

	/*!
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## Graph data ##
	 *
	 * Here we have the vectors that will contain the information about the graph.
	 *
	 * \snippet Plot/0_simple_graph/main.cpp datas vector
	 *
	 */

	//! \cond [datas vector] \endcond

	openfpm::vector<std::string> x;
	openfpm::vector<openfpm::vector<double>> y;
	openfpm::vector<std::string> yn;

	//! \cond [datas vector] \endcond

	/*!
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * We will try now to produce the following situation. Six values on **x** each of them having 4 values on **y**
	 *
	 * This mean that for each x value we have to define 4 y values. Having multiple values on on x can be used for
	 * several purpose.
	 *
	 * * Define multiple lines. For example if we connect all the points # we obtain one line. If we connect
	 *   all the @ points we obtain another line, an so on ... (figure below)
	 *
	 * * Define error bands
	 *
	 * * Visualize different observables/parameters for the same value x
	 *
	 *
	 * \verbatim

		  y  ^                          $ dataset1
		     |                          * dataset2
		 0.9 |                          # dataset3
		     |       @                  @ dataset4
		     |   #
		 0.6 |       *   *   @       *
		     |   $   #   @   #   #
		     |   @       $   $   @   @
		 0.3 |           #   *   $   #
		     |       $           *
		     |   *                   $
		  0  |_________________________________
				 o   t   t   f   f   s          x
				 n   w   h   o   i   i
				 e   o   r   u   v   x
						 e   r   e
						 e



	  \endverbatim
	 *
	 * We start from the first case (Define multiple lines)
	 *
	 * \snippet Plot/0_simple_graph/main.cpp data fill
	 *
	 */

	//! \cond [data fill] \endcond

	// Fill the x values
	x.add("one");
	x.add("two");
	x.add("three");
	x.add("four");
	x.add("five");
	x.add("six");

	// we have 4 dataset  or lines
	yn.add("dataset1");
	yn.add("dataset2");
	yn.add("dataset3");
	yn.add("dataset4");

	// Because we have 6 points on x each containing 4 lines or dataset, we have to provides
	// 6 point with 4 values at each x point
	y.add({2,3,5,6});
	y.add({5,6,1,6});
	y.add({2,1,6,9});
	y.add({1,6,3,2});
	y.add({3,3,0,6});
	y.add({2,1,4,6});

	//! \cond [data fill] \endcond

	/*!
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## Graph options ##
	 *
	 * We can specify several options for the graphs.
	 *
	 * * Title of the graph
	 * * Title of the y axis
	 * * Title of the x axis
	 *
	 *
	 * \snippet Plot/0_simple_graph/main.cpp google chart
	 *
	 */

	//! \cond [google chart] \endcond

	GCoptions options;

	options.title = std::string("Example");
	options.yAxis = std::string("Y Axis");
	options.xAxis = std::string("X Axis");
	options.lineWidth = 5;

	//! \cond [google chart] \endcond

	/*!
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## Graph write ##
	 *
	 * We create the object to create plots with Google Charts
	 *
	 * A writer can produce several graphs optionally interleaved with HTML code.
	 * Here we write in HTML a description of the graph, than we output the graph
	 *
	 * AddLinesGraph create a typical graph with lines
	 *
	 * \snippet Plot/0_simple_graph/main.cpp google chart write1
	 *
	 * \htmlonly
		<div id="chart_div0" style="width: 900px; height: 500px;"></div>
	   \endhtmlonly
	 *
	 */

	//! \cond [google chart write1] \endcond

	GoogleChart cg;
	//
	cg.addHTML("<h2>First graph</h2>");
	cg.AddLinesGraph(x,y,yn,options);

	//! \cond [google chart write1] \endcond


	/*!
	 *
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## Hist graph ##
	 *
	 * Hist graph is instead a more flexible Graph writer. In particular we can specify
	 * how to draw each dataset. With the option
	 *
	 * * **stype** specify how to draw each dataset
	 * * **stypeext** we can override the default stype option. In this case we say that the third dataset
	 *            in must be reppresented as a line instead of a bars
	 *
	 * To note that we can reuse the same Google chart writer to write multiple
	 * Graph on the same page, interleaved with HTML code
	 *
	 * \snippet Plot/0_simple_graph/main.cpp google chart write2
	 *
	 * \htmlonly
		<div id="chart_div1" style="width: 900px; height: 500px;"></div>
	   \endhtmlonly
	 *
	 *
	 */

	//! \cond [google chart write2] \endcond

	options.stype = std::string("bars");

	// it say that the dataset4 must me represented with a line
	options.stypeext = std::string("{3: {type: 'line'}}");

	cg.addHTML("<h2>Second graph</h2>");
	cg.AddHistGraph(x,y,yn,options);

	//! \cond [google chart write2] \endcond

	/*!
	 *
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## %Error bars ##
	 *
	 * Here we show how to draw error bars. %Error bars are drawn specifying intervals with a min and a max.
	 * Intervals in general does not have to encapsulate any curve. First we construct the vector y with 3
	 *  values the first value contain the curve points, the second and third contain the min,max interval.
	 *
	 * \snippet Plot/0_simple_graph/main.cpp google chart write3
	 *
	 * \htmlonly
		<div id="chart_div2" style="width: 900px; height: 500px;"></div>
	   \endhtmlonly
	 *
	 *
	 */

	//! \cond [google chart write3] \endcond

	cg.addHTML("<h2>Third graph</h2>");

	// The first colum are the values of a line while the other 2 values
	// are the min and max of an interval, as we can see interval does not
	// have to encapsulate any curve
	y.clear();
	y.add({0.10,0.20,0.19});
	y.add({0.11,0.21,0.18});
	y.add({0.12,0.22,0.21});
	y.add({0.15,0.25,0.20});
	y.add({0.09,0.29,0.25});
	y.add({0.08,0.28,0.27});

	// Here we mark that the the colum 2 and 3 are intervals
	yn.clear();
	yn.add("line1");
	yn.add("interval");
	yn.add("interval");

	cg.AddLinesGraph(x,y,yn,options);

	//! \cond [google chart write3] \endcond

	/*!
	 *
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * The style of each interval can be controlled, and the definition of intervals can be interleaved with definition of
	 * other lines. In this example we show how to define 3 lines and 3 intervals, controlling the style of the last interval
	 *
	 * \snippet Plot/0_simple_graph/main.cpp google chart write4
	 *
	 * \htmlonly
		<div id="chart_div3" style="width: 900px; height: 500px;"></div>
	  \endhtmlonly
	 *
	 *
	 */

	//! \cond [google chart write4] \endcond

	cg.addHTML("<h2>Four graph</h2>");

	// again 6 point but 9 values
	y.clear();
	y.add({0.10,0.20,0.19,0.22,0.195,0.215,0.35,0.34,0.36});
	y.add({0.11,0.21,0.18,0.22,0.19,0.215,0.36,0.35,0.37});
	y.add({0.12,0.22,0.21,0.23,0.215,0.225,0.35,0.34,0.36});
	y.add({0.15,0.25,0.20,0.26,0.22,0.255,0.36,0.35,0.37});
	y.add({0.09,0.29,0.25,0.30,0.26,0.295,0.35,0.34,0.36});
	y.add({0.08,0.28,0.27,0.29,0.275,0.285,0.36,0.35,0.37});

	// colum  0 and 1 are lines
	// colums 2-3 and 4-5 are intervals
	// colum 6 is a line
	// colum 7-8 is an interval
	yn.add("line1");
	yn.add("line2");
	yn.add("interval");
	yn.add("interval");
	yn.add("interval");
	yn.add("interval");
	yn.add("line3");
	yn.add("interval");
	yn.add("interval");

	// Intervals are enumerated with iX, for example in this case with 3 intervals we have i0,i1,i2
	// with this line we control the style of the intervals. In particular we change from the default
	// values
	options.intervalext = std::string("{'i2': { 'color': '#4374E0', 'style':'bars', 'lineWidth':4, 'fillOpacity':1 } }");

	cg.AddLinesGraph(x,y,yn,options);

	//! \cond [google chart write4] \endcond

	/*!
	 *
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 * ## More options ##
	 *
	 * In this last example we also show how to:
	 *
	 *
	 * * Make the graph bigger, setting **width** and **height** options
	 * * Give the possibility to to zoom-in and zoom-out with **GC_EXPLORER**
	 * * Use lines instead a smooth function to connect points
	 * * Use logaritmic scale
	 *
	 * \note For more options refer to doxygen and Google Charts
	 *
	 * \snippet Plot/0_simple_graph/main.cpp google chart write5
	 *
	 *
	 * \htmlonly
		<div id="chart_div4" style="width: 1280px; height: 700px;"></div>
	  \endhtmlonly
	 *
	 */

	//! \cond [google chart write5] \endcond

	openfpm::vector<double> xn;

	xn.add(1.0);
	xn.add(2.0);
	xn.add(3.0);
	xn.add(4.0);
	xn.add(5.0);
	xn.add(6.0);

	options.intervalext = "";
	options.width = 1280;
	options.heigh = 720;
	options.curveType = "line";
	options.more = GC_ZOOM + "," + GC_X_LOG + "," + GC_Y_LOG;

	cg.AddLinesGraph(xn,y,yn,options);

	cg.write("gc_out.html");

	//! \cond [google chart write5] \endcond


	/*!
	 * \page Plot_0_cg Plot 0 Google Chart
	 *
	 *
	 * ## Finalize ## {#finalize}
	 *
	 *  At the very end of the program we have always de-initialize the library
	 *
	 * \snippet Plot/0_simple_graph/main.cpp finalize
	 *
	 */

	//! \cond [finalize] \endcond

	openfpm_finalize();


	//! \cond [finalize] \endcond
}
Пример #20
0
int main(int argc, char* argv[])
{
	//
	// ### WIKI 2 ###
	//
	// Initialize the library and several objects 
	//
	openfpm_init(&argc,&argv);
	
	//
	// ### WIKI 3 ###
	//
	// Get the vcluster object and the number of processor
	//

	Vcluster & v_cl = create_vcluster();
	size_t N_prc = v_cl.getProcessingUnits();

	//
	// ### WIKI 3 ###
	//
	// We find the maximum of the processors rank, that should be the Number of
	// processora minus one, only processor 0 print on terminal
	//

	size_t id = v_cl.getProcessUnitID();

	v_cl.max(id);
	v_cl.execute();
	if (v_cl.getProcessUnitID() == 0)
		std::cout << "Maximum processor rank: " << id << "\n";

	//
	// ### WIKI 4 ###
	//
	// We sum all the processor ranks the maximum, the result should be that should
	// be $\frac{(n-1)n}{2}$, only processor 0 print on terminal
	//

	size_t id2 = v_cl.getProcessUnitID();

	v_cl.sum(id2);
	v_cl.execute();
	if (v_cl.getProcessUnitID() == 0)
		std::cout << "Sum of all processors rank: " << id2 << "\n";

	//
	// ### WIKI 5 ###
	//
	// we can collect information from all processors using the function gather
	//

	size_t id3 = v_cl.getProcessUnitID();
	openfpm::vector<size_t> v;
	
	v_cl.allGather(id3,v);
	v_cl.execute();
	
	if (v_cl.getProcessUnitID() == 0)
	{
		std::cout << "Collected ids: ";
		for(size_t i = 0 ; i < v.size() ; i++)
			std::cout << " " << v.get(i) << " ";

		std::cout << "\n";
	}

	//
	// ### WIKI 5 ###
	//
	// we can also send messages to specific processors, with the condition that the receiving
	// processors know we want to communicate with them, if you are searching for a more
	// free way to communicate where the receiving processors does not know which one processor
	// want to communicate with us, see the example 1_dsde
	//

	std::stringstream ss_message_1;
	std::stringstream ss_message_2;
	ss_message_1 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n";
	ss_message_2 << "Hello from " << std::setw(8) << v_cl.getProcessUnitID() << "\n";
	std::string message_1 = ss_message_1.str();
	std::string message_2 = ss_message_2.str();
	size_t msg_size = message_1.size();
	
	// Processor 0 send to processors 1,2 , 1 to 2,1, 2 to 0,1

	v_cl.send(((id3+1)%N_prc + N_prc)%N_prc,0,message_1.c_str(),msg_size);
	v_cl.send(((id3+2)%N_prc + N_prc)%N_prc,0,message_2.c_str(),msg_size);

	openfpm::vector<char> v_one;
	v_one.resize(msg_size);
	openfpm::vector<char> v_two(msg_size);
	v_two.resize(msg_size);

	v_cl.recv(((id3-1)%N_prc + N_prc)%N_prc,0,(void *)v_one.getPointer(),msg_size);
	v_cl.recv(((id3-2)%N_prc + N_prc)%N_prc,0,(void *)v_two.getPointer(),msg_size);
	v_cl.execute();

	if (v_cl.getProcessUnitID() == 0)
	{
		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_one.get(i);

		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_two.get(i);
	}

	//
	// ### WIKI 5 ###
	//
	// we can also do what we did before in one shot
	//

	id = v_cl.getProcessUnitID();
	id2 = v_cl.getProcessUnitID();
	id3 = v_cl.getProcessUnitID();
	v.clear();

	// convert the string into a vector

	openfpm::vector<char> message_1_v(msg_size);
	openfpm::vector<char> message_2_v(msg_size);

	for (size_t i = 0 ; i < msg_size ; i++)
		message_1_v.get(i) = message_1[i];

	for (size_t i = 0 ; i < msg_size ; i++)
		message_2_v.get(i) = message_2[i];

	v_cl.max(id);
	v_cl.sum(id2);
	v_cl.allGather(id3,v);

	// in the case of vector we have special functions that avoid to specify the size
	v_cl.send(((id+1)%N_prc + N_prc)%N_prc,0,message_1_v);
	v_cl.send(((id+2)%N_prc + N_prc)%N_prc,0,message_2_v);
	v_cl.recv(((id-1)%N_prc + N_prc)%N_prc,0,v_one);
	v_cl.recv(((id-2)%N_prc + N_prc)%N_prc,0,v_two);
	v_cl.execute();

	if (v_cl.getProcessUnitID() == 0)
	{
		std::cout << "Maximum processor rank: " << id << "\n";
		std::cout << "Sum of all processors rank: " << id << "\n";

		std::cout << "Collected ids: ";
		for(size_t i = 0 ; i < v.size() ; i++)
			std::cout << " " << v.get(i) << " ";

		std::cout << "\n";

		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_one.get(i);

		for (size_t i = 0 ; i < msg_size ; i++)
			std::cout << v_two.get(i);
	}

	openfpm_finalize();
}
template<unsigned int ip> void test()
{
	Vcluster & vcl = create_vcluster();

	// send/recv messages

	global_rank = vcl.getProcessUnitID();
	size_t n_proc = vcl.getProcessingUnits();

	// Checking short communication pattern

	for (size_t s = 0 ; s < N_TRY ; s++)
	{
		for (size_t j = 32 ; j < N_LOOP ; j*=2)
		{
			global_step = j;

			//! [dsde]

			// We send one message for each processor (one message is an openfpm::vector<unsigned char>)
			// or an array of bytes
			openfpm::vector<openfpm::vector<unsigned char>> message;

			// receving messages. Each receiving message is an openfpm::vector<unsigned char>
			// or an array if bytes
			openfpm::vector<openfpm::vector<unsigned char>> recv_message(n_proc);

			// each processor communicate based on a list of processor
			openfpm::vector<size_t> prc;

			// We construct the processor list in particular in this case
			// each processor communicate with the 8 next (in id) processors
			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				size_t p_id = (i + 1 + vcl.getProcessUnitID()) % n_proc;

				// avoid to communicate with yourself
				if (p_id != vcl.getProcessUnitID())
				{
					// Create an hello message
					prc.add(p_id);
					message.add();
					std::ostringstream msg;
					msg << "Hello from " << vcl.getProcessUnitID() << " to " << p_id;
					std::string str(msg.str());
					message.last().resize(j);
					memset(message.last().getPointer(),0,j);
					std::copy(str.c_str(),&(str.c_str())[msg.str().size()],&(message.last().get(0)));
				}
			}

			// For simplicity we create in advance a receiving buffer for all processors
			recv_message.resize(n_proc);

			// The pattern is not really random preallocate the receive buffer
			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				long int p_id = vcl.getProcessUnitID() - i - 1;
				if (p_id < 0)
					p_id += n_proc;
				else
					p_id = p_id % n_proc;

				if (p_id != (long int)vcl.getProcessUnitID())
					recv_message.get(p_id).resize(j);
			}

			// Send and receive
			vcl.sendrecvMultipleMessagesNBX(prc,message,msg_alloc,&recv_message);

			//! [dsde]

#ifdef VERBOSE_TEST
			timer t;
			t.start();
#endif


#ifdef VERBOSE_TEST
			t.stop();

			double clk = t.getwct();
			double clk_max = clk;

			size_t size_send_recv = 2 * j * (prc.size());
			vcl.sum(size_send_recv);
			vcl.max(clk_max);
			vcl.execute();

			if (vcl.getProcessUnitID() == 0)
				std::cout << "(Short pattern: " << method<ip>() << ")Buffer size: " << j << "    Bandwidth (Average): " << size_send_recv / vcl.getProcessingUnits() / clk / 1e6 << " MB/s  " << "    Bandwidth (Total): " << size_send_recv / clk / 1e6 << " MB/s    Clock: " << clk << "   Clock MAX: " << clk_max <<"\n";
#endif

			// Check the message
			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				long int p_id = vcl.getProcessUnitID() - i - 1;
				if (p_id < 0)
					p_id += n_proc;
				else
					p_id = p_id % n_proc;

				if (p_id != (long int)vcl.getProcessUnitID())
				{
					std::ostringstream msg;
					msg << "Hello from " << p_id << " to " << vcl.getProcessUnitID();
					std::string str(msg.str());
					BOOST_REQUIRE_EQUAL(std::equal(str.c_str(),str.c_str() + str.size() ,&(recv_message.get(p_id).get(0))),true);
				}
				else
				{
					BOOST_REQUIRE_EQUAL((size_t)0,recv_message.get(p_id).size());
				}
			}
		}

		std::srand(create_vcluster().getProcessUnitID());
		std::default_random_engine eg;
		std::uniform_int_distribution<int> d(0,n_proc/8);

		// Check random pattern (maximum 16 processors)

		for (size_t j = 32 ; j < N_LOOP && n_proc < 16 ; j*=2)
		{
			global_step = j;
			// original send
			openfpm::vector<size_t> o_send;
			// send message
			openfpm::vector<openfpm::vector<unsigned char>> message;
			// recv message
			openfpm::vector<openfpm::vector<unsigned char>> recv_message;
//			recv_message.reserve(n_proc);

			openfpm::vector<size_t> prc;

			for (size_t i = 0 ; i < n_proc ; i++)
			{
				// randomly with which processor communicate
				if (d(eg) == 0)
				{
					prc.add(i);
					o_send.add(i);
					message.add();
					message.last().fill(0);
					std::ostringstream msg;
					msg << "Hello from " << vcl.getProcessUnitID() << " to " << i;
					std::string str(msg.str());
					message.last().resize(str.size());
					std::copy(str.c_str(),&(str.c_str())[msg.str().size()],&(message.last().get(0)));
					message.last().resize(j);
				}
			}

			id = 0;
			prc_recv.clear();


#ifdef VERBOSE_TEST
			timer t;
			t.start();
#endif

			commFunc<ip>(vcl,prc,message,msg_alloc3,&recv_message);

#ifdef VERBOSE_TEST
			t.stop();
			double clk = t.getwct();
			double clk_max = clk;

			size_t size_send_recv = (prc.size() + recv_message.size()) * j;
			vcl.sum(size_send_recv);
			vcl.sum(clk);
			vcl.max(clk_max);
			vcl.execute();
			clk /= vcl.getProcessingUnits();

			if (vcl.getProcessUnitID() == 0)
				std::cout << "(Random Pattern: " << method<ip>() << ") Buffer size: " << j << "    Bandwidth (Average): " << size_send_recv / vcl.getProcessingUnits() / clk / 1e6 << " MB/s  " << "    Bandwidth (Total): " << size_send_recv / clk / 1e6 <<  " MB/s    Clock: " << clk << "   Clock MAX: " << clk_max << "\n";
#endif

			// Check the message

			for (size_t i = 0 ; i < recv_message.size() ; i++)
			{
				std::ostringstream msg;
				msg << "Hello from " << prc_recv.get(i) << " to " << vcl.getProcessUnitID();
				std::string str(msg.str());
				BOOST_REQUIRE_EQUAL(std::equal(str.c_str(),str.c_str() + str.size() ,&(recv_message.get(i).get(0))),true);
			}

			// Reply back

			// Create the message

			prc.clear();
			message.clear();
			for (size_t i = 0 ; i < prc_recv.size() ; i++)
			{
				prc.add(prc_recv.get(i));
				message.add();
				std::ostringstream msg;
				msg << "Hey from " << vcl.getProcessUnitID() << " to " << prc_recv.get(i);
				std::string str(msg.str());
				message.last().resize(str.size());
				std::copy(str.c_str(),&(str.c_str())[msg.str().size()],&(message.last().get(0)));
				message.last().resize(j);
			}

			id = 0;
			prc_recv.clear();
			recv_message.clear();

			commFunc<ip>(vcl,prc,message,msg_alloc3,&recv_message);

			// Check if the received hey message match the original send

			BOOST_REQUIRE_EQUAL(o_send.size(),prc_recv.size());

			for (size_t i = 0 ; i < o_send.size() ; i++)
			{
				size_t j = 0;
				for ( ; j < prc_recv.size() ; j++)
				{
					if (o_send.get(i) == prc_recv.get(j))
					{
						// found the message check it

						std::ostringstream msg;
						msg << "Hey from " << prc_recv.get(i) << " to " << vcl.getProcessUnitID();
						std::string str(msg.str());
						BOOST_REQUIRE_EQUAL(std::equal(str.c_str(),str.c_str() + str.size() ,&(recv_message.get(i).get(0))),true);
						break;
					}
				}
				// Check that we find always a match
				BOOST_REQUIRE_EQUAL(j != prc_recv.size(),true);
			}
		}

		// Check long communication pattern

		for (size_t j = 32 ; j < N_LOOP ; j*=2)
		{
			global_step = j;
			// Processor step
			long int ps = n_proc / (8 + 1);

			// send message
			openfpm::vector<openfpm::vector<unsigned char>> message;
			// recv message
			openfpm::vector<openfpm::vector<unsigned char>> recv_message(n_proc);

			openfpm::vector<size_t> prc;

			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				size_t p_id = ((i+1) * ps + vcl.getProcessUnitID()) % n_proc;
				if (p_id != vcl.getProcessUnitID())
				{
					prc.add(p_id);
					message.add();
					std::ostringstream msg;
					msg << "Hello from " << vcl.getProcessUnitID() << " to " << p_id;
					std::string str(msg.str());
					message.last().resize(j);
					memset(message.last().getPointer(),0,j);
					std::copy(str.c_str(),&(str.c_str())[msg.str().size()],&(message.last().get(0)));
				}
			}

			recv_message.resize(n_proc);
			// The pattern is not really random preallocate the receive buffer
			for (size_t i = 0 ; i < 8  && i < n_proc ; i++)
			{
				long int p_id = (- (i+1) * ps + (long int)vcl.getProcessUnitID());
				if (p_id < 0)
					p_id += n_proc;
				else
					p_id = p_id % n_proc;

				if (p_id != (long int)vcl.getProcessUnitID())
					recv_message.get(p_id).resize(j);
			}

#ifdef VERBOSE_TEST
			timer t;
			t.start();
#endif

			commFunc<ip>(vcl,prc,message,msg_alloc,&recv_message);

#ifdef VERBOSE_TEST
			t.stop();
			double clk = t.getwct();
			double clk_max = clk;

			size_t size_send_recv = 2 * j * (prc.size());
			vcl.sum(size_send_recv);
			vcl.max(clk_max);
			vcl.execute();

			if (vcl.getProcessUnitID() == 0)
				std::cout << "(Long pattern: " << method<ip>() << ")Buffer size: " << j << "    Bandwidth (Average): " << size_send_recv / vcl.getProcessingUnits() / clk / 1e6 << " MB/s  " << "    Bandwidth (Total): " << size_send_recv / clk / 1e6 << " MB/s    Clock: " << clk << "   Clock MAX: " << clk_max <<"\n";
#endif

			// Check the message
			for (long int i = 0 ; i < 8  && i < (long int)n_proc ; i++)
			{
				long int p_id = (- (i+1) * ps + (long int)vcl.getProcessUnitID());
				if (p_id < 0)
					p_id += n_proc;
				else
					p_id = p_id % n_proc;

				if (p_id != (long int)vcl.getProcessUnitID())
				{
					std::ostringstream msg;
					msg << "Hello from " << p_id << " to " << vcl.getProcessUnitID();
					std::string str(msg.str());
					BOOST_REQUIRE_EQUAL(std::equal(str.c_str(),str.c_str() + str.size() ,&(recv_message.get(p_id).get(0))),true);
				}
				else
				{
					BOOST_REQUIRE_EQUAL((size_t)0,recv_message.get(p_id).size());
				}
			}
		}
	}
}