void buffered_levels( size_t concurrency, Body body ) { // Do for lc = 1 to concurrency level for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; // Set the execute_counter back to zero in the harness harness_graph_executor<InputType, OutputType>::execute_count = 0; // Set the number of current executors to zero. harness_graph_executor<InputType, OutputType>::current_executors = 0; // Set the max allowed executors to lc. There is a check in the functor to make sure this is never exceeded. harness_graph_executor<InputType, OutputType>::max_executors = lc; // Create the function_node with the appropriate concurrency level, and use default buffering tbb::flow::function_node< InputType, OutputType > exe_node( g, lc, body ); tbb::flow::function_node<InputType, InputType> pass_thru( g, tbb::flow::unlimited, pass_through<InputType>()); // Create a vector of identical exe_nodes and pass_thrus std::vector< tbb::flow::function_node< InputType, OutputType > > exe_vec(2, exe_node); std::vector< tbb::flow::function_node< InputType, InputType > > pass_thru_vec(2, pass_thru); // Attach each pass_thru to its corresponding exe_node for (size_t node_idx=0; node_idx<exe_vec.size(); ++node_idx) { tbb::flow::make_edge(pass_thru_vec[node_idx], exe_vec[node_idx]); } // TODO: why the test is executed serially for the node pairs, not concurrently? for (size_t node_idx=0; node_idx<exe_vec.size(); ++node_idx) { // For num_receivers = 1 to MAX_NODES for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { // Create num_receivers counting receivers and connect the exe_vec[node_idx] to them. std::vector< harness_mapped_receiver<OutputType>* > receivers(num_receivers); for (size_t i = 0; i < num_receivers; i++) { receivers[i] = new harness_mapped_receiver<OutputType>(g); } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( exe_vec[node_idx], *receivers[r] ); } // Do the test with varying numbers of senders harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { // Create num_senders senders, set there message limit each to N, and connect them to pass_thru_vec[node_idx] senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { senders[s].my_limit = N; senders[s].register_successor(pass_thru_vec[node_idx] ); } // Initialize the receivers so they know how many senders and messages to check for for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->initialize_map( N, num_senders ); } // Do the test NativeParallelFor( (int)num_senders, parallel_put_until_limit<InputType>(senders) ); g.wait_for_all(); // confirm that each sender was requested from N times for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &pass_thru_vec[node_idx], NULL ); } // validate the receivers for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->validate(); } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( exe_vec[node_idx], *receivers[r] ); } ASSERT( exe_vec[node_idx].try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { // since it's detached, nothing should have changed receivers[r]->validate(); } for (size_t i = 0; i < num_receivers; i++) { delete receivers[i]; } } // for num_receivers } // for node_idx } // for concurrency level lc }
void buffered_levels( size_t concurrency, Body body ) { typedef typename std::tuple_element<0,OutputTuple>::type OutputType; // Do for lc = 1 to concurrency level for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; // Set the execute_counter back to zero in the harness harness_graph_multifunction_executor<InputType, OutputTuple,tbb::spin_mutex>::execute_count = 0; // Set the max allowed executors to lc. There is a check in the functor to make sure this is never exceeded. harness_graph_multifunction_executor<InputType, OutputTuple,tbb::spin_mutex>::max_executors = lc; // Create the function_node with the appropriate concurreny level, and use default buffering tbb::flow::multifunction_node< InputType, OutputTuple > exe_node( g, lc, body ); //Create a vector of identical exe_nodes std::vector< tbb::flow::multifunction_node< InputType, OutputTuple > > exe_vec(2, exe_node); // exercise each of the copied nodes for (size_t node_idx=0; node_idx<exe_vec.size(); ++node_idx) { for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { // Create num_receivers counting receivers and connect the exe_vec[node_idx] to them. harness_mapped_receiver<OutputType> *receivers = new harness_mapped_receiver<OutputType>[num_receivers]; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( tbb::flow::output_port<0>(exe_vec[node_idx]), receivers[r] ); } // Do the test with varying numbers of senders harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { // Create num_senders senders, set there message limit each to N, and connect them to the exe_vec[node_idx] senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { senders[s].my_limit = N; tbb::flow::make_edge( senders[s], exe_vec[node_idx] ); } // Initialize the receivers so they know how many senders and messages to check for for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].initialize_map( N, num_senders ); } // Do the test NativeParallelFor( (int)num_senders, parallel_put_until_limit<InputType>(senders) ); g.wait_for_all(); // cofirm that each sender was requested from N times for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &exe_vec[node_idx], NULL ); } // validate the receivers for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].validate(); } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( tbb::flow::output_port<0>(exe_vec[node_idx]), receivers[r] ); } ASSERT( exe_vec[node_idx].try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { // since it's detached, nothing should have changed receivers[r].validate(); } delete [] receivers; } } } }