void unlimited_concurrency( Body body ) { for (int p = 1; p < 2*MaxThread; ++p) { tbb::flow::graph g; tbb::flow::function_node< InputType, OutputType, tbb::flow::rejecting > exe_node( g, tbb::flow::unlimited, body ); for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { std::vector< harness_counting_receiver<OutputType> > receivers(num_receivers, harness_counting_receiver<OutputType>(g)); harness_graph_executor<InputType, OutputType>::execute_count = 0; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( exe_node, receivers[r] ); } NativeParallelFor( p, parallel_puts<InputType>(exe_node) ); g.wait_for_all(); // 2) the nodes will receive puts from multiple predecessors simultaneously, size_t ec = harness_graph_executor<InputType, OutputType>::execute_count; ASSERT( (int)ec == p*N, NULL ); for (size_t r = 0; r < num_receivers; ++r ) { size_t c = receivers[r].my_count; // 3) the nodes will send to multiple successors. ASSERT( (int)c == p*N, NULL ); } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( exe_node, receivers[r] ); } } } }
void continue_nodes( Body body ) { for (int p = 1; p < 2*MaxThread; ++p) { tbb::graph g; tbb::executable_node< OutputType > exe_node( g, body ); for (size_t i = 0; i < N; ++i) { exe_node.register_predecessor( *reinterpret_cast< tbb::sender< tbb::continue_msg > * >(&exe_node) ); } for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { harness_counting_receiver<OutputType> *receivers = new harness_counting_receiver<OutputType>[num_receivers]; harness_graph_executor<tbb::continue_msg, OutputType>::execute_count = 0; for (size_t r = 0; r < num_receivers; ++r ) { ASSERT( exe_node.register_successor( receivers[r] ), NULL ); } NativeParallelFor( p, parallel_puts<tbb::continue_msg>(exe_node) ); g.wait_for_all(); // 2) the nodes will receive puts from multiple predecessors simultaneously, size_t ec = harness_graph_executor<tbb::continue_msg, OutputType>::execute_count; ASSERT( (int)ec == p, NULL ); for (size_t r = 0; r < num_receivers; ++r ) { size_t c = receivers[r].my_count; // 3) the nodes will send to multiple successors. ASSERT( (int)c == p, NULL ); } } } }
void continue_nodes( Body body ) { for (int p = 1; p < 2*MaxThread; ++p) { tbb::flow::graph g; tbb::flow::continue_node< OutputType > exe_node( g, body ); run_continue_nodes( p, g, exe_node); exe_node.try_put(tbb::flow::continue_msg()); tbb::flow::continue_node< OutputType > exe_node_copy( exe_node ); run_continue_nodes( p, g, exe_node_copy); } }
void continue_nodes_with_copy( ) { for (int p = 1; p < 2*MaxThread; ++p) { tbb::flow::graph g; inc_functor<OutputType> cf; cf.local_execute_count = Offset; global_execute_count = Offset; tbb::flow::continue_node< OutputType > exe_node( g, cf ); fake_continue_sender fake_sender; for (size_t i = 0; i < N; ++i) { exe_node.register_predecessor( fake_sender ); } for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { harness_counting_receiver<OutputType> *receivers = new harness_counting_receiver<OutputType>[num_receivers]; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( exe_node, receivers[r] ); } NativeParallelFor( p, parallel_puts<tbb::flow::continue_msg>(exe_node) ); g.wait_for_all(); // 2) the nodes will receive puts from multiple predecessors simultaneously, for (size_t r = 0; r < num_receivers; ++r ) { size_t c = receivers[r].my_count; // 3) the nodes will send to multiple successors. ASSERT( (int)c == p, NULL ); } } // validate that the local body matches the global execute_count and both are correct inc_functor<OutputType> body_copy = tbb::flow::copy_body< inc_functor<OutputType> >( exe_node ); const size_t expected_count = p*MAX_NODES + Offset; size_t global_count = global_execute_count; size_t inc_count = body_copy.local_execute_count; ASSERT( global_count == expected_count && global_count == inc_count, NULL ); } }
void buffered_levels( size_t concurrency, Body body ) { // Do for lc = 1 to concurrency level for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; // Set the execute_counter back to zero in the harness harness_graph_executor<InputType, OutputType>::execute_count = 0; // Set the number of current executors to zero. harness_graph_executor<InputType, OutputType>::current_executors = 0; // Set the max allowed executors to lc. There is a check in the functor to make sure this is never exceeded. harness_graph_executor<InputType, OutputType>::max_executors = lc; // Create the function_node with the appropriate concurrency level, and use default buffering tbb::flow::function_node< InputType, OutputType > exe_node( g, lc, body ); tbb::flow::function_node<InputType, InputType> pass_thru( g, tbb::flow::unlimited, pass_through<InputType>()); // Create a vector of identical exe_nodes and pass_thrus std::vector< tbb::flow::function_node< InputType, OutputType > > exe_vec(2, exe_node); std::vector< tbb::flow::function_node< InputType, InputType > > pass_thru_vec(2, pass_thru); // Attach each pass_thru to its corresponding exe_node for (size_t node_idx=0; node_idx<exe_vec.size(); ++node_idx) { tbb::flow::make_edge(pass_thru_vec[node_idx], exe_vec[node_idx]); } // TODO: why the test is executed serially for the node pairs, not concurrently? for (size_t node_idx=0; node_idx<exe_vec.size(); ++node_idx) { // For num_receivers = 1 to MAX_NODES for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { // Create num_receivers counting receivers and connect the exe_vec[node_idx] to them. std::vector< harness_mapped_receiver<OutputType>* > receivers(num_receivers); for (size_t i = 0; i < num_receivers; i++) { receivers[i] = new harness_mapped_receiver<OutputType>(g); } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( exe_vec[node_idx], *receivers[r] ); } // Do the test with varying numbers of senders harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { // Create num_senders senders, set there message limit each to N, and connect them to pass_thru_vec[node_idx] senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { senders[s].my_limit = N; senders[s].register_successor(pass_thru_vec[node_idx] ); } // Initialize the receivers so they know how many senders and messages to check for for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->initialize_map( N, num_senders ); } // Do the test NativeParallelFor( (int)num_senders, parallel_put_until_limit<InputType>(senders) ); g.wait_for_all(); // confirm that each sender was requested from N times for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &pass_thru_vec[node_idx], NULL ); } // validate the receivers for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->validate(); } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( exe_vec[node_idx], *receivers[r] ); } ASSERT( exe_vec[node_idx].try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { // since it's detached, nothing should have changed receivers[r]->validate(); } for (size_t i = 0; i < num_receivers; i++) { delete receivers[i]; } } // for num_receivers } // for node_idx } // for concurrency level lc }
void concurrency_levels( size_t concurrency, Body body ) { for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; // Set the execute_counter back to zero in the harness harness_graph_executor<InputType, OutputType>::execute_count = 0; // Set the number of current executors to zero. harness_graph_executor<InputType, OutputType>::current_executors = 0; // Set the max allowed executors to lc. There is a check in the functor to make sure this is never exceeded. harness_graph_executor<InputType, OutputType>::max_executors = lc; typedef tbb::flow::function_node< InputType, OutputType, tbb::flow::rejecting > fnode_type; fnode_type exe_node( g, lc, body ); for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { std::vector< harness_counting_receiver<OutputType> > receivers(num_receivers, harness_counting_receiver<OutputType>(g)); #if TBB_PREVIEW_FLOW_GRAPH_FEATURES ASSERT(exe_node.successor_count() == 0, NULL); ASSERT(exe_node.predecessor_count() == 0, NULL); #endif for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( exe_node, receivers[r] ); } #if TBB_PREVIEW_FLOW_GRAPH_FEATURES ASSERT(exe_node.successor_count() == num_receivers, NULL); typename fnode_type::successor_list_type my_succs; exe_node.copy_successors(my_succs); ASSERT(my_succs.size() == num_receivers, NULL); typename fnode_type::predecessor_list_type my_preds; exe_node.copy_predecessors(my_preds); ASSERT(my_preds.size() == 0, NULL); #endif harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { senders = new harness_counting_sender<InputType>[num_senders]; { // Exclusively lock m to prevent exe_node from finishing tbb::spin_rw_mutex::scoped_lock l( harness_graph_executor<InputType, OutputType>::template mutex_holder<tbb::spin_rw_mutex>::mutex ); // put to lc level, it will accept and then block at m for ( size_t c = 0 ; c < lc ; ++c ) { ASSERT( exe_node.try_put( InputType() ) == true, NULL ); } // it only accepts to lc level ASSERT( exe_node.try_put( InputType() ) == false, NULL ); for (size_t s = 0; s < num_senders; ++s ) { // register a sender senders[s].my_limit = N; exe_node.register_predecessor( senders[s] ); } } // release lock at end of scope, setting the exe node free to continue // wait for graph to settle down g.wait_for_all(); // confirm that each sender was requested from N times for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &exe_node, NULL ); } // confirm that each receivers got N * num_senders + the initial lc puts for (size_t r = 0; r < num_receivers; ++r ) { size_t n = receivers[r].my_count; ASSERT( n == num_senders*N+lc, NULL ); receivers[r].my_count = 0; } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( exe_node, receivers[r] ); } ASSERT( exe_node.try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { ASSERT( int(receivers[r].my_count) == 0, NULL ); } } } }
void buffered_levels_with_copy( size_t concurrency ) { // Do for lc = 1 to concurrency level for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; inc_functor cf; cf.local_execute_count = Offset; global_execute_count = Offset; tbb::flow::function_node< InputType, OutputType > exe_node( g, lc, cf ); for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { std::vector< harness_mapped_receiver<OutputType>* > receivers(num_receivers); for (size_t i = 0; i < num_receivers; i++) { receivers[i] = new harness_mapped_receiver<OutputType>(g); } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( exe_node, *receivers[r] ); } harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { senders[s].my_limit = N; tbb::flow::make_edge( senders[s], exe_node ); } for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->initialize_map( N, num_senders ); } NativeParallelFor( (int)num_senders, parallel_put_until_limit<InputType>(senders) ); g.wait_for_all(); for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &exe_node, NULL ); } for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->validate(); } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( exe_node, *receivers[r] ); } ASSERT( exe_node.try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { receivers[r]->validate(); } for (size_t i = 0; i < num_receivers; i++) { delete receivers[i]; } } // validate that the local body matches the global execute_count and both are correct inc_functor body_copy = tbb::flow::copy_body<inc_functor>( exe_node ); const size_t expected_count = N/2 * MAX_NODES * MAX_NODES * ( MAX_NODES + 1 ) + MAX_NODES + Offset; size_t global_count = global_execute_count; size_t inc_count = body_copy.local_execute_count; ASSERT( global_count == expected_count && global_count == inc_count, NULL ); g.reset(tbb::flow::rf_reset_bodies); body_copy = tbb::flow::copy_body<inc_functor>( exe_node ); inc_count = body_copy.local_execute_count; ASSERT( Offset == inc_count, "reset(rf_reset_bodies) did not reset functor" ); } }
void buffered_levels( size_t concurrency, Body body ) { typedef typename std::tuple_element<0,OutputTuple>::type OutputType; // Do for lc = 1 to concurrency level for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; // Set the execute_counter back to zero in the harness harness_graph_multifunction_executor<InputType, OutputTuple,tbb::spin_mutex>::execute_count = 0; // Set the max allowed executors to lc. There is a check in the functor to make sure this is never exceeded. harness_graph_multifunction_executor<InputType, OutputTuple,tbb::spin_mutex>::max_executors = lc; // Create the function_node with the appropriate concurreny level, and use default buffering tbb::flow::multifunction_node< InputType, OutputTuple > exe_node( g, lc, body ); //Create a vector of identical exe_nodes std::vector< tbb::flow::multifunction_node< InputType, OutputTuple > > exe_vec(2, exe_node); // exercise each of the copied nodes for (size_t node_idx=0; node_idx<exe_vec.size(); ++node_idx) { for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { // Create num_receivers counting receivers and connect the exe_vec[node_idx] to them. harness_mapped_receiver<OutputType> *receivers = new harness_mapped_receiver<OutputType>[num_receivers]; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( tbb::flow::output_port<0>(exe_vec[node_idx]), receivers[r] ); } // Do the test with varying numbers of senders harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { // Create num_senders senders, set there message limit each to N, and connect them to the exe_vec[node_idx] senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { senders[s].my_limit = N; tbb::flow::make_edge( senders[s], exe_vec[node_idx] ); } // Initialize the receivers so they know how many senders and messages to check for for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].initialize_map( N, num_senders ); } // Do the test NativeParallelFor( (int)num_senders, parallel_put_until_limit<InputType>(senders) ); g.wait_for_all(); // cofirm that each sender was requested from N times for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &exe_vec[node_idx], NULL ); } // validate the receivers for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].validate(); } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( tbb::flow::output_port<0>(exe_vec[node_idx]), receivers[r] ); } ASSERT( exe_vec[node_idx].try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { // since it's detached, nothing should have changed receivers[r].validate(); } delete [] receivers; } } } }
void unlimited_concurrency( Body body ) { typedef typename std::tuple_element<0,OutputTuple>::type OutputType; for (int p = 1; p < 2*MaxThread; ++p) { tbb::flow::graph g; tbb::flow::multifunction_node< InputType, OutputTuple, tbb::flow::rejecting > exe_node( g, tbb::flow::unlimited, body ); for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { harness_counting_receiver<OutputType> *receivers = new harness_counting_receiver<OutputType>[num_receivers]; harness_graph_multifunction_executor<InputType, OutputTuple>::execute_count = 0; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( tbb::flow::output_port<0>(exe_node), receivers[r] ); } NativeParallelFor( p, parallel_puts<InputType>(exe_node) ); g.wait_for_all(); // 2) the nodes will receive puts from multiple predecessors simultaneously, size_t ec = harness_graph_multifunction_executor<InputType, OutputTuple>::execute_count; ASSERT( (int)ec == p*N, NULL ); for (size_t r = 0; r < num_receivers; ++r ) { size_t c = receivers[r].my_count; // 3) the nodes will send to multiple successors. ASSERT( (int)c == p*N, NULL ); } } } }
void concurrency_levels( size_t concurrency, Body body ) { typedef typename std::tuple_element<0,OutputTuple>::type OutputType; for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; harness_graph_multifunction_executor<InputType, OutputTuple, tbb::spin_mutex>::execute_count = 0; tbb::flow::multifunction_node< InputType, OutputTuple, tbb::flow::rejecting > exe_node( g, lc, body ); for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { harness_counting_receiver<OutputType> *receivers = new harness_counting_receiver<OutputType>[num_receivers]; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( tbb::flow::output_port<0>(exe_node), receivers[r] ); } harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { { // lock m to prevent exe_node from finishing tbb::spin_mutex::scoped_lock l( harness_graph_multifunction_executor< InputType, OutputTuple, tbb::spin_mutex >::mutex ); // put to lc level, it will accept and then block at m for ( size_t c = 0 ; c < lc ; ++c ) { ASSERT( exe_node.try_put( InputType() ) == true, NULL ); } // it only accepts to lc level ASSERT( exe_node.try_put( InputType() ) == false, NULL ); senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { // register a sender senders[s].my_limit = N; exe_node.register_predecessor( senders[s] ); } } // release lock at end of scope, setting the exe node free to continue // wait for graph to settle down g.wait_for_all(); // confirm that each sender was requested from N times for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &exe_node, NULL ); } // confirm that each receivers got N * num_senders + the initial lc puts for (size_t r = 0; r < num_receivers; ++r ) { size_t n = receivers[r].my_count; ASSERT( n == num_senders*N+lc, NULL ); receivers[r].my_count = 0; } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( tbb::flow::output_port<0>(exe_node), receivers[r] ); } ASSERT( exe_node.try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { ASSERT( int(receivers[r].my_count) == 0, NULL ); } delete [] receivers; } } }
void buffered_levels_with_copy( size_t concurrency ) { typedef typename std::tuple_element<0,OutputTuple>::type OutputType; // Do for lc = 1 to concurrency level for ( size_t lc = 1; lc <= concurrency; ++lc ) { tbb::flow::graph g; inc_functor cf; cf.local_execute_count = Offset; global_execute_count = Offset; tbb::flow::multifunction_node< InputType, OutputTuple > exe_node( g, lc, cf ); for (size_t num_receivers = 1; num_receivers <= MAX_NODES; ++num_receivers ) { harness_mapped_receiver<OutputType> *receivers = new harness_mapped_receiver<OutputType>[num_receivers]; for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::make_edge( tbb::flow::output_port<0>(exe_node), receivers[r] ); } harness_counting_sender<InputType> *senders = NULL; for (size_t num_senders = 1; num_senders <= MAX_NODES; ++num_senders ) { senders = new harness_counting_sender<InputType>[num_senders]; for (size_t s = 0; s < num_senders; ++s ) { senders[s].my_limit = N; tbb::flow::make_edge( senders[s], exe_node ); } for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].initialize_map( N, num_senders ); } NativeParallelFor( (int)num_senders, parallel_put_until_limit<InputType>(senders) ); g.wait_for_all(); for (size_t s = 0; s < num_senders; ++s ) { size_t n = senders[s].my_received; ASSERT( n == N, NULL ); ASSERT( senders[s].my_receiver == &exe_node, NULL ); } for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].validate(); } delete [] senders; } for (size_t r = 0; r < num_receivers; ++r ) { tbb::flow::remove_edge( tbb::flow::output_port<0>(exe_node), receivers[r] ); } ASSERT( exe_node.try_put( InputType() ) == true, NULL ); g.wait_for_all(); for (size_t r = 0; r < num_receivers; ++r ) { receivers[r].validate(); } delete [] receivers; } // validate that the local body matches the global execute_count and both are correct inc_functor body_copy = tbb::flow::copy_body<inc_functor>( exe_node ); const size_t expected_count = N/2 * MAX_NODES * MAX_NODES * ( MAX_NODES + 1 ) + MAX_NODES + Offset; size_t global_count = global_execute_count; size_t inc_count = body_copy.local_execute_count; ASSERT( global_count == expected_count && global_count == inc_count, NULL ); } }