void plain_deferred_arguments() { void_f4_count.store(0); int_f4_count.store(0); { future<void> f1 = dataflow(hpx::launch::deferred, &void_f4, 42); future<int> f2 = dataflow(hpx::launch::deferred, &int_f4, 42); f1.wait(); HPX_TEST_EQ(void_f4_count, 1u); HPX_TEST_EQ(f2.get(), 84); HPX_TEST_EQ(int_f4_count, 1u); } void_f5_count.store(0); int_f5_count.store(0); { future<void> f1 = dataflow(&void_f5, 42, async(hpx::launch::deferred, &int_f)); future<int> f2 = dataflow(&int_f5, 42, async(hpx::launch::deferred, &int_f)); f1.wait(); HPX_TEST_EQ(void_f5_count, 1u); HPX_TEST_EQ(f2.get(), 126); HPX_TEST_EQ(int_f5_count, 1u); } }
void hpx_runtime::create_future_task( int gtid, kmp_task_t *thunk, int ndeps, kmp_depend_info_t *dep_list) { shared_future<raw_data> *output_future; vector<shared_future<raw_data>*> input_futures(ndeps); //if the variables are FP, then the data needs to be copied, if it's shared, then only //pointers need to be set. working with the assumption/requirement that data is FP. for(int i=0; i < ndeps; i++) { input_futures[i] = (**(shared_future<raw_data>***)(dep_list[i].base_addr)); if(dep_list[i].flags.out ) { output_future = (**(shared_future<raw_data>***)(dep_list[i].base_addr)); } } if(ndeps == 1) { *(output_future) = dataflow( unwrapping(future_wrapper), make_ready_future(gtid), make_ready_future(thunk), *(input_futures[0]) ); } else if(ndeps == 2) { *(output_future) = dataflow( unwrapping(future_wrapper2), make_ready_future(gtid), make_ready_future(thunk), *(input_futures[0]), *(input_futures[1]) ); } else if(ndeps == 3) { *(output_future) = dataflow( unwrapping(future_wrapper3), make_ready_future(gtid), make_ready_future(thunk), *(input_futures[0]), *(input_futures[1]), *(input_futures[2]) ); } else { cout << "too many dependencies for now" << endl; } }
void plain_arguments(Executor& exec) { void_f4_count.store(0); int_f4_count.store(0); { future<void> f1 = dataflow(exec, &void_f4, 42); future<int> f2 = dataflow(exec, &int_f4, 42); f1.wait(); HPX_TEST_EQ(void_f4_count, 1u); HPX_TEST_EQ(f2.get(), 84); HPX_TEST_EQ(int_f4_count, 1u); } void_f5_count.store(0); int_f5_count.store(0); { future<void> f1 = dataflow(exec, &void_f5, 42, async(&int_f)); future<int> f2 = dataflow(exec, &int_f5, 42, async(&int_f)); f1.wait(); HPX_TEST_EQ(void_f5_count, 1u); HPX_TEST_EQ(f2.get(), 126); HPX_TEST_EQ(int_f5_count, 1u); } }
int main() { auto functor = hpx::util::unwrapped(mul<double>( 0.5 )); future_type f1 = hpx::make_ready_future( 1.0 ); future_type f2 = dataflow( hpx::launch::sync, functor , f1 , f1 ); future_type f3 = dataflow( functor , f1 , f1 ); hpx::wait_all(f1, f2, f3); return 0; }
void future_function_pointers() { future_void_f1_count.store(0); future_void_f2_count.store(0); future<void> f1 = dataflow( &future_void_f1, async(&future_void_sf1, shared_future<void>(make_ready_future())) ); f1.wait(); HPX_TEST_EQ(future_void_f1_count, 2u); future_void_f1_count.store(0); future<void> f2 = dataflow( &future_void_f2 , async(&future_void_sf1, shared_future<void>(make_ready_future())) , async(&future_void_sf1, shared_future<void>(make_ready_future())) ); f2.wait(); HPX_TEST_EQ(future_void_f1_count, 2u); HPX_TEST_EQ(future_void_f2_count, 1u); future_void_f1_count.store(0); future_void_f2_count.store(0); future<int> f3 = dataflow( &future_int_f1 , make_ready_future() ); HPX_TEST_EQ(f3.get(), 1); HPX_TEST_EQ(future_int_f1_count, 1u); future_int_f1_count.store(0); future<int> f4 = dataflow( &future_int_f2 , dataflow(&future_int_f1, make_ready_future()) , dataflow(&future_int_f1, make_ready_future()) ); HPX_TEST_EQ(f4.get(), 2); HPX_TEST_EQ(future_int_f1_count, 2u); HPX_TEST_EQ(future_int_f2_count, 1u); future_int_f1_count.store(0); future_int_f2_count.store(0); future_int_f_vector_count.store(0); std::vector<future<int> > vf; for(std::size_t i = 0; i < 10; ++i) { vf.push_back(dataflow(&future_int_f1, make_ready_future())); } future<int> f5 = dataflow(&future_int_f_vector, boost::ref(vf)); HPX_TEST_EQ(f5.get(), 10); }
// do all the work on 'np' partitions, 'nx' data points each, for 'nt' // time steps hpx::future<space> do_work(std::size_t np, std::size_t nx, std::size_t nt, boost::shared_array<double> data) { using hpx::dataflow; using hpx::util::unwrapped; // U[t][i] is the state of position i at time t. std::vector<space> U(2); for (space& s: U) s.resize(np); if (!data) { // Initial conditions: f(0, i) = i std::size_t b = 0; auto range = boost::irange(b, np); using hpx::parallel::execution::par; hpx::parallel::for_each( par, boost::begin(range), boost::end(range), [&U, nx](std::size_t i) { U[0][i] = hpx::make_ready_future( partition_data(nx, double(i))); } ); } else { // Initialize from existing data std::size_t b = 0; auto range = boost::irange(b, np); using hpx::parallel::execution::par; hpx::parallel::for_each( par, boost::begin(range), boost::end(range), [&U, nx, data](std::size_t i) { U[0][i] = hpx::make_ready_future( partition_data(nx, data.get()+(i*nx))); } ); } auto Op = unwrapped(&stepper::heat_part); // Actual time step loop for (std::size_t t = 0; t != nt; ++t) { space const& current = U[t % 2]; space& next = U[(t + 1) % 2]; for (std::size_t i = 0; i != np; ++i) { next[i] = dataflow( hpx::launch::async, Op, current[idx(i, -1, np)], current[i], current[idx(i, +1, np)] ); } } // Return the solution at time-step 'nt'. return hpx::when_all(U[nt % 2]); }
void LU( int numBlocks) { printf("LU\n"); hpx::naming::id_type here = hpx::find_here(); vector<vector<block> > blockList; getBlockList(blockList, numBlocks); vector<vector<vector<shared_future<block> > > > dfArray(numBlocks); shared_future<block> *diag_block, *first_col; for(int i = 0; i < numBlocks; i++){ dfArray[i].resize(numBlocks); for(int j = 0; j < numBlocks; j++){ dfArray[i][j].resize(numBlocks, hpx::make_ready_future(block())); } } //first iteration through matrix, initialized vector of futures dfArray[0][0][0] = async( ProcessDiagonalBlock, blockList[0][0] ); diag_block = &dfArray[0][0][0]; for(int i = 1; i < numBlocks; i++) { dfArray[0][0][i] = dataflow( unwrapped( &ProcessBlockOnRow ), hpx::make_ready_future( blockList[0][i] ), *diag_block); } for(int i = 1; i < numBlocks; i++) { dfArray[0][i][0] = dataflow( unwrapped( &ProcessBlockOnColumn ), hpx::make_ready_future( blockList[i][0] ), *diag_block); first_col = &dfArray[0][i][0]; for(int j = 1; j < numBlocks; j++) { dfArray[0][i][j] = dataflow( unwrapped( &ProcessInnerBlock ), hpx::make_ready_future( blockList[i][j]), dfArray[0][0][j], *first_col ); } } //all calculation after initialization. Each iteration, //the number of tasks/blocks spawned is decreased. for(int i = 1; i < numBlocks; i++) { dfArray[i][i][i] = dataflow( unwrapped( &ProcessDiagonalBlock ), dfArray[i-1][i][i]); diag_block = &dfArray[i][i][i]; for(int j = i + 1; j < numBlocks; j++){ dfArray[i][i][j] = dataflow( unwrapped(&ProcessBlockOnRow), dfArray[i-1][i][j], *diag_block); } for(int j = i + 1; j < numBlocks; j++){ dfArray[i][j][i] = dataflow( unwrapped( &ProcessBlockOnColumn ), dfArray[i-1][j][i], *diag_block); first_col = &dfArray[i][j][i]; for(int k = i + 1; k < numBlocks; k++) { dfArray[i][j][k] = dataflow( unwrapped( &ProcessInnerBlock ), dfArray[i-1][j][k], dfArray[i][i][k], *first_col ); } } } wait_all(dfArray[numBlocks-1][numBlocks-1][numBlocks-1]); }
//[stepper_7 // The partitioned operator, it invokes the heat operator above on all elements // of a partition. static partition heat_part(partition const& left, partition const& middle, partition const& right) { using hpx::dataflow; using hpx::util::unwrapped; hpx::shared_future<partition_data> middle_data = middle.get_data(partition_server::middle_partition); hpx::future<partition_data> next_middle = middle_data.then( unwrapped( [middle](partition_data const& m) -> partition_data { // All local operations are performed once the middle data of // the previous time step becomes available. std::size_t size = m.size(); partition_data next(size); for (std::size_t i = 1; i != size-1; ++i) next[i] = heat(m[i-1], m[i], m[i+1]); return next; } ) ); return dataflow( hpx::launch::async, unwrapped( [left, middle, right](partition_data next, partition_data const& l, partition_data const& m, partition_data const& r) -> partition { // Calculate the missing boundary elements once the // corresponding data has become available. std::size_t size = m.size(); next[0] = heat(l[size-1], m[0], m[1]); next[size-1] = heat(m[size-2], m[size-1], r[0]); // The new partition_data will be allocated on the same locality // as 'middle'. return partition(middle.get_id(), next); } ), std::move(next_middle), left.get_data(partition_server::left_partition), middle_data, right.get_data(partition_server::right_partition) ); }
static partition heat_part(partition const& left, partition const& middle, partition const& right) { using hpx::dataflow; using hpx::util::unwrapping; return dataflow( unwrapping( [middle](partition_data const& l, partition_data const& m, partition_data const& r) { // The new partition_data will be allocated on the same // locality as 'middle'. return partition(middle.get_id(), heat_part_data(l, m, r)); } ), left.get_data(), middle.get_data(), right.get_data()); }
/////////////////////////////////////////////////////////////////////////////// // do all the work on 'np' partitions, 'nx' data points each, for 'nt' // time steps stepper::space stepper::do_work(std::size_t np, std::size_t nx, std::size_t nt) { using hpx::dataflow; std::vector<hpx::id_type> localities = hpx::find_all_localities(); std::size_t nl = localities.size(); // Number of localities // U[t][i] is the state of position i at time t. std::vector<space> U(2); for (space& s: U) s.resize(np); // Initial conditions: f(0, i) = i //[do_work_6 for (std::size_t i = 0; i != np; ++i) U[0][i] = partition(localities[locidx(i, np, nl)], nx, double(i)); //] heat_part_action act; for (std::size_t t = 0; t != nt; ++t) { space const& current = U[t % 2]; space& next = U[(t + 1) % 2]; for (std::size_t i = 0; i != np; ++i) { // we execute the action on the locality of the middle partition using hpx::util::placeholders::_1; using hpx::util::placeholders::_2; using hpx::util::placeholders::_3; auto Op = hpx::util::bind(act, localities[locidx(i, np, nl)], _1, _2, _3); next[i] = dataflow( hpx::launch::async, Op, current[idx(i, -1, np)], current[i], current[idx(i, +1, np)] ); } } // Return the solution at time-step 'nt'. return U[nt % 2]; }
// do all the work on 'np' partitions, 'nx' data points each, for 'nt' // time steps hpx::future<space> do_work(std::size_t np, std::size_t nx, std::size_t nt) { using hpx::util::unwrapped; using hpx::dataflow; using hpx::parallel::for_each; using hpx::parallel::par; // U[t][i] is the state of position i at time t. std::vector<space> U(2); for (space& s: U) s.resize(np); // Initial conditions: f(0, i) = i for (std::size_t i = 0; i != np; ++i) U[0][i] = hpx::make_ready_future(partition_data(nx, double(i))); auto Op = unwrapped(&stepper::heat_part); // Actual time step loop for (std::size_t t = 0; t != nt; ++t) { space const& current = U[t % 2]; space& next = U[(t + 1) % 2]; typedef boost::counting_iterator<std::size_t> iterator; for_each(par, iterator(0), iterator(np), [&next, ¤t, np, &Op](std::size_t i) { next[i] = dataflow( hpx::launch::async, Op, current[idx(i-1, np)], current[i], current[idx(i+1, np)] ); }); } // Return the solution at time-step 'nt'. return hpx::when_all(U[nt % 2]); }
static partition heat_part(partition const& left, partition const& middle, partition const& right) { using hpx::dataflow; using hpx::util::unwrapped; return dataflow( hpx::launch::async, unwrapped( [left, middle, right](partition_data const& l, partition_data const& m, partition_data const& r) { // The new partition_data will be allocated on the same locality // as 'middle'. return partition(middle.get_id(), heat_part_data(l, m, r)); } ), left.get_data(partition_server::left_partition), middle.get_data(partition_server::middle_partition), right.get_data(partition_server::right_partition) ); }
/////////////////////////////////////////////////////////////////////////////// // do all the work on 'np' partitions, 'nx' data points each, for 'nt' // time steps stepper::space stepper::do_work(std::size_t np, std::size_t nx, std::size_t nt) { using hpx::dataflow; // U[t][i] is the state of position i at time t. std::vector<space> U(2); for (space& s: U) s.resize(np); // Initial conditions: f(0, i) = i for (std::size_t i = 0; i != np; ++i) U[0][i] = partition(hpx::find_here(), nx, double(i)); using hpx::util::placeholders::_1; using hpx::util::placeholders::_2; using hpx::util::placeholders::_3; auto Op = hpx::util::bind(heat_part_action(), hpx::find_here(), _1, _2, _3); // Actual time step loop for (std::size_t t = 0; t != nt; ++t) { space const& current = U[t % 2]; space& next = U[(t + 1) % 2]; for (std::size_t i = 0; i != np; ++i) { next[i] = dataflow( hpx::launch::async, Op, current[idx(i, -1, np)], current[i], current[idx(i, +1, np)] ); } } // Return the solution at time-step 'nt'. return U[nt % 2]; }
// The input on the Intel call is a pair of pointers to arrays of dep structs, // and the length of these arrays. // The structs contain a pointer and a flag for in or out dep void hpx_runtime::create_df_task( int gtid, kmp_task_t *thunk, int ndeps, kmp_depend_info_t *dep_list, int ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) { auto task = get_task_data(); auto team = task->team; if(team->num_threads == 1 ) { create_task(thunk->routine, gtid, thunk); } vector<shared_future<void>> dep_futures; dep_futures.reserve( ndeps + ndeps_noalias); //Populating a vector of futures that the task depends on for(int i = 0; i < ndeps;i++) { if(task->df_map.count( dep_list[i].base_addr) > 0) { dep_futures.push_back(task->df_map[dep_list[i].base_addr]); } } for(int i = 0; i < ndeps_noalias;i++) { if(task->df_map.count( noalias_dep_list[i].base_addr) > 0) { dep_futures.push_back(task->df_map[noalias_dep_list[i].base_addr]); } } shared_future<void> new_task; if(task->in_taskgroup) { } else { *(task->num_child_tasks) += 1; } #ifndef OMP_COMPLIANT team->num_tasks++; #endif if(dep_futures.size() == 0) { #ifdef OMP_COMPLIANT if(task->in_taskgroup) { new_task = hpx::async( *(task->tg_exec), tg_task_setup, gtid, thunk, task->icv, task->tg_exec, team); } else { new_task = hpx::async( *(team->exec), task_setup, gtid, thunk, task->icv, task->num_child_tasks, team); } #else new_task = hpx::async( task_setup, gtid, thunk, task->icv, task->num_child_tasks, team); #endif } else { #ifdef OMP_COMPLIANT //shared_future<shared_ptr<local_priority_queue_executor>> tg_exec = hpx::make_ready_future(task->tg_exec); if(task->in_taskgroup) { new_task = dataflow( *(task->tg_exec), unwrapping(df_tg_task_wrapper), gtid, thunk, task->icv, task->tg_exec, team, hpx::when_all(dep_futures) ); } else { new_task = dataflow( *(team->exec), unwrapping(df_task_wrapper), gtid, thunk, task->icv, task->num_child_tasks, team, hpx::when_all(dep_futures) ); } #else new_task = dataflow( unwrapping(df_task_wrapper), gtid, thunk, task->icv, task->num_child_tasks, team, hpx::when_all(dep_futures) ); #endif } for(int i = 0 ; i < ndeps; i++) { if(dep_list[i].flags.out) { task->df_map[dep_list[i].base_addr] = new_task; } } for(int i = 0 ; i < ndeps_noalias; i++) { if(noalias_dep_list[i].flags.out) { task->df_map[noalias_dep_list[i].base_addr] = new_task; } } //task->last_df_task = new_task; }
void function_pointers() { void_f_count.store(0); int_f_count.store(0); void_f1_count.store(0); int_f1_count.store(0); int_f2_count.store(0); future<void> f1 = dataflow(unwrapped(&void_f1), async(&int_f)); future<int> f2 = dataflow( unwrapped(&int_f1) , dataflow( unwrapped(&int_f1) , make_ready_future(42)) ); future<int> f3 = dataflow( unwrapped(&int_f2) , dataflow( unwrapped(&int_f1) , make_ready_future(42) ) , dataflow( unwrapped(&int_f1) , make_ready_future(37) ) ); int_f_vector_count.store(0); std::vector<future<int> > vf; for(std::size_t i = 0; i < 10; ++i) { vf.push_back(dataflow(unwrapped(&int_f1), make_ready_future(42))); } future<int> f4 = dataflow(unwrapped(&int_f_vector), std::move(vf)); future<int> f5 = dataflow( unwrapped(&int_f1) , dataflow( unwrapped(&int_f1) , make_ready_future(42)) , dataflow( unwrapped(&void_f) , make_ready_future()) ); f1.wait(); HPX_TEST_EQ(f2.get(), 126); HPX_TEST_EQ(f3.get(), 163); HPX_TEST_EQ(f4.get(), 10 * 84); HPX_TEST_EQ(f5.get(), 126); HPX_TEST_EQ(void_f_count, 1u); HPX_TEST_EQ(int_f_count, 1u); HPX_TEST_EQ(void_f1_count, 1u); HPX_TEST_EQ(int_f1_count, 16u); HPX_TEST_EQ(int_f2_count, 1u); }