void try_synchronizing_spawns() { BOOST_MESSAGE("Testing synchronizing spawns"); const int N = 1<<8; BOOST_MESSAGE(" private,local"); CompletionEvent ce; int x = 0; for (int i=0; i<N; i++) { spawn(&ce, [&x]{ x++; }); } ce.wait(); BOOST_CHECK_EQUAL(x, N); BOOST_MESSAGE(" private,global"); on_all_cores([N]{ // gce.reset(); // barrier(); int x = 0; for (int i=0; i<N; i++) { spawn<&gce>([&x]{ x++; }); } gce.wait(); BOOST_CHECK_EQUAL(x, N); }); BOOST_MESSAGE(" public,global"); VLOG(1) << "actually in public_global"; on_all_cores([]{ global_x = 0; }); // gce.reset_all(); for (int i=0; i<N; i++) { spawn<unbound,&gce>([]{ global_x++; }); } gce.wait(); int total = 0; auto total_addr = make_global(&total); on_all_cores([total_addr]{ BOOST_MESSAGE("global_x on " << mycore() << ": " << global_x); delegate::fetch_and_add(total_addr, global_x); }); BOOST_CHECK_EQUAL(total, N); }
void try_global_ce() { BOOST_MESSAGE("GlobalCompletionEvent:"); const int64_t N = 128; int64_t x = 0; auto xa = make_global(&x); BOOST_MESSAGE(" block on user_main only"); // gce.reset_all(); (don't need to call `reset` anymore) on_all_cores([xa]{ Core origin = mycore(); gce.enroll(N+1); for (int i=0; i<N; i++) { spawn<unbound>([xa,origin]{ delegate::fetch_and_add(xa, 1); complete(make_global(&gce,origin)); }); } gce.complete(); }); gce.wait(); BOOST_CHECK_EQUAL(x, N*cores()); BOOST_MESSAGE(" block in SPMD tasks"); x = 0; // gce.reset_all(); (don't need this anymore) on_all_cores([xa,N]{ int y = 0; auto ya = make_global(&y); Core origin = mycore(); gce.enroll(N); for (int i=0; i<N; i++) { spawn<unbound>([xa,ya,origin]{ delegate::fetch_and_add(xa, 1); delegate::fetch_and_add(ya, 1); complete(make_global(&gce,origin)); }); } gce.wait(); BOOST_CHECK_EQUAL(y, N); }); BOOST_CHECK_EQUAL(x, N*cores()); }
void bfs_level(int64_t start, int64_t end) { #ifdef VTRACE VT_TRACER("bfs_level"); if (mycore() == 0) { char s[256]; sprintf(s, "<%ld>", end-start); VT_MARKER(marker, s); } #endif vlist_buf.setup(vlist, k2); range_t r = blockDist(start, end, mycore(), cores()); // TODO/FIXME: can't call `forall_global_public` from inside `on_all_cores` because it uses shared GCE pointer and calls `on_all_cores` itself. forall_here<unbound,async,&bfs_gce>(r.start, r.end-r.start, [](int64_t kstart, int64_t kiters) { int64_t buf[kiters]; Incoherent<int64_t>::RO cvlist(vlist+kstart, kiters, buf); for (int64_t i=0; i<kiters; i++) { ++bfs_vertex_visited; const int64_t v = cvlist[i]; int64_t buf[2]; Incoherent<int64_t>::RO cxoff(xoff+2*v, 2, buf); const int64_t vstart = cxoff[0], vend = cxoff[1]; // (xoff[2v], xoff[2v+1]) forall_here<unbound,async,&bfs_gce>(vstart, vend-vstart, [v](int64_t estart, int64_t eiters) { //const int64_t j = read(xadj+vo); //VLOG(1) << "estart: " << estart << ", eiters: " << eiters; int64_t cbuf[eiters]; Incoherent<int64_t>::RO cadj(xadj+estart, eiters, cbuf); for (int64_t i = 0; i < eiters; i++) { ++bfs_neighbors_visited; const int64_t j = cadj[i]; //VLOG(1) << "v = " << v << ", j = " << j << ", i = " << i << ", eiters = " << eiters; if (delegate::compare_and_swap(bfs_tree+j, -1, v)) { vlist_buf.push(j); } } }); } }); bfs_gce.wait(); vlist_buf.flush(); }
void test_loop_decomposition_global() { BOOST_MESSAGE("Testing loop_decomposition_public..."); VLOG(1) << "loop_decomposition_public"; int N = 160000; my_gce.enroll(); impl::loop_decomposition<unbound,&my_gce>(0, N, [](int64_t start, int64_t iters) { if ( start%10000==0 ) { VLOG(1) << "loop(" << start << ", " << iters << ")"; } }); my_gce.complete(); my_gce.wait(); }
void try_global_ce_recursive() { BOOST_MESSAGE("GlobalCompletionEvent (recursive spawns):"); const int64_t N = 128; int64_t x = 0; auto xa = make_global(&x); BOOST_MESSAGE(" block on user_main only"); // gce.reset_all(); on_all_cores([xa]{ gce.enroll(); Core origin = mycore(); for (int i=0; i<N; i++) { gce.enroll(); spawn<unbound>([xa,origin]{ delegate::fetch_and_add(xa, 1); complete(make_global(&gce,origin)); }); } gce.complete(); }); // overload Core0 with extra work rec_spawn(xa, N*2); gce.wait(); BOOST_CHECK_EQUAL(x, N*cores()+N*2); BOOST_MESSAGE(" block in SPMD tasks"); x = 0; // gce.reset_all(); on_all_cores([xa,N]{ int y = 0; auto ya = make_global(&y); Core origin = mycore(); gce.enroll(N); for (int i=0; i<N; i++) { spawn<unbound>([xa,ya,origin]{ delegate::fetch_and_add(xa, 1); delegate::fetch_and_add(ya, 1); complete(make_global(&gce,origin)); }); } if (mycore() == 0) { // overload Core0 with extra work rec_spawn(xa, N*2); } gce.wait(); BOOST_CHECK_EQUAL(y, N); }); BOOST_CHECK_EQUAL(x, N*cores()+N*2); BOOST_MESSAGE("test finish block syntactic sugar"); long xx = 0; auto a = make_global(&xx); finish([=]{ forall<unbound,async>(0, N, [=](int64_t i){ delegate::increment<async>(a, 1); }); }); BOOST_CHECK_EQUAL(xx, N); }
void test_forall_localized() { BOOST_MESSAGE("Testing forall (localized)..."); VLOG(1) << "testing forall (localized)"; const int64_t N = 100; auto array = Grappa::global_alloc<int64_t>(N); VLOG(1) << "checking 'on_cores_localized'"; on_cores_localized_async(array, N, [](int64_t* local_base, size_t nelem){ VLOG(1) << "local_base => " << local_base <<"\nnelem => " << nelem; }); forall(array, N, [](int64_t i, int64_t& e) { e = 1; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), 1); } forall(array, N, [](int64_t& e) { e = 2; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), 2); } forall(array, N, [](int64_t s, int64_t n, int64_t* e) { for (auto i=0; i<n; i++) { e[i] = 3; } }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), 3); } BOOST_MESSAGE("Testing forall_async..."); VLOG(1) << "testing forall_async"; VLOG(1) << "start spawning"; forall<async,&my_gce>(array+ 0, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "after async"; forall<async,&my_gce>(array+25, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "after async"; forall<async,&my_gce>(array+50, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "after async"; forall<async,&my_gce>(array+75, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "done spawning"; my_gce.wait(); int npb = block_size / sizeof(int64_t); auto * base = array.localize(); auto * end = (array+N).localize(); for (auto* x = base; x < end; x++) { BOOST_CHECK_EQUAL(*x, 2); } VLOG(1) << "checking indexing..."; VLOG(1) << ">> forall"; Grappa::memset(array, 0, N); forall(array, N, [](int64_t i, int64_t& e){ e = i; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), i); } VLOG(1) << ">> forall_async"; VLOG(1) << ">> my_gce => " << &my_gce; Grappa::memset(array, 0, N); forall<async,&my_gce>(array, N, [](int64_t i, int64_t& e){ e = i; }); my_gce.wait(); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), i); } Grappa::memset(array, 0, N); struct Pair { int64_t x, y; }; auto pairs = static_cast<GlobalAddress<Pair>>(array); forall<&my_gce>(pairs, N/2, [](int64_t i, Pair& e){ e.x = i; e.y = i; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), i/2); } }