void test_loop_decomposition_global() { BOOST_MESSAGE("Testing loop_decomposition_public..."); VLOG(1) << "loop_decomposition_public"; int N = 160000; my_gce.enroll(); impl::loop_decomposition<unbound,&my_gce>(0, N, [](int64_t start, int64_t iters) { if ( start%10000==0 ) { VLOG(1) << "loop(" << start << ", " << iters << ")"; } }); my_gce.complete(); my_gce.wait(); }
void try_synchronizing_spawns() { BOOST_MESSAGE("Testing synchronizing spawns"); const int N = 1<<8; BOOST_MESSAGE(" private,local"); CompletionEvent ce; int x = 0; for (int i=0; i<N; i++) { spawn(&ce, [&x]{ x++; }); } ce.wait(); BOOST_CHECK_EQUAL(x, N); BOOST_MESSAGE(" private,global"); on_all_cores([N]{ // gce.reset(); // barrier(); int x = 0; for (int i=0; i<N; i++) { spawn<&gce>([&x]{ x++; }); } gce.wait(); BOOST_CHECK_EQUAL(x, N); }); BOOST_MESSAGE(" public,global"); VLOG(1) << "actually in public_global"; on_all_cores([]{ global_x = 0; }); // gce.reset_all(); for (int i=0; i<N; i++) { spawn<unbound,&gce>([]{ global_x++; }); } gce.wait(); int total = 0; auto total_addr = make_global(&total); on_all_cores([total_addr]{ BOOST_MESSAGE("global_x on " << mycore() << ": " << global_x); delegate::fetch_and_add(total_addr, global_x); }); BOOST_CHECK_EQUAL(total, N); }
void try_global_ce() { BOOST_MESSAGE("GlobalCompletionEvent:"); const int64_t N = 128; int64_t x = 0; auto xa = make_global(&x); BOOST_MESSAGE(" block on user_main only"); // gce.reset_all(); (don't need to call `reset` anymore) on_all_cores([xa]{ Core origin = mycore(); gce.enroll(N+1); for (int i=0; i<N; i++) { spawn<unbound>([xa,origin]{ delegate::fetch_and_add(xa, 1); complete(make_global(&gce,origin)); }); } gce.complete(); }); gce.wait(); BOOST_CHECK_EQUAL(x, N*cores()); BOOST_MESSAGE(" block in SPMD tasks"); x = 0; // gce.reset_all(); (don't need this anymore) on_all_cores([xa,N]{ int y = 0; auto ya = make_global(&y); Core origin = mycore(); gce.enroll(N); for (int i=0; i<N; i++) { spawn<unbound>([xa,ya,origin]{ delegate::fetch_and_add(xa, 1); delegate::fetch_and_add(ya, 1); complete(make_global(&gce,origin)); }); } gce.wait(); BOOST_CHECK_EQUAL(y, N); }); BOOST_CHECK_EQUAL(x, N*cores()); }
void rec_spawn(GlobalAddress<int64_t> xa, int N) { Core origin = mycore(); for (int i=0; i<N/2+1; i++) { gce.enroll(); spawn<unbound>([xa,origin]{ delegate::fetch_and_add(xa, 1); complete(make_global(&gce,origin)); }); } if (N>1) rec_spawn(xa, N-N/2-1); }
void bfs_level(int64_t start, int64_t end) { #ifdef VTRACE VT_TRACER("bfs_level"); if (mycore() == 0) { char s[256]; sprintf(s, "<%ld>", end-start); VT_MARKER(marker, s); } #endif vlist_buf.setup(vlist, k2); range_t r = blockDist(start, end, mycore(), cores()); // TODO/FIXME: can't call `forall_global_public` from inside `on_all_cores` because it uses shared GCE pointer and calls `on_all_cores` itself. forall_here<unbound,async,&bfs_gce>(r.start, r.end-r.start, [](int64_t kstart, int64_t kiters) { int64_t buf[kiters]; Incoherent<int64_t>::RO cvlist(vlist+kstart, kiters, buf); for (int64_t i=0; i<kiters; i++) { ++bfs_vertex_visited; const int64_t v = cvlist[i]; int64_t buf[2]; Incoherent<int64_t>::RO cxoff(xoff+2*v, 2, buf); const int64_t vstart = cxoff[0], vend = cxoff[1]; // (xoff[2v], xoff[2v+1]) forall_here<unbound,async,&bfs_gce>(vstart, vend-vstart, [v](int64_t estart, int64_t eiters) { //const int64_t j = read(xadj+vo); //VLOG(1) << "estart: " << estart << ", eiters: " << eiters; int64_t cbuf[eiters]; Incoherent<int64_t>::RO cadj(xadj+estart, eiters, cbuf); for (int64_t i = 0; i < eiters; i++) { ++bfs_neighbors_visited; const int64_t j = cadj[i]; //VLOG(1) << "v = " << v << ", j = " << j << ", i = " << i << ", eiters = " << eiters; if (delegate::compare_and_swap(bfs_tree+j, -1, v)) { vlist_buf.push(j); } } }); } }); bfs_gce.wait(); vlist_buf.flush(); }
void spmv_mult( GlobalAddress<Graph<PagerankVertex>> _g, vindex vx, vindex vy ) { call_on_all_cores([_g]{ g = _g; }); CHECK( vx < (1<<3) && vy < (1<<3) ); // forall rows forall<&mmjoiner>(g, [vx,vy](int64_t i, PagerankVertex& v){ auto weights = v->weights; auto origin = mycore(); mmjoiner.enroll(v.nadj); struct { int64_t i:44; vindex x:2, y:2; Core origin:16; } p = { i, vx, vy, origin }; forall<async,nullptr>(adj(g,v), [weights,p](int64_t localj, GlobalAddress<PagerankVertex> vj){ auto vjw = weights[localj]; delegate::call<async,nullptr>(vj, [vjw,p](PagerankVertex& vj){ auto yaccum = vjw * vj->v[p.x]; delegate::call<async,nullptr>(g->vs+p.i,[yaccum,p](PagerankVertex& vi){ vi->v[p.y] += yaccum; mmjoiner.send_completion(p.origin); }); }); }); }); }
void try_global_ce_recursive() { BOOST_MESSAGE("GlobalCompletionEvent (recursive spawns):"); const int64_t N = 128; int64_t x = 0; auto xa = make_global(&x); BOOST_MESSAGE(" block on user_main only"); // gce.reset_all(); on_all_cores([xa]{ gce.enroll(); Core origin = mycore(); for (int i=0; i<N; i++) { gce.enroll(); spawn<unbound>([xa,origin]{ delegate::fetch_and_add(xa, 1); complete(make_global(&gce,origin)); }); } gce.complete(); }); // overload Core0 with extra work rec_spawn(xa, N*2); gce.wait(); BOOST_CHECK_EQUAL(x, N*cores()+N*2); BOOST_MESSAGE(" block in SPMD tasks"); x = 0; // gce.reset_all(); on_all_cores([xa,N]{ int y = 0; auto ya = make_global(&y); Core origin = mycore(); gce.enroll(N); for (int i=0; i<N; i++) { spawn<unbound>([xa,ya,origin]{ delegate::fetch_and_add(xa, 1); delegate::fetch_and_add(ya, 1); complete(make_global(&gce,origin)); }); } if (mycore() == 0) { // overload Core0 with extra work rec_spawn(xa, N*2); } gce.wait(); BOOST_CHECK_EQUAL(y, N); }); BOOST_CHECK_EQUAL(x, N*cores()+N*2); BOOST_MESSAGE("test finish block syntactic sugar"); long xx = 0; auto a = make_global(&xx); finish([=]{ forall<unbound,async>(0, N, [=](int64_t i){ delegate::increment<async>(a, 1); }); }); BOOST_CHECK_EQUAL(xx, N); }
void bfs(GlobalAddress<G> _g, int nbfs, TupleGraph tg) { bool verified = false; double t; auto _frontier = GlobalBag<VertexID>::create(_g->nv); auto _next = GlobalBag<VertexID>::create(_g->nv); call_on_all_cores([=]{ frontier = _frontier; next = _next; g = _g; }); // do BFS from multiple different roots and average their times for (int root_idx = 0; root_idx < nbfs; root_idx++) { // intialize parent to -1 forall(g, [](G::Vertex& v){ v->init(); v->level = -1; }); VertexID root; if (FLAGS_max_degree_source) { forall(g, [](VertexID i, G::Vertex& v){ max_degree << MaxDegree(i, v.nadj); }); root = static_cast<MaxDegree>(max_degree).idx(); } else { root = choose_root(g); } // setup 'root' as the parent of itself delegate::call(g->vs+root, [=](G::Vertex& v){ v->parent = root; v->level = 0; }); // reset frontier queues next->clear(); frontier->clear(); // start with root as only thing in frontier delegate::call((g->vs+root).core(), [=]{ frontier->add(root); }); t = walltime(); bool top_down = true; int64_t prev_nf = -1; int64_t frontier_edges = 0; int64_t remaining_edges = g->nadj; while (!frontier->empty()) { auto nf = frontier->size(); VLOG(1) << "remaining_edges = " << remaining_edges << ", nf = " << nf << ", prev_nf = " << prev_nf << ", frontier_edges: " ; if (top_down && frontier_edges > remaining_edges/FLAGS_beamer_alpha && nf > prev_nf) { VLOG(1) << "switching to bottom-up"; top_down = false; } else if (!top_down && frontier_edges < g->nv/FLAGS_beamer_beta && nf < prev_nf) { VLOG(1) << "switching to top-down"; top_down = true; } edge_count = 0; if (top_down) { // iterate over vertices in this level of the frontier forall(frontier, [](VertexID& i){ // visit all the adjacencies of the vertex // note: this has to be 'async' to prevent deadlock from // running out of available workers forall<async>(adj(g,i), [i](G::Edge& e) { auto j = e.id; // at the core where the vertex is... delegate::call<async>(e.ga, [i,j](G::Vertex& vj){ // note: no synchronization needed because 'call' is // guaranteed to be executed atomically because it // does no blocking operations if (vj->parent == -1) { // claim parenthood vj->parent = i; vj->level = current_depth; next->add(j); edge_count += vj.nadj; } }); }); }); } else { // bottom-up forall<&phaser>(g, [](G::Vertex& v){ if (v->level != -1) return; auto va = make_linear(&v); forall<async,&phaser>(adj(g,v), [=,&v](G::Edge& e){ if (v->level != -1) return; phaser.enroll(); auto eva = e.ga; send_heap_message(eva.core(), [=]{ auto& ev = *eva.pointer(); if (ev->level != -1 && ev->level < current_depth) { auto eid = g->id(ev); send_heap_message(va.core(), [=]{ auto& v = *va.pointer(); if (v->level == -1) { next->add(g->id(v)); v->level = current_depth; v->parent = eid; edge_count += v.nadj; } phaser.complete(); }); } else { phaser.send_completion(va.core()); } }); }); }); } call_on_all_cores([=]{ current_depth++; // switch to next frontier level std::swap(frontier, next); }); next->clear(); frontier_edges = edge_count; remaining_edges -= frontier_edges; prev_nf = nf; } // while (frontier not empty) double this_bfs_time = walltime() - t; LOG(INFO) << "(root=" << root << ", time=" << this_bfs_time << ")"; if (!verified) { // only verify the first one to save time t = walltime(); bfs_nedge = verify(tg, g, root); verify_time = (walltime()-t); LOG(INFO) << verify_time; verified = true; Metrics::reset_all_cores(); // don't count the first one } else { total_time += this_bfs_time; } bfs_mteps += bfs_nedge / this_bfs_time / 1.0e6; } }
void test_forall_localized() { BOOST_MESSAGE("Testing forall (localized)..."); VLOG(1) << "testing forall (localized)"; const int64_t N = 100; auto array = Grappa::global_alloc<int64_t>(N); VLOG(1) << "checking 'on_cores_localized'"; on_cores_localized_async(array, N, [](int64_t* local_base, size_t nelem){ VLOG(1) << "local_base => " << local_base <<"\nnelem => " << nelem; }); forall(array, N, [](int64_t i, int64_t& e) { e = 1; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), 1); } forall(array, N, [](int64_t& e) { e = 2; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), 2); } forall(array, N, [](int64_t s, int64_t n, int64_t* e) { for (auto i=0; i<n; i++) { e[i] = 3; } }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), 3); } BOOST_MESSAGE("Testing forall_async..."); VLOG(1) << "testing forall_async"; VLOG(1) << "start spawning"; forall<async,&my_gce>(array+ 0, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "after async"; forall<async,&my_gce>(array+25, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "after async"; forall<async,&my_gce>(array+50, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "after async"; forall<async,&my_gce>(array+75, 25, [](int64_t i, int64_t& e) { e = 2; }); VLOG(1) << "done spawning"; my_gce.wait(); int npb = block_size / sizeof(int64_t); auto * base = array.localize(); auto * end = (array+N).localize(); for (auto* x = base; x < end; x++) { BOOST_CHECK_EQUAL(*x, 2); } VLOG(1) << "checking indexing..."; VLOG(1) << ">> forall"; Grappa::memset(array, 0, N); forall(array, N, [](int64_t i, int64_t& e){ e = i; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), i); } VLOG(1) << ">> forall_async"; VLOG(1) << ">> my_gce => " << &my_gce; Grappa::memset(array, 0, N); forall<async,&my_gce>(array, N, [](int64_t i, int64_t& e){ e = i; }); my_gce.wait(); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), i); } Grappa::memset(array, 0, N); struct Pair { int64_t x, y; }; auto pairs = static_cast<GlobalAddress<Pair>>(array); forall<&my_gce>(pairs, N/2, [](int64_t i, Pair& e){ e.x = i; e.y = i; }); for (int i=0; i<N; i++) { BOOST_CHECK_EQUAL(delegate::read(array+i), i/2); } }