C++ (Cpp) GlobalCompletionEvent 예제들, GlobalCompletionEvent C++ (Cpp) 예제들

예제 #1

0

파일 보기

파일: New_loop_tests.cpp 프로젝트: HTOKORG/grappa

void test_loop_decomposition_global() {
  BOOST_MESSAGE("Testing loop_decomposition_public..."); VLOG(1) << "loop_decomposition_public";
  int N = 160000;
  
  my_gce.enroll();
  impl::loop_decomposition<unbound,&my_gce>(0, N, [](int64_t start, int64_t iters) {
    if ( start%10000==0 ) {
      VLOG(1) << "loop(" << start << ", " << iters << ")";
    }
  });
	my_gce.complete();
	my_gce.wait();
}

예제 #2

0

파일 보기

파일: CompletionEvent_tests.cpp 프로젝트: kawuum/grappa

void try_synchronizing_spawns() {
  BOOST_MESSAGE("Testing synchronizing spawns");
  const int N = 1<<8;
  
  BOOST_MESSAGE("  private,local");
  CompletionEvent ce;
  int x = 0;
  for (int i=0; i<N; i++) {
    spawn(&ce, [&x]{
      x++;
    });
  }
  ce.wait();
  BOOST_CHECK_EQUAL(x, N);
  
  BOOST_MESSAGE("  private,global");
  on_all_cores([N]{
//    gce.reset();
//    barrier();
    
    int x = 0;
    
    for (int i=0; i<N; i++) {
      spawn<&gce>([&x]{
        x++;
      });
    }
    
    gce.wait();
    BOOST_CHECK_EQUAL(x, N);
  });
  
  BOOST_MESSAGE("  public,global"); VLOG(1) << "actually in public_global";
  on_all_cores([]{ global_x = 0; });
  
//  gce.reset_all();
  for (int i=0; i<N; i++) {
    spawn<unbound,&gce>([]{
      global_x++;
    });
  }
  gce.wait();
  
  int total = 0;
  auto total_addr = make_global(&total);
  on_all_cores([total_addr]{
    BOOST_MESSAGE("global_x on " << mycore() << ": " << global_x);
    delegate::fetch_and_add(total_addr, global_x);
  });
  BOOST_CHECK_EQUAL(total, N);
}

예제 #3

0

파일 보기

파일: CompletionEvent_tests.cpp 프로젝트: kawuum/grappa

void try_global_ce() {
  BOOST_MESSAGE("GlobalCompletionEvent:");
  
  const int64_t N = 128;
  int64_t x = 0;
  auto xa = make_global(&x);
  
  BOOST_MESSAGE("  block on user_main only");
//  gce.reset_all(); (don't need to call `reset` anymore)
  on_all_cores([xa]{
    Core origin = mycore();
    gce.enroll(N+1);
    for (int i=0; i<N; i++) {
      spawn<unbound>([xa,origin]{
        delegate::fetch_and_add(xa, 1);
        complete(make_global(&gce,origin));
      });
    }
    
    gce.complete();
  });
  
  gce.wait();
  BOOST_CHECK_EQUAL(x, N*cores());
  
  
  BOOST_MESSAGE("  block in SPMD tasks");
  
  x = 0;
//  gce.reset_all(); (don't need this anymore)
  on_all_cores([xa,N]{
    int y = 0;
    auto ya = make_global(&y);
    
    Core origin = mycore();
    gce.enroll(N);
    for (int i=0; i<N; i++) {
      spawn<unbound>([xa,ya,origin]{
        delegate::fetch_and_add(xa, 1);
        delegate::fetch_and_add(ya, 1);
        complete(make_global(&gce,origin));
      });
    }
    gce.wait();
    BOOST_CHECK_EQUAL(y, N);
  });
  BOOST_CHECK_EQUAL(x, N*cores());
}

예제 #4

0

파일 보기

파일: CompletionEvent_tests.cpp 프로젝트: kawuum/grappa

void rec_spawn(GlobalAddress<int64_t> xa, int N) {
  Core origin = mycore();
  for (int i=0; i<N/2+1; i++) {
    gce.enroll();
    spawn<unbound>([xa,origin]{
      delegate::fetch_and_add(xa, 1);
      complete(make_global(&gce,origin));
    });
  }
  if (N>1) rec_spawn(xa, N-N/2-1);
}

예제 #5

0

파일 보기

파일: bfs.cpp 프로젝트: kawuum/grappa

void bfs_level(int64_t start, int64_t end) {
#ifdef VTRACE
  VT_TRACER("bfs_level");
  if (mycore() == 0) {
    char s[256];
    sprintf(s, "<%ld>", end-start);
    VT_MARKER(marker, s);
  }
#endif

  vlist_buf.setup(vlist, k2);
  
  range_t r = blockDist(start, end, mycore(), cores());
  
  // TODO/FIXME: can't call `forall_global_public` from inside `on_all_cores` because it uses shared GCE pointer and calls `on_all_cores` itself.
  forall_here<unbound,async,&bfs_gce>(r.start, r.end-r.start, [](int64_t kstart, int64_t kiters) {
    int64_t buf[kiters];
    Incoherent<int64_t>::RO cvlist(vlist+kstart, kiters, buf);

    for (int64_t i=0; i<kiters; i++) {
      ++bfs_vertex_visited;

      const int64_t v = cvlist[i];
      
      int64_t buf[2];
      Incoherent<int64_t>::RO cxoff(xoff+2*v, 2, buf);
      const int64_t vstart = cxoff[0], vend = cxoff[1]; // (xoff[2v], xoff[2v+1])
      
      forall_here<unbound,async,&bfs_gce>(vstart, vend-vstart, [v](int64_t estart, int64_t eiters) {
        //const int64_t j = read(xadj+vo);
        //VLOG(1) << "estart: " << estart << ", eiters: " << eiters;

        int64_t cbuf[eiters];
        Incoherent<int64_t>::RO cadj(xadj+estart, eiters, cbuf);
        
        for (int64_t i = 0; i < eiters; i++) {
          ++bfs_neighbors_visited;

          const int64_t j = cadj[i];
          //VLOG(1) << "v = " << v << ", j = " << j << ", i = " << i << ", eiters = " << eiters;

          if (delegate::compare_and_swap(bfs_tree+j, -1, v)) {
            vlist_buf.push(j);
          }
        }
      });
    }
  });
  
  bfs_gce.wait();
  vlist_buf.flush();
    
}

예제 #6

0

파일 보기

파일: spmv_mult.cpp 프로젝트: HPCProjectsTry/grappa

void spmv_mult( GlobalAddress<Graph<PagerankVertex>> _g, vindex vx, vindex vy ) {
  call_on_all_cores([_g]{ g = _g; });
  CHECK( vx < (1<<3) && vy < (1<<3) );
  // forall rows
  forall<&mmjoiner>(g, [vx,vy](int64_t i, PagerankVertex& v){
    auto weights = v->weights;
    auto origin = mycore();
    mmjoiner.enroll(v.nadj);
    struct { int64_t i:44; vindex x:2, y:2; Core origin:16; } p
         = {         i,          vx,  vy,        origin };
    
    forall<async,nullptr>(adj(g,v), [weights,p](int64_t localj, GlobalAddress<PagerankVertex> vj){
      auto vjw = weights[localj];
      delegate::call<async,nullptr>(vj, [vjw,p](PagerankVertex& vj){
        auto yaccum = vjw * vj->v[p.x];
        delegate::call<async,nullptr>(g->vs+p.i,[yaccum,p](PagerankVertex& vi){
          vi->v[p.y] += yaccum;
          mmjoiner.send_completion(p.origin);
        });
      });
    });
  });
}

예제 #7

0

파일 보기

파일: CompletionEvent_tests.cpp 프로젝트: kawuum/grappa

void try_global_ce_recursive() {
  BOOST_MESSAGE("GlobalCompletionEvent (recursive spawns):");
  
  const int64_t N = 128;
  int64_t x = 0;
  auto xa = make_global(&x);
  
  BOOST_MESSAGE("  block on user_main only");
//  gce.reset_all();
  on_all_cores([xa]{
    gce.enroll();
    Core origin = mycore();
    for (int i=0; i<N; i++) {
      gce.enroll();
      spawn<unbound>([xa,origin]{
        delegate::fetch_and_add(xa, 1);
        complete(make_global(&gce,origin));
      });
    }
    
    gce.complete();
  });
  
  // overload Core0 with extra work
  rec_spawn(xa, N*2);
  
  gce.wait();
  BOOST_CHECK_EQUAL(x, N*cores()+N*2);
  
  
  BOOST_MESSAGE("  block in SPMD tasks");
  
  x = 0;
//  gce.reset_all();
  on_all_cores([xa,N]{
    int y = 0;
    auto ya = make_global(&y);
    
    Core origin = mycore();
    gce.enroll(N);
    for (int i=0; i<N; i++) {
      spawn<unbound>([xa,ya,origin]{
        delegate::fetch_and_add(xa, 1);
        delegate::fetch_and_add(ya, 1);
        complete(make_global(&gce,origin));
      });
    }
    
    if (mycore() == 0) {
      // overload Core0 with extra work
      rec_spawn(xa, N*2);
    }
    
    gce.wait();
    BOOST_CHECK_EQUAL(y, N);
  });
  BOOST_CHECK_EQUAL(x, N*cores()+N*2);
  
  BOOST_MESSAGE("test finish block syntactic sugar");
  
  long xx = 0;
  auto a = make_global(&xx);
  
  finish([=]{
    forall<unbound,async>(0, N, [=](int64_t i){
      delegate::increment<async>(a, 1);
    });
  });

  BOOST_CHECK_EQUAL(xx, N);
}

예제 #8

0

파일 보기

파일: bfs_beamer.cpp 프로젝트: HTOKORG/grappa

void bfs(GlobalAddress<G> _g, int nbfs, TupleGraph tg) {
  bool verified = false;
  double t;
      
  auto _frontier = GlobalBag<VertexID>::create(_g->nv);
  auto _next     = GlobalBag<VertexID>::create(_g->nv);
  call_on_all_cores([=]{ frontier = _frontier; next = _next; g = _g; });
    
  // do BFS from multiple different roots and average their times
  for (int root_idx = 0; root_idx < nbfs; root_idx++) {
  
    // intialize parent to -1
    forall(g, [](G::Vertex& v){ v->init(); v->level = -1; });
    
    VertexID root;
    if (FLAGS_max_degree_source) {
      forall(g, [](VertexID i, G::Vertex& v){
        max_degree << MaxDegree(i, v.nadj);
      });
      root = static_cast<MaxDegree>(max_degree).idx();
    } else {
      root = choose_root(g);
    }
    
    // setup 'root' as the parent of itself
    delegate::call(g->vs+root, [=](G::Vertex& v){
      v->parent = root;
      v->level = 0;
    });
    
    // reset frontier queues
    next->clear();
    frontier->clear();
    
    // start with root as only thing in frontier
    delegate::call((g->vs+root).core(), [=]{ frontier->add(root); });
    
    t = walltime();
    
    bool top_down = true;
    int64_t prev_nf = -1;
    int64_t frontier_edges = 0;
    int64_t remaining_edges = g->nadj;
    
    while (!frontier->empty()) {
      
      auto nf = frontier->size();
      VLOG(1) << "remaining_edges = " << remaining_edges << ", nf = " << nf << ", prev_nf = " << prev_nf << ", frontier_edges: " ;
      if (top_down && frontier_edges > remaining_edges/FLAGS_beamer_alpha && nf > prev_nf) {
        VLOG(1) << "switching to bottom-up";
        top_down = false;
      } else if (!top_down && frontier_edges < g->nv/FLAGS_beamer_beta && nf < prev_nf) {
        VLOG(1) << "switching to top-down";
        top_down = true;
      }
      
      edge_count = 0;
      
      if (top_down) {
                
        // iterate over vertices in this level of the frontier
        forall(frontier, [](VertexID& i){
          // visit all the adjacencies of the vertex
          // note: this has to be 'async' to prevent deadlock from
          //       running out of available workers
          forall<async>(adj(g,i), [i](G::Edge& e) {
            auto j = e.id;
            // at the core where the vertex is...
            delegate::call<async>(e.ga, [i,j](G::Vertex& vj){
              // note: no synchronization needed because 'call' is 
              // guaranteed to be executed atomically because it 
              // does no blocking operations
              if (vj->parent == -1) {
                // claim parenthood
                vj->parent = i;
                vj->level = current_depth;
                next->add(j);
                edge_count += vj.nadj;
              }
            });
          });
        });
      } else { // bottom-up
        
        forall<&phaser>(g, [](G::Vertex& v){
          if (v->level != -1) return;
          auto va = make_linear(&v);
          forall<async,&phaser>(adj(g,v), [=,&v](G::Edge& e){
            if (v->level != -1) return;
            
            phaser.enroll();
            auto eva = e.ga;
            send_heap_message(eva.core(), [=]{
              auto& ev = *eva.pointer();
              if (ev->level != -1 && ev->level < current_depth) {
                auto eid = g->id(ev);
                send_heap_message(va.core(), [=]{
                  auto& v = *va.pointer();
                  if (v->level == -1) {
                    next->add(g->id(v));
                    v->level = current_depth;
                    v->parent = eid;
                    edge_count += v.nadj;
                  }
                  phaser.complete();
                });
              } else {
                phaser.send_completion(va.core());
              }
            });
          });
        });
      }
      
      call_on_all_cores([=]{
        current_depth++;
        // switch to next frontier level
        std::swap(frontier, next);
      });
      next->clear();
      frontier_edges = edge_count;
      remaining_edges -= frontier_edges;
      prev_nf = nf;
    } // while (frontier not empty)
    
    double this_bfs_time = walltime() - t;
    LOG(INFO) << "(root=" << root << ", time=" << this_bfs_time << ")";
    
    if (!verified) {
      // only verify the first one to save time
      t = walltime();
      bfs_nedge = verify(tg, g, root);
      verify_time = (walltime()-t);
      LOG(INFO) << verify_time;
      verified = true;
      Metrics::reset_all_cores(); // don't count the first one
    } else {
      total_time += this_bfs_time;
    }
    
    bfs_mteps += bfs_nedge / this_bfs_time / 1.0e6;
  }
}

예제 #9

0

파일 보기

파일: New_loop_tests.cpp 프로젝트: HTOKORG/grappa

void test_forall_localized() {
  BOOST_MESSAGE("Testing forall (localized)..."); VLOG(1) << "testing forall (localized)";
  const int64_t N = 100;
  
  auto array = Grappa::global_alloc<int64_t>(N);
  
  VLOG(1) << "checking 'on_cores_localized'";
  on_cores_localized_async(array, N, [](int64_t* local_base, size_t nelem){
    VLOG(1) << "local_base => " << local_base <<"\nnelem => " << nelem;
  });
  
  forall(array, N, [](int64_t i, int64_t& e) {
    e = 1;
  });
  for (int i=0; i<N; i++) {
    BOOST_CHECK_EQUAL(delegate::read(array+i), 1);
  }

  forall(array, N, [](int64_t& e) {
    e = 2;
  });
  for (int i=0; i<N; i++) {
    BOOST_CHECK_EQUAL(delegate::read(array+i), 2);
  }

  forall(array, N, [](int64_t s, int64_t n, int64_t* e) {
    for (auto i=0; i<n; i++) {
      e[i] = 3;
    }
  });
  for (int i=0; i<N; i++) {
    BOOST_CHECK_EQUAL(delegate::read(array+i), 3);
  }
  
  BOOST_MESSAGE("Testing forall_async..."); VLOG(1) << "testing forall_async";
  
  VLOG(1) << "start spawning";
  forall<async,&my_gce>(array+ 0, 25, [](int64_t i, int64_t& e) { e = 2; });
  VLOG(1) << "after async";
  forall<async,&my_gce>(array+25, 25, [](int64_t i, int64_t& e) { e = 2; });
  VLOG(1) << "after async";
  forall<async,&my_gce>(array+50, 25, [](int64_t i, int64_t& e) { e = 2; });
  VLOG(1) << "after async";
  forall<async,&my_gce>(array+75, 25, [](int64_t i, int64_t& e) { e = 2; });
  VLOG(1) << "done spawning";
  
  my_gce.wait();
  
  int npb = block_size / sizeof(int64_t);
  
  auto * base = array.localize();
  auto * end = (array+N).localize();
  for (auto* x = base; x < end; x++) {
    BOOST_CHECK_EQUAL(*x, 2);
  }
  
  VLOG(1) << "checking indexing...";
  
  VLOG(1) << ">> forall";
  Grappa::memset(array, 0, N);
  forall(array, N, [](int64_t i, int64_t& e){ e = i; });
  for (int i=0; i<N; i++) {
    BOOST_CHECK_EQUAL(delegate::read(array+i), i);
  }
  
  VLOG(1) << ">> forall_async";
  VLOG(1) << ">>   my_gce => " << &my_gce;
  Grappa::memset(array, 0, N);
  forall<async,&my_gce>(array, N, [](int64_t i, int64_t& e){ e = i; });
  my_gce.wait();
  
  for (int i=0; i<N; i++) {
    BOOST_CHECK_EQUAL(delegate::read(array+i), i);
  }

  Grappa::memset(array, 0, N);    
  struct Pair { int64_t x, y; };
  auto pairs = static_cast<GlobalAddress<Pair>>(array);
  forall<&my_gce>(pairs, N/2, [](int64_t i, Pair& e){ e.x = i; e.y = i; });
  
  for (int i=0; i<N; i++) {
    BOOST_CHECK_EQUAL(delegate::read(array+i), i/2);
  }  
    
}