Example #1
0
void GlobalQueue<T>::pull( ChunkInfo<T> * result ) {
  CHECK( initialized );
  DVLOG(5) << "pull";

  // blocking request for an address of an entry in the global queue
  A_Entry loc;
  D_A_Entry qdesc( &loc );
  A_D_A_Entry desc_addr = make_global( &qdesc );
  Grappa_call_on( HOME_NODE, GlobalQueue<T>::pull_reserve_am_g, &desc_addr ); // FIXME: call_on deprecated
  size_t resv_msg_bytes = Grappa_sizeof_message( &desc_addr );
  /* wait for element: this is designed to block forever if there are no more items shared in this queue
   * for the rest of the program. */
  qdesc.wait();

  CHECK( loc.pointer() != NULL ) << "Invalid global address. Pull is always blocking";

  Grappa::Metrics::global_queue_stats.record_pull_reserve_request( resv_msg_bytes );

  // get the element of the queue, which will point to data
  Descriptor< ChunkInfo<T> > cdesc( result );

  pull_entry_args<T> entry_args;
  entry_args.target = loc;
  entry_args.descriptor = make_global( &cdesc );
  Grappa_call_on( loc.core(), pull_entry_request_g_am, &entry_args );  // FIXME: call_on deprecated

  size_t entry_msg_bytes = Grappa_sizeof_message( &entry_args );
  Grappa::Metrics::global_queue_stats.record_pull_entry_request( entry_msg_bytes );
  cdesc.wait();
}
Example #2
0
 T readFF(GlobalAddress<FullEmpty<T>> fe_addr) {
   if (fe_addr.core() == mycore()) {
     DVLOG(2) << "local";
     return fe_addr.pointer()->readFF();
   }
   
   FullEmpty<T> result;
   auto result_addr = make_global(&result);
   
   send_message(fe_addr.core(), [fe_addr,result_addr]{
     auto& fe = *fe_addr.pointer();
     
     if (fe.full()) {
       // DVLOG(2) << "no need to block";
       fill_remote(result_addr, fe.readFF());
       return;
     }
     
     DVLOG(2) << "setting up to block (" << fe_addr << ")";
     auto* c = SuspendedDelegate::create([&fe,result_addr]{
       VLOG(0) << "suspended_delegate!";
       fill_remote(result_addr, fe.readFF());
     });
     add_waiter(&fe, c);
   });
   
   return result.readFF();
 }
Example #3
0
 inline void CompletionEvent::send_completion(Core origin, int64_t decr) {
   if (origin == mycore()) {
     this->complete(decr);
   } else {
     Grappa::complete(make_global(this,origin), decr);
   }
 }
Example #4
0
A_Entry GlobalQueue<T>::push_reserve ( bool ignore ) {
  CHECK( isMaster() );

  Grappa::Metrics::global_queue_stats.record_push_reserve_reply( Grappa_sizeof_delegate_func_reply< bool, A_Entry >() );

  DVLOG(5) << "push_reserve";

  CHECK( capacity > 0 );
  if ( (tail % capacity == head % capacity) && (tail != head) ) {
    return make_global( static_cast< QueueEntry<T> * >( NULL ) ); // no room
  } else {
    A_Entry assigned = queueBase + (tail % capacity); 
    tail++;

    // if there are any consumers, wake oldest and give the address just produced
    if ( pullReserveWaiters.size() > 0 ) {
      CHECK( head == tail-1 ) << "Size should be exactly one, since there are waiters and one value was just produced";
      DVLOG(5) << "push_reserve: found waiters";

      A_Entry granted = assigned;
      head++;

      A_D_A_Entry w = pullReserveWaiters.front();
      pullReserveWaiters.pop();

      pull_reserve_sendreply( w, &granted, true );
    }

    return assigned;
  }
}
Example #5
0
/// helper function run on each core to load edges stored as int32_t
/// tuples in bintsv4 format
void local_load_bintsv4( const char * filename,
                         Grappa::TupleGraph::Edge * local_ptr,
                         Grappa::TupleGraph::Edge * local_end ) {
  Int32Edge * local_load_ptr = reinterpret_cast<Int32Edge*>(local_ptr);
  size_t local_count = local_end - local_ptr;
  
  if( !FLAGS_use_mpi_io ) {
    // use standard C++/POSIX IO
    std::ifstream infile( filename, std::ios_base::in | std::ios_base::binary );

    // TODO: fix this with scan
    local_offset = 0; // do on all cores
    Grappa::barrier();
    int64_t offset = Grappa::delegate::fetch_and_add( make_global( &local_offset, 0 ),
                                              local_count );
    Grappa::barrier();

    infile.seekg( offset * sizeof(Int32Edge) );

    infile.read( (char*) local_ptr, local_count * sizeof(Int32Edge) );
  } else {
    // load int32's into local chunk
    impl::read_unordered_shared( filename, local_ptr, local_count * sizeof(Int32Edge) );
  }

  // expand int32's into int64's
  for( int64_t i = local_count - 1; i >= 0; --i ) {
    auto v0 = local_load_ptr[i].v0;
    auto v1 = local_load_ptr[i].v1;
    local_ptr[i].v0 = v0;
    local_ptr[i].v1 = v1;
  }
}
Example #6
0
  void do_release() {
    size_t total_bytes = *count_ * sizeof(T);
    
    RequestArgs args;
    args.request_address = *request_address_;
    DVLOG(5) << "Computing request_bytes from block_max " << request_address_->first_byte().block_max() << " and " << *request_address_;
    args.reply_address = make_global( this );
    size_t offset = 0;
    size_t request_bytes = 0;

    for( size_t i = 0;
         offset < total_bytes; 
         offset += request_bytes, i++) {

      request_bytes = args.request_address.first_byte().block_max() - args.request_address.first_byte();

      if( request_bytes > total_bytes - offset ) {
        request_bytes = total_bytes - offset;
      }

      DVLOG(5) << "sending release request with " << request_bytes
               << " of total bytes = " << *count_ * sizeof(T)
               << " to " << args.request_address;

      Grappa::send_heap_message(args.request_address.core(),
        [args](void * payload, size_t payload_size) {
          IRMetrics::count_release_ams( payload_size );
          DVLOG(5) << "Worker " << Grappa::current_worker()
          << " received release request to " << args.request_address
          << " reply to " << args.reply_address;
          memcpy( args.request_address.pointer(), payload, payload_size );
    
          auto reply_address = args.reply_address;
          Grappa::send_heap_message(args.reply_address.core(), [reply_address]{
            DVLOG(5) << "Worker " << Grappa::current_worker() << " received release reply to " << reply_address;
            reply_address.pointer()->release_reply();
          });
    
          DVLOG(5) << "Worker " << Grappa::current_worker()
          << " sent release reply to " << reply_address;
        },
        (char*)(*pointer_) + offset, request_bytes
      );

      // TODO: change type so we don't screw with pointer like this
      args.request_address = GlobalAddress<T>::Raw( args.request_address.raw_bits() + request_bytes );
    }
    DVLOG(5) << "release started for " << args.request_address;
    
    // blocks here waiting for messages to be sent
  }
 void SimpleMetric<T>::merge_all(impl::MetricBase* static_stat_ptr) {
   this->value_ = 0;
   
   // TODO: use more generalized `reduce` operation to merge all
   SimpleMetric<T>* this_static = reinterpret_cast<SimpleMetric<T>*>(static_stat_ptr);
   
   GlobalAddress<SimpleMetric<T>> combined_addr = make_global(this);
   
   CompletionEvent ce(Grappa::cores());
   
   for (Core c = 0; c < Grappa::cores(); c++) {
     // we can compute the GlobalAddress here because we have pointers to globals,
     // which are guaranteed to be the same on all nodes
     GlobalAddress<SimpleMetric<T>> remote_stat = make_global(this_static, c);
     
     send_heap_message(c, [remote_stat, combined_addr, &ce] {
         SimpleMetric<T>* s = remote_stat.pointer();
         T s_value = s->value_;
         
         send_heap_message(combined_addr.core(), [combined_addr, s_value, &ce] {
             // for this simple SimpleMetric, merging is as simple as accumulating the value
             SimpleMetric<T>* combined_ptr = combined_addr.pointer();
             if (combined_ptr->initf_ != NULL) {
               // min
               if (combined_ptr->value_ > s_value) combined_ptr->value_ = s_value;
             } else {
               //sum
               combined_ptr->value_ += s_value;
             }
             
             ce.complete();
           });
       });
   }
   ce.wait();
 }
Example #8
0
NewJavaNativeModule::NewJavaNativeModule(
  std::weak_ptr<Instance> instance,
  jni::alias_ref<JavaModuleWrapper::javaobject> wrapper,
  std::shared_ptr<MessageQueueThread> messageQueueThread)
: instance_(std::move(instance))
, wrapper_(make_global(wrapper))
, module_(make_global(wrapper->getModule()))
, messageQueueThread_(std::move(messageQueueThread)) {
  auto descs = wrapper_->getMethodDescriptors();
  std::string moduleName = getName();
  methods_.reserve(descs->size());

  for (const auto& desc : *descs) {
    auto type = desc->getType();
    auto name = desc->getName();
    methods_.emplace_back(
        desc->getMethod(),
        desc->getSignature(),
        moduleName + "." + name,
        type == "syncHook");

    methodDescriptors_.emplace_back(name, type);
  }
}
Example #9
0
bool GlobalQueue<T>::push_entry ( push_entry_args<T> args ) {
  QueueEntry<T> * e = args.target.pointer();

  e->chunk = args.chunk;
  e->valid = true;

  // if a consumer is waiting then send a wake message
  if ( e->sleeper.pointer() != NULL ) {
    DVLOG(5) << "push_entry: was sleeping consumer " << e->sleeper;

    pull_entry_sendreply( e->sleeper, e );
    e->sleeper = make_global( static_cast<Descriptor< ChunkInfo<T> > * >( NULL ) );
    return true;
  } else {
    DVLOG(5) << "push_entry: no consumer";
    return false;
  }
} 
Example #10
0
 void on_all_cores(F work) {
   
   CompletionEvent ce(cores());
   auto ce_addr = make_global(&ce);
   
   auto lsz = [ce_addr,work]{};
   MessagePool pool(cores()*(sizeof(Message<decltype(lsz)>)));
   
   for (Core c = 0; c < cores(); c++) {
     pool.send_message(c, [ce_addr, work] {
       spawn([ce_addr, work] {
         work();
         complete(ce_addr);
       });
     });
   }
   ce.wait();
 }
Example #11
0
bool ShaderManager::init()
{
    Shader::Params shaders[]{
        {Shader::kTypeVertex, 0, rainbow::shaders::kFixed2Dv,
         rainbow::shaders::integrated::kFixed2Dv},
        {Shader::kTypeFragment, 0, rainbow::shaders::kFixed2Df,
         rainbow::shaders::integrated::kFixed2Df},
        {Shader::kTypeInvalid, 0, nullptr, nullptr}};
    const unsigned int pid = compile(shaders, nullptr);
    if (pid == kInvalidProgram)
    {
        R_ABORT("Failed to compile default shader");
        UNREACHABLE();
        return false;
    }

    make_global();
    return true;
}
Example #12
0
    void call(Core dest, F func, void (F::*mf)() const) {
      delegate_ops++;
      Core origin = Grappa::mycore();

      if (dest == origin) {
        // short-circuit if local
        delegate_short_circuits++;
        func();
      } else {
        
        struct Desc {
          int64_t network_time;
          int64_t start_time;
        } desc;
        
        desc.network_time = 0;
        desc.start_time = Grappa::timestamp();
        
        FullEmpty<Desc*> result(&desc);
        result.readFE();
        auto ra = make_global(&result);
        
        send_message(dest, [ra,func] {
          delegate_targets++;
  
          func();
  
          // TODO: replace with handler-safe send_message
          send_heap_message(ra.core(), [ra] {
            auto r = ra->readXX();
            r->network_time = Grappa::timestamp();
            record_network_latency(r->start_time);
            ra->writeXF(r);
          });
        }); // send message

        // ... and wait for the call to complete
        result.readFF();
        record_wakeup_latency(desc.start_time, desc.network_time);
      }
    }
Example #13
0
inline void call_async(PoolType& pool, Core dest, F remote_work) {
    static_assert(std::is_same< decltype(remote_work()), void >::value, "return type of callable must be void when not associated with Promise.");
    delegate_ops++;
    delegate_async_ops++;
    Core origin = Grappa::mycore();

    if (dest == origin) {
        // short-circuit if local
        delegate_targets++;
        delegate_short_circuits++;
        remote_work();
    } else {
        if (GCE) GCE->enroll();

        pool.send_message(dest, [origin, remote_work] {
            delegate_targets++;
            remote_work();
            if (GCE) complete(make_global(GCE,origin));
        });
    }
}
Example #14
0
 void sync() {
   CompletionEvent ce(keys_to_insert.size()+lookups.size());
   auto cea = make_global(&ce);
   
   for (auto& k : keys_to_insert) {
     ++hashset_insert_msgs;
     auto cell = owner->base+owner->computeIndex(k);
     send_heap_message(cell.core(), [cell,k,cea]{
       Cell * c = cell.localize();
       bool found = false;
       for (auto& e : c->entries) if (e.key == k) { found = true; break; }
       if (!found) c->entries.emplace_back(k);
       complete(cea);
     });
   }
   for (auto& e : lookups) { auto k = e.first;
     ++hashset_lookup_msgs;
     auto re = e.second;
     DVLOG(3) << "lookup " << k << " with re = " << re;
     auto cell = owner->base+owner->computeIndex(k);
     
     send_heap_message(cell.core(), [cell,k,cea,re]{
       Cell * c = cell.localize();
       bool found = false;
       for (auto& e : c->entries) if (e.key == k) { found = true; break; }
       
       send_heap_message(cea.core(), [cea,re,found]{
         ResultEntry * r = re;
         while (r != nullptr) {
           r->result = found;
           r = r->next;
         }
         complete(cea);
       });
     });
   }
   ce.wait();
 }
Example #15
0
TupleGraph TupleGraph::load_tsv( std::string path ) {
  // make sure file exists
  CHECK( fs::exists( path ) ) << "File not found.";
  CHECK( fs::is_regular_file( path ) ) << "File is not a regular file.";

  size_t file_size = fs::file_size( path );

  size_t path_length = path.size() + 1; // include space for terminator

  CHECK_LT( path_length, max_path_length )
    << "Sorry, filename exceeds preset limit. Please change max_path_length constant in this file and rerun.";

  char filename[ max_path_length ];
  strncpy( &filename[0], path.c_str(), max_path_length );

  Core mycore = Grappa::mycore();
  auto bytes_each_core = file_size / Grappa::cores();

  // read into temporary buffer
  on_all_cores( [=] {
      // use standard C++/POSIX IO

      // make one core take any data remaining after truncation
      auto my_bytes_each_core = bytes_each_core;
      if( Grappa::mycore() == 0 ) {
        my_bytes_each_core += file_size - (bytes_each_core * Grappa::cores());
      }
      
      // compute initial offset into ASCII file
      // TODO: fix this with scan
      local_offset = 0; // do on all cores
      Grappa::barrier();
      int64_t start_offset = Grappa::delegate::fetch_and_add( make_global( &local_offset, 0 ),
                                                              my_bytes_each_core );
      Grappa::barrier();
      int64_t end_offset = start_offset + my_bytes_each_core;

      DVLOG(7) << "Reading about " << my_bytes_each_core
               << " bytes starting at " << start_offset
               << " of " << file_size;
      
      // start reading at start offset
      std::ifstream infile( filename, std::ios_base::in );
      infile.seekg( start_offset );

      if( start_offset > 0 ) {
        // move past next newline so we start parsing from a record boundary
        std::string s;
        std::getline( infile, s );
        DVLOG(6) << "Skipped '" << s << "'";
      }
      
      start_offset = infile.tellg();
      DVLOG(6) << "Start reading at " << start_offset;

      // read up to one entry past the end_offset
      while( infile.good() && start_offset < end_offset ) {
        int64_t v0 = -1;
        int64_t v1 = -1;
        if( infile.peek() == '#' ) { // if a comment
          std::string str;
          std::getline( infile, str );
        } else {
          infile >> v0;
          if( !infile.good() ) break;
          infile >> v1;
          Edge e = { v0, v1 };
          DVLOG(6) << "Read " << v0 << " -> " << v1;
          read_edges.push_back( e );
          start_offset = infile.tellg();
        }
      }

      DVLOG(6) << "Done reading at " << start_offset << " end_offset " << end_offset;
      
      // collect sizes
      local_offset = read_edges.size();
      DVLOG(7) << "Read " << local_offset << " edges";
    } );
Example #16
0
int* get_global() {
  static int* g2 = make_global();
  get_calls++;
  return g2;
}
  void do_acquire() {
    size_t total_bytes = *count_ * sizeof(T);
    RequestArgs args;
    args.request_address = *request_address_;
    DVLOG(5) << "Computing request_bytes from block_max " << request_address_->first_byte().block_max() << " and " << *request_address_;
    args.reply_address = make_global( this );
    args.offset = 0;  
    
    for(size_t i = 0;
         args.offset < total_bytes; 
         args.offset += args.request_bytes, i++) {

      args.request_bytes = args.request_address.first_byte().block_max() - args.request_address.first_byte();


      if( args.request_bytes > total_bytes - args.offset ) {
        args.request_bytes = total_bytes - args.offset;
      }

      DVLOG(5) << "sending acquire request for " << args.request_bytes
               << " of total bytes = " << *count_ * sizeof(T)
               << " from " << args.request_address;

      Grappa::send_heap_message(args.request_address.core(), [args]{
        IAMetrics::count_acquire_ams( args.request_bytes );
        DVLOG(5) << "Worker " << Grappa::current_worker()
        << " received acquire request to " << args.request_address
        << " size " << args.request_bytes
        << " offset " << args.offset
        << " reply to " << args.reply_address;
          
        DVLOG(5) << "Worker " << Grappa::current_worker()
        << " sending acquire reply to " << args.reply_address
        << " offset " << args.offset
        << " request address " << args.request_address
        << " payload address " << args.request_address.pointer()
        << " payload size " << args.request_bytes;
          
        // note: this will read the payload *later* when the message is copied into the actual send buffer,
        // should be okay because we're already assuming DRF, but something to watch out for
        auto reply_address = args.reply_address;
        auto offset = args.offset;
          
        Grappa::send_heap_message(args.reply_address.core(),
          [reply_address, offset](void * payload, size_t payload_size) {
            DVLOG(5) << "Worker " << Grappa::current_worker()
            << " received acquire reply to " << reply_address
            << " offset " << offset
            << " payload size " << payload_size;
            reply_address.pointer()->acquire_reply( offset, payload, payload_size);
          },
          args.request_address.pointer(), args.request_bytes
        );
          
        DVLOG(5) << "Worker " << Grappa::current_worker()
        << " sent acquire reply to " << args.reply_address
        << " offset " << args.offset
        << " request address " << args.request_address
        << " payload address " << args.request_address.pointer()
        << " payload size " << args.request_bytes;
      });

      // TODO: change type so we don't screw with pointer like this
      args.request_address = GlobalAddress<T>::Raw( args.request_address.raw_bits() + args.request_bytes );
    }
    DVLOG(5) << "acquire started for " << args.request_address;      
  }