void GlobalQueue<T>::pull( ChunkInfo<T> * result ) { CHECK( initialized ); DVLOG(5) << "pull"; // blocking request for an address of an entry in the global queue A_Entry loc; D_A_Entry qdesc( &loc ); A_D_A_Entry desc_addr = make_global( &qdesc ); Grappa_call_on( HOME_NODE, GlobalQueue<T>::pull_reserve_am_g, &desc_addr ); // FIXME: call_on deprecated size_t resv_msg_bytes = Grappa_sizeof_message( &desc_addr ); /* wait for element: this is designed to block forever if there are no more items shared in this queue * for the rest of the program. */ qdesc.wait(); CHECK( loc.pointer() != NULL ) << "Invalid global address. Pull is always blocking"; Grappa::Metrics::global_queue_stats.record_pull_reserve_request( resv_msg_bytes ); // get the element of the queue, which will point to data Descriptor< ChunkInfo<T> > cdesc( result ); pull_entry_args<T> entry_args; entry_args.target = loc; entry_args.descriptor = make_global( &cdesc ); Grappa_call_on( loc.core(), pull_entry_request_g_am, &entry_args ); // FIXME: call_on deprecated size_t entry_msg_bytes = Grappa_sizeof_message( &entry_args ); Grappa::Metrics::global_queue_stats.record_pull_entry_request( entry_msg_bytes ); cdesc.wait(); }
T readFF(GlobalAddress<FullEmpty<T>> fe_addr) { if (fe_addr.core() == mycore()) { DVLOG(2) << "local"; return fe_addr.pointer()->readFF(); } FullEmpty<T> result; auto result_addr = make_global(&result); send_message(fe_addr.core(), [fe_addr,result_addr]{ auto& fe = *fe_addr.pointer(); if (fe.full()) { // DVLOG(2) << "no need to block"; fill_remote(result_addr, fe.readFF()); return; } DVLOG(2) << "setting up to block (" << fe_addr << ")"; auto* c = SuspendedDelegate::create([&fe,result_addr]{ VLOG(0) << "suspended_delegate!"; fill_remote(result_addr, fe.readFF()); }); add_waiter(&fe, c); }); return result.readFF(); }
inline void CompletionEvent::send_completion(Core origin, int64_t decr) { if (origin == mycore()) { this->complete(decr); } else { Grappa::complete(make_global(this,origin), decr); } }
A_Entry GlobalQueue<T>::push_reserve ( bool ignore ) { CHECK( isMaster() ); Grappa::Metrics::global_queue_stats.record_push_reserve_reply( Grappa_sizeof_delegate_func_reply< bool, A_Entry >() ); DVLOG(5) << "push_reserve"; CHECK( capacity > 0 ); if ( (tail % capacity == head % capacity) && (tail != head) ) { return make_global( static_cast< QueueEntry<T> * >( NULL ) ); // no room } else { A_Entry assigned = queueBase + (tail % capacity); tail++; // if there are any consumers, wake oldest and give the address just produced if ( pullReserveWaiters.size() > 0 ) { CHECK( head == tail-1 ) << "Size should be exactly one, since there are waiters and one value was just produced"; DVLOG(5) << "push_reserve: found waiters"; A_Entry granted = assigned; head++; A_D_A_Entry w = pullReserveWaiters.front(); pullReserveWaiters.pop(); pull_reserve_sendreply( w, &granted, true ); } return assigned; } }
/// helper function run on each core to load edges stored as int32_t /// tuples in bintsv4 format void local_load_bintsv4( const char * filename, Grappa::TupleGraph::Edge * local_ptr, Grappa::TupleGraph::Edge * local_end ) { Int32Edge * local_load_ptr = reinterpret_cast<Int32Edge*>(local_ptr); size_t local_count = local_end - local_ptr; if( !FLAGS_use_mpi_io ) { // use standard C++/POSIX IO std::ifstream infile( filename, std::ios_base::in | std::ios_base::binary ); // TODO: fix this with scan local_offset = 0; // do on all cores Grappa::barrier(); int64_t offset = Grappa::delegate::fetch_and_add( make_global( &local_offset, 0 ), local_count ); Grappa::barrier(); infile.seekg( offset * sizeof(Int32Edge) ); infile.read( (char*) local_ptr, local_count * sizeof(Int32Edge) ); } else { // load int32's into local chunk impl::read_unordered_shared( filename, local_ptr, local_count * sizeof(Int32Edge) ); } // expand int32's into int64's for( int64_t i = local_count - 1; i >= 0; --i ) { auto v0 = local_load_ptr[i].v0; auto v1 = local_load_ptr[i].v1; local_ptr[i].v0 = v0; local_ptr[i].v1 = v1; } }
void do_release() { size_t total_bytes = *count_ * sizeof(T); RequestArgs args; args.request_address = *request_address_; DVLOG(5) << "Computing request_bytes from block_max " << request_address_->first_byte().block_max() << " and " << *request_address_; args.reply_address = make_global( this ); size_t offset = 0; size_t request_bytes = 0; for( size_t i = 0; offset < total_bytes; offset += request_bytes, i++) { request_bytes = args.request_address.first_byte().block_max() - args.request_address.first_byte(); if( request_bytes > total_bytes - offset ) { request_bytes = total_bytes - offset; } DVLOG(5) << "sending release request with " << request_bytes << " of total bytes = " << *count_ * sizeof(T) << " to " << args.request_address; Grappa::send_heap_message(args.request_address.core(), [args](void * payload, size_t payload_size) { IRMetrics::count_release_ams( payload_size ); DVLOG(5) << "Worker " << Grappa::current_worker() << " received release request to " << args.request_address << " reply to " << args.reply_address; memcpy( args.request_address.pointer(), payload, payload_size ); auto reply_address = args.reply_address; Grappa::send_heap_message(args.reply_address.core(), [reply_address]{ DVLOG(5) << "Worker " << Grappa::current_worker() << " received release reply to " << reply_address; reply_address.pointer()->release_reply(); }); DVLOG(5) << "Worker " << Grappa::current_worker() << " sent release reply to " << reply_address; }, (char*)(*pointer_) + offset, request_bytes ); // TODO: change type so we don't screw with pointer like this args.request_address = GlobalAddress<T>::Raw( args.request_address.raw_bits() + request_bytes ); } DVLOG(5) << "release started for " << args.request_address; // blocks here waiting for messages to be sent }
void SimpleMetric<T>::merge_all(impl::MetricBase* static_stat_ptr) { this->value_ = 0; // TODO: use more generalized `reduce` operation to merge all SimpleMetric<T>* this_static = reinterpret_cast<SimpleMetric<T>*>(static_stat_ptr); GlobalAddress<SimpleMetric<T>> combined_addr = make_global(this); CompletionEvent ce(Grappa::cores()); for (Core c = 0; c < Grappa::cores(); c++) { // we can compute the GlobalAddress here because we have pointers to globals, // which are guaranteed to be the same on all nodes GlobalAddress<SimpleMetric<T>> remote_stat = make_global(this_static, c); send_heap_message(c, [remote_stat, combined_addr, &ce] { SimpleMetric<T>* s = remote_stat.pointer(); T s_value = s->value_; send_heap_message(combined_addr.core(), [combined_addr, s_value, &ce] { // for this simple SimpleMetric, merging is as simple as accumulating the value SimpleMetric<T>* combined_ptr = combined_addr.pointer(); if (combined_ptr->initf_ != NULL) { // min if (combined_ptr->value_ > s_value) combined_ptr->value_ = s_value; } else { //sum combined_ptr->value_ += s_value; } ce.complete(); }); }); } ce.wait(); }
NewJavaNativeModule::NewJavaNativeModule( std::weak_ptr<Instance> instance, jni::alias_ref<JavaModuleWrapper::javaobject> wrapper, std::shared_ptr<MessageQueueThread> messageQueueThread) : instance_(std::move(instance)) , wrapper_(make_global(wrapper)) , module_(make_global(wrapper->getModule())) , messageQueueThread_(std::move(messageQueueThread)) { auto descs = wrapper_->getMethodDescriptors(); std::string moduleName = getName(); methods_.reserve(descs->size()); for (const auto& desc : *descs) { auto type = desc->getType(); auto name = desc->getName(); methods_.emplace_back( desc->getMethod(), desc->getSignature(), moduleName + "." + name, type == "syncHook"); methodDescriptors_.emplace_back(name, type); } }
bool GlobalQueue<T>::push_entry ( push_entry_args<T> args ) { QueueEntry<T> * e = args.target.pointer(); e->chunk = args.chunk; e->valid = true; // if a consumer is waiting then send a wake message if ( e->sleeper.pointer() != NULL ) { DVLOG(5) << "push_entry: was sleeping consumer " << e->sleeper; pull_entry_sendreply( e->sleeper, e ); e->sleeper = make_global( static_cast<Descriptor< ChunkInfo<T> > * >( NULL ) ); return true; } else { DVLOG(5) << "push_entry: no consumer"; return false; } }
void on_all_cores(F work) { CompletionEvent ce(cores()); auto ce_addr = make_global(&ce); auto lsz = [ce_addr,work]{}; MessagePool pool(cores()*(sizeof(Message<decltype(lsz)>))); for (Core c = 0; c < cores(); c++) { pool.send_message(c, [ce_addr, work] { spawn([ce_addr, work] { work(); complete(ce_addr); }); }); } ce.wait(); }
bool ShaderManager::init() { Shader::Params shaders[]{ {Shader::kTypeVertex, 0, rainbow::shaders::kFixed2Dv, rainbow::shaders::integrated::kFixed2Dv}, {Shader::kTypeFragment, 0, rainbow::shaders::kFixed2Df, rainbow::shaders::integrated::kFixed2Df}, {Shader::kTypeInvalid, 0, nullptr, nullptr}}; const unsigned int pid = compile(shaders, nullptr); if (pid == kInvalidProgram) { R_ABORT("Failed to compile default shader"); UNREACHABLE(); return false; } make_global(); return true; }
void call(Core dest, F func, void (F::*mf)() const) { delegate_ops++; Core origin = Grappa::mycore(); if (dest == origin) { // short-circuit if local delegate_short_circuits++; func(); } else { struct Desc { int64_t network_time; int64_t start_time; } desc; desc.network_time = 0; desc.start_time = Grappa::timestamp(); FullEmpty<Desc*> result(&desc); result.readFE(); auto ra = make_global(&result); send_message(dest, [ra,func] { delegate_targets++; func(); // TODO: replace with handler-safe send_message send_heap_message(ra.core(), [ra] { auto r = ra->readXX(); r->network_time = Grappa::timestamp(); record_network_latency(r->start_time); ra->writeXF(r); }); }); // send message // ... and wait for the call to complete result.readFF(); record_wakeup_latency(desc.start_time, desc.network_time); } }
inline void call_async(PoolType& pool, Core dest, F remote_work) { static_assert(std::is_same< decltype(remote_work()), void >::value, "return type of callable must be void when not associated with Promise."); delegate_ops++; delegate_async_ops++; Core origin = Grappa::mycore(); if (dest == origin) { // short-circuit if local delegate_targets++; delegate_short_circuits++; remote_work(); } else { if (GCE) GCE->enroll(); pool.send_message(dest, [origin, remote_work] { delegate_targets++; remote_work(); if (GCE) complete(make_global(GCE,origin)); }); } }
void sync() { CompletionEvent ce(keys_to_insert.size()+lookups.size()); auto cea = make_global(&ce); for (auto& k : keys_to_insert) { ++hashset_insert_msgs; auto cell = owner->base+owner->computeIndex(k); send_heap_message(cell.core(), [cell,k,cea]{ Cell * c = cell.localize(); bool found = false; for (auto& e : c->entries) if (e.key == k) { found = true; break; } if (!found) c->entries.emplace_back(k); complete(cea); }); } for (auto& e : lookups) { auto k = e.first; ++hashset_lookup_msgs; auto re = e.second; DVLOG(3) << "lookup " << k << " with re = " << re; auto cell = owner->base+owner->computeIndex(k); send_heap_message(cell.core(), [cell,k,cea,re]{ Cell * c = cell.localize(); bool found = false; for (auto& e : c->entries) if (e.key == k) { found = true; break; } send_heap_message(cea.core(), [cea,re,found]{ ResultEntry * r = re; while (r != nullptr) { r->result = found; r = r->next; } complete(cea); }); }); } ce.wait(); }
TupleGraph TupleGraph::load_tsv( std::string path ) { // make sure file exists CHECK( fs::exists( path ) ) << "File not found."; CHECK( fs::is_regular_file( path ) ) << "File is not a regular file."; size_t file_size = fs::file_size( path ); size_t path_length = path.size() + 1; // include space for terminator CHECK_LT( path_length, max_path_length ) << "Sorry, filename exceeds preset limit. Please change max_path_length constant in this file and rerun."; char filename[ max_path_length ]; strncpy( &filename[0], path.c_str(), max_path_length ); Core mycore = Grappa::mycore(); auto bytes_each_core = file_size / Grappa::cores(); // read into temporary buffer on_all_cores( [=] { // use standard C++/POSIX IO // make one core take any data remaining after truncation auto my_bytes_each_core = bytes_each_core; if( Grappa::mycore() == 0 ) { my_bytes_each_core += file_size - (bytes_each_core * Grappa::cores()); } // compute initial offset into ASCII file // TODO: fix this with scan local_offset = 0; // do on all cores Grappa::barrier(); int64_t start_offset = Grappa::delegate::fetch_and_add( make_global( &local_offset, 0 ), my_bytes_each_core ); Grappa::barrier(); int64_t end_offset = start_offset + my_bytes_each_core; DVLOG(7) << "Reading about " << my_bytes_each_core << " bytes starting at " << start_offset << " of " << file_size; // start reading at start offset std::ifstream infile( filename, std::ios_base::in ); infile.seekg( start_offset ); if( start_offset > 0 ) { // move past next newline so we start parsing from a record boundary std::string s; std::getline( infile, s ); DVLOG(6) << "Skipped '" << s << "'"; } start_offset = infile.tellg(); DVLOG(6) << "Start reading at " << start_offset; // read up to one entry past the end_offset while( infile.good() && start_offset < end_offset ) { int64_t v0 = -1; int64_t v1 = -1; if( infile.peek() == '#' ) { // if a comment std::string str; std::getline( infile, str ); } else { infile >> v0; if( !infile.good() ) break; infile >> v1; Edge e = { v0, v1 }; DVLOG(6) << "Read " << v0 << " -> " << v1; read_edges.push_back( e ); start_offset = infile.tellg(); } } DVLOG(6) << "Done reading at " << start_offset << " end_offset " << end_offset; // collect sizes local_offset = read_edges.size(); DVLOG(7) << "Read " << local_offset << " edges"; } );
int* get_global() { static int* g2 = make_global(); get_calls++; return g2; }
void do_acquire() { size_t total_bytes = *count_ * sizeof(T); RequestArgs args; args.request_address = *request_address_; DVLOG(5) << "Computing request_bytes from block_max " << request_address_->first_byte().block_max() << " and " << *request_address_; args.reply_address = make_global( this ); args.offset = 0; for(size_t i = 0; args.offset < total_bytes; args.offset += args.request_bytes, i++) { args.request_bytes = args.request_address.first_byte().block_max() - args.request_address.first_byte(); if( args.request_bytes > total_bytes - args.offset ) { args.request_bytes = total_bytes - args.offset; } DVLOG(5) << "sending acquire request for " << args.request_bytes << " of total bytes = " << *count_ * sizeof(T) << " from " << args.request_address; Grappa::send_heap_message(args.request_address.core(), [args]{ IAMetrics::count_acquire_ams( args.request_bytes ); DVLOG(5) << "Worker " << Grappa::current_worker() << " received acquire request to " << args.request_address << " size " << args.request_bytes << " offset " << args.offset << " reply to " << args.reply_address; DVLOG(5) << "Worker " << Grappa::current_worker() << " sending acquire reply to " << args.reply_address << " offset " << args.offset << " request address " << args.request_address << " payload address " << args.request_address.pointer() << " payload size " << args.request_bytes; // note: this will read the payload *later* when the message is copied into the actual send buffer, // should be okay because we're already assuming DRF, but something to watch out for auto reply_address = args.reply_address; auto offset = args.offset; Grappa::send_heap_message(args.reply_address.core(), [reply_address, offset](void * payload, size_t payload_size) { DVLOG(5) << "Worker " << Grappa::current_worker() << " received acquire reply to " << reply_address << " offset " << offset << " payload size " << payload_size; reply_address.pointer()->acquire_reply( offset, payload, payload_size); }, args.request_address.pointer(), args.request_bytes ); DVLOG(5) << "Worker " << Grappa::current_worker() << " sent acquire reply to " << args.reply_address << " offset " << args.offset << " request address " << args.request_address << " payload address " << args.request_address.pointer() << " payload size " << args.request_bytes; }); // TODO: change type so we don't screw with pointer like this args.request_address = GlobalAddress<T>::Raw( args.request_address.raw_bits() + args.request_bytes ); } DVLOG(5) << "acquire started for " << args.request_address; }