// -------------------------------------------------------------------- int rma_receiver::handle_chunks_read_message() { char *chunk_data = chunk_region_->get_address(); HPX_ASSERT(chunk_data); // uint64_t chunkbytes = chunk_region_->get_message_length(); uint64_t num_chunks = chunkbytes/sizeof(chunk_struct); chunks_.resize(num_chunks); std::memcpy(chunks_.data(), chunk_data, chunkbytes); LOG_DEBUG_MSG("receiver " << hexpointer(this) << "Copied chunk data from chunk_region: size " << decnumber(chunkbytes) << "with num chunks " << decnumber(num_chunks)); // rma_regions_.clear(); chunk_fetch_ = false; // for each zerocopy chunk, schedule a read operation uint64_t zc_count = std::count_if(chunks_.begin(), chunks_.end(), [](chunk_struct &c) { return c.type_ == serialization::chunk_type_pointer; }); LOG_DEBUG_MSG("receiver " << hexpointer(this) << "Restarting RMA reads with " << decnumber(rma_count_) << "chunks"); rma_count_ = zc_count; // perform an rma read for each zero copy chunk read_chunk_list(); // do not return rma_count_ as it might already have decremented! (racey) HPX_ASSERT(rma_regions_.size() == zc_count ); return rma_regions_.size(); }
// -------------------------------------------------------------------- void rma_receiver::handle_message_no_chunk_data() { chunk_fetch_ = true; // get the remote chunk block memory region details auto &cb = header_->chunk_header_ptr()->chunk_rma; LOG_DEBUG_MSG("receiver " << hexpointer(this) << "Fetching RMA chunk chunk data with " << "size " << decnumber(cb.size_) << "rkey " << hexpointer(cb.rkey_) << "addr " << hexpointer(cb.data_.cpos_)); // we need a local memory region to read the chunks into chunk_region_ = memory_pool_->allocate_region(cb.size_); chunk_region_->set_message_length(cb.size_); uint64_t rkey1 = cb.rkey_; const void *remoteAddr1 = cb.data_.cpos_; // add it to the list of rma regions to fetch rma_regions_.push_back(chunk_region_); LOG_DEBUG_MSG("receiver " << hexpointer(this) << "Fetching chunk region with size " << decnumber(cb.size_)); rma_count_ = 1; // if the message region requires rma-get, we can get it now as well if (!header_->message_piggy_back()) { auto &mc = header_->message_chunk_ptr()->message_rma; message_region_ = memory_pool_->allocate_region(mc.size_); message_region_->set_message_length(mc.size_); uint64_t rkey2 = mc.rkey_; const void *remoteAddr2 = mc.data_.cpos_; // add it to the list of rma regions to fetch rma_regions_.push_back(message_region_); LOG_DEBUG_MSG("receiver " << hexpointer(this) << "Fetching message region with size " << decnumber(mc.size_)); ++rma_count_; // call the rma read function for the message chunk read_one_chunk(src_addr_, message_region_, remoteAddr2, rkey2); } // call the rma read function for the chunk-info chunk // we do this last so that a completion does not come before the message // chunk read has been triggered - this would cause the rma receive to decrement // the counter and might hit zero before we had sent the second request read_one_chunk(src_addr_, chunk_region_, remoteAddr1, rkey1); }
inline std::ostream& operator<<( std::ostream& os, const rdma_thread_print_helper&) { if (hpx::threads::get_self_id()==hpx::threads::invalid_thread_id) { os << "------------------ "; } else { hpx::threads::thread_data *dummy = hpx::this_thread::get_id().native_handle().get(); os << hexpointer(dummy); } os << nhex(12) << std::this_thread::get_id() << " cpu " << decnumber(sched_getcpu()); return os; }