void reset( ) { DVLOG(5) << "In " << __PRETTY_FUNCTION__; CHECK( !acquire_started_ || acquired_ ) << "inconsistent state for reset"; acquire_started_ = false; acquired_ = false; thread_ = NULL; num_messages_ = 0; response_count_ = 0; expected_reply_payload_ = sizeof( T ) * *count_; total_reply_payload_ = 0; start_time_ = 0; network_time_ = 0; if( *count_ == 0 ) { DVLOG(5) << "Zero-length acquire"; *pointer_ = NULL; acquire_started_ = true; acquired_ = true; } else if( request_address_->is_2D() ) { num_messages_ = 1; if( request_address_->core() == Grappa::mycore() ) { DVLOG(5) << "Short-circuiting to address " << request_address_->pointer(); *pointer_ = request_address_->pointer(); acquire_started_ = true; acquired_ = true; } } else { DVLOG(5) << "Straddle: block_max is " << (*request_address_ + *count_).block_max() ; DVLOG(5) << ", request_address is " << *request_address_; DVLOG(5) << ", sizeof(T) is " << sizeof(T); DVLOG(5) << ", count is " << *count_; DVLOG(5) << ", block_min is " << request_address_->block_min(); DVLOG(5) << "Straddle: address is " << *request_address_ ; DVLOG(5) << ", address + count is " << *request_address_ + *count_; ptrdiff_t byte_diff = ( (*request_address_ + *count_ - 1).last_byte().block_max() - request_address_->first_byte().block_min() ); DVLOG(5) << "Straddle: address block max is " << request_address_->block_max(); DVLOG(5) << " address + count block max is " << (*request_address_ + *count_).block_max(); DVLOG(5) << " address block min " << request_address_->block_min(); DVLOG(5) << "Straddle: diff is " << byte_diff << " bs " << block_size; num_messages_ = byte_diff / block_size; } if( num_messages_ > 1 ) DVLOG(5) << "****************************** MULTI BLOCK CACHE REQUEST ******************************"; DVLOG(5) << "In " << __PRETTY_FUNCTION__ << "; detecting straddle for sizeof(T):" << sizeof(T) << " count:" << *count_ << " num_messages_:" << num_messages_ << " request_address:" << *request_address_; }
void do_release() { size_t total_bytes = *count_ * sizeof(T); RequestArgs args; args.request_address = *request_address_; DVLOG(5) << "Computing request_bytes from block_max " << request_address_->first_byte().block_max() << " and " << *request_address_; args.reply_address = make_global( this ); size_t offset = 0; size_t request_bytes = 0; for( size_t i = 0; offset < total_bytes; offset += request_bytes, i++) { request_bytes = args.request_address.first_byte().block_max() - args.request_address.first_byte(); if( request_bytes > total_bytes - offset ) { request_bytes = total_bytes - offset; } DVLOG(5) << "sending release request with " << request_bytes << " of total bytes = " << *count_ * sizeof(T) << " to " << args.request_address; Grappa::send_heap_message(args.request_address.core(), [args](void * payload, size_t payload_size) { IRMetrics::count_release_ams( payload_size ); DVLOG(5) << "Worker " << Grappa::current_worker() << " received release request to " << args.request_address << " reply to " << args.reply_address; memcpy( args.request_address.pointer(), payload, payload_size ); auto reply_address = args.reply_address; Grappa::send_heap_message(args.reply_address.core(), [reply_address]{ DVLOG(5) << "Worker " << Grappa::current_worker() << " received release reply to " << reply_address; reply_address.pointer()->release_reply(); }); DVLOG(5) << "Worker " << Grappa::current_worker() << " sent release reply to " << reply_address; }, (char*)(*pointer_) + offset, request_bytes ); // TODO: change type so we don't screw with pointer like this args.request_address = GlobalAddress<T>::Raw( args.request_address.raw_bits() + request_bytes ); } DVLOG(5) << "release started for " << args.request_address; // blocks here waiting for messages to be sent }
void reset( ) { CHECK( !release_started_ || released_ ) << "inconsistent state for reset"; release_started_ = false; released_ = false; thread_ = NULL; num_messages_ = 0; response_count_ = 0; if( *count_ == 0 ) { DVLOG(5) << "Zero-length release"; release_started_ = true; released_ = true; } else if( request_address_->is_2D() ) { num_messages_ = 1; if( request_address_->core() == Grappa::mycore() ) { release_started_ = true; released_ = true; } } else { DVLOG(5) << "Straddle: block_max is " << (*request_address_ + *count_).block_max() ; DVLOG(5) << ", request_address is " << *request_address_; DVLOG(5) << ", sizeof(T) is " << sizeof(T); DVLOG(5) << ", count is " << *count_; DVLOG(5) << ", block_min is " << request_address_->block_min(); DVLOG(5) << "Straddle: address is " << *request_address_ ; DVLOG(5) << ", address + count is " << *request_address_ + *count_; ptrdiff_t byte_diff = ( (*request_address_ + *count_ - 1).last_byte().block_max() - request_address_->first_byte().block_min() ); DVLOG(5) << "Straddle: address block max is " << request_address_->block_max(); DVLOG(5) << " address + count block max is " << (*request_address_ + *count_).block_max(); DVLOG(5) << " address + count -1 block max is " << (*request_address_ + *count_ - 1).block_max(); DVLOG(5) << " difference is " << ( (*request_address_ + *count_ - 1).block_max() - request_address_->block_min() ); DVLOG(5) << " multiplied difference is " << ( (*request_address_ + *count_ - 1).block_max() - request_address_->block_min() ) * sizeof(T); DVLOG(5) << " address block min " << request_address_->block_min(); DVLOG(5) << "Straddle: diff is " << byte_diff << " bs " << block_size; num_messages_ = byte_diff / block_size; } if( num_messages_ > 1 ) DVLOG(5) << "****************************** MULTI BLOCK CACHE REQUEST ******************************"; DVLOG(5) << "Detecting straddle for sizeof(T):" << sizeof(T) << " count:" << *count_ << " num_messages_:" << num_messages_ << " request_address:" << *request_address_; }
void do_acquire() { size_t total_bytes = *count_ * sizeof(T); RequestArgs args; args.request_address = *request_address_; DVLOG(5) << "Computing request_bytes from block_max " << request_address_->first_byte().block_max() << " and " << *request_address_; args.reply_address = make_global( this ); args.offset = 0; for(size_t i = 0; args.offset < total_bytes; args.offset += args.request_bytes, i++) { args.request_bytes = args.request_address.first_byte().block_max() - args.request_address.first_byte(); if( args.request_bytes > total_bytes - args.offset ) { args.request_bytes = total_bytes - args.offset; } DVLOG(5) << "sending acquire request for " << args.request_bytes << " of total bytes = " << *count_ * sizeof(T) << " from " << args.request_address; Grappa::send_heap_message(args.request_address.core(), [args]{ IAMetrics::count_acquire_ams( args.request_bytes ); DVLOG(5) << "Worker " << Grappa::current_worker() << " received acquire request to " << args.request_address << " size " << args.request_bytes << " offset " << args.offset << " reply to " << args.reply_address; DVLOG(5) << "Worker " << Grappa::current_worker() << " sending acquire reply to " << args.reply_address << " offset " << args.offset << " request address " << args.request_address << " payload address " << args.request_address.pointer() << " payload size " << args.request_bytes; // note: this will read the payload *later* when the message is copied into the actual send buffer, // should be okay because we're already assuming DRF, but something to watch out for auto reply_address = args.reply_address; auto offset = args.offset; Grappa::send_heap_message(args.reply_address.core(), [reply_address, offset](void * payload, size_t payload_size) { DVLOG(5) << "Worker " << Grappa::current_worker() << " received acquire reply to " << reply_address << " offset " << offset << " payload size " << payload_size; reply_address.pointer()->acquire_reply( offset, payload, payload_size); }, args.request_address.pointer(), args.request_bytes ); DVLOG(5) << "Worker " << Grappa::current_worker() << " sent acquire reply to " << args.reply_address << " offset " << args.offset << " request address " << args.request_address << " payload address " << args.request_address.pointer() << " payload size " << args.request_bytes; }); // TODO: change type so we don't screw with pointer like this args.request_address = GlobalAddress<T>::Raw( args.request_address.raw_bits() + args.request_bytes ); } DVLOG(5) << "acquire started for " << args.request_address; }