Block::BlockPtr SialOpsParallel::wait_and_check(Block::BlockPtr b, int line) { if (b->state().pending()) { if (sialx_timers_){ sialx_timers_->start_timer(line, SialxTimer::BLOCKWAITTIME); b->state().wait(b->size()); sialx_timers_->pause_timer(line, SialxTimer::BLOCKWAITTIME); } else b->state().wait(b->size()); } return b; }
void BlockManager::lazy_gpu_read_on_host(const Block::BlockPtr& blk) { if (!blk->is_on_gpu() && !blk->is_on_host()) { fail("block allocated neither on host or gpu", current_line()); } else if (!blk->is_on_host()) { blk->allocate_host_data(); _gpu_device_to_host(blk->get_data(), blk->get_gpu_data(), blk->size()); } else if (blk->is_dirty_on_gpu()) { _gpu_device_to_host(blk->get_data(), blk->get_gpu_data(), blk->size()); } else if (blk->is_dirty_on_all()) { fail("block dirty on host & gpu !", current_line()); } blk->set_on_host(); blk->unset_dirty_on_gpu(); }
void SialPrinterForTests::do_print_block(const BlockId& id, Block::BlockPtr block, int line_number){ int MAX_TO_PRINT = 1024; int size = block->size(); int OUTPUT_ROW_SIZE = block->shape().segment_sizes_[0]; double* data = block->get_data(); out_.precision(14); out_.setf(std::ios_base::fixed); out_ << line_number << ": "; if (size == 1) { out_ << "printing " << id.str(sip_tables_) << " = "; out_ << *(data); } else { out_ << "printing " << (size < MAX_TO_PRINT?size:MAX_TO_PRINT); out_ << " of " <<size << " elements of block " << id.str(sip_tables_);//BlockId2String(id); out_ << " in the order stored in memory "; int i; for (i = 0; i < size && i < MAX_TO_PRINT; ++i){ if (i%OUTPUT_ROW_SIZE == 0) out_ << std::endl; out_ << *(data+i) << " "; } if (i == MAX_TO_PRINT){ out_ << "...."; } } out_ << std::endl; }
void BlockManager::lazy_gpu_write_on_device(Block::BlockPtr& blk, const BlockId &id, const BlockShape& shape) { if (!blk->is_on_gpu() && !blk->is_on_host()) { block_map_.cached_delete_block(id); // Get rid of block, create a new one blk = create_gpu_block(id, shape); // if (is_scope_extent) { // temp_block_list_stack_.back()->push_back(id); // } } else if (!blk->is_on_gpu()) { blk->allocate_gpu_data(); _gpu_host_to_device(blk->get_data(), blk->get_gpu_data(), blk->size()); } else if (blk->is_dirty_on_host()) { _gpu_host_to_device(blk->get_data(), blk->get_gpu_data(), blk->size()); } else if (blk->is_dirty_on_all()) { fail("block dirty on host & gpu !", current_line()); } blk->set_on_gpu(); blk->set_dirty_on_gpu(); blk->unset_dirty_on_host(); }
//TODO optimize this. Can reduce searches in block map. void SialOpsParallel::get(BlockId& block_id) { //check for "data race" check_and_set_mode(block_id, READ); //if block already exists, or has pending request, just return Block::BlockPtr block = block_manager_.block(block_id); if (block != NULL) return; //send get message to block's server, and post receive int server_rank = data_distribution_.get_server_rank(block_id); int get_tag; get_tag = barrier_support_.make_mpi_tag_for_GET(); sip::check(server_rank>=0&&server_rank<sip_mpi_attr_.global_size(), "invalid server rank",current_line()); SIP_LOG(std::cout<<"W " << sip_mpi_attr_.global_rank() << " : sending GET for block " << block_id << " to server "<< server_rank << std::endl); // Construct int array to send to server. const int to_send_size = BlockId::MPI_BLOCK_ID_COUNT + 2; const int line_num_offset = BlockId::MPI_BLOCK_ID_COUNT; const int section_num_offset = line_num_offset + 1; int to_send[to_send_size]; // BlockId & line number int *serialized_block_id = block_id.to_mpi_array(); std::copy(serialized_block_id + 0, serialized_block_id + BlockId::MPI_BLOCK_ID_COUNT, to_send); to_send[line_num_offset] = current_line(); to_send[section_num_offset] = barrier_support_.section_number(); SIPMPIUtils::check_err( MPI_Send(to_send, to_send_size, MPI_INT, server_rank, get_tag, MPI_COMM_WORLD)); //allocate block, and insert in block map, using block data as buffer block = block_manager_.get_block_for_writing(block_id, true); //post an asynchronous receive and store the request in the //block's state MPI_Request request; SIPMPIUtils::check_err( MPI_Irecv(block->get_data(), block->size(), MPI_DOUBLE, server_rank, get_tag, MPI_COMM_WORLD, &request)); block->state().mpi_request_ = request; }
/** * A put appears in a SIAL program as * put target(i,j,k,l) += source(i,j,k,l) * So we need the target block id, but the source block data. * Accumulation is done by the server * * The implementation will be more complicated if asynchronous send is * used * * @param target * @param source_ptr */ void SialOpsParallel::put_accumulate(BlockId& target_id, const Block::BlockPtr source_block) { //partial check for data races check_and_set_mode(target_id, WRITE); //send message with target block's id to server int my_rank = sip_mpi_attr_.global_rank(); int server_rank = data_distribution_.get_server_rank(target_id); int put_accumulate_tag, put_accumulate_data_tag; put_accumulate_tag = barrier_support_.make_mpi_tags_for_PUT_ACCUMULATE( put_accumulate_data_tag); sip::check(server_rank>=0&&server_rank<sip_mpi_attr_.global_size(), "invalid server rank",current_line()); SIP_LOG(std::cout<<"W " << sip_mpi_attr_.global_rank() << " : sending PUT_ACCUMULATE for block " << target_id << " to server "<< server_rank << std::endl); // Construct int array to send to server. const int to_send_size = BlockId::MPI_BLOCK_ID_COUNT + 2; const int line_num_offset = BlockId::MPI_BLOCK_ID_COUNT; const int section_num_offset = line_num_offset + 1; int to_send[to_send_size]; // BlockId & line number int *serialized_block_id = target_id.to_mpi_array(); std::copy(serialized_block_id + 0, serialized_block_id + BlockId::MPI_BLOCK_ID_COUNT, to_send); to_send[line_num_offset] = current_line(); to_send[section_num_offset] = barrier_support_.section_number(); //send block id SIPMPIUtils::check_err( MPI_Send(to_send, to_send_size, MPI_INT, server_rank, put_accumulate_tag, MPI_COMM_WORLD)); //immediately follow with the data SIPMPIUtils::check_err( MPI_Send(source_block->get_data(), source_block->size(), MPI_DOUBLE, server_rank, put_accumulate_data_tag, MPI_COMM_WORLD)); //ack ack_handler_.expect_ack_from(server_rank, put_accumulate_data_tag); SIP_LOG( std::cout<< "W " << sip_mpi_attr_.global_rank() << " : Done with PUT_ACCUMULATE for block " << target_id << " to server rank " << server_rank << std::endl); }
void SialPrinterForTests::do_print_contiguous(int array_slot, Block::BlockPtr block, int line_number){ int MAX_TO_PRINT = 1024; int size = block->size(); int OUTPUT_ROW_SIZE = block->shape().segment_sizes_[0]; double* data = block->get_data(); out_ << line_number << ": "; out_ << "printing " << (size < MAX_TO_PRINT?size:MAX_TO_PRINT); out_ << " of " <<size << " elements of contiguous array " << sip_tables_.array_name(array_slot); out_ << " in the order stored in memory "; int i; for (i = 0; i < size && i < MAX_TO_PRINT; ++i){ if (i%OUTPUT_ROW_SIZE == 0) out_ << std::endl; out_.width(14); out_ << *(data+i) << " "; } if (i == MAX_TO_PRINT){ out_ << "...."; } out_ << std::endl; }
void SialOpsParallel::broadcast_static(Block::BlockPtr source_or_dest, int source_worker){ if (sip_mpi_attr_.num_workers()>0 ){ SIPMPIUtils::check_err(MPI_Bcast(source_or_dest->get_data() , source_or_dest->size(), MPI_DOUBLE, source_worker, sip_mpi_attr_.company_communicator())); } }