예제 #1
0
Block::BlockPtr SialOpsParallel::wait_and_check(Block::BlockPtr b, int line) {
	if (b->state().pending()) {
		if (sialx_timers_){
			sialx_timers_->start_timer(line, SialxTimer::BLOCKWAITTIME);
		    b->state().wait(b->size());
	        sialx_timers_->pause_timer(line, SialxTimer::BLOCKWAITTIME);
		}
		else b->state().wait(b->size());
	}
	return b;
}
예제 #2
0
void BlockManager::lazy_gpu_read_on_host(const Block::BlockPtr& blk) {
	if (!blk->is_on_gpu() && !blk->is_on_host()) {
		fail("block allocated neither on host or gpu", current_line());
	} else if (!blk->is_on_host()) {
		blk->allocate_host_data();
		_gpu_device_to_host(blk->get_data(), blk->get_gpu_data(), blk->size());
	} else if (blk->is_dirty_on_gpu()) {
		_gpu_device_to_host(blk->get_data(), blk->get_gpu_data(), blk->size());
	} else if (blk->is_dirty_on_all()) {
		fail("block dirty on host & gpu !", current_line());
	}
	blk->set_on_host();
	blk->unset_dirty_on_gpu();
}
예제 #3
0
void SialPrinterForTests::do_print_block(const BlockId& id, Block::BlockPtr block, int line_number){
		int MAX_TO_PRINT = 1024;
		int size = block->size();
		int OUTPUT_ROW_SIZE = block->shape().segment_sizes_[0];
		double* data = block->get_data();
	        out_.precision(14);
		out_.setf(std::ios_base::fixed);
		out_ << line_number << ":  ";
		if (size == 1) {
		    out_ << "printing " << id.str(sip_tables_) << " = ";
		    out_ << *(data);
		} else {
		    out_ << "printing " << (size < MAX_TO_PRINT?size:MAX_TO_PRINT);
		    out_ << " of " <<size << " elements of block " <<  id.str(sip_tables_);//BlockId2String(id);
		    out_ << " in the order stored in memory ";
		    int i;
		    for (i = 0; i < size && i < MAX_TO_PRINT; ++i){
			if (i%OUTPUT_ROW_SIZE == 0) out_ << std::endl;
			out_ << *(data+i) << " ";
		    }
		    if (i == MAX_TO_PRINT){
			out_ << "....";
		    }
		}
		out_ << std::endl;
	}
예제 #4
0
void BlockManager::lazy_gpu_write_on_device(Block::BlockPtr& blk, const BlockId &id, const BlockShape& shape) {
	if (!blk->is_on_gpu() && !blk->is_on_host()) {
		block_map_.cached_delete_block(id); // Get rid of block, create a new one
		blk = create_gpu_block(id, shape);
//		if (is_scope_extent) {
//			temp_block_list_stack_.back()->push_back(id);
//		}
	} else if (!blk->is_on_gpu()) {
		blk->allocate_gpu_data();
		_gpu_host_to_device(blk->get_data(), blk->get_gpu_data(), blk->size());
	} else if (blk->is_dirty_on_host()) {
		_gpu_host_to_device(blk->get_data(), blk->get_gpu_data(), blk->size());
	} else if (blk->is_dirty_on_all()) {
		fail("block dirty on host & gpu !", current_line());
	}
	blk->set_on_gpu();
	blk->set_dirty_on_gpu();
	blk->unset_dirty_on_host();
}
예제 #5
0
//TODO optimize this.  Can reduce searches in block map.
void SialOpsParallel::get(BlockId& block_id) {

	//check for "data race"
	check_and_set_mode(block_id, READ);

	//if block already exists, or has pending request, just return
	Block::BlockPtr block = block_manager_.block(block_id);
	if (block != NULL)
		return;

	//send get message to block's server, and post receive
	int server_rank = data_distribution_.get_server_rank(block_id);
	int get_tag;
	get_tag = barrier_support_.make_mpi_tag_for_GET();

    sip::check(server_rank>=0&&server_rank<sip_mpi_attr_.global_size(), "invalid server rank",current_line()); 

    SIP_LOG(std::cout<<"W " << sip_mpi_attr_.global_rank()
    		<< " : sending GET for block " << block_id
    		<< " to server "<< server_rank << std::endl);

    // Construct int array to send to server.
    const int to_send_size = BlockId::MPI_BLOCK_ID_COUNT + 2;
    const int line_num_offset = BlockId::MPI_BLOCK_ID_COUNT;
    const int section_num_offset = line_num_offset + 1;
    int to_send[to_send_size]; // BlockId & line number
    int *serialized_block_id = block_id.to_mpi_array();
    std::copy(serialized_block_id + 0, serialized_block_id + BlockId::MPI_BLOCK_ID_COUNT, to_send);
    to_send[line_num_offset] = current_line();
    to_send[section_num_offset] = barrier_support_.section_number();

	SIPMPIUtils::check_err(
			MPI_Send(to_send, to_send_size, MPI_INT,
					server_rank, get_tag, MPI_COMM_WORLD));

	//allocate block, and insert in block map, using block data as buffer
	block = block_manager_.get_block_for_writing(block_id, true);

	//post an asynchronous receive and store the request in the
	//block's state
	MPI_Request request;
	SIPMPIUtils::check_err(
			MPI_Irecv(block->get_data(), block->size(), MPI_DOUBLE, server_rank,
					get_tag, MPI_COMM_WORLD, &request));
	block->state().mpi_request_ = request;
}
예제 #6
0
/**
 * A put appears in a SIAL program as
 * put target(i,j,k,l) += source(i,j,k,l)
 * So we need the target block id, but the source block data.
 * Accumulation is done by the server
 *
 * The implementation will be more complicated if asynchronous send is
 * used
 *
 * @param target
 * @param source_ptr
 */
void SialOpsParallel::put_accumulate(BlockId& target_id,
		const Block::BlockPtr source_block) {

	//partial check for data races
	check_and_set_mode(target_id, WRITE);

	//send message with target block's id to server
	int my_rank = sip_mpi_attr_.global_rank();
	int server_rank = data_distribution_.get_server_rank(target_id);
	int put_accumulate_tag, put_accumulate_data_tag;
	put_accumulate_tag = barrier_support_.make_mpi_tags_for_PUT_ACCUMULATE(
			put_accumulate_data_tag);

    sip::check(server_rank>=0&&server_rank<sip_mpi_attr_.global_size(), "invalid server rank",current_line()); 

    SIP_LOG(std::cout<<"W " << sip_mpi_attr_.global_rank()
       		<< " : sending PUT_ACCUMULATE for block " << target_id
       		<< " to server "<< server_rank << std::endl);


    // Construct int array to send to server.
    const int to_send_size = BlockId::MPI_BLOCK_ID_COUNT + 2;
    const int line_num_offset = BlockId::MPI_BLOCK_ID_COUNT;
    const int section_num_offset = line_num_offset + 1;
    int to_send[to_send_size]; // BlockId & line number
    int *serialized_block_id = target_id.to_mpi_array();
    std::copy(serialized_block_id + 0, serialized_block_id + BlockId::MPI_BLOCK_ID_COUNT, to_send);
    to_send[line_num_offset] = current_line();
    to_send[section_num_offset] = barrier_support_.section_number();

	//send block id
	SIPMPIUtils::check_err(
			MPI_Send(to_send, to_send_size, MPI_INT,
					server_rank, put_accumulate_tag, MPI_COMM_WORLD));
	//immediately follow with the data
	SIPMPIUtils::check_err(
			MPI_Send(source_block->get_data(), source_block->size(), MPI_DOUBLE,
					server_rank, put_accumulate_data_tag, MPI_COMM_WORLD));

	//ack
	ack_handler_.expect_ack_from(server_rank, put_accumulate_data_tag);

	SIP_LOG(
			std::cout<< "W " << sip_mpi_attr_.global_rank() << " : Done with PUT_ACCUMULATE for block " << target_id << " to server rank " << server_rank << std::endl);

}
예제 #7
0
void SialPrinterForTests::do_print_contiguous(int array_slot, Block::BlockPtr block, int line_number){
	int MAX_TO_PRINT = 1024;
	int size = block->size();
	int OUTPUT_ROW_SIZE = block->shape().segment_sizes_[0];
	double* data = block->get_data();
	out_ << line_number << ":  ";
	out_ << "printing " << (size < MAX_TO_PRINT?size:MAX_TO_PRINT);
	out_ << " of " <<size << " elements of contiguous array " <<  sip_tables_.array_name(array_slot);
	out_ << " in the order stored in memory ";
	int i;
    for (i = 0; i < size && i < MAX_TO_PRINT; ++i){
    	if (i%OUTPUT_ROW_SIZE == 0) out_ << std::endl;
    	out_.width(14);
    	out_ << *(data+i) << " ";
    }
    if (i == MAX_TO_PRINT){
    	out_ << "....";
    }
    out_ << std::endl;
}
예제 #8
0
void SialOpsParallel::broadcast_static(Block::BlockPtr source_or_dest, int source_worker){
	if (sip_mpi_attr_.num_workers()>0 ){
	   SIPMPIUtils::check_err(MPI_Bcast(source_or_dest->get_data() , source_or_dest->size(), MPI_DOUBLE, source_worker,
			   sip_mpi_attr_.company_communicator()));
    }
}