void RemoveBarrierPass::_addEntryPoint(
	analysis::DataflowGraph::iterator block )
{
	analysis::DataflowGraph::iterator entry = _dfg().insert(_dfg().begin());
			
	ir::PTXInstruction move( ir::PTXInstruction::Mov );
	
	move.type = ir::PTXOperand::u32;
	move.addressSpace = ir::PTXInstruction::Local;
	move.a.identifier = "_Zocelot_resume_point";
	move.a.addressMode = ir::PTXOperand::Address;
	move.a.type = ir::PTXOperand::u32;
	
	move.d.reg = _tempRegister();
	move.d.addressMode = ir::PTXOperand::Register;
	move.d.type = ir::PTXOperand::u32;
	
	_dfg().insert( entry, move, 0 );

	ir::PTXInstruction load( ir::PTXInstruction::Ld );

	load.addressSpace = ir::PTXInstruction::Local;
	load.type = ir::PTXOperand::u32;
	load.a = move.d;

	load.d.reg = _tempRegister();
	load.d.addressMode = ir::PTXOperand::Register;
	load.d.type = ir::PTXOperand::u32;
	
	_dfg().insert( entry, load, 1 );

	ir::PTXInstruction setp( ir::PTXInstruction::SetP );
	
	setp.type = ir::PTXOperand::u32;
	setp.comparisonOperator = ir::PTXInstruction::Eq;

	setp.d.reg = _tempRegister();
	setp.d.addressMode = ir::PTXOperand::Register;
	setp.d.type = ir::PTXOperand::pred;
	
	setp.a = load.d;
	
	setp.b.addressMode = ir::PTXOperand::Immediate;
	setp.b.type = ir::PTXOperand::u32;
	setp.b.imm_uint = block->id();
	
	_dfg().insert( entry, setp, 2 );
	
	ir::PTXInstruction branch( ir::PTXInstruction::Bra );
	
	branch.d.addressMode = ir::PTXOperand::Label;
	branch.d.identifier = block->label();
	branch.pg = setp.d;

	_dfg().insert( entry, branch, 3 );
	
	_dfg().target( entry, block );
}
void RemoveBarrierPass::_removeBarrier( analysis::DataflowGraph::iterator block, 
	unsigned int id )
{
	typedef analysis::DataflowGraph::RegisterSet RegisterSet;
	
	analysis::DataflowGraph::InstructionVector::const_iterator 
		_instruction( block->instructions().begin() );
	std::advance( _instruction, id );
	analysis::DataflowGraph::iterator exitBlock( _dfg().end() );
	std::advance( exitBlock, -1 );

	ir::PTXInstruction& instruction = static_cast< ir::PTXInstruction& >( 
		*_instruction->i );

	bool isBarrier = instruction.opcode == ir::PTXInstruction::Bar;

	if( isBarrier )
	{
		report( "  Converting instruction " << instruction.toString() );
		instruction.opcode = ir::PTXInstruction::Call;
		instruction.tailCall = true;
		instruction.branchTargetInstruction = -1;
		instruction.a = ir::PTXOperand(
			ir::PTXOperand::FunctionName, "_ZOcelotBarrierKernel");
		instruction.d.addressMode = ir::PTXOperand::Invalid;

		report( "   Converted to " << instruction.toString() );		
	}
	
	RegisterSet alive = block->alive( _instruction );
	
	analysis::DataflowGraph::iterator bottom = _dfg().split( block, 
		id + 1, false );

	_addSpillCode( block, bottom, alive, isBarrier );
	_addRestoreCode( bottom, alive );
	
	_dfg().redirect( block, bottom, exitBlock );
	
	if( !isBarrier && instruction.pg.condition != ir::PTXOperand::PT )
	{
		_dfg().target( block, bottom, true );
	}
	
	_addEntryPoint( bottom );
}
Example #3
0
void RemoveBarrierPass::_runOnBlock( analysis::DataflowGraph::iterator block )
{
	typedef analysis::DataflowGraph::InstructionVector::const_iterator
		const_iterator;
	for( const_iterator _instruction = block->instructions().begin(); 
		_instruction != block->instructions().end(); ++_instruction )
	{
		ir::PTXInstruction& instruction = static_cast< 
			ir::PTXInstruction& >( *_instruction->i );
		if( instruction.opcode == ir::PTXInstruction::Bar
			|| ( instruction.opcode == ir::PTXInstruction::Call 
				&& !instruction.tailCall ) )
		{
#if 0
			if( _externals != 0
				&& instruction.opcode == ir::PTXInstruction::Call )
			{
				if( _externals->find( instruction.a.identifier ) != 0 )
				{
					report( "Skipping external call "
						<< instruction.toString() );
					continue;
				}
			}
#endif

			unsigned int bytes = _spillBytes;
			_spillBytes = 1;
			usesBarriers = true;
			_removeBarrier( block, std::distance( 
				const_iterator( block->instructions().begin() ),
				_instruction ) );
			_spillBytes = std::max( bytes, _spillBytes );
			++_reentryPoint;
			_dfg().compute();
			break;
		}
	}
}
void RemoveBarrierPass::_addSpillCode( analysis::DataflowGraph::iterator block, 
	analysis::DataflowGraph::iterator target, 
	const analysis::DataflowGraph::RegisterSet& alive, bool isBarrier )
{
	unsigned int bytes = 0;
	
	ir::PTXInstruction move ( ir::PTXInstruction::Mov );
	
	move.type = ir::PTXOperand::u32;
	move.addressSpace = ir::PTXInstruction::Local;
	move.a.identifier = "_Zocelot_spill_area";
	move.a.addressMode = ir::PTXOperand::Address;
	move.a.type = ir::PTXOperand::u32;
	
	move.d.reg = _tempRegister();
	move.d.addressMode = ir::PTXOperand::Register;
	move.d.type = ir::PTXOperand::u32;
	
	_dfg().insert( block, move, block->instructions().size() - 1 );

	report( "   Saving " << alive.size() << " Registers" );
	
	for( analysis::DataflowGraph::RegisterSet::const_iterator 
		reg = alive.begin(); reg != alive.end(); ++reg )
	{
		report( "    r" << reg->id << " (" 
			<< ir::PTXOperand::bytes( reg->type ) << " bytes)" );
		ir::PTXInstruction save( ir::PTXInstruction::St );
	
		save.type = reg->type;
		save.addressSpace = ir::PTXInstruction::Local;

		save.d.addressMode = ir::PTXOperand::Indirect;
		save.d.reg = move.d.reg;
		save.d.type = ir::PTXOperand::u32;
		save.d.offset = bytes;
		
		bytes += ir::PTXOperand::bytes( save.type );
	
		save.a.addressMode = ir::PTXOperand::Register;
		save.a.type = reg->type;
		save.a.reg = reg->id;
		
		_dfg().insert( block, save, 
			block->instructions().size() - 1 );
	}
	
	_spillBytes = std::max( bytes, _spillBytes );
	
	move.type = ir::PTXOperand::u32;
	move.addressSpace = ir::PTXInstruction::Local;
	move.a.identifier = "_Zocelot_resume_point";
	move.a.addressMode = ir::PTXOperand::Address;
	move.a.type = ir::PTXOperand::u32;
	
	move.d.reg = _tempRegister();
	move.d.addressMode = ir::PTXOperand::Register;
	move.d.type = ir::PTXOperand::u32;
	
	_dfg().insert( block, move, block->instructions().size() - 1 );
	
	ir::PTXInstruction save( ir::PTXInstruction::St );

	save.type = ir::PTXOperand::u32;
	save.addressSpace = ir::PTXInstruction::Local;

	save.d.addressMode = ir::PTXOperand::Indirect;
	save.d.reg = move.d.reg;
	save.d.type = ir::PTXOperand::u32;

	save.a.addressMode = ir::PTXOperand::Immediate;
	save.a.type = ir::PTXOperand::u32;
	save.a.imm_uint = target->id();
	
	_dfg().insert( block, save, block->instructions().size() - 1 );

	if( isBarrier )
	{
		move.d.reg = _tempRegister();
		move.a.identifier = "_Zocelot_barrier_next_kernel";
	
		_dfg().insert( block, move,
			block->instructions().size() - 1 );

		save.d.reg = move.d.reg;
		save.a.imm_uint = _kernelId;

		_dfg().insert( block, save,
			block->instructions().size() - 1 );
	}
}