void RemoveBarrierPass::_addEntryPoint( analysis::DataflowGraph::iterator block ) { analysis::DataflowGraph::iterator entry = _dfg().insert(_dfg().begin()); ir::PTXInstruction move( ir::PTXInstruction::Mov ); move.type = ir::PTXOperand::u32; move.addressSpace = ir::PTXInstruction::Local; move.a.identifier = "_Zocelot_resume_point"; move.a.addressMode = ir::PTXOperand::Address; move.a.type = ir::PTXOperand::u32; move.d.reg = _tempRegister(); move.d.addressMode = ir::PTXOperand::Register; move.d.type = ir::PTXOperand::u32; _dfg().insert( entry, move, 0 ); ir::PTXInstruction load( ir::PTXInstruction::Ld ); load.addressSpace = ir::PTXInstruction::Local; load.type = ir::PTXOperand::u32; load.a = move.d; load.d.reg = _tempRegister(); load.d.addressMode = ir::PTXOperand::Register; load.d.type = ir::PTXOperand::u32; _dfg().insert( entry, load, 1 ); ir::PTXInstruction setp( ir::PTXInstruction::SetP ); setp.type = ir::PTXOperand::u32; setp.comparisonOperator = ir::PTXInstruction::Eq; setp.d.reg = _tempRegister(); setp.d.addressMode = ir::PTXOperand::Register; setp.d.type = ir::PTXOperand::pred; setp.a = load.d; setp.b.addressMode = ir::PTXOperand::Immediate; setp.b.type = ir::PTXOperand::u32; setp.b.imm_uint = block->id(); _dfg().insert( entry, setp, 2 ); ir::PTXInstruction branch( ir::PTXInstruction::Bra ); branch.d.addressMode = ir::PTXOperand::Label; branch.d.identifier = block->label(); branch.pg = setp.d; _dfg().insert( entry, branch, 3 ); _dfg().target( entry, block ); }
void RemoveBarrierPass::_addSpillCode( analysis::DataflowGraph::iterator block, analysis::DataflowGraph::iterator target, const analysis::DataflowGraph::RegisterSet& alive, bool isBarrier ) { unsigned int bytes = 0; ir::PTXInstruction move ( ir::PTXInstruction::Mov ); move.type = ir::PTXOperand::u32; move.addressSpace = ir::PTXInstruction::Local; move.a.identifier = "_Zocelot_spill_area"; move.a.addressMode = ir::PTXOperand::Address; move.a.type = ir::PTXOperand::u32; move.d.reg = _tempRegister(); move.d.addressMode = ir::PTXOperand::Register; move.d.type = ir::PTXOperand::u32; _dfg().insert( block, move, block->instructions().size() - 1 ); report( " Saving " << alive.size() << " Registers" ); for( analysis::DataflowGraph::RegisterSet::const_iterator reg = alive.begin(); reg != alive.end(); ++reg ) { report( " r" << reg->id << " (" << ir::PTXOperand::bytes( reg->type ) << " bytes)" ); ir::PTXInstruction save( ir::PTXInstruction::St ); save.type = reg->type; save.addressSpace = ir::PTXInstruction::Local; save.d.addressMode = ir::PTXOperand::Indirect; save.d.reg = move.d.reg; save.d.type = ir::PTXOperand::u32; save.d.offset = bytes; bytes += ir::PTXOperand::bytes( save.type ); save.a.addressMode = ir::PTXOperand::Register; save.a.type = reg->type; save.a.reg = reg->id; _dfg().insert( block, save, block->instructions().size() - 1 ); } _spillBytes = std::max( bytes, _spillBytes ); move.type = ir::PTXOperand::u32; move.addressSpace = ir::PTXInstruction::Local; move.a.identifier = "_Zocelot_resume_point"; move.a.addressMode = ir::PTXOperand::Address; move.a.type = ir::PTXOperand::u32; move.d.reg = _tempRegister(); move.d.addressMode = ir::PTXOperand::Register; move.d.type = ir::PTXOperand::u32; _dfg().insert( block, move, block->instructions().size() - 1 ); ir::PTXInstruction save( ir::PTXInstruction::St ); save.type = ir::PTXOperand::u32; save.addressSpace = ir::PTXInstruction::Local; save.d.addressMode = ir::PTXOperand::Indirect; save.d.reg = move.d.reg; save.d.type = ir::PTXOperand::u32; save.a.addressMode = ir::PTXOperand::Immediate; save.a.type = ir::PTXOperand::u32; save.a.imm_uint = target->id(); _dfg().insert( block, save, block->instructions().size() - 1 ); if( isBarrier ) { move.d.reg = _tempRegister(); move.a.identifier = "_Zocelot_barrier_next_kernel"; _dfg().insert( block, move, block->instructions().size() - 1 ); save.d.reg = move.d.reg; save.a.imm_uint = _kernelId; _dfg().insert( block, save, block->instructions().size() - 1 ); } }