static BlockSet getBlocksWithCallsToFuctionsThatObserveSideEffects( ir::IRKernel& k) { BlockSet blocks; report(" Getting functions that can observe side-effects"); for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { for(auto instruction : block->instructions) { auto ptxInstruction = static_cast<ir::PTXInstruction*>(instruction); // TODO: Check that the target can observe side effects if(ptxInstruction->isCall()) { report(" " << ptxInstruction->toString()); blocks.insert(block); break; } } } return blocks; }
ir::ControlFlowGraph::iterator HoistParameterLoadsPass::_getTopLevelDominatingBlock( ir::IRKernel& k, ir::ControlFlowGraph::iterator block) { auto loopAnalysis = static_cast<analysis::LoopAnalysis*>( getAnalysis(Analysis::LoopAnalysis)); auto dominatorTree = static_cast<analysis::DominatorTree*>( getAnalysis(Analysis::DominatorTreeAnalysis)); while(loopAnalysis->isContainedInLoop(block)) { auto dominator = dominatorTree->getDominator(block); if(dominator == block->cfg->get_entry_block()) { block = k.cfg()->split_edge(dominator->get_fallthrough_edge(), ir::BasicBlock(k.cfg()->newId())).first->tail; invalidateAnalysis(analysis::Analysis::LoopAnalysis ); invalidateAnalysis(analysis::Analysis::DominatorTreeAnalysis); break; } block = dominator; } return block; }
void SimplifyControlFlowGraphPass::runOnKernel(ir::IRKernel& k) { bool changed = true; report("Simplify control flow for " << k.name); while(changed) { changed = _deleteUnconnectedBlocks(k); changed |= _deleteEmptyBlocks(k); #if REPORT_BASE > 1 k.cfg()->write(std::cout); #endif changed |= _simplifyTerminator(k); #if REPORT_BASE > 1 k.cfg()->write(std::cout); #endif changed |= _mergeBlockIntoPredecessor(k); #if REPORT_BASE > 1 k.cfg()->write(std::cout); #endif changed |= _mergeExitBlocks(k); #if REPORT_BASE > 1 k.cfg()->write(std::cout); #endif } }
bool SimplifyControlFlowGraphPass::_deleteEmptyBlocks(ir::IRKernel& k) { report(" Deleting empty blocks..."); bool any = false; for(ir::ControlFlowGraph::iterator block = k.cfg()->begin(); block != k.cfg()->end(); ) { if(block == k.cfg()->get_entry_block()) { ++block; continue; } if(block == k.cfg()->get_exit_block()) { ++block; continue; } if(block->instructions.empty()) { // redirect all in_edges to the target ir::BasicBlock::EdgePointerVector inEdges = block->in_edges; if(block->has_fallthrough_edge()) { ir::ControlFlowGraph::iterator fallthrough = block->get_fallthrough_edge()->tail; k.cfg()->remove_edge(block->get_fallthrough_edge()); for(ir::ControlFlowGraph::edge_pointer_iterator edge = inEdges.begin(); edge != inEdges.end(); ++edge) { if((*edge)->type == ir::Edge::FallThrough) { k.cfg()->insert_edge(ir::Edge((*edge)->head, fallthrough, ir::Edge::FallThrough)); } else { k.cfg()->insert_edge(ir::Edge((*edge)->head, fallthrough, ir::Edge::Branch)); ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( *(*edge)->head->instructions.back()); ptx.d.identifier = fallthrough->label(); } } } report(" " << block->label()); // delete the block, should wipe out all edges k.cfg()->remove_block(block++); } else { ++block; } } return any; }
/*! \brief Analyze the control and data flow graphs searching for divergent * variables and blocks * * 1) Makes data flow analysis that detects divergent variables and blocks * based on divergent sources, such as t.id, laneId * 2) Makes control flow analysis that detects new divergent variables based * on the dependency of variables of variables created on divergent paths */ void DivergenceAnalysis::analyze(ir::IRKernel &k) { Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis); dfg.convertToSSAType(DataflowGraph::Gated); assert(dfg.ssa()); _divergGraph.clear(); _notDivergentBlocks.clear(); _kernel = &k; report("Running Divergence analysis on kernel '" << k.name << "'") #if REPORT_PTX > 0 k.write(std::cout); #endif DivergenceGraph::node_set predicates; /* 1) Makes data flow analysis that detects divergent variables and blocks based on divergent sources, such as t.id, laneId */ _analyzeDataFlow(); /* 2) Makes control flow analysis that detects new divergent variables based on the dependency of variables of variables created on divergent paths */ _analyzeControlFlow(); }
void AddLocationMetadataPass::runOnKernel(ir::IRKernel& k) { report("Adding location meta data to kernel " << k.name); for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { if(block->instructions.empty()) continue; ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( *block->instructions.front()); ptx.metadata += " .location '" + k.getLocationString(ptx) + "'"; report(" - " << ptx.toString() << " - " << ptx.metadata); } }
void HoistParameterLoadsPass::runOnKernel(ir::IRKernel& k) { typedef std::pair<ir::ControlFlowGraph::iterator, ir::PTXInstruction*> Load; typedef std::vector<Load> LoadVector; auto aliasAnalysis = static_cast<analysis::SimpleAliasAnalysis*>( getAnalysis(Analysis::SimpleAliasAnalysis)); LoadVector candidateLoads; report("Hoisting loads in kernel '" << k.name << "'"); report(" Identifying candidate loads"); for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { for(auto instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { auto ptx = static_cast<ir::PTXInstruction*>(*instruction); if(ptx->isLoad() && aliasAnalysis->cannotAliasAnyStore(*instruction) && hasNoRegisterDependencies(*ptx)) { report(" " << ptx->toString()); candidateLoads.push_back(std::make_pair(block, ptx)); } } } report(" Attempting to hoist loads"); for(auto load = candidateLoads.begin(); load != candidateLoads.end(); ++load) { _tryHoistingLoad(load->first, load->second, k); } invalidateAnalysis(analysis::Analysis::DataflowGraphAnalysis); invalidateAnalysis(analysis::Analysis::SimpleAliasAnalysis ); }
static void mergeBlocks(ir::IRKernel& k, ir::ControlFlowGraph::iterator predecessor, ir::ControlFlowGraph::iterator block) { typedef std::vector<ir::Edge> EdgeVector; // delete the branch at the end of the predecessor auto branch = getBranch(predecessor); if(branch != 0) { delete branch; predecessor->instructions.pop_back(); } // move remaining instructions intro the predecessor predecessor->instructions.insert(predecessor->instructions.end(), block->instructions.begin(), block->instructions.end()); block->instructions.clear(); // track the block's out edges EdgeVector outEdges; for(auto edge = block->out_edges.begin(); edge != block->out_edges.end(); ++edge) { outEdges.push_back(**edge); } // remove the block k.cfg()->remove_block(block); // add the edges back in for(auto edge = outEdges.begin(); edge != outEdges.end(); ++edge) { k.cfg()->insert_edge(ir::Edge(predecessor, edge->tail, edge->type)); } }
void SimpleAliasAnalysis::analyze(ir::IRKernel& kernel) { // Functions can be called _aStoreCanReachThisFunction = !kernel.function(); _kernel = &kernel; if(_aStoreCanReachThisFunction) return; for(auto block = kernel.cfg()->begin(); block != kernel.cfg()->end(); ++block) { for(auto instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { auto ptx = static_cast<ir::PTXInstruction*>(*instruction); if(ptx->isStore()) { _aStoreCanReachThisFunction = true; return; } } } }
bool SimplifyControlFlowGraphPass::_mergeBlockIntoPredecessor(ir::IRKernel& k) { bool merged = false; report(" Merging blocks with predecessors..."); for(ir::ControlFlowGraph::iterator block = k.cfg()->begin(); block != k.cfg()->end(); ) { if(block == k.cfg()->get_entry_block()) { ++block; continue; } if(block == k.cfg()->get_exit_block()) { ++block; continue; } // Block has a single predecessor bool singlePredecessor = block->in_edges.size() == 1; if(!singlePredecessor) { ++block; continue; } // Predecessor has single successor auto predecessor = block->in_edges.back()->head; if(predecessor == k.cfg()->get_entry_block()) { ++block; continue; } bool singleSuccessor = predecessor->out_edges.size() == 1; if(!singleSuccessor) { ++block; continue; } report(" " << predecessor->label() << " <- " << block->label()); // Merge the blocks mergeBlocks(k, predecessor, block++); merged = true; } return merged; }
void DivergenceLinearScan::runOnKernel(ir::IRKernel& k) { auto dfg = static_cast<analysis::DataflowGraph*>( getAnalysis("DataflowGraphAnalysis")); dfg->convertToSSAType(analysis::DataflowGraph::Gated); #if DIVERGENCE_REGISTER_PROFILE_H_ divergenceProfiler::resetSpillData(); #endif _shared.clear(); LinearScanRegisterAllocationPass::runOnKernel(k); #if DIVERGENCE_REGISTER_PROFILE_H_ if(!k.function()) divergenceProfiler::printSpillResults(k.name); #endif }
void AffineLinearScan::runOnKernel(ir::IRKernel& k) { #if AFFINE_REGISTER_PROFILE_H_ affineProfiler::resetSpillData(); #endif AffineRegister::tempRegisters.clear(); AffineRegister::warpPosition = 0; _shared.clear(); LinearScanRegisterAllocationPass::runOnKernel(k); #if AFFINE_REGISTER_PROFILE_H_ if(!k.function()) { affineProfiler::printSpillResults(k.name); } #endif }
void PostdominatorTree::analyze(ir::IRKernel& kernel) { // form a vector of the basic blocks in post-order report("Building post-dominator tree."); cfg = kernel.cfg(); report(" Starting with post order sequence"); // form a vector of the basic blocks in post-order ir::ControlFlowGraph::BlockPointerVector post_order = cfg->reverse_topological_sequence(); ir::ControlFlowGraph::reverse_pointer_iterator it = post_order.rbegin(); ir::ControlFlowGraph::reverse_pointer_iterator end = post_order.rend(); for (; it != end; ++it) { blocks.push_back(*it); blocksToIndex[*it] = (int)blocks.size()-1; p_dom.push_back(-1); report(" " << (*it)->label()); } computeDT(); }
static InstructionSet getInstructionsThatCanObserveSideEffects(ir::IRKernel& k) { InstructionSet instructions; report(" Getting instructions that can observe side-effects"); for(auto& block : *k.cfg()) { for(auto instruction : block.instructions) { auto ptxInstruction = static_cast<ir::PTXInstruction*>(instruction); if(ptxInstruction->canObserveSideEffects()) { report(" " << ptxInstruction->toString()); instructions.insert(ptxInstruction); } } } return instructions; }
void ThreadFrontierReconvergencePass::runOnKernel(const ir::IRKernel& k) { report("Running thread frontier reconvergence pass"); typedef analysis::ThreadFrontierAnalysis::Priority Priority; typedef std::multimap<Priority, ir::ControlFlowGraph::const_iterator, std::greater<Priority>> ReversePriorityMap; typedef analysis::ThreadFrontierAnalysis TFAnalysis; typedef ir::ControlFlowGraph::const_pointer_iterator const_pointer_iterator; Analysis* analysis = getAnalysis(Analysis::ThreadFrontierAnalysis); assert(analysis != 0); TFAnalysis* tfAnalysis = static_cast<TFAnalysis*>(analysis); ReversePriorityMap priorityToBlocks; // sort by priority (high to low) for(ir::ControlFlowGraph::const_iterator block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { priorityToBlocks.insert(std::make_pair(tfAnalysis->getPriority(block), block)); } typedef std::unordered_map<ir::BasicBlock::Id, unsigned int> IdToPCMap; typedef std::unordered_map<unsigned int, ir::ControlFlowGraph::const_iterator> PCToBlockMap; IdToPCMap pcs; PCToBlockMap branchPCs; PCToBlockMap fallthroughPCs; // lay the code out in priority order report(" Packing instructions into a vector"); for(ReversePriorityMap::const_iterator priorityAndBlock = priorityToBlocks.begin(); priorityAndBlock != priorityToBlocks.end(); ++priorityAndBlock) { ir::ControlFlowGraph::const_iterator block = priorityAndBlock->second; report(" Basic Block " << block->label() << " (" << block->id << ")"); pcs.insert(std::make_pair(block->id, instructions.size())); for(ir::ControlFlowGraph::InstructionList::const_iterator instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { const ir::PTXInstruction& ptx = static_cast< const ir::PTXInstruction&>(**instruction); report(" [" << instructions.size() << "] '" << ptx.toString()); instructions.push_back(ptx); instructions.back().pc = instructions.size() - 1; if(ptx.opcode == ir::PTXInstruction::Bra) { branchPCs.insert(std::make_pair(instructions.back().pc, block)); } } if(!_gen6) { // Add a branch for the fallthrough if it is in the TF if(block->has_fallthrough_edge()) { ir::ControlFlowGraph::const_iterator target = block->get_fallthrough_edge()->tail; ReversePriorityMap::const_iterator next = priorityAndBlock; ++next; bool needsCheck = target != next->second; TFAnalysis::BlockVector frontier = tfAnalysis->getThreadFrontier(block); for(TFAnalysis::BlockVector::const_iterator stalledBlock = frontier.begin(); stalledBlock != frontier.end(); ++stalledBlock) { if((*stalledBlock)->id == target->id) { needsCheck = true; break; } } if(needsCheck) { fallthroughPCs.insert(std::make_pair( instructions.size(), block)); instructions.push_back(ir::PTXInstruction( ir::PTXInstruction::Bra, ir::PTXOperand(target->label()))); instructions.back().needsReconvergenceCheck = true; instructions.back().branchTargetInstruction = -1; report(" [" << (instructions.size() - 1) << "] '" << instructions.back().toString()); report(" - artificial branch for check on" " fallthrough into TF."); } } } } report(" Updating branch targets"); for(PCToBlockMap::const_iterator pcAndBlock = branchPCs.begin(); pcAndBlock != branchPCs.end(); ++pcAndBlock) { ir::ControlFlowGraph::const_iterator block = pcAndBlock->second; unsigned int pc = pcAndBlock->first; const ir::PTXInstruction& ptx = static_cast< const ir::PTXInstruction&>(instructions[pc]); ir::ControlFlowGraph::const_iterator target = block->get_branch_edge()->tail; IdToPCMap::const_iterator targetPC = pcs.find(target->id); assert(targetPC != pcs.end()); report(" setting branch target of '" << ptx.toString() << "' to " << targetPC->second); instructions[pc].branchTargetInstruction = targetPC->second; TFAnalysis::BlockVector frontier = tfAnalysis->getThreadFrontier(block); if(_gen6) { ir::ControlFlowGraph::const_iterator firstBlock = k.cfg()->end(); TFAnalysis::Priority highest = 0; frontier.push_back(block->get_branch_edge()->tail); if(block->has_fallthrough_edge()) { frontier.push_back( block->get_fallthrough_edge()->tail); } // gen6 jumps to the block with the highest priority for(TFAnalysis::BlockVector::const_iterator stalledBlock = frontier.begin(); stalledBlock != frontier.end(); ++stalledBlock) { TFAnalysis::Priority priority = tfAnalysis->getPriority(*stalledBlock); if(priority >= highest) { highest = priority; firstBlock = *stalledBlock; } } // the reconverge point is the first block in the frontier assert(firstBlock != k.cfg()->end()); IdToPCMap::const_iterator reconverge = pcs.find(firstBlock->id); assert(reconverge != pcs.end()); instructions[pc].reconvergeInstruction = reconverge->second; report(" re-converge point " << reconverge->second << ", " << firstBlock->label()); } else { // Does this branch need to check for re-convergence? // Or: are any of the target's predecessors // in the thread frontier? bool needsCheck = false; for(TFAnalysis::BlockVector::const_iterator stalledBlock = frontier.begin(); stalledBlock != frontier.end(); ++stalledBlock) { if((*stalledBlock)->id == target->id) { needsCheck = true; report(" needs re-convergence check."); break; } } instructions[pc].needsReconvergenceCheck = needsCheck; } } report(" Updating fallthrough targets"); for(PCToBlockMap::const_iterator pcAndBlock = fallthroughPCs.begin(); pcAndBlock != fallthroughPCs.end(); ++pcAndBlock) { ir::ControlFlowGraph::const_iterator block = pcAndBlock->second; unsigned int pc = pcAndBlock->first; const ir::PTXInstruction& ptx = static_cast< const ir::PTXInstruction&>(instructions[pc]); ir::ControlFlowGraph::const_iterator target = block->get_fallthrough_edge()->tail; IdToPCMap::const_iterator targetPC = pcs.find(target->id); assert(targetPC != pcs.end()); report(" setting branch target of '" << ptx.toString() << "' to " << targetPC->second); instructions[pc].branchTargetInstruction = targetPC->second; } }
void FunctionInliningPass::_getFunctionsToInline(ir::IRKernel& k) { report(" Finding functions that are eligible for inlining..."); for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { bool linked = false; for(auto instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { auto ptx = static_cast<ir::PTXInstruction&>(**instruction); if(ptx.opcode != ir::PTXInstruction::Call) continue; report(" Examining " << ptx.toString()); if(ptx.a.addressMode != ir::PTXOperand::FunctionName) { report(" skipping because it is an indirect call."); continue; } // Get the kernel being called if it is in this module auto calledKernel = k.module->getKernel(ptx.a.identifier); // Skip kernels in another module if(calledKernel == 0) { report(" skipping because it is in a different module."); continue; } // Skip kernels that are built-in functions if(isBuiltin(ptx.a.identifier)) { report(" skipping because it is a reserved keyword."); continue; } // Skip kernels that are too large to inline if(calledKernel->cfg()->instructionCount() > thresholdToInline) { report(" skipping because it is too large (" << calledKernel->cfg()->instructionCount() << " > " << thresholdToInline << ")."); continue; } report(" it is eligible for inlining!"); if(linked) { _calls.back().linked = true; } _calls.push_back(FunctionCallDescriptor( instruction, block, calledKernel)); linked = true; } } }
static ir::ControlFlowGraph::iterator convertCallToJumps( const BasicBlockMap& newBlocks, ir::ControlFlowGraph::iterator functionEntry, ir::ControlFlowGraph::iterator functionExit, ir::IRKernel& kernel, ir::ControlFlowGraph::instruction_iterator call, ir::ControlFlowGraph::iterator block) { // split the block auto firstInstructionOfSplitBlock = call; ++firstInstructionOfSplitBlock; auto returnBlock = kernel.cfg()->split_block(block, firstInstructionOfSplitBlock, ir::Edge::Invalid); kernel.cfg()->remove_edge(block->out_edges.front()); // add edges kernel.cfg()->insert_edge(ir::Edge(block, functionEntry, ir::Edge::Branch)); kernel.cfg()->insert_edge(ir::Edge(functionExit, returnBlock, ir::Edge::Branch)); ir::PTXInstruction& ptxCall = static_cast<ir::PTXInstruction&>(**call); if(ptxCall.pg.condition != ir::PTXOperand::PT) { kernel.cfg()->insert_edge(ir::Edge(block, returnBlock, ir::Edge::FallThrough)); } else { ptxCall.uni = true; } // set branch to function instruction ptxCall = ir::PTXInstruction(ir::PTXInstruction::Bra); ptxCall.d.addressMode = ir::PTXOperand::Label; ptxCall.d.identifier = functionEntry->label(); // set all return instructions to branches to the exit node for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto instruction = block->second->instructions.begin(); instruction != block->second->instructions.end(); ++instruction) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( **instruction); if(ptx.opcode != ir::PTXInstruction::Ret) continue; ptx = ir::PTXInstruction(ir::PTXInstruction::Bra); ptx.d.addressMode = ir::PTXOperand::Label; ptx.d.identifier = functionExit->label(); if(block->second->has_fallthrough_edge() && ptx.pg.condition == ir::PTXOperand::PT) { auto fallthrough = block->second->get_fallthrough_edge(); ptx.uni = true; ir::Edge newEdge(fallthrough->head, fallthrough->tail, ir::Edge::Branch); kernel.cfg()->remove_edge(fallthrough); kernel.cfg()->insert_edge(newEdge); } break; } } // set branch back after executing function auto ret = new ir::PTXInstruction(ir::PTXInstruction::Bra); ret->uni = true; ret->d.addressMode = ir::PTXOperand::Label; ret->d.identifier = returnBlock->label(); functionExit->instructions.push_back(ret); return returnBlock; }
static void insertAndConnectBlocks(BasicBlockMap& newBlocks, ir::ControlFlowGraph::iterator& functionEntry, ir::ControlFlowGraph::iterator& functionExit, ir::IRKernel& kernel, unsigned int& nextRegister, const ir::IRKernel& inlinedKernel) { typedef std::unordered_map<ir::PTXOperand::RegisterType, ir::PTXOperand::RegisterType> RegisterMap; ir::IRKernel copy; const ir::IRKernel* inlinedKernelPointer = &inlinedKernel; // create a copy if the call is recursive if(inlinedKernelPointer == &kernel) { copy = inlinedKernel; inlinedKernelPointer = © } // Insert new blocks for(auto block = inlinedKernelPointer->cfg()->begin(); block != inlinedKernelPointer->cfg()->end(); ++block) { auto newBlock = kernel.cfg()->clone_block(block); newBlocks.insert(std::make_pair(block, newBlock)); } // Connect new blocks, rename branch labels for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto edge = block->first->out_edges.begin(); edge != block->first->out_edges.end(); ++edge) { auto headBlock = block->second; auto tail = (*edge)->tail; auto tailBlock = newBlocks.find(tail); assert(tailBlock != newBlocks.end()); kernel.cfg()->insert_edge(ir::Edge(headBlock, tailBlock->second, (*edge)->type)); if((*edge)->type == ir::Edge::Branch) { assert(!headBlock->instructions.empty()); auto instruction = headBlock->instructions.back(); auto branch = static_cast<ir::PTXInstruction*>(instruction); if(branch->opcode == ir::PTXInstruction::Ret) continue; assertM(branch->opcode == ir::PTXInstruction::Bra, "Expecting " << branch->toString() << " to be a branch"); branch->d.identifier = tailBlock->second->label(); } } } // Assign copied blocks new registers RegisterMap newRegisters; for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto instruction = block->second->instructions.begin(); instruction != block->second->instructions.end(); ++instruction) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( **instruction); ir::PTXOperand* operands[] = {&ptx.pg, &ptx.pq, &ptx.d, &ptx.a, &ptx.b, &ptx.c}; for(unsigned int i = 0; i < 6; ++i) { ir::PTXOperand& operand = *operands[i]; if( operand.addressMode != ir::PTXOperand::Register && operand.addressMode != ir::PTXOperand::Indirect && operand.addressMode != ir::PTXOperand::ArgumentList) { continue; } if(operand.type != ir::PTXOperand::pred) { if(operand.array.empty() && operand.addressMode != ir::PTXOperand::ArgumentList) { auto mapping = newRegisters.find(operand.reg); if(mapping == newRegisters.end()) { mapping = newRegisters.insert(std::make_pair( operand.reg, nextRegister++)).first; } operand.reg = mapping->second; } else { for(auto subOperand = operand.array.begin(); subOperand != operand.array.end(); ++subOperand ) { if(!subOperand->isRegister()) continue; auto mapping = newRegisters.find(subOperand->reg); if(mapping == newRegisters.end()) { mapping = newRegisters.insert(std::make_pair( subOperand->reg, nextRegister++)).first; } subOperand->reg = mapping->second; } } } else if(operand.addressMode != ir::PTXOperand::ArgumentList) { if(operand.condition == ir::PTXOperand::Pred || operand.condition == ir::PTXOperand::InvPred) { auto mapping = newRegisters.find(operand.reg); if(mapping == newRegisters.end()) { mapping = newRegisters.insert(std::make_pair( operand.reg, nextRegister++)).first; } operand.reg = mapping->second; } } } } } // Assign copied blocks new local variables typedef std::unordered_map<std::string, std::string> LocalMap; LocalMap locals; for(auto local = inlinedKernel.locals.begin(); local != inlinedKernel.locals.end(); ++local) { std::string newName = "_Zinlined_" + local->first; locals.insert(std::make_pair(local->first, newName)); auto newLocal = kernel.locals.insert( std::make_pair(newName, local->second)).first; newLocal->second.name = newName; } for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto instruction = block->second->instructions.begin(); instruction != block->second->instructions.end(); ++instruction) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( **instruction); if(!ptx.mayHaveAddressableOperand()) continue; ir::PTXOperand* operands[] = {&ptx.pg, &ptx.pq, &ptx.d, &ptx.a, &ptx.b, &ptx.c}; for(unsigned int i = 0; i < 6; ++i) { ir::PTXOperand& operand = *operands[i]; if(operand.addressMode != ir::PTXOperand::Address) continue; auto local = locals.find(operand.identifier); if(local == locals.end()) continue; operand.identifier = local->second; } } } // Get the entry and exit points auto entryMapping = newBlocks.find( inlinedKernelPointer->cfg()->get_entry_block()); assert(entryMapping != newBlocks.end()); functionEntry = entryMapping->second; auto exitMapping = newBlocks.find( inlinedKernelPointer->cfg()->get_exit_block()); assert(exitMapping != newBlocks.end()); functionExit = exitMapping->second; }
bool SimplifyControlFlowGraphPass::_mergeExitBlocks(ir::IRKernel& k) { typedef std::unordered_map<ir::ControlFlowGraph::iterator, ir::ControlFlowGraph::instruction_iterator> BlockMap; report(" Merging exit blocks..."); BlockMap exitBlocks; // Find all blocks with exit instructions for(ir::ControlFlowGraph::iterator block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { for(ir::ControlFlowGraph::instruction_iterator instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>(**instruction); if(ptx.isExit() && ptx.opcode != ir::PTXInstruction::Trap) { // There should be an edge to the exit block assertM(block->find_out_edge(k.cfg()->get_exit_block()) != block->out_edges.end(), "No edge from " << block->label() << " to exit node."); exitBlocks.insert(std::make_pair(block, instruction)); break; } } } // If there is only one/zero blocks, then don't change anything if(exitBlocks.size() < 2) { if(exitBlocks.size() == 1) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>(**exitBlocks.begin()->second); if(k.function()) { ptx.opcode = ir::PTXInstruction::Ret; } else { ptx.opcode = ir::PTXInstruction::Exit; } } return false; } // Otherwise... // 1) create a new exit block ir::ControlFlowGraph::iterator newExit = k.cfg()->insert_block( ir::BasicBlock(k.cfg()->newId())); ir::BasicBlock::EdgePointerVector deletedEdges = k.cfg()->get_exit_block()->in_edges; // 1a) Create edges targetting the new block for(ir::ControlFlowGraph::edge_pointer_iterator edge = deletedEdges.begin(); edge != deletedEdges.end(); ++edge) { k.cfg()->insert_edge(ir::Edge((*edge)->head, newExit, (*edge)->type)); k.cfg()->remove_edge(*edge); } k.cfg()->insert_edge(ir::Edge(newExit, k.cfg()->get_exit_block(), ir::Edge::FallThrough)); // 2) Delete the instructions from their blocks for(BlockMap::iterator block = exitBlocks.begin(); block != exitBlocks.end(); ++block) { report(" merging block " << block->first->label()); // 2a) Insert a branch from blocks with branch edges ir::ControlFlowGraph::edge_pointer_iterator edge = newExit->find_in_edge(block->first); if((*edge)->type == ir::Edge::Branch) { ir::PTXInstruction* newBranch = new ir::PTXInstruction( ir::PTXInstruction::Bra, ir::PTXOperand(newExit->label())); newBranch->uni = true; block->first->instructions.push_back(newBranch); } delete *block->second; block->first->instructions.erase(block->second); } // 3 Add an appropriate exit instruction to the new exit block if(k.function()) { newExit->instructions.push_back( new ir::PTXInstruction(ir::PTXInstruction::Ret)); } else { newExit->instructions.push_back( new ir::PTXInstruction(ir::PTXInstruction::Exit)); } return true; }