static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const DivergenceAnalysis &DA) { for (auto E : R->elements()) { if (!E->isSubRegion()) { auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); if (!Br || !Br->isConditional()) continue; if (!DA.isUniform(Br)) return false; LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() << " has uniform terminator\n"); } else { // Explicitly refuse to treat regions as uniform if they have non-uniform // subregions. We cannot rely on DivergenceAnalysis for branches in // subregions because those branches may have been removed and re-created, // so we look for our metadata instead. // // Warning: It would be nice to treat regions as uniform based only on // their direct child basic blocks' terminators, regardless of whether // subregions are uniform or not. However, this requires a very careful // look at SIAnnotateControlFlow to make sure nothing breaks there. for (auto BB : E->getNodeAs<Region>()->blocks()) { auto Br = dyn_cast<BranchInst>(BB->getTerminator()); if (!Br || !Br->isConditional()) continue; if (!Br->getMetadata(UniformMDKindID)) return false; } } } return true; }
void SyncEliminationPass::runOnKernel(ir::Kernel& k) { DivergenceAnalysis divAnalysis; divAnalysis.runOnKernel(k); DataflowGraph::iterator block = ++k.dfg()->begin(); DataflowGraph::iterator blockEnd = --k.dfg()->end(); for (; block != blockEnd; block++) { if (!divAnalysis.isDivBlock(block)) { DataflowGraph::Instruction inst = *(--block->_instructions.end()); if (typeid(ir::PTXInstruction) == typeid(*(inst.i))) { ir::PTXInstruction *ptxInst = static_cast<ir::PTXInstruction*> (inst.i); if (ptxInst->opcode == ir::PTXInstruction::Opcode::Bra) { ptxInst->uni = true; } } } } }
static bool hasOnlyUniformBranches(const Region *R, const DivergenceAnalysis &DA) { for (const BasicBlock *BB : R->blocks()) { const BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator()); if (!Br || !Br->isConditional()) continue; if (!DA.isUniform(Br->getCondition())) return false; DEBUG(dbgs() << "BB: " << BB->getName() << " has uniform terminator\n"); } return true; }
void BlockUnificationPass::runOnKernel( ir::Kernel& k ) { InstructionConverter instConv; ir::PTXInstruction::ComputeCapability deviceCapability = ir::PTXInstruction::Cap_2_0; DataflowGraph::iterator unificationBranch; DataflowGraph::iterator unificationTarget1; DataflowGraph::iterator unificationTarget2; BlockMatcher::MatrixPath bestPath; float largestGain = 0.0; // analyze kernel for divergence DivergenceAnalysis divAnalysis; divAnalysis.runOnKernel(k); do { largestGain = 0.0; DataflowGraph::iterator block = k.dfg()->begin(); for (; block != k.dfg()->end(); ++block) { ir::ControlFlowGraph::const_iterator irBlock = block->block(); DataflowGraph::const_iterator constBlock = block; if (irBlock->endsWithConditionalBranch() && divAnalysis.isDivBlock(constBlock) ) { // get the fallthrough block DataflowGraph::iterator fallthroughBlock = block->fallthrough(); // get the branch block DataflowGraph::iterator branchBlock = fallthroughBlock; DataflowGraph::BlockPointerSet branchTargets = block->targets(); DataflowGraph::BlockPointerSet::const_iterator it = branchTargets.begin(); for (; it != branchTargets.end(); ++it) { if (*it != fallthroughBlock) { branchBlock = *it; break; } } assertM(branchBlock != fallthroughBlock, "Block unification pass error: could not find fallthrough"); ir::PostdominatorTree* pdomTree = k.pdom_tree(); ir::ControlFlowGraph::const_iterator postDomBlk = pdomTree->getPostDominator(block->block()); bool haveBranch2FallthroughPath = thereIsPathFromB1toB2( branchBlock->block(), fallthroughBlock->block(), postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>); bool haveFallthrough2BranchPath = thereIsPathFromB1toB2( fallthroughBlock->block(), branchBlock->block(), postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>); if (!haveBranch2FallthroughPath && !haveFallthrough2BranchPath) { // Calculate branch targets' unification gain BlockMatcher::MatrixPath path; float gain = BlockMatcher::calculateUnificationGain( k.dfg(), *fallthroughBlock, *branchBlock, path, instConv, deviceCapability); if (gain > largestGain) { largestGain = gain; unificationBranch = block; unificationTarget1 = fallthroughBlock; unificationTarget2 = branchBlock; bestPath = path; } } } } if (largestGain > 10.0) { // Unify the basic block pair with biggest gain (if there's one) cout << ">>>>> unifying blocks: " << unificationTarget1->block()->label << " and " << unificationTarget2->block()->label << std::endl; weaveBlocks(unificationBranch, unificationTarget1, unificationTarget2, bestPath, k.dfg()); } // refresh divergence analysis data divAnalysis.run(); } while (false); //(largestGain > 0.0); }