DivergenceAnalysis::block_set DivergenceAnalysis::_getDivergentBlocksInPostdominanceFrontier( const DataflowGraph::iterator &block) { const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); const DataflowGraph &cdfg = static_cast<const DataflowGraph&>(*dfgAnalysis); DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg); PostdominatorTree* dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis")); auto postDominator = dfg.getCFGtoDFGMap()[ dtree->getPostDominator(block->block())]; block_set divergentBlocks; for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { if (*successor == postDominator) continue; block_set allDivergentPaths; buildDivergentSubgraph(allDivergentPaths, *successor, postDominator); divergentBlocks.insert(allDivergentPaths.begin(), allDivergentPaths.end()); } return divergentBlocks; }
/*! \brief Tests if a block ends with a divergent branch instruction (isDivBranchInstr) */ bool DivergenceAnalysis::isDivBlock(const DataflowGraph::iterator &block) const { if (block->instructions().size() == 0) { return false; } return isDivBranch(*--block->instructions().end()); }
static bool buildDivergentSubgraph( DivergenceAnalysis::block_set& graph, const DataflowGraph::iterator &block, const DataflowGraph::iterator &postDominator) { bool hitPostDominator = false; // don't include blocks with barriers if(hasBarrier(block)) return false; // skip loops if(!graph.insert(block).second) return false; for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { // stop at the post dominator if (*successor == postDominator) { hitPostDominator = true; continue; } hitPostDominator |= buildDivergentSubgraph(graph, *successor, postDominator); } return hitPostDominator; }
static bool hasBarrier(const DataflowGraph::iterator &block) { for (auto instruction = block->instructions().begin(); instruction != block->instructions().end(); ++instruction) { if (typeid(ir::PTXInstruction) == typeid(*(instruction->i))) { auto ptxInstruction = static_cast<ir::PTXInstruction*>(instruction->i); if (ptxInstruction->opcode == ir::PTXInstruction::Bar) return true; // texture instruction intrinsics if (ptxInstruction->isCall()) { if (ptxInstruction->a.addressMode != ir::PTXOperand::FunctionName) { continue; } if (ptxInstruction->a.identifier.find( "_Z_intrinsic_pseudo_tex") != 0) { continue; } return true; } } } return false; }
bool DivergenceAnalysis::_hasTrivialPathToExit( const DataflowGraph::iterator &block) const { // We can ignore divergent threads that immediately exit unsigned int exitingPaths = 0; const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); const DataflowGraph &dfg = static_cast<const DataflowGraph&>(*dfgAnalysis); auto exit = --dfg.end(); for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { auto path = *successor; while (true) { if (path == exit) { ++exitingPaths; break; } if (path->successors().size() != 1) { break; } if (!path->instructions().empty()) { if (path->instructions().size() == 1) { const ir::PTXInstruction &ptxI = *(static_cast<ir::PTXInstruction *> ( path->instructions().back().i)); if (ptxI.isExit()) { ++exitingPaths; } } break; } path = *path->successors().begin(); } } if (block->successors().size() - exitingPaths < 2) { return true; } return false; }
void ConvertPredicationToSelectPass::_replacePredicate( DataflowGraph::iterator block, unsigned int id ) { typedef DataflowGraph::Block::RegisterSet RegisterSet; DataflowGraph::InstructionVector::const_iterator instruction( block->instructions().begin() ); std::advance( instruction, id ); report( " Converting instruction " << instruction->i->toString() ); ir::PTXInstruction select( ir::PTXInstruction::SelP ); ir::PTXInstruction& ptx = static_cast< ir::PTXInstruction& >( *instruction->i ); select.d = ptx.d; select.b = select.d; select.a = select.b; select.a.reg = _tempRegister(); select.c = ptx.pg; ptx.pg.condition = ir::PTXOperand::PT; ptx.d.reg = select.a.reg; _kernel->dfg()->insert( block, select, id + 1 ); }
void ConvertPredicationToSelectPass::_runOnBlock( DataflowGraph::iterator block ) { for( DataflowGraph::InstructionVector::const_iterator instruction = block->instructions().begin(); instruction != block->instructions().end(); ++instruction ) { ir::PTXInstruction& ptx = static_cast< ir::PTXInstruction& >( *instruction->i ); if( ptx.opcode != ir::PTXInstruction::Bra && ptx.opcode != ir::PTXInstruction::Call && ptx.opcode != ir::PTXInstruction::Ret ) { if( ptx.pg.condition != ir::PTXOperand::PT ) { _replacePredicate( block, std::distance( block->instructions().begin(), instruction ) ); } } } }
unsigned int DivergenceAnalysis::_numberOfDivergentPathsToPostDominator( const DataflowGraph::iterator &block) const { const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); const DataflowGraph &cdfg = static_cast<const DataflowGraph&>(*dfgAnalysis); DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg); PostdominatorTree* dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis")); auto postDominator = dfg.getCFGtoDFGMap()[ dtree->getPostDominator(block->block())]; unsigned int divergentPaths = 0; for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { if (*successor == postDominator) { ++divergentPaths; continue; } block_set allDivergentPaths; if (doAnyDivergentPathsReachThePostDominator(allDivergentPaths, *successor, postDominator)) { ++divergentPaths; } } report(" There are " << divergentPaths << " divergent paths from " << block->label() << " to post-dominator " << postDominator->label()); return divergentPaths; }
void BlockUnificationPass::runOnKernel( ir::Kernel& k ) { InstructionConverter instConv; ir::PTXInstruction::ComputeCapability deviceCapability = ir::PTXInstruction::Cap_2_0; DataflowGraph::iterator unificationBranch; DataflowGraph::iterator unificationTarget1; DataflowGraph::iterator unificationTarget2; BlockMatcher::MatrixPath bestPath; float largestGain = 0.0; // analyze kernel for divergence DivergenceAnalysis divAnalysis; divAnalysis.runOnKernel(k); do { largestGain = 0.0; DataflowGraph::iterator block = k.dfg()->begin(); for (; block != k.dfg()->end(); ++block) { ir::ControlFlowGraph::const_iterator irBlock = block->block(); DataflowGraph::const_iterator constBlock = block; if (irBlock->endsWithConditionalBranch() && divAnalysis.isDivBlock(constBlock) ) { // get the fallthrough block DataflowGraph::iterator fallthroughBlock = block->fallthrough(); // get the branch block DataflowGraph::iterator branchBlock = fallthroughBlock; DataflowGraph::BlockPointerSet branchTargets = block->targets(); DataflowGraph::BlockPointerSet::const_iterator it = branchTargets.begin(); for (; it != branchTargets.end(); ++it) { if (*it != fallthroughBlock) { branchBlock = *it; break; } } assertM(branchBlock != fallthroughBlock, "Block unification pass error: could not find fallthrough"); ir::PostdominatorTree* pdomTree = k.pdom_tree(); ir::ControlFlowGraph::const_iterator postDomBlk = pdomTree->getPostDominator(block->block()); bool haveBranch2FallthroughPath = thereIsPathFromB1toB2( branchBlock->block(), fallthroughBlock->block(), postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>); bool haveFallthrough2BranchPath = thereIsPathFromB1toB2( fallthroughBlock->block(), branchBlock->block(), postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>); if (!haveBranch2FallthroughPath && !haveFallthrough2BranchPath) { // Calculate branch targets' unification gain BlockMatcher::MatrixPath path; float gain = BlockMatcher::calculateUnificationGain( k.dfg(), *fallthroughBlock, *branchBlock, path, instConv, deviceCapability); if (gain > largestGain) { largestGain = gain; unificationBranch = block; unificationTarget1 = fallthroughBlock; unificationTarget2 = branchBlock; bestPath = path; } } } } if (largestGain > 10.0) { // Unify the basic block pair with biggest gain (if there's one) cout << ">>>>> unifying blocks: " << unificationTarget1->block()->label << " and " << unificationTarget2->block()->label << std::endl; weaveBlocks(unificationBranch, unificationTarget1, unificationTarget2, bestPath, k.dfg()); } // refresh divergence analysis data divAnalysis.run(); } while (false); //(largestGain > 0.0); }
void BlockUnificationPass::weaveBlocks(DataflowGraph::iterator branchBlock, DataflowGraph::iterator target1, DataflowGraph::iterator target2, BlockMatcher::MatrixPath& extractionPath, DataflowGraph* dfg) { DataflowGraph::iterator oldFallthroughBlock = branchBlock; DataflowGraph::iterator oldBranchBlock = branchBlock; // get branch predicate ir::ControlFlowGraph::const_iterator irBlock = branchBlock->block(); ir::Instruction* branchInst = irBlock->getTerminator(); ir::PTXInstruction* branchInstPtx = static_cast<ir::PTXInstruction*>(branchInst); ir::PTXOperand* branchPredicate = &(branchInstPtx->pg); std::string labelPrefix = "$BBweave_" + target1->block()->label + "_" + target2->block()->label; int blockNum = 0; // while not consumed path, generate basic blocks BlockExtractor extractor(dfg, target1, target2, extractionPath, *branchPredicate); while (extractor.hasNext()) { if (extractor.nextStep() == BlockMatcher::Match || extractor.nextStep() == BlockMatcher::Substitution) { // block label std::stringstream blockLabel; blockLabel << labelPrefix << "_uni_" << blockNum++; // create block DataflowGraph::iterator newUnifiedBlock = dfg->insert(oldFallthroughBlock, target1, blockLabel.str()); extractor.extractUnifiedBlock(newUnifiedBlock); // link blocks dfg->addEdge(newUnifiedBlock, target2, ir::ControlFlowGraph::Edge::Branch); if (oldFallthroughBlock == oldBranchBlock) { // oldFallthroughBlock and oldBranchBlock all point to branchBlock. // remove oldBranchBlock -> target2 because there's // already a edge from branchBlock to newUnifiedBlock dfg->removeEdge(oldBranchBlock, target2); } else { dfg->removeEdge(oldFallthroughBlock, newUnifiedBlock); dfg->redirect(oldBranchBlock, target2, newUnifiedBlock); dfg->addEdge(oldFallthroughBlock, newUnifiedBlock, ir::ControlFlowGraph::Edge::Branch); // add goto in oldFallthroughBlock to newUnifiedBlock ir::PTXInstruction gotoPtx(ir::PTXInstruction::Bra); ir::PTXOperand gotoLabelOperand(blockLabel.str(), ir::PTXOperand::Label, ir::PTXOperand::s32); gotoPtx.setDestination(gotoLabelOperand); gotoPtx.uni = true; ir::Instruction& gotoInst = gotoPtx; dfg->insert(oldFallthroughBlock, gotoInst); } oldFallthroughBlock = newUnifiedBlock; oldBranchBlock = newUnifiedBlock; } else { // create fallthrough block std::stringstream fallthroughLabel; fallthroughLabel << labelPrefix << "_ft_" << blockNum++; DataflowGraph::iterator newFallthoughBlock = dfg->insert(oldFallthroughBlock, target1, fallthroughLabel.str()); // create branch block std::stringstream branchLabel; branchLabel << labelPrefix << "_bra_" << blockNum++; DataflowGraph::iterator newBranchBlock = dfg->insert(oldBranchBlock, target2, branchLabel.str()); // fill blocks extractor.extractDivergentBlocks(newFallthoughBlock, newBranchBlock); if (oldFallthroughBlock == branchBlock) { const ir::PTXOperand braLabelOperand(branchLabel.str(), ir::PTXOperand::Label, ir::PTXOperand::s32); branchInstPtx->setDestination(braLabelOperand); } else { ir::PTXInstruction braPtx(ir::PTXInstruction::Bra); ir::PTXOperand braLabelOperand(branchLabel.str(), ir::PTXOperand::Label, ir::PTXOperand::s32); braPtx.setDestination(braLabelOperand); braPtx.setPredicate(*branchPredicate); ir::Instruction& bra = braPtx; dfg->insert(oldFallthroughBlock, bra); } // all needed edges were already created in block creation, // no more edge manipulation needed oldFallthroughBlock = newFallthoughBlock; oldBranchBlock = newBranchBlock; } } // remove branch instruction on BranchBlock if needed if (branchBlock->targets().size() == 0) { // branchBlock does not have branch at end, remove that instruction unsigned int branchInstPos = branchBlock->instructions().size() - 1; dfg->erase(branchBlock, branchInstPos); } if (oldFallthroughBlock == oldBranchBlock) { // If target1 has no fall-through, then // switch branch and fall-through edges. if ( !(target1->block()->has_fallthrough_edge()) ) { dfg->setEdgeType(oldFallthroughBlock, target1, ir::ControlFlowGraph::BasicBlock::Edge::Branch); dfg->setEdgeType(oldFallthroughBlock, target2, ir::ControlFlowGraph::BasicBlock::Edge::FallThrough); } } // weaved block finishes with a divergent section // copy target2 targets to a ending divergent fallthrough block dfg->copyOutgoingBranchEdges(target1, oldFallthroughBlock); // remove target1 dfg->erase(target1); // copy target2 targets to a ending unified block or divergent branch block dfg->copyOutgoingBranchEdges(target2, oldBranchBlock); // remove target2 dfg->erase(target2); // remove empty blocks or blocks with just a goto // TODO: removing this blocks is not essential for correctness, yet it // might increase performance. // replaces all uses of old registers to use new ones // in code after unified basic blocks std::cerr << "\n\n\nCalling register replacement\n\n\n"; replaceRegisters(dfg, extractor); // recalculate live in and live out //dfg->compute(); //dfg->toSsa(); }
void DivergenceAnalysis::_findBranches(branch_set& branches) { Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis); /* Create a list of branches that can be divergent, that is, they are not bra.uni and have a predicate */ DataflowGraph::iterator block = dfg.begin(); DataflowGraph::iterator endBlock = dfg.end(); /* Post-dominator tree */ PostdominatorTree *dtree; dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis")); report(" Finding branches"); for (; block != endBlock; ++block) { ir::PTXInstruction *ptxInstruction = NULL; if (block->instructions().size() > 0) { /* Branch instructions can only be the last instruction of a basic block */ DataflowGraph::Instruction& lastInstruction = *(--block->instructions().end()); if (typeid(ir::PTXInstruction) == typeid(*(lastInstruction.i))) { ptxInstruction = static_cast<ir::PTXInstruction*>(lastInstruction.i); if ((ptxInstruction->opcode == ir::PTXInstruction::Bra)) { report(" examining " << ptxInstruction->toString()); if(ptxInstruction->uni == true) { report(" eliminated, uniform..."); continue; } if(lastInstruction.s.size() == 0) { report(" eliminated, wrong source count (" << lastInstruction.s.size() << ")..."); continue; } assert(lastInstruction.s.size() == 1); DataflowGraph::iterator postDomBlock = dfg.getCFGtoDFGMap()[ dtree->getPostDominator(block->block())]; if (postDomBlock != dfg.end()) { BranchInfo newBranch(&(*block), &(*postDomBlock), lastInstruction, _divergGraph); branches.insert(newBranch); report(" is potentially divergent..."); } else { report(" eliminated, no post-dominator..."); } } } } } }