ir::ControlFlowGraph::iterator HoistParameterLoadsPass::_getTopLevelDominatingBlock( ir::IRKernel& k, ir::ControlFlowGraph::iterator block) { auto loopAnalysis = static_cast<analysis::LoopAnalysis*>( getAnalysis(Analysis::LoopAnalysis)); auto dominatorTree = static_cast<analysis::DominatorTree*>( getAnalysis(Analysis::DominatorTreeAnalysis)); while(loopAnalysis->isContainedInLoop(block)) { auto dominator = dominatorTree->getDominator(block); if(dominator == block->cfg->get_entry_block()) { block = k.cfg()->split_edge(dominator->get_fallthrough_edge(), ir::BasicBlock(k.cfg()->newId())).first->tail; invalidateAnalysis(analysis::Analysis::LoopAnalysis ); invalidateAnalysis(analysis::Analysis::DominatorTreeAnalysis); break; } block = dominator; } return block; }
DivergenceAnalysis::block_set DivergenceAnalysis::_getDivergentBlocksInPostdominanceFrontier( const DataflowGraph::iterator &block) { const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); const DataflowGraph &cdfg = static_cast<const DataflowGraph&>(*dfgAnalysis); DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg); PostdominatorTree* dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis")); auto postDominator = dfg.getCFGtoDFGMap()[ dtree->getPostDominator(block->block())]; block_set divergentBlocks; for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { if (*successor == postDominator) continue; block_set allDivergentPaths; buildDivergentSubgraph(allDivergentPaths, *successor, postDominator); divergentBlocks.insert(allDivergentPaths.begin(), allDivergentPaths.end()); } return divergentBlocks; }
const DataflowGraph* DivergenceAnalysis::getDFG() const { const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); return static_cast<const DataflowGraph*>(dfgAnalysis); }
analysis::DataflowGraph& RemoveBarrierPass::_dfg() { Analysis* dfg_structure = getAnalysis(Analysis::DataflowGraphAnalysis); assert(dfg_structure != 0); return *static_cast<analysis::DataflowGraph*>(dfg_structure); }
/*! \brief Analyze the control and data flow graphs searching for divergent * variables and blocks * * 1) Makes data flow analysis that detects divergent variables and blocks * based on divergent sources, such as t.id, laneId * 2) Makes control flow analysis that detects new divergent variables based * on the dependency of variables of variables created on divergent paths */ void DivergenceAnalysis::analyze(ir::IRKernel &k) { Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis); dfg.convertToSSAType(DataflowGraph::Gated); assert(dfg.ssa()); _divergGraph.clear(); _notDivergentBlocks.clear(); _kernel = &k; report("Running Divergence analysis on kernel '" << k.name << "'") #if REPORT_PTX > 0 k.write(std::cout); #endif DivergenceGraph::node_set predicates; /* 1) Makes data flow analysis that detects divergent variables and blocks based on divergent sources, such as t.id, laneId */ _analyzeDataFlow(); /* 2) Makes control flow analysis that detects new divergent variables based on the dependency of variables of variables created on divergent paths */ _analyzeControlFlow(); }
void MoveEliminationPass::runOnKernel(ir::IRKernel& k) { report("Eliminating moves in kernel " << k.name << ""); auto dfg = static_cast<analysis::DataflowGraph*>( getAnalysis("DataflowGraphAnalysis")); assert(dfg != 0); dfg->convertToSSAType(analysis::DataflowGraph::Minimal); auto moves = getMoves(dfg); bool eliminatedAny = false; report(" Eliminating moves"); for(auto move = moves.begin(); move != moves.end(); ++move) { if(canEliminate(*move)) { report(" " << (*move)->i->toString()); eliminate(*move); eliminatedAny = true; } } if(eliminatedAny) { invalidateAnalysis("DataflowGraphAnalysis"); } report("finished..."); }
void DeadCodeEliminationPass::runOnKernel(ir::IRKernel& k) { report("Running dead code elimination on kernel " << k.name); reportE(REPORT_PTX, k); Analysis* dfgAnalysis = getAnalysis(Analysis::DataflowGraphAnalysis); assert(dfgAnalysis != 0); analysis::DataflowGraph& dfg = *static_cast<analysis::DataflowGraph*>(dfgAnalysis); assert(dfg.ssa() != analysis::DataflowGraph::SsaType::None); BlockSet blocks; report(" Starting by scanning all basic blocks"); for(iterator block = dfg.begin(); block != dfg.end(); ++block) { report(" Queueing up BB_" << block->id()); blocks.insert(block); } while(!blocks.empty()) { iterator block = *blocks.begin(); blocks.erase(blocks.begin()); eliminateDeadInstructions(dfg, blocks, block); } report("Finished running dead code elimination on kernel " << k.name); reportE(REPORT_PTX, k); }
void HoistParameterLoadsPass::_tryHoistingLoad( ir::ControlFlowGraph::iterator block, ir::PTXInstruction* ptx, ir::IRKernel& k) { report(" " << ptx->toString()); auto newBlock = _getTopLevelDominatingBlock(k, block); if(newBlock == block) return; report(" hoisting to " << newBlock->label()); auto dfg = static_cast<analysis::DataflowGraph*>( getAnalysis(Analysis::DataflowGraphAnalysis)); auto load = new ir::PTXInstruction(ir::PTXInstruction::Ld); load->addressSpace = ptx->addressSpace; load->type = ptx->type; load->volatility = ptx->volatility; load->cacheOperation = ptx->cacheOperation; load->d = ir::PTXOperand(ir::PTXOperand::Register, ptx->d.type, dfg->newRegister()); load->a = ptx->a; insertBeforeTerminator(newBlock, load); ptx->opcode = ir::PTXInstruction::Mov; ptx->a = load->d; }
void LoopUnrollingPass::runOnKernel(ir::IRKernel& k) { Analysis* loopAnalysis = getAnalysis(Analysis::LoopAnalysis); assert(loopAnalysis != 0); // TODO actually unroll something. }
analysis::AffineAnalysis& AffineLinearScan::_afa() { Analysis* aff = getAnalysis(Analysis::AffineAnalysis); assert(aff != 0); return *static_cast<analysis::AffineAnalysis*>(aff); }
void DivergenceAnalysis::_convergenceAnalysis() { report("Running convergence analysis."); Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis); /* Assume all blocks are convergent */ block_set divergentBlocks; /* 1) mark all blocks in the post-dominance frontier of divergent branches along paths that do not encounter known convergent points as divergent. This is an optimistic analysis. */ report(" Marking divergent blocks."); for (auto block = dfg.begin() ; block != dfg.end(); ++block) { if (!isDivBlock(block) || _hasTrivialPathToExit(block)) continue; block_set divergentBlocksInPostdominanceFrontier = _getDivergentBlocksInPostdominanceFrontier(block); divergentBlocks.insert(divergentBlocksInPostdominanceFrontier.begin(), divergentBlocksInPostdominanceFrontier.end()); } report(" Marking convergent blocks."); _notDivergentBlocks.clear(); for (auto block = dfg.begin(); block != dfg.end(); ++block) { if (divergentBlocks.count(block) == 0) { report(" " << block->label() << " is assumed convergent."); _notDivergentBlocks.insert(block); } } }
void DataDependenceAnalysis::analyze(ir::IRKernel& kernel) { auto dfg = static_cast<DataflowGraph*>( getAnalysis("DataflowGraphAnalysis")); report("Running data dependence analysis on kernel " << kernel.name); for(auto block = dfg->begin(); block != dfg->end(); ++block) { analyzeBlock(block, dfg, _nodes, _instructionToNodes); } }
void DependenceAnalysis::analyze(ir::IRKernel& kernel) { report("Running dependence analysis on kernel " << kernel.name); auto controlDependenceAnalysis = static_cast<ControlDependenceAnalysis*>( getAnalysis("ControlDependenceAnalysis")); auto dataDependenceAnalysis = static_cast<DataDependenceAnalysis*>( getAnalysis("DataDependenceAnalysis")); auto memoryDependenceAnalysis = static_cast<MemoryDependenceAnalysis*>( getAnalysis("MemoryDependenceAnalysis")); for(auto& node : *controlDependenceAnalysis) { auto newNode = _nodes.insert(_nodes.end(), Node(node.instruction)); _instructionToNodes.insert(std::make_pair(node.instruction, newNode)); } for(auto& node : *this) { auto controlDependenceNode = controlDependenceAnalysis->getNode( node.instruction); addEdges(node, *controlDependenceNode, _instructionToNodes); auto dataDependenceNode = dataDependenceAnalysis->getNode( node.instruction); addEdges(node, *dataDependenceNode, _instructionToNodes); auto memoryDependenceNode = memoryDependenceAnalysis->getNode( node.instruction); if(memoryDependenceNode != memoryDependenceAnalysis->end()) { addEdges(node, *memoryDependenceNode, _instructionToNodes); } } }
unsigned int DivergenceAnalysis::_numberOfDivergentPathsToPostDominator( const DataflowGraph::iterator &block) const { const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); const DataflowGraph &cdfg = static_cast<const DataflowGraph&>(*dfgAnalysis); DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg); PostdominatorTree* dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis")); auto postDominator = dfg.getCFGtoDFGMap()[ dtree->getPostDominator(block->block())]; unsigned int divergentPaths = 0; for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { if (*successor == postDominator) { ++divergentPaths; continue; } block_set allDivergentPaths; if (doAnyDivergentPathsReachThePostDominator(allDivergentPaths, *successor, postDominator)) { ++divergentPaths; } } report(" There are " << divergentPaths << " divergent paths from " << block->label() << " to post-dominator " << postDominator->label()); return divergentPaths; }
void SafeRegionAnalysis::analyze(ir::IRKernel& kernel) { // Get analyses auto cycleAnalysis = static_cast<CycleAnalysis*>( getAnalysis("CycleAnalysis")); auto dependenceAnalysis = static_cast<DependenceAnalysis*>( getAnalysis("DependenceAnalysis")); auto controlDependenceAnalysis = static_cast<ControlDependenceAnalysis*>( getAnalysis("ControlDependenceAnalysis")); // Find basic blocks that cannot be contained in safe regions auto blocksThatDependOnSideEffects = getBlocksThatDependOnSideEffects( kernel, cycleAnalysis, dependenceAnalysis, controlDependenceAnalysis); // Find hammocks in the program auto hammockAnalysis = static_cast<HammockGraphAnalysis*>( getAnalysis("HammockGraphAnalysis")); // Form safe regions around hammocks that do not contain blocks with // side effects formSafeRegionsAroundHammocks(_root, _regions, hammockAnalysis, blocksThatDependOnSideEffects); }
bool DivergenceAnalysis::_hasTrivialPathToExit( const DataflowGraph::iterator &block) const { // We can ignore divergent threads that immediately exit unsigned int exitingPaths = 0; const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); const DataflowGraph &dfg = static_cast<const DataflowGraph&>(*dfgAnalysis); auto exit = --dfg.end(); for (auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { auto path = *successor; while (true) { if (path == exit) { ++exitingPaths; break; } if (path->successors().size() != 1) { break; } if (!path->instructions().empty()) { if (path->instructions().size() == 1) { const ir::PTXInstruction &ptxI = *(static_cast<ir::PTXInstruction *> ( path->instructions().back().i)); if (ptxI.isExit()) { ++exitingPaths; } } break; } path = *path->successors().begin(); } } if (block->successors().size() - exitingPaths < 2) { return true; } return false; }
void DivergenceLinearScan::runOnKernel(ir::IRKernel& k) { auto dfg = static_cast<analysis::DataflowGraph*>( getAnalysis("DataflowGraphAnalysis")); dfg->convertToSSAType(analysis::DataflowGraph::Gated); #if DIVERGENCE_REGISTER_PROFILE_H_ divergenceProfiler::resetSpillData(); #endif _shared.clear(); LinearScanRegisterAllocationPass::runOnKernel(k); #if DIVERGENCE_REGISTER_PROFILE_H_ if(!k.function()) divergenceProfiler::printSpillResults(k.name); #endif }
void HoistParameterLoadsPass::runOnKernel(ir::IRKernel& k) { typedef std::pair<ir::ControlFlowGraph::iterator, ir::PTXInstruction*> Load; typedef std::vector<Load> LoadVector; auto aliasAnalysis = static_cast<analysis::SimpleAliasAnalysis*>( getAnalysis(Analysis::SimpleAliasAnalysis)); LoadVector candidateLoads; report("Hoisting loads in kernel '" << k.name << "'"); report(" Identifying candidate loads"); for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { for(auto instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { auto ptx = static_cast<ir::PTXInstruction*>(*instruction); if(ptx->isLoad() && aliasAnalysis->cannotAliasAnyStore(*instruction) && hasNoRegisterDependencies(*ptx)) { report(" " << ptx->toString()); candidateLoads.push_back(std::make_pair(block, ptx)); } } } report(" Attempting to hoist loads"); for(auto load = candidateLoads.begin(); load != candidateLoads.end(); ++load) { _tryHoistingLoad(load->first, load->second, k); } invalidateAnalysis(analysis::Analysis::DataflowGraphAnalysis); invalidateAnalysis(analysis::Analysis::SimpleAliasAnalysis ); }
void FunctionInliningPass::runOnKernel(ir::IRKernel& k) { report("Running function inlining pass on kernel " << k.name); auto analysis = getAnalysis(Analysis::DataflowGraphAnalysis); assert(analysis != 0); auto dfg = static_cast<analysis::DataflowGraph*>(analysis); _nextRegister = dfg->maxRegister() + 1; // Get the set of all function calls that satisfy the inlining criteria _getFunctionsToInline(k); // Inline all of the functions in this set _inlineSelectedFunctions(k); if(!_calls.empty()) { invalidateAnalysis(Analysis::DataflowGraphAnalysis); } }
void ConstantPropagationPass::runOnKernel(ir::IRKernel& k) { report("Running constant propagation on kernel " << k.name); Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); analysis::DataflowGraph& dfg = *static_cast<analysis::DataflowGraph*>(dfgAnalysis); dfg.convertToSSAType(analysis::DataflowGraph::Minimal); assert(dfg.ssa() == analysis::DataflowGraph::Minimal); BlockSet blocks; report(" Starting by scanning all basic blocks"); for(iterator block = dfg.begin(); block != dfg.end(); ++block) { report(" Queueing up BB_" << block->id()); blocks.insert(block); } while(!blocks.empty()) { iterator block = *blocks.begin(); blocks.erase(blocks.begin()); eliminateRedundantInstructions(dfg, blocks, block); } report("Finished running constant propagation on kernel " << k.name); reportE(REPORT_PTX, k); }
void terrama2::services::analysis::core::Context::loadMonitoredObject(AnalysisHashCode analysisHashCode) { std::lock_guard<std::recursive_mutex> lock(mutex_); auto dataManagerPtr = dataManager_.lock(); if(!dataManagerPtr) { QString errMsg(QObject::tr("Invalid data manager.")); throw terrama2::core::InvalidDataManagerException() << terrama2::ErrorDescription(errMsg); } auto analysis = getAnalysis(analysisHashCode); for(auto analysisDataSeries : analysis->analysisDataSeriesList) { auto dataSeriesPtr = dataManagerPtr->findDataSeries(analysisDataSeries.dataSeriesId); auto datasets = dataSeriesPtr->datasetList; if(analysisDataSeries.type == AnalysisDataSeriesType::DATASERIES_MONITORED_OBJECT_TYPE) { assert(datasets.size() == 1); auto dataset = datasets[0]; auto dataProvider = dataManagerPtr->findDataProvider(dataSeriesPtr->dataProviderId); terrama2::core::Filter filter; //accessing data terrama2::core::DataAccessorPtr accessor = terrama2::core::DataAccessorFactory::getInstance().make(dataProvider, dataSeriesPtr); auto seriesMap = accessor->getSeries(filter); auto series = seriesMap[dataset]; std::string identifier = analysisDataSeries.metadata["identifier"]; std::shared_ptr<ContextDataSeries> dataSeriesContext(new ContextDataSeries); if(!series.syncDataSet) { QString errMsg(QObject::tr("No data available for DataSeries %1").arg(dataSeriesPtr->id)); throw terrama2::InvalidArgumentException() << terrama2::ErrorDescription(errMsg); } if(!series.syncDataSet->dataset()) { QString errMsg(QObject::tr("Adding an invalid dataset to the analysis context: DataSeries %1").arg(dataSeriesPtr->id)); throw terrama2::InvalidArgumentException() << terrama2::ErrorDescription(errMsg); } std::size_t geomPropertyPosition = te::da::GetFirstPropertyPos(series.syncDataSet->dataset().get(), te::dt::GEOMETRY_TYPE); dataSeriesContext->series = series; dataSeriesContext->identifier = identifier; dataSeriesContext->geometryPos = geomPropertyPosition; ContextKey key; key.datasetId_ = dataset->id; key.analysisHashCode_ = analysisHashCode; datasetMap_[key] = dataSeriesContext; } else if(analysisDataSeries.type == AnalysisDataSeriesType::DATASERIES_PCD_TYPE) { for(auto dataset : dataSeriesPtr->datasetList) { auto dataProvider = dataManagerPtr->findDataProvider(dataSeriesPtr->dataProviderId); terrama2::core::Filter filter; //accessing data terrama2::core::DataAccessorPtr accessor = terrama2::core::DataAccessorFactory::getInstance().make(dataProvider, dataSeriesPtr); auto seriesMap = accessor->getSeries(filter); auto series = seriesMap[dataset]; std::string identifier = analysisDataSeries.metadata["identifier"]; std::shared_ptr<ContextDataSeries> dataSeriesContext(new ContextDataSeries); std::size_t geomPropertyPosition = te::da::GetFirstPropertyPos(series.syncDataSet->dataset().get(), te::dt::GEOMETRY_TYPE); dataSeriesContext->series = series; dataSeriesContext->identifier = identifier; dataSeriesContext->geometryPos = geomPropertyPosition; ContextKey key; key.datasetId_ = dataset->id; key.analysisHashCode_ = analysisHashCode; datasetMap_[key] = dataSeriesContext; } } } }
void ThreadFrontierReconvergencePass::runOnKernel(const ir::IRKernel& k) { report("Running thread frontier reconvergence pass"); typedef analysis::ThreadFrontierAnalysis::Priority Priority; typedef std::multimap<Priority, ir::ControlFlowGraph::const_iterator, std::greater<Priority>> ReversePriorityMap; typedef analysis::ThreadFrontierAnalysis TFAnalysis; typedef ir::ControlFlowGraph::const_pointer_iterator const_pointer_iterator; Analysis* analysis = getAnalysis(Analysis::ThreadFrontierAnalysis); assert(analysis != 0); TFAnalysis* tfAnalysis = static_cast<TFAnalysis*>(analysis); ReversePriorityMap priorityToBlocks; // sort by priority (high to low) for(ir::ControlFlowGraph::const_iterator block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { priorityToBlocks.insert(std::make_pair(tfAnalysis->getPriority(block), block)); } typedef std::unordered_map<ir::BasicBlock::Id, unsigned int> IdToPCMap; typedef std::unordered_map<unsigned int, ir::ControlFlowGraph::const_iterator> PCToBlockMap; IdToPCMap pcs; PCToBlockMap branchPCs; PCToBlockMap fallthroughPCs; // lay the code out in priority order report(" Packing instructions into a vector"); for(ReversePriorityMap::const_iterator priorityAndBlock = priorityToBlocks.begin(); priorityAndBlock != priorityToBlocks.end(); ++priorityAndBlock) { ir::ControlFlowGraph::const_iterator block = priorityAndBlock->second; report(" Basic Block " << block->label() << " (" << block->id << ")"); pcs.insert(std::make_pair(block->id, instructions.size())); for(ir::ControlFlowGraph::InstructionList::const_iterator instruction = block->instructions.begin(); instruction != block->instructions.end(); ++instruction) { const ir::PTXInstruction& ptx = static_cast< const ir::PTXInstruction&>(**instruction); report(" [" << instructions.size() << "] '" << ptx.toString()); instructions.push_back(ptx); instructions.back().pc = instructions.size() - 1; if(ptx.opcode == ir::PTXInstruction::Bra) { branchPCs.insert(std::make_pair(instructions.back().pc, block)); } } if(!_gen6) { // Add a branch for the fallthrough if it is in the TF if(block->has_fallthrough_edge()) { ir::ControlFlowGraph::const_iterator target = block->get_fallthrough_edge()->tail; ReversePriorityMap::const_iterator next = priorityAndBlock; ++next; bool needsCheck = target != next->second; TFAnalysis::BlockVector frontier = tfAnalysis->getThreadFrontier(block); for(TFAnalysis::BlockVector::const_iterator stalledBlock = frontier.begin(); stalledBlock != frontier.end(); ++stalledBlock) { if((*stalledBlock)->id == target->id) { needsCheck = true; break; } } if(needsCheck) { fallthroughPCs.insert(std::make_pair( instructions.size(), block)); instructions.push_back(ir::PTXInstruction( ir::PTXInstruction::Bra, ir::PTXOperand(target->label()))); instructions.back().needsReconvergenceCheck = true; instructions.back().branchTargetInstruction = -1; report(" [" << (instructions.size() - 1) << "] '" << instructions.back().toString()); report(" - artificial branch for check on" " fallthrough into TF."); } } } } report(" Updating branch targets"); for(PCToBlockMap::const_iterator pcAndBlock = branchPCs.begin(); pcAndBlock != branchPCs.end(); ++pcAndBlock) { ir::ControlFlowGraph::const_iterator block = pcAndBlock->second; unsigned int pc = pcAndBlock->first; const ir::PTXInstruction& ptx = static_cast< const ir::PTXInstruction&>(instructions[pc]); ir::ControlFlowGraph::const_iterator target = block->get_branch_edge()->tail; IdToPCMap::const_iterator targetPC = pcs.find(target->id); assert(targetPC != pcs.end()); report(" setting branch target of '" << ptx.toString() << "' to " << targetPC->second); instructions[pc].branchTargetInstruction = targetPC->second; TFAnalysis::BlockVector frontier = tfAnalysis->getThreadFrontier(block); if(_gen6) { ir::ControlFlowGraph::const_iterator firstBlock = k.cfg()->end(); TFAnalysis::Priority highest = 0; frontier.push_back(block->get_branch_edge()->tail); if(block->has_fallthrough_edge()) { frontier.push_back( block->get_fallthrough_edge()->tail); } // gen6 jumps to the block with the highest priority for(TFAnalysis::BlockVector::const_iterator stalledBlock = frontier.begin(); stalledBlock != frontier.end(); ++stalledBlock) { TFAnalysis::Priority priority = tfAnalysis->getPriority(*stalledBlock); if(priority >= highest) { highest = priority; firstBlock = *stalledBlock; } } // the reconverge point is the first block in the frontier assert(firstBlock != k.cfg()->end()); IdToPCMap::const_iterator reconverge = pcs.find(firstBlock->id); assert(reconverge != pcs.end()); instructions[pc].reconvergeInstruction = reconverge->second; report(" re-converge point " << reconverge->second << ", " << firstBlock->label()); } else { // Does this branch need to check for re-convergence? // Or: are any of the target's predecessors // in the thread frontier? bool needsCheck = false; for(TFAnalysis::BlockVector::const_iterator stalledBlock = frontier.begin(); stalledBlock != frontier.end(); ++stalledBlock) { if((*stalledBlock)->id == target->id) { needsCheck = true; report(" needs re-convergence check."); break; } } instructions[pc].needsReconvergenceCheck = needsCheck; } } report(" Updating fallthrough targets"); for(PCToBlockMap::const_iterator pcAndBlock = fallthroughPCs.begin(); pcAndBlock != fallthroughPCs.end(); ++pcAndBlock) { ir::ControlFlowGraph::const_iterator block = pcAndBlock->second; unsigned int pc = pcAndBlock->first; const ir::PTXInstruction& ptx = static_cast< const ir::PTXInstruction&>(instructions[pc]); ir::ControlFlowGraph::const_iterator target = block->get_fallthrough_edge()->tail; IdToPCMap::const_iterator targetPC = pcs.find(target->id); assert(targetPC != pcs.end()); report(" setting branch target of '" << ptx.toString() << "' to " << targetPC->second); instructions[pc].branchTargetInstruction = targetPC->second; } }
void DivergenceAnalysis::_findBranches(branch_set& branches) { Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis); /* Create a list of branches that can be divergent, that is, they are not bra.uni and have a predicate */ DataflowGraph::iterator block = dfg.begin(); DataflowGraph::iterator endBlock = dfg.end(); /* Post-dominator tree */ PostdominatorTree *dtree; dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis")); report(" Finding branches"); for (; block != endBlock; ++block) { ir::PTXInstruction *ptxInstruction = NULL; if (block->instructions().size() > 0) { /* Branch instructions can only be the last instruction of a basic block */ DataflowGraph::Instruction& lastInstruction = *(--block->instructions().end()); if (typeid(ir::PTXInstruction) == typeid(*(lastInstruction.i))) { ptxInstruction = static_cast<ir::PTXInstruction*>(lastInstruction.i); if ((ptxInstruction->opcode == ir::PTXInstruction::Bra)) { report(" examining " << ptxInstruction->toString()); if(ptxInstruction->uni == true) { report(" eliminated, uniform..."); continue; } if(lastInstruction.s.size() == 0) { report(" eliminated, wrong source count (" << lastInstruction.s.size() << ")..."); continue; } assert(lastInstruction.s.size() == 1); DataflowGraph::iterator postDomBlock = dfg.getCFGtoDFGMap()[ dtree->getPostDominator(block->block())]; if (postDomBlock != dfg.end()) { BranchInfo newBranch(&(*block), &(*postDomBlock), lastInstruction, _divergGraph); branches.insert(newBranch); report(" is potentially divergent..."); } else { report(" eliminated, no post-dominator..."); } } } } } }
void ReversePostOrderTraversal::analyze(Function& function) { typedef util::LargeSet<BasicBlock*> BlockSet; typedef std::stack<BasicBlock*> BlockStack; order.clear(); BlockSet visited; BlockStack stack; auto cfgAnalysis = getAnalysis("ControlFlowGraph"); auto cfg = static_cast<ControlFlowGraph*>(cfgAnalysis); report("Creating reverse post order traversal over function '" + function.name() + "'"); // reverse post order is reversed topological order stack.push(&*function.entry_block()); while(order.size() != function.size()) { if(stack.empty()) { for(auto block : order) { auto successors = cfg->getSuccessors(*block); for(auto successor : successors) { if(visited.insert(successor).second) { stack.push(successor); break; } } if(!stack.empty()) break; } } assertM(!stack.empty(), (function.size() - order.size()) << " blocks are not connected."); while(!stack.empty()) { BasicBlock* top = stack.top(); stack.pop(); auto successors = cfg->getSuccessors(*top); for(auto successor : successors) { assert(successor != nullptr); auto predecessors = cfg->getPredecessors(*successor); bool allPredecessorsVisited = true; for(auto predecessor : predecessors) { if(visited.count(predecessor) == 0) { allPredecessorsVisited = false; break; } } if(!allPredecessorsVisited) continue; if(visited.insert(successor).second) { stack.push(successor); } } order.push_back(top); report(" " << top->name()); } } // reverse the order std::reverse(order.begin(), order.end()); }
analysis::DivergenceAnalysis& DivergenceLinearScan::_diva() { Analysis* divA = getAnalysis("DivergenceAnalysis"); assertM(divA != NULL, "Got null divergence analysis"); return *static_cast<analysis::DivergenceAnalysis*>(divA); }
void DivergenceAnalysis::_analyzeDataFlow() { Analysis* dfg = getAnalysis("DataflowGraphAnalysis"); assert(dfg != 0); DataflowGraph &nonConstGraph = static_cast<DataflowGraph&>(*dfg); DataflowGraph::const_iterator block = nonConstGraph.begin(); DataflowGraph::const_iterator endBlock = nonConstGraph.end(); report("Analyzing data flow"); /* 1) Analyze the data flow adding divergence sources */ for (; block != endBlock; ++block) { report(" for block " << block->label()); DataflowGraph::PhiInstructionVector::const_iterator phiInstruction = block->phis().begin(); DataflowGraph::PhiInstructionVector::const_iterator endPhiInstruction = block->phis().end(); /* Go over the phi functions and add their dependences to the * dependence graph. */ for (; phiInstruction != endPhiInstruction; phiInstruction++) { for (DataflowGraph::RegisterVector::const_iterator si = phiInstruction->s.begin(); si != phiInstruction->s.end(); ++si) { _divergGraph.insertEdge(si->id, phiInstruction->d.id); report(" phi r" << phiInstruction->d.id << " <- r" << si->id); } } DataflowGraph::InstructionVector::const_iterator ii = block->instructions().begin(); DataflowGraph::InstructionVector::const_iterator iiEnd = block->instructions().end(); for (; ii != iiEnd; ++ii) { ir::PTXInstruction *ptxInstruction = NULL; bool atom = false; bool functionStackArgument = false; bool localMemoryOperand = false; bool isCall = false; std::set<const ir::PTXOperand*> divergenceSources; /* First we populate divergenceSources with all the * source operands that might diverge. */ if (typeid(ir::PTXInstruction) == typeid(*(ii->i))) { ptxInstruction = static_cast<ir::PTXInstruction*> (ii->i); if (isDivergenceSource(ptxInstruction->a)) { divergenceSources.insert(&ptxInstruction->a); } if (isDivergenceSource(ptxInstruction->b)) { divergenceSources.insert(&ptxInstruction->b); } if (isDivergenceSource(ptxInstruction->c)) { divergenceSources.insert(&ptxInstruction->c); } if (ptxInstruction->opcode == ir::PTXInstruction::Atom){ atom = true; } if (ptxInstruction->mayHaveAddressableOperand()) { if (_doesOperandUseLocalMemory(ptxInstruction->a)) { localMemoryOperand = true; } } if (ptxInstruction->opcode == ir::PTXInstruction::Call){ isCall = true; } } /* Second, if this is a function call, we populate divergenceSources * with all the source operands that might diverge in a call. */ if (_kernel->function()) { if (typeid(ir::PTXInstruction) == typeid(*(ii->i))) { ptxInstruction = static_cast<ir::PTXInstruction*> (ii->i); if (ptxInstruction->mayHaveAddressableOperand()) { if (_isOperandAnArgument(ptxInstruction->a)) { functionStackArgument = true; report(" operand '" << ptxInstruction->a.toString() << "' is a function call argument."); } } } } /* Third, we link the source operands to the * destination operands, and check if the destination * can diverge. This will only happen in case the * instruction is atomic. */ DataflowGraph::RegisterPointerVector::const_iterator destinationReg = ii->d.begin(); DataflowGraph::RegisterPointerVector::const_iterator destinationEndReg = ii->d.end(); for (; destinationReg != destinationEndReg; destinationReg++) { if (divergenceSources.size() != 0) { std::set<const ir::PTXOperand*>::iterator divergenceSource = divergenceSources.begin(); std::set<const ir::PTXOperand*>::iterator divergenceSourceEnd = divergenceSources.end(); for (; divergenceSource != divergenceSourceEnd; divergenceSource++) { report(" destination register r" << *destinationReg->pointer << " is derived from a divergence source r" << *divergenceSource); _divergGraph.insertEdge(*divergenceSource, *destinationReg->pointer); } } DataflowGraph::RegisterPointerVector::const_iterator sourceReg = ii->s.begin(); DataflowGraph::RegisterPointerVector::const_iterator sourceRegEnd = ii->s.end(); for (; sourceReg != sourceRegEnd; sourceReg++) { _divergGraph.insertEdge(*sourceReg->pointer, *destinationReg->pointer); reportE(REPORT_ALL_DEPENDENCES, " r" << *destinationReg->pointer << " <- r" << *sourceReg->pointer); } if (atom || functionStackArgument || localMemoryOperand || isCall) { report(" destination register r" << *destinationReg->pointer << " is a divergence source."); _divergGraph.insertNode(*destinationReg->pointer); _divergGraph.setAsDiv(*destinationReg->pointer); } } } } /* 2) Computes the divergence propagation */ _divergGraph.computeDivergence(); }