ir::ControlFlowGraph::iterator 
	HoistParameterLoadsPass::_getTopLevelDominatingBlock(
		ir::IRKernel& k, ir::ControlFlowGraph::iterator block)
{
	auto loopAnalysis = static_cast<analysis::LoopAnalysis*>(
		getAnalysis(Analysis::LoopAnalysis));
	auto dominatorTree = static_cast<analysis::DominatorTree*>(
		getAnalysis(Analysis::DominatorTreeAnalysis));
		
	while(loopAnalysis->isContainedInLoop(block))
	{
		auto dominator = dominatorTree->getDominator(block);
		
		if(dominator == block->cfg->get_entry_block())
		{
			block = k.cfg()->split_edge(dominator->get_fallthrough_edge(),
				ir::BasicBlock(k.cfg()->newId())).first->tail;
				
			invalidateAnalysis(analysis::Analysis::LoopAnalysis         );
			invalidateAnalysis(analysis::Analysis::DominatorTreeAnalysis);

			break;
		}
		
		block = dominator;
	}
	
	return block;
	
}
예제 #2
0
DivergenceAnalysis::block_set
	DivergenceAnalysis::_getDivergentBlocksInPostdominanceFrontier(
	const DataflowGraph::iterator &block) {
	
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	const DataflowGraph &cdfg =
		static_cast<const DataflowGraph&>(*dfgAnalysis);
	DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg);
	
	PostdominatorTree* dtree = (PostdominatorTree*)
		(getAnalysis("PostDominatorTreeAnalysis"));
	
	auto postDominator = dfg.getCFGtoDFGMap()[
		dtree->getPostDominator(block->block())];

	block_set divergentBlocks;

	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		if (*successor == postDominator) continue;
		
		block_set allDivergentPaths;
		
		buildDivergentSubgraph(allDivergentPaths, *successor, postDominator);
		
		divergentBlocks.insert(allDivergentPaths.begin(),
			allDivergentPaths.end());
	}
	
	return divergentBlocks;
}
예제 #3
0
const DataflowGraph* DivergenceAnalysis::getDFG() const
{
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	return static_cast<const DataflowGraph*>(dfgAnalysis);
}
예제 #4
0
analysis::DataflowGraph& RemoveBarrierPass::_dfg()
{
	Analysis* dfg_structure = getAnalysis(Analysis::DataflowGraphAnalysis);
	assert(dfg_structure != 0);

	return *static_cast<analysis::DataflowGraph*>(dfg_structure);
}
예제 #5
0
/*! \brief Analyze the control and data flow graphs searching for divergent 
 *    variables and blocks
 *
 * 1) Makes data flow analysis that detects divergent variables and blocks 
 *    based on divergent sources, such as t.id, laneId
 * 2) Makes control flow analysis that detects new divergent variables based
 *    on the dependency of variables of variables created on divergent paths
 */
void DivergenceAnalysis::analyze(ir::IRKernel &k)
{
	Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis);

	dfg.convertToSSAType(DataflowGraph::Gated);

	assert(dfg.ssa());

	_divergGraph.clear();
	_notDivergentBlocks.clear();
	_kernel = &k;

	report("Running Divergence analysis on kernel '" << k.name << "'")
	#if REPORT_PTX > 0
	k.write(std::cout);
	#endif

	DivergenceGraph::node_set predicates;
		
	/* 1) Makes data flow analysis that detects divergent variables and blocks
		based on divergent sources, such as t.id, laneId */
	_analyzeDataFlow();
	/* 2) Makes control flow analysis that detects new divergent variables
		based on the dependency of variables of variables created on divergent
		paths */
	_analyzeControlFlow();
}
void MoveEliminationPass::runOnKernel(ir::IRKernel& k)
{
	report("Eliminating moves in kernel " << k.name << "");
	
	auto dfg = static_cast<analysis::DataflowGraph*>(
		getAnalysis("DataflowGraphAnalysis"));
	assert(dfg != 0);

	dfg->convertToSSAType(analysis::DataflowGraph::Minimal);

	auto moves = getMoves(dfg);
	
	bool eliminatedAny = false;
	
	report(" Eliminating moves");
	
	for(auto move = moves.begin(); move != moves.end(); ++move)
	{
		if(canEliminate(*move))
		{
			report("  " << (*move)->i->toString());
			eliminate(*move);
			eliminatedAny = true;
		}
	}
	
	if(eliminatedAny)
	{
		invalidateAnalysis("DataflowGraphAnalysis");
	}
	
	report("finished...");
}
예제 #7
0
void DeadCodeEliminationPass::runOnKernel(ir::IRKernel& k)
{
	report("Running dead code elimination on kernel " << k.name);
	reportE(REPORT_PTX, k);
	
	Analysis* dfgAnalysis = getAnalysis(Analysis::DataflowGraphAnalysis);
	assert(dfgAnalysis != 0);

	analysis::DataflowGraph& dfg =
		*static_cast<analysis::DataflowGraph*>(dfgAnalysis);
	
	assert(dfg.ssa() != analysis::DataflowGraph::SsaType::None);
	
	BlockSet blocks;
	
	report(" Starting by scanning all basic blocks");
	
	for(iterator block = dfg.begin(); block != dfg.end(); ++block)
	{
		report("  Queueing up BB_" << block->id());
		blocks.insert(block);
	}
	
	while(!blocks.empty())
	{
		iterator block = *blocks.begin();
		blocks.erase(blocks.begin());
	
		eliminateDeadInstructions(dfg, blocks, block);
	}
	
	report("Finished running dead code elimination on kernel " << k.name);
	reportE(REPORT_PTX, k);
}
void HoistParameterLoadsPass::_tryHoistingLoad(
	ir::ControlFlowGraph::iterator block, ir::PTXInstruction* ptx,
	ir::IRKernel& k)
{
	report("  " << ptx->toString());
	
	auto newBlock = _getTopLevelDominatingBlock(k, block);
	
	if(newBlock == block) return;
	
	report("   hoisting to " << newBlock->label());
	
	auto dfg = static_cast<analysis::DataflowGraph*>(
		getAnalysis(Analysis::DataflowGraphAnalysis));
	
	auto load = new ir::PTXInstruction(ir::PTXInstruction::Ld);
	
	load->addressSpace   = ptx->addressSpace;
	load->type           = ptx->type;
	load->volatility     = ptx->volatility;
	load->cacheOperation = ptx->cacheOperation;
	
	load->d = ir::PTXOperand(ir::PTXOperand::Register,
		ptx->d.type, dfg->newRegister());
	load->a = ptx->a;
	
	insertBeforeTerminator(newBlock, load);
	
	ptx->opcode = ir::PTXInstruction::Mov;
	ptx->a      = load->d;
}
예제 #9
0
void LoopUnrollingPass::runOnKernel(ir::IRKernel& k)
{
	Analysis* loopAnalysis = getAnalysis(Analysis::LoopAnalysis);
	assert(loopAnalysis != 0);
	
	// TODO actually unroll something.
}
예제 #10
0
analysis::AffineAnalysis& AffineLinearScan::_afa()
{
	Analysis* aff = getAnalysis(Analysis::AffineAnalysis);
	assert(aff != 0);

	return *static_cast<analysis::AffineAnalysis*>(aff);
}
예제 #11
0
void DivergenceAnalysis::_convergenceAnalysis()
{
	report("Running convergence analysis.");

	Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis);
	
	/* Assume all blocks are convergent */
	block_set divergentBlocks;
	
	/* 1) mark all blocks in the post-dominance frontier of divergent branches
		along paths that do not encounter known convergent points
		as divergent.  This is an optimistic analysis. */
	report(" Marking divergent blocks.");
	for (auto block = dfg.begin() ; block != dfg.end(); ++block) {
		if (!isDivBlock(block) || _hasTrivialPathToExit(block)) continue;
		
		block_set divergentBlocksInPostdominanceFrontier =
			_getDivergentBlocksInPostdominanceFrontier(block);
		
		divergentBlocks.insert(divergentBlocksInPostdominanceFrontier.begin(),
			divergentBlocksInPostdominanceFrontier.end());
	}
	
	report(" Marking convergent blocks.");
	_notDivergentBlocks.clear();
	for (auto block = dfg.begin(); block != dfg.end(); ++block) {
		if (divergentBlocks.count(block) == 0) {
			report("  " << block->label() << " is assumed convergent.");
			_notDivergentBlocks.insert(block);
		}
	}
}
void DataDependenceAnalysis::analyze(ir::IRKernel& kernel)
{
	auto dfg = static_cast<DataflowGraph*>(
		getAnalysis("DataflowGraphAnalysis"));

	report("Running data dependence analysis on kernel " << kernel.name);
	
	for(auto block = dfg->begin(); block != dfg->end(); ++block)
	{
		analyzeBlock(block, dfg, _nodes, _instructionToNodes);
	}
}
void DependenceAnalysis::analyze(ir::IRKernel& kernel)
{
	report("Running dependence analysis on kernel " << kernel.name);
	
	auto controlDependenceAnalysis = static_cast<ControlDependenceAnalysis*>(
		getAnalysis("ControlDependenceAnalysis")); 
	auto dataDependenceAnalysis = static_cast<DataDependenceAnalysis*>(
		getAnalysis("DataDependenceAnalysis")); 
	auto memoryDependenceAnalysis = static_cast<MemoryDependenceAnalysis*>(
		getAnalysis("MemoryDependenceAnalysis")); 
		
	for(auto& node : *controlDependenceAnalysis)
	{
		auto newNode = _nodes.insert(_nodes.end(), Node(node.instruction));
	
		_instructionToNodes.insert(std::make_pair(node.instruction, newNode));
	}
	
	for(auto& node : *this)
	{
		auto controlDependenceNode = controlDependenceAnalysis->getNode(
			node.instruction);
	
		addEdges(node, *controlDependenceNode, _instructionToNodes);

		auto dataDependenceNode = dataDependenceAnalysis->getNode(
			node.instruction);
	
		addEdges(node, *dataDependenceNode, _instructionToNodes);
		
		auto memoryDependenceNode = memoryDependenceAnalysis->getNode(
			node.instruction);
		
		if(memoryDependenceNode != memoryDependenceAnalysis->end())
		{
			addEdges(node, *memoryDependenceNode, _instructionToNodes);
		}
	}
}
예제 #14
0
unsigned int DivergenceAnalysis::_numberOfDivergentPathsToPostDominator(
	const DataflowGraph::iterator &block) const {
	
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	const DataflowGraph &cdfg =
		static_cast<const DataflowGraph&>(*dfgAnalysis);
	DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg);
	
	PostdominatorTree* dtree = (PostdominatorTree*)
		(getAnalysis("PostDominatorTreeAnalysis"));
	
	auto postDominator = dfg.getCFGtoDFGMap()[
		dtree->getPostDominator(block->block())];

	unsigned int divergentPaths = 0;

	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		if (*successor == postDominator) {
			++divergentPaths;
			continue;
		}
		
		block_set allDivergentPaths;
		
		if (doAnyDivergentPathsReachThePostDominator(allDivergentPaths,
			*successor, postDominator)) {
			++divergentPaths;
		}
	}
	
	report("  There are " << divergentPaths << " divergent paths from "
		<< block->label() << " to post-dominator " << postDominator->label());

	return divergentPaths;
}
void SafeRegionAnalysis::analyze(ir::IRKernel& kernel)
{
	// Get analyses
	auto cycleAnalysis = static_cast<CycleAnalysis*>(
		getAnalysis("CycleAnalysis"));
	auto dependenceAnalysis = static_cast<DependenceAnalysis*>(
		getAnalysis("DependenceAnalysis"));
	auto controlDependenceAnalysis =
		static_cast<ControlDependenceAnalysis*>(
		getAnalysis("ControlDependenceAnalysis"));
		
	// Find basic blocks that cannot be contained in safe regions
	auto blocksThatDependOnSideEffects = getBlocksThatDependOnSideEffects(
		kernel, cycleAnalysis, dependenceAnalysis, controlDependenceAnalysis);

	// Find hammocks in the program
	auto hammockAnalysis = static_cast<HammockGraphAnalysis*>(
		getAnalysis("HammockGraphAnalysis"));
		
	// Form safe regions around hammocks that do not contain blocks with
	// side effects
	formSafeRegionsAroundHammocks(_root, _regions, hammockAnalysis,
		blocksThatDependOnSideEffects);
}
예제 #16
0
bool DivergenceAnalysis::_hasTrivialPathToExit(
	const DataflowGraph::iterator &block) const {

	// We can ignore divergent threads that immediately exit
	unsigned int exitingPaths = 0;
	
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	const DataflowGraph &dfg =
		static_cast<const DataflowGraph&>(*dfgAnalysis);

	auto exit = --dfg.end();

	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		auto path = *successor;
		
		while (true) {
			if (path == exit) {
				++exitingPaths;
				break;
			}
			if (path->successors().size() != 1) {
				break;
			}
			if (!path->instructions().empty()) {
				if (path->instructions().size() == 1) {
					const ir::PTXInstruction &ptxI =
						*(static_cast<ir::PTXInstruction *> (
						path->instructions().back().i));
				
					if (ptxI.isExit()) {
						++exitingPaths;
					}
				}
				break;
			}
			path = *path->successors().begin();
		}
	}

	if (block->successors().size() - exitingPaths < 2) {
		return true;
	}
	
	return false;
}
void DivergenceLinearScan::runOnKernel(ir::IRKernel& k)
{
	auto dfg = static_cast<analysis::DataflowGraph*>(
		getAnalysis("DataflowGraphAnalysis"));
	
	dfg->convertToSSAType(analysis::DataflowGraph::Gated);

#if DIVERGENCE_REGISTER_PROFILE_H_
	divergenceProfiler::resetSpillData();
#endif

	_shared.clear();
	LinearScanRegisterAllocationPass::runOnKernel(k);
#if DIVERGENCE_REGISTER_PROFILE_H_
	if(!k.function())
		divergenceProfiler::printSpillResults(k.name);
#endif
}
void HoistParameterLoadsPass::runOnKernel(ir::IRKernel& k)
{
	typedef std::pair<ir::ControlFlowGraph::iterator, ir::PTXInstruction*> Load;
	typedef std::vector<Load> LoadVector;

	auto aliasAnalysis = static_cast<analysis::SimpleAliasAnalysis*>(
		getAnalysis(Analysis::SimpleAliasAnalysis));
	
	LoadVector candidateLoads;
	
	report("Hoisting loads in kernel '" << k.name << "'");

	report(" Identifying candidate loads");
	
	for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block)
	{
		for(auto instruction = block->instructions.begin();
			instruction != block->instructions.end(); ++instruction)
		{
			auto ptx = static_cast<ir::PTXInstruction*>(*instruction);
		
			if(ptx->isLoad() &&
				aliasAnalysis->cannotAliasAnyStore(*instruction)
				&& hasNoRegisterDependencies(*ptx))
			{
				report("  " << ptx->toString());
				candidateLoads.push_back(std::make_pair(block, ptx));
			}
		}
	}
	
	report(" Attempting to hoist loads");
	for(auto load = candidateLoads.begin();
		load != candidateLoads.end(); ++load)
	{
		_tryHoistingLoad(load->first, load->second, k);
	}
	
	invalidateAnalysis(analysis::Analysis::DataflowGraphAnalysis);
	invalidateAnalysis(analysis::Analysis::SimpleAliasAnalysis  );
}
예제 #19
0
void FunctionInliningPass::runOnKernel(ir::IRKernel& k)
{
	report("Running function inlining pass on kernel " << k.name);
	
	auto analysis = getAnalysis(Analysis::DataflowGraphAnalysis);
	assert(analysis != 0);
	
	auto dfg = static_cast<analysis::DataflowGraph*>(analysis);

	_nextRegister = dfg->maxRegister() + 1;
	
	// Get the set of all function calls that satisfy the inlining criteria
	_getFunctionsToInline(k);

	// Inline all of the functions in this set
	_inlineSelectedFunctions(k);

	if(!_calls.empty())
	{
		invalidateAnalysis(Analysis::DataflowGraphAnalysis);
	}
}
void ConstantPropagationPass::runOnKernel(ir::IRKernel& k)
{
	report("Running constant propagation on kernel " << k.name);
	
	Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);
	
	analysis::DataflowGraph& dfg =
		*static_cast<analysis::DataflowGraph*>(dfgAnalysis);
	
	dfg.convertToSSAType(analysis::DataflowGraph::Minimal);
	
	assert(dfg.ssa() == analysis::DataflowGraph::Minimal);
	
	BlockSet blocks;
	
	report(" Starting by scanning all basic blocks");
	
	for(iterator block = dfg.begin(); block != dfg.end(); ++block)
	{
		report("  Queueing up BB_" << block->id());
		blocks.insert(block);
	}
	
	while(!blocks.empty())
	{
		iterator block = *blocks.begin();
		blocks.erase(blocks.begin());
	
		eliminateRedundantInstructions(dfg, blocks, block);
	}

	report("Finished running constant propagation on kernel " << k.name);
	reportE(REPORT_PTX, k);

}
예제 #21
0
void terrama2::services::analysis::core::Context::loadMonitoredObject(AnalysisHashCode analysisHashCode)
{
  std::lock_guard<std::recursive_mutex> lock(mutex_);

  auto dataManagerPtr = dataManager_.lock();
  if(!dataManagerPtr)
  {
    QString errMsg(QObject::tr("Invalid data manager."));
    throw terrama2::core::InvalidDataManagerException() << terrama2::ErrorDescription(errMsg);
  }

  auto analysis = getAnalysis(analysisHashCode);

  for(auto analysisDataSeries : analysis->analysisDataSeriesList)
  {
    auto dataSeriesPtr = dataManagerPtr->findDataSeries(analysisDataSeries.dataSeriesId);
    auto datasets = dataSeriesPtr->datasetList;
    if(analysisDataSeries.type == AnalysisDataSeriesType::DATASERIES_MONITORED_OBJECT_TYPE)
    {
      assert(datasets.size() == 1);
      auto dataset = datasets[0];

      auto dataProvider = dataManagerPtr->findDataProvider(dataSeriesPtr->dataProviderId);
      terrama2::core::Filter filter;

      //accessing data
      terrama2::core::DataAccessorPtr accessor = terrama2::core::DataAccessorFactory::getInstance().make(dataProvider, dataSeriesPtr);
      auto seriesMap = accessor->getSeries(filter);
      auto series = seriesMap[dataset];

      std::string identifier = analysisDataSeries.metadata["identifier"];

      std::shared_ptr<ContextDataSeries> dataSeriesContext(new ContextDataSeries);

      if(!series.syncDataSet)
      {
        QString errMsg(QObject::tr("No data available for DataSeries %1").arg(dataSeriesPtr->id));
        throw terrama2::InvalidArgumentException() << terrama2::ErrorDescription(errMsg);
      }

      if(!series.syncDataSet->dataset())
      {
        QString errMsg(QObject::tr("Adding an invalid dataset to the analysis context: DataSeries %1").arg(dataSeriesPtr->id));
        throw terrama2::InvalidArgumentException() << terrama2::ErrorDescription(errMsg);
      }

      std::size_t geomPropertyPosition = te::da::GetFirstPropertyPos(series.syncDataSet->dataset().get(), te::dt::GEOMETRY_TYPE);

      dataSeriesContext->series = series;
      dataSeriesContext->identifier = identifier;
      dataSeriesContext->geometryPos = geomPropertyPosition;

      ContextKey key;
      key.datasetId_ = dataset->id;
      key.analysisHashCode_ = analysisHashCode;
      datasetMap_[key] = dataSeriesContext;
    }
    else if(analysisDataSeries.type == AnalysisDataSeriesType::DATASERIES_PCD_TYPE)
    {
      for(auto dataset : dataSeriesPtr->datasetList)
      {
        auto dataProvider = dataManagerPtr->findDataProvider(dataSeriesPtr->dataProviderId);
        terrama2::core::Filter filter;

        //accessing data
        terrama2::core::DataAccessorPtr accessor = terrama2::core::DataAccessorFactory::getInstance().make(dataProvider, dataSeriesPtr);
        auto seriesMap = accessor->getSeries(filter);
        auto series = seriesMap[dataset];

        std::string identifier = analysisDataSeries.metadata["identifier"];

        std::shared_ptr<ContextDataSeries> dataSeriesContext(new ContextDataSeries);

        std::size_t geomPropertyPosition = te::da::GetFirstPropertyPos(series.syncDataSet->dataset().get(), te::dt::GEOMETRY_TYPE);

        dataSeriesContext->series = series;
        dataSeriesContext->identifier = identifier;
        dataSeriesContext->geometryPos = geomPropertyPosition;

        ContextKey key;
        key.datasetId_ = dataset->id;
        key.analysisHashCode_ = analysisHashCode;
        datasetMap_[key] = dataSeriesContext;
      }
    }
  }
}
void ThreadFrontierReconvergencePass::runOnKernel(const ir::IRKernel& k)
{
	report("Running thread frontier reconvergence pass");
	typedef analysis::ThreadFrontierAnalysis::Priority Priority;
	typedef std::multimap<Priority, ir::ControlFlowGraph::const_iterator,
		std::greater<Priority>> ReversePriorityMap;
	typedef analysis::ThreadFrontierAnalysis TFAnalysis;
	typedef ir::ControlFlowGraph::const_pointer_iterator const_pointer_iterator;

	Analysis* analysis = getAnalysis(Analysis::ThreadFrontierAnalysis);
	assert(analysis != 0);
	
	TFAnalysis* tfAnalysis = static_cast<TFAnalysis*>(analysis);

	ReversePriorityMap priorityToBlocks;
	
	// sort by priority (high to low)
	for(ir::ControlFlowGraph::const_iterator block = k.cfg()->begin();
		block != k.cfg()->end(); ++block)
	{
		priorityToBlocks.insert(std::make_pair(tfAnalysis->getPriority(block),
			block));
	}
	
	typedef std::unordered_map<ir::BasicBlock::Id, unsigned int> IdToPCMap;
	typedef std::unordered_map<unsigned int,
		ir::ControlFlowGraph::const_iterator> PCToBlockMap;

	IdToPCMap     pcs;
	PCToBlockMap  branchPCs;
	PCToBlockMap  fallthroughPCs;
	
	// lay the code out in priority order
	report(" Packing instructions into a vector");
	for(ReversePriorityMap::const_iterator
		priorityAndBlock = priorityToBlocks.begin();
		priorityAndBlock != priorityToBlocks.end(); ++priorityAndBlock)
	{
		ir::ControlFlowGraph::const_iterator block = priorityAndBlock->second;

		report("  Basic Block " << block->label() << " ("
			<< block->id << ")");
			
		pcs.insert(std::make_pair(block->id, instructions.size()));
		
		for(ir::ControlFlowGraph::InstructionList::const_iterator 
			instruction = block->instructions.begin();
			instruction != block->instructions.end(); ++instruction)
		{
			const ir::PTXInstruction& ptx = static_cast<
				const ir::PTXInstruction&>(**instruction);
				
			report("   [" << instructions.size() << "] '" << ptx.toString());
			
			instructions.push_back(ptx);
			instructions.back().pc = instructions.size() - 1;
			
			if(ptx.opcode == ir::PTXInstruction::Bra)
			{
				branchPCs.insert(std::make_pair(instructions.back().pc, block));
			}
		}
		
		if(!_gen6)
		{
			// Add a branch for the fallthrough if it is in the TF
			if(block->has_fallthrough_edge())
			{
				ir::ControlFlowGraph::const_iterator target =
					block->get_fallthrough_edge()->tail;
				
				ReversePriorityMap::const_iterator next = priorityAndBlock;
				++next;
				
				bool needsCheck = target != next->second;
				
				TFAnalysis::BlockVector frontier =
						tfAnalysis->getThreadFrontier(block);
		
				for(TFAnalysis::BlockVector::const_iterator
					stalledBlock = frontier.begin();
					stalledBlock != frontier.end(); ++stalledBlock)
				{
					if((*stalledBlock)->id == target->id)
					{
						needsCheck = true;
						break;
					}
				}
				
				if(needsCheck)
				{
					fallthroughPCs.insert(std::make_pair(
						instructions.size(), block));
					
					instructions.push_back(ir::PTXInstruction(
						ir::PTXInstruction::Bra,
						ir::PTXOperand(target->label())));
		
					instructions.back().needsReconvergenceCheck = true;
					instructions.back().branchTargetInstruction = -1;
					report("   [" << (instructions.size() - 1) << "] '"
						<< instructions.back().toString());
					report("    - artificial branch for check on"
						" fallthrough into TF.");
				}
			}
		}
	}
	
	report(" Updating branch targets");
	for(PCToBlockMap::const_iterator pcAndBlock = branchPCs.begin();
		pcAndBlock != branchPCs.end(); ++pcAndBlock)
	{
		ir::ControlFlowGraph::const_iterator block = pcAndBlock->second;
		unsigned int pc                            = pcAndBlock->first;
		
		const ir::PTXInstruction& ptx = static_cast<
			const ir::PTXInstruction&>(instructions[pc]);
			
		ir::ControlFlowGraph::const_iterator target =
			block->get_branch_edge()->tail;
					
		IdToPCMap::const_iterator targetPC = pcs.find(target->id);
		assert(targetPC != pcs.end());
		
		report("  setting branch target of '" << ptx.toString()
			<< "' to " << targetPC->second);
		
		instructions[pc].branchTargetInstruction = targetPC->second;
		
		TFAnalysis::BlockVector frontier =
			tfAnalysis->getThreadFrontier(block);
		
		if(_gen6)
		{
			ir::ControlFlowGraph::const_iterator firstBlock =
				k.cfg()->end();
		
			TFAnalysis::Priority highest = 0;
		
			frontier.push_back(block->get_branch_edge()->tail);
			
			if(block->has_fallthrough_edge())
			{
				frontier.push_back(
					block->get_fallthrough_edge()->tail);
			}
		
			// gen6 jumps to the block with the highest priority
			for(TFAnalysis::BlockVector::const_iterator
				stalledBlock = frontier.begin();
				stalledBlock != frontier.end(); ++stalledBlock)
			{
				TFAnalysis::Priority priority =
					tfAnalysis->getPriority(*stalledBlock);
				if(priority >= highest)
				{
					highest    = priority;
					firstBlock = *stalledBlock;
				}
			}
		
			// the reconverge point is the first block in the frontier
			assert(firstBlock != k.cfg()->end());
	
			IdToPCMap::const_iterator reconverge =
				pcs.find(firstBlock->id);
			assert(reconverge != pcs.end());
			instructions[pc].reconvergeInstruction =
				reconverge->second;
			report("   re-converge point "  << reconverge->second
				<< ", " << firstBlock->label());
		}
		else
		{
			// Does this branch need to check for re-convergence?
			// Or: are any of the target's predecessors
			//       in the thread frontier?
			bool needsCheck = false;
			
			for(TFAnalysis::BlockVector::const_iterator
				stalledBlock = frontier.begin();
				stalledBlock != frontier.end(); ++stalledBlock)
			{
				if((*stalledBlock)->id == target->id)
				{
					needsCheck = true;
					report("   needs re-convergence check.");
					break;
				}
			}
			
			instructions[pc].needsReconvergenceCheck = needsCheck;
		}
	}
	
	report(" Updating fallthrough targets");
	for(PCToBlockMap::const_iterator pcAndBlock = fallthroughPCs.begin();
		pcAndBlock != fallthroughPCs.end(); ++pcAndBlock)
	{
		ir::ControlFlowGraph::const_iterator block = pcAndBlock->second;
		unsigned int pc                            = pcAndBlock->first;
		
		const ir::PTXInstruction& ptx = static_cast<
			const ir::PTXInstruction&>(instructions[pc]);
			
		ir::ControlFlowGraph::const_iterator target =
			block->get_fallthrough_edge()->tail;
					
		IdToPCMap::const_iterator targetPC = pcs.find(target->id);
		assert(targetPC != pcs.end());
		
		report("  setting branch target of '" << ptx.toString()
			<< "' to " << targetPC->second);
		
		instructions[pc].branchTargetInstruction = targetPC->second;
	}
	
}
예제 #23
0
void DivergenceAnalysis::_findBranches(branch_set& branches)
{
	Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis);

	/* Create a list of branches that can be divergent, that is,
		they are not  bra.uni and have a predicate */
	DataflowGraph::iterator block = dfg.begin();
	DataflowGraph::iterator endBlock = dfg.end();

	/* Post-dominator tree */
	PostdominatorTree *dtree;
	dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis"));

	report(" Finding branches");
	for (; block != endBlock; ++block) {
		ir::PTXInstruction *ptxInstruction = NULL;

		if (block->instructions().size() > 0) {
			/* Branch instructions can only be the last
			instruction of a basic block */
			DataflowGraph::Instruction& lastInstruction =
			*(--block->instructions().end());

			if (typeid(ir::PTXInstruction) == typeid(*(lastInstruction.i))) {
				ptxInstruction =
					static_cast<ir::PTXInstruction*>(lastInstruction.i);

				if ((ptxInstruction->opcode == ir::PTXInstruction::Bra)) {
					report("  examining " << ptxInstruction->toString());
					
					if(ptxInstruction->uni == true) { 
						report("   eliminated, uniform...");
						continue;
					}
					
					if(lastInstruction.s.size() == 0) {
						report("   eliminated, wrong source count ("
							<< lastInstruction.s.size() << ")...");
						continue;
					}
					
					assert(lastInstruction.s.size() == 1);
					DataflowGraph::iterator postDomBlock =
						dfg.getCFGtoDFGMap()[
							dtree->getPostDominator(block->block())];
					if (postDomBlock != dfg.end()) {
						BranchInfo newBranch(&(*block), &(*postDomBlock), 
							lastInstruction, _divergGraph);
						branches.insert(newBranch);
						report("   is potentially divergent...");
					}
					else {
						report("   eliminated, no post-dominator...");
					}
				}
			}
		}
	}
}
void ReversePostOrderTraversal::analyze(Function& function)
{
	typedef util::LargeSet<BasicBlock*> BlockSet;
	typedef std::stack<BasicBlock*>     BlockStack;

	order.clear();
	
	BlockSet   visited;
	BlockStack stack;
	
	auto cfgAnalysis = getAnalysis("ControlFlowGraph");
	auto cfg         = static_cast<ControlFlowGraph*>(cfgAnalysis);	

	report("Creating reverse post order traversal over function '" +
		function.name() + "'");

	// reverse post order is reversed topological order
	stack.push(&*function.entry_block());
	
	while(order.size() != function.size())
	{
		if(stack.empty())
		{
			for(auto block : order)
			{
				auto successors = cfg->getSuccessors(*block);
				
				for(auto successor : successors)
				{
					if(visited.insert(successor).second)
					{
						stack.push(successor);
						break;
					}
				}
				
				if(!stack.empty()) break;
			}
		}
		
		assertM(!stack.empty(), (function.size() - order.size())
			<< " blocks are not connected.");
		
		while(!stack.empty())
		{
			BasicBlock* top = stack.top();
			stack.pop();
		
			auto successors = cfg->getSuccessors(*top);
			
			for(auto successor : successors)
			{
				assert(successor != nullptr);
				
				auto predecessors = cfg->getPredecessors(*successor);
				
				bool allPredecessorsVisited = true;
		
				for(auto predecessor : predecessors)
				{
					if(visited.count(predecessor) == 0)
					{
						allPredecessorsVisited = false;
						break;
					}
				}
				
				if(!allPredecessorsVisited) continue;
				
				if(visited.insert(successor).second)
				{
					stack.push(successor);
				}
			}

			order.push_back(top);
		
			report(" " << top->name());
		}
	}
	
	// reverse the order
	std::reverse(order.begin(), order.end());
}
analysis::DivergenceAnalysis& DivergenceLinearScan::_diva()
{
	Analysis* divA = getAnalysis("DivergenceAnalysis");
	assertM(divA != NULL, "Got null divergence analysis");
	return *static_cast<analysis::DivergenceAnalysis*>(divA);
}
예제 #26
0
void DivergenceAnalysis::_analyzeDataFlow()
{
	Analysis* dfg = getAnalysis("DataflowGraphAnalysis");
	assert(dfg != 0);

	DataflowGraph &nonConstGraph = static_cast<DataflowGraph&>(*dfg);
	DataflowGraph::const_iterator block = nonConstGraph.begin();
	DataflowGraph::const_iterator endBlock = nonConstGraph.end();

	report("Analyzing data flow");

	/* 1) Analyze the data flow adding divergence sources */
	for (; block != endBlock; ++block) {
		report(" for block " << block->label());
		
		DataflowGraph::PhiInstructionVector::const_iterator
			phiInstruction = block->phis().begin();
		DataflowGraph::PhiInstructionVector::const_iterator
			endPhiInstruction = block->phis().end();
        /* Go over the phi functions and add their dependences to the
         * dependence graph. */
		for (; phiInstruction != endPhiInstruction; phiInstruction++) {
			for (DataflowGraph::RegisterVector::const_iterator
				si = phiInstruction->s.begin();
				si != phiInstruction->s.end(); ++si) {
				_divergGraph.insertEdge(si->id, phiInstruction->d.id);
				report("  phi r" << phiInstruction->d.id << " <- r" << si->id);
			}
		}

		DataflowGraph::InstructionVector::const_iterator
			ii = block->instructions().begin();
		DataflowGraph::InstructionVector::const_iterator
			iiEnd = block->instructions().end();
		for (; ii != iiEnd; ++ii) {

			ir::PTXInstruction *ptxInstruction = NULL;
			bool atom = false;
			bool functionStackArgument = false;
			bool localMemoryOperand = false;
			bool isCall = false;

			std::set<const ir::PTXOperand*> divergenceSources;

			/* First we populate divergenceSources with all the
			 * source operands that might diverge.
			 */
			if (typeid(ir::PTXInstruction) == typeid(*(ii->i))) {
				ptxInstruction = static_cast<ir::PTXInstruction*> (ii->i);
				if (isDivergenceSource(ptxInstruction->a)) {
					divergenceSources.insert(&ptxInstruction->a);
				}
				if (isDivergenceSource(ptxInstruction->b)) {
					divergenceSources.insert(&ptxInstruction->b);
				}
				if (isDivergenceSource(ptxInstruction->c)) {
					divergenceSources.insert(&ptxInstruction->c);
				}

				if (ptxInstruction->opcode == ir::PTXInstruction::Atom){
					atom = true;
				}
				
				if (ptxInstruction->mayHaveAddressableOperand()) {
					if (_doesOperandUseLocalMemory(ptxInstruction->a)) {
						localMemoryOperand = true;
					}
				}
				
				if (ptxInstruction->opcode == ir::PTXInstruction::Call){
					isCall = true;
				}
			}

			/* Second, if this is a function call, we populate divergenceSources
			 * with all the source operands that might diverge in a call.
			 */
			if (_kernel->function()) {
				if (typeid(ir::PTXInstruction) == typeid(*(ii->i))) {
					ptxInstruction = static_cast<ir::PTXInstruction*> (ii->i);
				
					if (ptxInstruction->mayHaveAddressableOperand()) {
						if (_isOperandAnArgument(ptxInstruction->a)) {
							functionStackArgument = true;
							report("  operand '" << ptxInstruction->a.toString()
								<< "' is a function call argument.");
						}
					}
				}
			}
						
			/* Third, we link the source operands to the
			 * destination operands, and check if the destination
			 * can diverge. This will only happen in case the
			 * instruction is atomic. */
			DataflowGraph::RegisterPointerVector::const_iterator
				destinationReg = ii->d.begin();
			DataflowGraph::RegisterPointerVector::const_iterator
				destinationEndReg = ii->d.end();

			for (; destinationReg != destinationEndReg; destinationReg++) {
				if (divergenceSources.size() != 0) {
					std::set<const ir::PTXOperand*>::iterator
						divergenceSource = divergenceSources.begin();
					std::set<const ir::PTXOperand*>::iterator
						divergenceSourceEnd = divergenceSources.end();

					for (; divergenceSource != divergenceSourceEnd;
						divergenceSource++) {
						report("  destination register r"
							<< *destinationReg->pointer
							<< " is derived from a divergence source r"
							<< *divergenceSource);
						_divergGraph.insertEdge(*divergenceSource,
							*destinationReg->pointer);
					}
				}

				DataflowGraph::RegisterPointerVector::const_iterator
					sourceReg = ii->s.begin();
				DataflowGraph::RegisterPointerVector::const_iterator
					sourceRegEnd = ii->s.end();

				for (; sourceReg != sourceRegEnd; sourceReg++) {
					_divergGraph.insertEdge(*sourceReg->pointer,
						*destinationReg->pointer);
					reportE(REPORT_ALL_DEPENDENCES,
						"  r" << *destinationReg->pointer
						<< " <- r" << *sourceReg->pointer);
				}

				if (atom || functionStackArgument ||
					localMemoryOperand || isCall) {
					
					report("  destination register r"
						<< *destinationReg->pointer
						<< " is a divergence source.");
					_divergGraph.insertNode(*destinationReg->pointer);
					_divergGraph.setAsDiv(*destinationReg->pointer);
				}
			}
		}
	}
	/* 2) Computes the divergence propagation */
	_divergGraph.computeDivergence();
}