Beispiel #1
0
DivergenceAnalysis::block_set
	DivergenceAnalysis::_getDivergentBlocksInPostdominanceFrontier(
	const DataflowGraph::iterator &block) {
	
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	const DataflowGraph &cdfg =
		static_cast<const DataflowGraph&>(*dfgAnalysis);
	DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg);
	
	PostdominatorTree* dtree = (PostdominatorTree*)
		(getAnalysis("PostDominatorTreeAnalysis"));
	
	auto postDominator = dfg.getCFGtoDFGMap()[
		dtree->getPostDominator(block->block())];

	block_set divergentBlocks;

	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		if (*successor == postDominator) continue;
		
		block_set allDivergentPaths;
		
		buildDivergentSubgraph(allDivergentPaths, *successor, postDominator);
		
		divergentBlocks.insert(allDivergentPaths.begin(),
			allDivergentPaths.end());
	}
	
	return divergentBlocks;
}
Beispiel #2
0
/*! \brief Tests if a block ends with a divergent branch
	instruction (isDivBranchInstr) */
bool DivergenceAnalysis::isDivBlock(const DataflowGraph::iterator &block) const
{
	if (block->instructions().size() == 0) {
		return false;
	}
	return isDivBranch(*--block->instructions().end());
}
Beispiel #3
0
static bool buildDivergentSubgraph(
	DivergenceAnalysis::block_set& graph,
	const DataflowGraph::iterator &block,
	const DataflowGraph::iterator &postDominator) {

	bool hitPostDominator = false;

	// don't include blocks with barriers
	if(hasBarrier(block)) return false;
	
	// skip loops
	if(!graph.insert(block).second) return false;
	
	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		
		// stop at the post dominator
		if (*successor == postDominator)
		{
			hitPostDominator = true;

			continue;
		}
		
		hitPostDominator |= buildDivergentSubgraph(graph,
			*successor, postDominator);
	}
	
	return hitPostDominator;
}
Beispiel #4
0
static bool hasBarrier(const DataflowGraph::iterator &block) {
	
	for (auto instruction = block->instructions().begin();
		instruction != block->instructions().end(); ++instruction) {

		if (typeid(ir::PTXInstruction) == typeid(*(instruction->i))) {
			auto ptxInstruction =
				static_cast<ir::PTXInstruction*>(instruction->i);
		
			if (ptxInstruction->opcode == ir::PTXInstruction::Bar) return true;
		
			// texture instruction intrinsics
			if (ptxInstruction->isCall()) {
				if (ptxInstruction->a.addressMode !=
					ir::PTXOperand::FunctionName) {
					 continue;
				}
				
				if (ptxInstruction->a.identifier.find(
					"_Z_intrinsic_pseudo_tex") != 0) {
					 continue;
				}
				
				return true;
			}
		}
	}
	
	return false;
}
Beispiel #5
0
bool DivergenceAnalysis::_hasTrivialPathToExit(
	const DataflowGraph::iterator &block) const {

	// We can ignore divergent threads that immediately exit
	unsigned int exitingPaths = 0;
	
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	const DataflowGraph &dfg =
		static_cast<const DataflowGraph&>(*dfgAnalysis);

	auto exit = --dfg.end();

	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		auto path = *successor;
		
		while (true) {
			if (path == exit) {
				++exitingPaths;
				break;
			}
			if (path->successors().size() != 1) {
				break;
			}
			if (!path->instructions().empty()) {
				if (path->instructions().size() == 1) {
					const ir::PTXInstruction &ptxI =
						*(static_cast<ir::PTXInstruction *> (
						path->instructions().back().i));
				
					if (ptxI.isExit()) {
						++exitingPaths;
					}
				}
				break;
			}
			path = *path->successors().begin();
		}
	}

	if (block->successors().size() - exitingPaths < 2) {
		return true;
	}
	
	return false;
}
	void ConvertPredicationToSelectPass::_replacePredicate( 
		DataflowGraph::iterator block, unsigned int id )
	{
		typedef DataflowGraph::Block::RegisterSet RegisterSet;

		DataflowGraph::InstructionVector::const_iterator 
			instruction( block->instructions().begin() );
		std::advance( instruction, id );

		report( "  Converting instruction " << instruction->i->toString() );
		
		ir::PTXInstruction select( ir::PTXInstruction::SelP );

		ir::PTXInstruction& ptx = static_cast< ir::PTXInstruction& >( 
			*instruction->i );

		select.d = ptx.d;
		select.b = select.d;
		select.a = select.b;
		select.a.reg = _tempRegister();
		select.c = ptx.pg;
		
		ptx.pg.condition = ir::PTXOperand::PT;
		ptx.d.reg = select.a.reg;
			
		_kernel->dfg()->insert( block, select, id + 1 );
	}
	void ConvertPredicationToSelectPass::_runOnBlock( 
		DataflowGraph::iterator block )
	{
		for( DataflowGraph::InstructionVector::const_iterator 
			instruction = block->instructions().begin(); 
			instruction != block->instructions().end(); ++instruction )
		{
			ir::PTXInstruction& ptx = static_cast< ir::PTXInstruction& >( 
				*instruction->i );
		
			if( ptx.opcode != ir::PTXInstruction::Bra 
				&& ptx.opcode != ir::PTXInstruction::Call 
				&& ptx.opcode != ir::PTXInstruction::Ret )
			{
				if( ptx.pg.condition != ir::PTXOperand::PT )
				{
					_replacePredicate( block, std::distance( 
						block->instructions().begin(), instruction ) );
				}
			}
		}
	}
Beispiel #8
0
unsigned int DivergenceAnalysis::_numberOfDivergentPathsToPostDominator(
	const DataflowGraph::iterator &block) const {
	
	const Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	const DataflowGraph &cdfg =
		static_cast<const DataflowGraph&>(*dfgAnalysis);
	DataflowGraph &dfg = const_cast<DataflowGraph&>(cdfg);
	
	PostdominatorTree* dtree = (PostdominatorTree*)
		(getAnalysis("PostDominatorTreeAnalysis"));
	
	auto postDominator = dfg.getCFGtoDFGMap()[
		dtree->getPostDominator(block->block())];

	unsigned int divergentPaths = 0;

	for (auto successor = block->successors().begin();
		successor != block->successors().end(); ++successor) {
		if (*successor == postDominator) {
			++divergentPaths;
			continue;
		}
		
		block_set allDivergentPaths;
		
		if (doAnyDivergentPathsReachThePostDominator(allDivergentPaths,
			*successor, postDominator)) {
			++divergentPaths;
		}
	}
	
	report("  There are " << divergentPaths << " divergent paths from "
		<< block->label() << " to post-dominator " << postDominator->label());

	return divergentPaths;
}
void BlockUnificationPass::runOnKernel( ir::Kernel& k )
{
	InstructionConverter instConv;
	ir::PTXInstruction::ComputeCapability deviceCapability = ir::PTXInstruction::Cap_2_0;
	DataflowGraph::iterator unificationBranch;
	DataflowGraph::iterator unificationTarget1;
	DataflowGraph::iterator unificationTarget2;
	BlockMatcher::MatrixPath bestPath;
	float largestGain = 0.0;

	// analyze kernel for divergence
	DivergenceAnalysis divAnalysis;
	divAnalysis.runOnKernel(k);

	do {
		largestGain = 0.0;

		DataflowGraph::iterator block = k.dfg()->begin();
		for (; block != k.dfg()->end(); ++block) {
			ir::ControlFlowGraph::const_iterator irBlock = block->block();
			DataflowGraph::const_iterator constBlock = block;

			if (irBlock->endsWithConditionalBranch() &&
				divAnalysis.isDivBlock(constBlock)
			) {
				// get the fallthrough block
				DataflowGraph::iterator fallthroughBlock = block->fallthrough();

				// get the branch block
				DataflowGraph::iterator branchBlock = fallthroughBlock;
				DataflowGraph::BlockPointerSet branchTargets = block->targets();
				DataflowGraph::BlockPointerSet::const_iterator it =
						branchTargets.begin();
				for (; it != branchTargets.end(); ++it) {
					if (*it != fallthroughBlock) {
						branchBlock = *it;
						break;
					}
				}
				assertM(branchBlock != fallthroughBlock,
						"Block unification pass error: could not find fallthrough");

				ir::PostdominatorTree* pdomTree = k.pdom_tree();
				ir::ControlFlowGraph::const_iterator postDomBlk =
						pdomTree->getPostDominator(block->block());
				bool haveBranch2FallthroughPath = thereIsPathFromB1toB2(
						branchBlock->block(), fallthroughBlock->block(),
						postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>);
				bool haveFallthrough2BranchPath = thereIsPathFromB1toB2(
						fallthroughBlock->block(), branchBlock->block(),
						postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>);
				if (!haveBranch2FallthroughPath && !haveFallthrough2BranchPath) {
					// Calculate branch targets' unification gain
					BlockMatcher::MatrixPath path;
					float gain = BlockMatcher::calculateUnificationGain(
							k.dfg(), *fallthroughBlock, *branchBlock, path,
							instConv, deviceCapability);

					if (gain > largestGain) {
						largestGain = gain;
						unificationBranch = block;
						unificationTarget1 = fallthroughBlock;
						unificationTarget2 = branchBlock;
						bestPath = path;
					}
				}
			}
		}

		if (largestGain > 10.0) {
			// Unify the basic block pair with biggest gain (if there's one)
			cout << ">>>>> unifying blocks: " << unificationTarget1->block()->label
								<< " and " << unificationTarget2->block()->label << std::endl;
			weaveBlocks(unificationBranch, unificationTarget1, unificationTarget2, bestPath, k.dfg());
		}

		// refresh divergence analysis data
		divAnalysis.run();
	} while (false); //(largestGain > 0.0);
}
void BlockUnificationPass::weaveBlocks(DataflowGraph::iterator branchBlock, DataflowGraph::iterator target1, DataflowGraph::iterator target2, BlockMatcher::MatrixPath& extractionPath, DataflowGraph* dfg)
{
	DataflowGraph::iterator oldFallthroughBlock = branchBlock;
	DataflowGraph::iterator oldBranchBlock = branchBlock;

	// get branch predicate
	ir::ControlFlowGraph::const_iterator irBlock = branchBlock->block();
	ir::Instruction* branchInst = irBlock->getTerminator();
	ir::PTXInstruction* branchInstPtx = static_cast<ir::PTXInstruction*>(branchInst);
	ir::PTXOperand* branchPredicate = &(branchInstPtx->pg);

	std::string labelPrefix = "$BBweave_" + target1->block()->label + "_" + target2->block()->label;
	int blockNum = 0;

	// while not consumed path, generate basic blocks
	BlockExtractor extractor(dfg, target1, target2, extractionPath, *branchPredicate);
	while (extractor.hasNext()) {
		if (extractor.nextStep() == BlockMatcher::Match ||
				extractor.nextStep() == BlockMatcher::Substitution) {
			// block label
			std::stringstream blockLabel;
			blockLabel << labelPrefix << "_uni_" << blockNum++;

			// create block
			DataflowGraph::iterator newUnifiedBlock = dfg->insert(oldFallthroughBlock, target1, blockLabel.str());
			extractor.extractUnifiedBlock(newUnifiedBlock);

			// link blocks
			dfg->addEdge(newUnifiedBlock, target2, ir::ControlFlowGraph::Edge::Branch);
			if (oldFallthroughBlock == oldBranchBlock) {
				// oldFallthroughBlock and oldBranchBlock all point to branchBlock.

				// remove oldBranchBlock -> target2 because there's
				// already a edge from branchBlock to newUnifiedBlock
				dfg->removeEdge(oldBranchBlock, target2);
			} else {
				dfg->removeEdge(oldFallthroughBlock, newUnifiedBlock);
				dfg->redirect(oldBranchBlock, target2, newUnifiedBlock);
				dfg->addEdge(oldFallthroughBlock, newUnifiedBlock, ir::ControlFlowGraph::Edge::Branch);

				// add goto in oldFallthroughBlock to newUnifiedBlock
				ir::PTXInstruction gotoPtx(ir::PTXInstruction::Bra);
				ir::PTXOperand gotoLabelOperand(blockLabel.str(), ir::PTXOperand::Label, ir::PTXOperand::s32);
				gotoPtx.setDestination(gotoLabelOperand);
				gotoPtx.uni = true;
				ir::Instruction& gotoInst = gotoPtx;
				dfg->insert(oldFallthroughBlock, gotoInst);
			}

			oldFallthroughBlock = newUnifiedBlock;
			oldBranchBlock = newUnifiedBlock;
		} else {

			// create fallthrough block
			std::stringstream fallthroughLabel;
			fallthroughLabel << labelPrefix << "_ft_" << blockNum++;
			DataflowGraph::iterator newFallthoughBlock = dfg->insert(oldFallthroughBlock, target1, fallthroughLabel.str());

			// create branch block
			std::stringstream branchLabel;
			branchLabel << labelPrefix << "_bra_" << blockNum++;
			DataflowGraph::iterator newBranchBlock = dfg->insert(oldBranchBlock, target2, branchLabel.str());

			// fill blocks
			extractor.extractDivergentBlocks(newFallthoughBlock, newBranchBlock);

			if (oldFallthroughBlock == branchBlock) {
				const ir::PTXOperand braLabelOperand(branchLabel.str(), ir::PTXOperand::Label, ir::PTXOperand::s32);
				branchInstPtx->setDestination(braLabelOperand);
			} else {
				ir::PTXInstruction braPtx(ir::PTXInstruction::Bra);
				ir::PTXOperand braLabelOperand(branchLabel.str(), ir::PTXOperand::Label, ir::PTXOperand::s32);
				braPtx.setDestination(braLabelOperand);
				braPtx.setPredicate(*branchPredicate);
				ir::Instruction& bra = braPtx;
				dfg->insert(oldFallthroughBlock, bra);
			}

			// all needed edges were already created in block creation,
			// no more edge manipulation needed
			oldFallthroughBlock = newFallthoughBlock;
			oldBranchBlock = newBranchBlock;
		}
	}

	// remove branch instruction on BranchBlock if needed
	if (branchBlock->targets().size() == 0) {
		// branchBlock does not have branch at end, remove that instruction
		unsigned int branchInstPos = branchBlock->instructions().size() - 1;
		dfg->erase(branchBlock, branchInstPos);
	}

	if (oldFallthroughBlock == oldBranchBlock) {
		// If target1 has no fall-through, then
		// switch branch and fall-through edges.

		if ( !(target1->block()->has_fallthrough_edge()) ) {
			dfg->setEdgeType(oldFallthroughBlock, target1, ir::ControlFlowGraph::BasicBlock::Edge::Branch);
			dfg->setEdgeType(oldFallthroughBlock, target2, ir::ControlFlowGraph::BasicBlock::Edge::FallThrough);
		}
	}
	// weaved block finishes with a divergent section

	// copy target2 targets to a ending divergent fallthrough block
	dfg->copyOutgoingBranchEdges(target1, oldFallthroughBlock);
	// remove target1
	dfg->erase(target1);

	// copy target2 targets to a ending unified block or divergent branch block
	dfg->copyOutgoingBranchEdges(target2, oldBranchBlock);
	// remove target2
	dfg->erase(target2);

	// remove empty blocks or blocks with just a goto
	// TODO: removing this blocks is not essential for correctness, yet it
	// might increase performance.

	// replaces all uses of old registers to use new ones
	// in code after unified basic blocks
	std::cerr << "\n\n\nCalling register replacement\n\n\n";
	replaceRegisters(dfg, extractor);

	// recalculate live in and live out
	//dfg->compute();
	//dfg->toSsa();
}
Beispiel #11
0
void DivergenceAnalysis::_findBranches(branch_set& branches)
{
	Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);

	DataflowGraph &dfg = static_cast<DataflowGraph&>(*dfgAnalysis);

	/* Create a list of branches that can be divergent, that is,
		they are not  bra.uni and have a predicate */
	DataflowGraph::iterator block = dfg.begin();
	DataflowGraph::iterator endBlock = dfg.end();

	/* Post-dominator tree */
	PostdominatorTree *dtree;
	dtree = (PostdominatorTree*) (getAnalysis("PostDominatorTreeAnalysis"));

	report(" Finding branches");
	for (; block != endBlock; ++block) {
		ir::PTXInstruction *ptxInstruction = NULL;

		if (block->instructions().size() > 0) {
			/* Branch instructions can only be the last
			instruction of a basic block */
			DataflowGraph::Instruction& lastInstruction =
			*(--block->instructions().end());

			if (typeid(ir::PTXInstruction) == typeid(*(lastInstruction.i))) {
				ptxInstruction =
					static_cast<ir::PTXInstruction*>(lastInstruction.i);

				if ((ptxInstruction->opcode == ir::PTXInstruction::Bra)) {
					report("  examining " << ptxInstruction->toString());
					
					if(ptxInstruction->uni == true) { 
						report("   eliminated, uniform...");
						continue;
					}
					
					if(lastInstruction.s.size() == 0) {
						report("   eliminated, wrong source count ("
							<< lastInstruction.s.size() << ")...");
						continue;
					}
					
					assert(lastInstruction.s.size() == 1);
					DataflowGraph::iterator postDomBlock =
						dfg.getCFGtoDFGMap()[
							dtree->getPostDominator(block->block())];
					if (postDomBlock != dfg.end()) {
						BranchInfo newBranch(&(*block), &(*postDomBlock), 
							lastInstruction, _divergGraph);
						branches.insert(newBranch);
						report("   is potentially divergent...");
					}
					else {
						report("   eliminated, no post-dominator...");
					}
				}
			}
		}
	}
}