void SyncEliminationPass::runOnKernel(ir::Kernel& k)
{
	DivergenceAnalysis divAnalysis;

	divAnalysis.runOnKernel(k);

	DataflowGraph::iterator block = ++k.dfg()->begin();
	DataflowGraph::iterator blockEnd = --k.dfg()->end();

	for (; block != blockEnd; block++) {
		if (!divAnalysis.isDivBlock(block)) {
			DataflowGraph::Instruction inst = *(--block->_instructions.end());
			if (typeid(ir::PTXInstruction) == typeid(*(inst.i))) {
				ir::PTXInstruction *ptxInst = static_cast<ir::PTXInstruction*> (inst.i);
				if (ptxInst->opcode == ir::PTXInstruction::Opcode::Bra) {
					ptxInst->uni = true;
				}
			}
		}
	}
}
void BlockUnificationPass::runOnKernel( ir::Kernel& k )
{
	InstructionConverter instConv;
	ir::PTXInstruction::ComputeCapability deviceCapability = ir::PTXInstruction::Cap_2_0;
	DataflowGraph::iterator unificationBranch;
	DataflowGraph::iterator unificationTarget1;
	DataflowGraph::iterator unificationTarget2;
	BlockMatcher::MatrixPath bestPath;
	float largestGain = 0.0;

	// analyze kernel for divergence
	DivergenceAnalysis divAnalysis;
	divAnalysis.runOnKernel(k);

	do {
		largestGain = 0.0;

		DataflowGraph::iterator block = k.dfg()->begin();
		for (; block != k.dfg()->end(); ++block) {
			ir::ControlFlowGraph::const_iterator irBlock = block->block();
			DataflowGraph::const_iterator constBlock = block;

			if (irBlock->endsWithConditionalBranch() &&
				divAnalysis.isDivBlock(constBlock)
			) {
				// get the fallthrough block
				DataflowGraph::iterator fallthroughBlock = block->fallthrough();

				// get the branch block
				DataflowGraph::iterator branchBlock = fallthroughBlock;
				DataflowGraph::BlockPointerSet branchTargets = block->targets();
				DataflowGraph::BlockPointerSet::const_iterator it =
						branchTargets.begin();
				for (; it != branchTargets.end(); ++it) {
					if (*it != fallthroughBlock) {
						branchBlock = *it;
						break;
					}
				}
				assertM(branchBlock != fallthroughBlock,
						"Block unification pass error: could not find fallthrough");

				ir::PostdominatorTree* pdomTree = k.pdom_tree();
				ir::ControlFlowGraph::const_iterator postDomBlk =
						pdomTree->getPostDominator(block->block());
				bool haveBranch2FallthroughPath = thereIsPathFromB1toB2(
						branchBlock->block(), fallthroughBlock->block(),
						postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>);
				bool haveFallthrough2BranchPath = thereIsPathFromB1toB2(
						fallthroughBlock->block(), branchBlock->block(),
						postDomBlk, new std::set<ir::ControlFlowGraph::BasicBlock*>);
				if (!haveBranch2FallthroughPath && !haveFallthrough2BranchPath) {
					// Calculate branch targets' unification gain
					BlockMatcher::MatrixPath path;
					float gain = BlockMatcher::calculateUnificationGain(
							k.dfg(), *fallthroughBlock, *branchBlock, path,
							instConv, deviceCapability);

					if (gain > largestGain) {
						largestGain = gain;
						unificationBranch = block;
						unificationTarget1 = fallthroughBlock;
						unificationTarget2 = branchBlock;
						bestPath = path;
					}
				}
			}
		}

		if (largestGain > 10.0) {
			// Unify the basic block pair with biggest gain (if there's one)
			cout << ">>>>> unifying blocks: " << unificationTarget1->block()->label
								<< " and " << unificationTarget2->block()->label << std::endl;
			weaveBlocks(unificationBranch, unificationTarget1, unificationTarget2, bestPath, k.dfg());
		}

		// refresh divergence analysis data
		divAnalysis.run();
	} while (false); //(largestGain > 0.0);
}