示例#1
0
/// \brief Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p
/// Blocks.
///
/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
/// \p LoopDomBB.  Insert the new blocks before block specified in \p Before.
Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
                                   Loop *OrigLoop, ValueToValueMapTy &VMap,
                                   const Twine &NameSuffix, LoopInfo *LI,
                                   DominatorTree *DT,
                                   SmallVectorImpl<BasicBlock *> &Blocks) {
  assert(OrigLoop->getSubLoops().empty() && 
         "Loop to be cloned cannot have inner loop");
  Function *F = OrigLoop->getHeader()->getParent();
  Loop *ParentLoop = OrigLoop->getParentLoop();

  Loop *NewLoop = new Loop();
  if (ParentLoop)
    ParentLoop->addChildLoop(NewLoop);
  else
    LI->addTopLevelLoop(NewLoop);

  BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
  assert(OrigPH && "No preheader");
  BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
  // To rename the loop PHIs.
  VMap[OrigPH] = NewPH;
  Blocks.push_back(NewPH);

  // Update LoopInfo.
  if (ParentLoop)
    ParentLoop->addBasicBlockToLoop(NewPH, *LI);

  // Update DominatorTree.
  DT->addNewBlock(NewPH, LoopDomBB);

  for (BasicBlock *BB : OrigLoop->getBlocks()) {
    BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
    VMap[BB] = NewBB;

    // Update LoopInfo.
    NewLoop->addBasicBlockToLoop(NewBB, *LI);

    // Add DominatorTree node. After seeing all blocks, update to correct IDom.
    DT->addNewBlock(NewBB, NewPH);

    Blocks.push_back(NewBB);
  }

  for (BasicBlock *BB : OrigLoop->getBlocks()) {
    // Update DominatorTree.
    BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
    DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
                                 cast<BasicBlock>(VMap[IDomBB]));
  }

  // Move them physically from the end of the block list.
  F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
                                NewPH);
  F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
                                NewLoop->getHeader()->getIterator(), F->end());

  return NewLoop;
}
示例#2
0
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
                                                     const LoopInfo &LI) {
  Loop *L = LI.getLoopFor(BB);
  if (!L)
    return false;

  SmallVector<unsigned, 8> BackEdges;
  SmallVector<unsigned, 8> ExitingEdges;
  SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.

  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
    if (!L->contains(*I))
      ExitingEdges.push_back(I.getSuccessorIndex());
    else if (L->getHeader() == *I)
      BackEdges.push_back(I.getSuccessorIndex());
    else
      InEdges.push_back(I.getSuccessorIndex());
  }

  if (BackEdges.empty() && ExitingEdges.empty())
    return false;

  // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
  // normalize them so that they sum up to one.
  BranchProbability Probs[] = {BranchProbability::getZero(),
                               BranchProbability::getZero(),
                               BranchProbability::getZero()};
  unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
                   (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
                   (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
  if (!BackEdges.empty())
    Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
  if (!InEdges.empty())
    Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
  if (!ExitingEdges.empty())
    Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);

  if (uint32_t numBackEdges = BackEdges.size()) {
    auto Prob = Probs[0] / numBackEdges;
    for (unsigned SuccIdx : BackEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numInEdges = InEdges.size()) {
    auto Prob = Probs[1] / numInEdges;
    for (unsigned SuccIdx : InEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numExitingEdges = ExitingEdges.size()) {
    auto Prob = Probs[2] / numExitingEdges;
    for (unsigned SuccIdx : ExitingEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB,
        const LoopInfo &LI) {
    Loop *L = LI.getLoopFor(BB);
    if (!L)
        return false;

    SmallVector<unsigned, 8> BackEdges;
    SmallVector<unsigned, 8> ExitingEdges;
    SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.

    for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
        if (!L->contains(*I))
            ExitingEdges.push_back(I.getSuccessorIndex());
        else if (L->getHeader() == *I)
            BackEdges.push_back(I.getSuccessorIndex());
        else
            InEdges.push_back(I.getSuccessorIndex());
    }

    if (BackEdges.empty() && ExitingEdges.empty())
        return false;

    if (uint32_t numBackEdges = BackEdges.size()) {
        uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
        if (backWeight < NORMAL_WEIGHT)
            backWeight = NORMAL_WEIGHT;

        for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(),
                EE = BackEdges.end(); EI != EE; ++EI) {
            setEdgeWeight(BB, *EI, backWeight);
        }
    }

    if (uint32_t numInEdges = InEdges.size()) {
        uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
        if (inWeight < NORMAL_WEIGHT)
            inWeight = NORMAL_WEIGHT;

        for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(),
                EE = InEdges.end(); EI != EE; ++EI) {
            setEdgeWeight(BB, *EI, inWeight);
        }
    }

    if (uint32_t numExitingEdges = ExitingEdges.size()) {
        uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
        if (exitWeight < MIN_WEIGHT)
            exitWeight = MIN_WEIGHT;

        for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(),
                EE = ExitingEdges.end(); EI != EE; ++EI) {
            setEdgeWeight(BB, *EI, exitWeight);
        }
    }

    return true;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
  Loop *L = LI->getLoopFor(BB);
  if (!L)
    return false;

  SmallPtrSet<BasicBlock *, 8> BackEdges;
  SmallPtrSet<BasicBlock *, 8> ExitingEdges;
  SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop.

  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
    if (!L->contains(*I))
      ExitingEdges.insert(*I);
    else if (L->getHeader() == *I)
      BackEdges.insert(*I);
    else
      InEdges.insert(*I);
  }

  if (uint32_t numBackEdges = BackEdges.size()) {
    uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
    if (backWeight < NORMAL_WEIGHT)
      backWeight = NORMAL_WEIGHT;

    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
         EE = BackEdges.end(); EI != EE; ++EI) {
      BasicBlock *Back = *EI;
      setEdgeWeight(BB, Back, backWeight);
    }
  }

  if (uint32_t numInEdges = InEdges.size()) {
    uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
    if (inWeight < NORMAL_WEIGHT)
      inWeight = NORMAL_WEIGHT;

    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(),
         EE = InEdges.end(); EI != EE; ++EI) {
      BasicBlock *Back = *EI;
      setEdgeWeight(BB, Back, inWeight);
    }
  }

  if (uint32_t numExitingEdges = ExitingEdges.size()) {
    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
    if (exitWeight < MIN_WEIGHT)
      exitWeight = MIN_WEIGHT;

    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
         EE = ExitingEdges.end(); EI != EE; ++EI) {
      BasicBlock *Exiting = *EI;
      setEdgeWeight(BB, Exiting, exitWeight);
    }
  }

  return true;
}
示例#5
0
void Delinearization::print(raw_ostream &O, const Module *) const {
  O << "Delinearization on function " << F->getName() << ":\n";
  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
    Instruction *Inst = &(*I);

    // Only analyze loads and stores.
    if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) &&
        !isa<GetElementPtrInst>(Inst))
      continue;

    const BasicBlock *BB = Inst->getParent();
    // Delinearize the memory access as analyzed in all the surrounding loops.
    // Do not analyze memory accesses outside loops.
    for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
      const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);

      const SCEVUnknown *BasePointer =
          dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
      // Do not delinearize if we cannot find the base pointer.
      if (!BasePointer)
        break;
      AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);

      // Do not try to delinearize memory accesses that are not AddRecs.
      if (!AR)
        break;


      O << "\n";
      O << "Inst:" << *Inst << "\n";
      O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
      O << "AddRec: " << *AR << "\n";

      SmallVector<const SCEV *, 3> Subscripts, Sizes;
      SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
      if (Subscripts.size() == 0 || Sizes.size() == 0 ||
          Subscripts.size() != Sizes.size()) {
        O << "failed to delinearize\n";
        continue;
      }

      O << "Base offset: " << *BasePointer << "\n";
      O << "ArrayDecl[UnknownSize]";
      int Size = Subscripts.size();
      for (int i = 0; i < Size - 1; i++)
        O << "[" << *Sizes[i] << "]";
      O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";

      O << "ArrayRef";
      for (int i = 0; i < Size; i++)
        O << "[" << *Subscripts[i] << "]";
      O << "\n";
    }
  }
}
/// MatchLoopHeaderHeuristic - Predict a successor that is a loop header or
/// a loop pre-header and does not post-dominate will be taken.
/// @returns a Prediction that is a pair in which the first element is the
/// successor taken, and the second the successor not taken.
Prediction BranchHeuristicsInfo::MatchLoopHeaderHeuristic(BasicBlock *root)
                                                          const {
  bool matched = false;
  Prediction pred;

  // Last instruction of basic block.
  TerminatorInst *TI = root->getTerminator();

  // Basic block successors. True and False branches.
  BasicBlock *trueSuccessor = TI->getSuccessor(0);
  BasicBlock *falseSuccessor = TI->getSuccessor(1);

  // Get the most inner loop in which the true successor basic block is in.
  Loop *loop = LI->getLoopFor(trueSuccessor);

  // Check if exists a loop, the true branch successor is a loop header or a
  // loop pre-header, and does not post dominate.
  if (loop && (trueSuccessor == loop->getHeader() ||
      trueSuccessor == loop->getLoopPreheader()) &&
      !PDT->dominates(trueSuccessor, root)) {
    matched = true;
    pred = std::make_pair(trueSuccessor, falseSuccessor);
  }

  // Get the most inner loop in which the false successor basic block is in.
  loop = LI->getLoopFor(falseSuccessor);

  // Check if exists a loop,
  // the false branch successor is a loop header or a loop pre-header, and
  // does not post dominate.
  if (loop && (falseSuccessor == loop->getHeader() ||
      falseSuccessor == loop->getLoopPreheader()) &&
      !PDT->dominates(falseSuccessor, root)) {
    // If the heuristic matches both branches, predict none.
    if (matched)
      return empty;

    matched = true;
    pred = std::make_pair(falseSuccessor, trueSuccessor);
  }

  return (matched ? pred : empty);
}
/*
 * This pass groups all the incoming blocks that a loop header have into
 * one single basic block. We call this basic block  the "Entry Block" of
 * a loop.
 * The entry block is a basic block that is executed only once before the first
 * iteration of the loop.
 *
 * Moreover, we normalize the exits of the loop such that when the loop stop,
 * the control goes to a block that does not belong to the loop but is
 * dominated by the entry block. We call blocks like that as "Post Exit Blocks".
 * A loop may have more than one post exit block, if it has more than one exit point
 * (e.g. when the loop has a break instruction).
 */
bool llvm::LoopNormalizer::runOnFunction(Function& F) {

	LoopInfoEx& li = getAnalysis<LoopInfoEx>();

	//Normalize headers
	for (LoopInfoEx::iterator it = li.begin(); it!= li.end(); it++){

		Loop* loop = *it;

		BasicBlock* header = loop->getHeader();

		std::set<BasicBlock*> OutsidePreHeaders;

		for(pred_iterator pred = pred_begin(header); pred != pred_end(header); pred++){

			BasicBlock* predecessor = *pred;

			if (li.getLoopFor(predecessor) != li.getLoopFor(header)){
				OutsidePreHeaders.insert(predecessor);
			}

		}

		normalizePreHeaders(OutsidePreHeaders, header);

		NumNormalizedLoops++;

	}


//	//Normalize exits
//	std::set<BasicBlock*> loopExits = nla.getLoopExitBlocks();
//	for (std::set<BasicBlock*>::iterator it = loopExits.begin(); it!= loopExits.end(); it++){
//
//		BasicBlock* exitBlock = *it;
//
//		for(succ_iterator succ = succ_begin(exitBlock); succ != succ_end(exitBlock); succ++){
//
//			BasicBlock* successor = *succ;
//
//			if (li.getLoopDepth(successor) < li.getLoopDepth(exitBlock)){
//
//				//Successor is outside the block
//				normalizePostExit(exitBlock, successor);
//
//			}
//
//		}
//
//	}

	return true;
}
示例#8
0
PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
                                           LoopStandardAnalysisResults &AR,
                                           LPMUpdater &U) {
  const auto &FAM =
      AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
  Function *F = L.getHeader()->getParent();
  auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
  LoopPredication LP(&AR.SE, BPI);
  if (!LP.runOnLoop(&L))
    return PreservedAnalyses::all();

  return getLoopPassPreservedAnalyses();
}
示例#9
0
文件: LoopInfo.cpp 项目: happz/llvm
void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) {

  if (forcePrintModuleIR()) {
    // handling -print-module-scope
    OS << Banner << " (loop: ";
    L.getHeader()->printAsOperand(OS, false);
    OS << ")\n";

    // printing whole module
    OS << *L.getHeader()->getModule();
    return;
  }

  OS << Banner;

  auto *PreHeader = L.getLoopPreheader();
  if (PreHeader) {
    OS << "\n; Preheader:";
    PreHeader->print(OS);
    OS << "\n; Loop:";
  }

  for (auto *Block : L.blocks())
    if (Block)
      Block->print(OS);
    else
      OS << "Printing <null> block";

  SmallVector<BasicBlock *, 8> ExitBlocks;
  L.getExitBlocks(ExitBlocks);
  if (!ExitBlocks.empty()) {
    OS << "\n; Exit blocks";
    for (auto *Block : ExitBlocks)
      if (Block)
        Block->print(OS);
      else
        OS << "Printing <null> block";
  }
}
bool TripCountAnalysis::runOnFunction(Function& F){




	LoopNormalizerAnalysis& lna = getAnalysis<LoopNormalizerAnalysis>();
	LoopInfoEx& li = getAnalysis<LoopInfoEx>();

	for (LoopInfoEx::iterator it = li.begin(); it!= li.end(); it++){

		Loop* loop = *it;

		BasicBlock* entryBlock = lna.getEntryBlock(loop->getHeader());

		//If no trip count was found, it will be null.
		//
		//If there is no trip count for any loop, probably the pass
		//"Trip Count Generator" has not been executed.
		tripCounts[loop->getHeader()] = NULL;

		/*
		 * The Trip Count must be available in the entry block of the loop.
		 *
		 * Here we will look for it
		 */
		for(BasicBlock::iterator iit = entryBlock->begin(), iend = entryBlock->end(); iit != iend; iit++){
			Instruction* I = iit;

			if (IsTripCount(*I)) {
				tripCounts[loop->getHeader()] = I;
				break;
			}
		}

	}


	return false;
}
示例#11
0
PreservedAnalyses LoopDeletionPass::run(Loop &L, AnalysisManager<Loop> &AM) {
  auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
  Function *F = L.getHeader()->getParent();

  auto &DT = *FAM.getCachedResult<DominatorTreeAnalysis>(*F);
  auto &SE = *FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
  auto &LI = *FAM.getCachedResult<LoopAnalysis>(*F);

  bool Changed = runImpl(&L, DT, SE, LI);
  if (!Changed)
    return PreservedAnalyses::all();

  return getLoopPassPreservedAnalyses();
}
示例#12
0
bool WorklessInstrument::runOnModule(Module& M)
{
	Function * pFunction = SearchFunctionByName(M, strFileName, strFuncName, uSrcLine);
	if(pFunction == NULL)
	{
		errs() << "Cannot find the input function\n";
		return false;
	}

	LoopInfo *pLoopInfo = &(getAnalysis<LoopInfo>(*pFunction));
	Loop * pLoop = SearchLoopByLineNo(pFunction, pLoopInfo, uSrcLine);

	if(pLoop == NULL)
	{
		errs() << "Cannot find the input loop\n";
		return false;
	}

	SetupTypes(&M);
	SetupConstants(&M);
	SetupHooks(&M);
	SetupGlobals(&M);

	BasicBlock * pHeader = pLoop->getHeader();

	LoopSimplify(pLoop, this);

	pLoop = pLoopInfo->getLoopFor(pHeader);

	if(uType == 0)
	{

	}
	else if(uType == 1)
	{
		InstrumentWorkless0Star1(&M, pLoop);
	}
	else if(uType == 2)
	{
		set<string> setWorkingBlocks;
		ParseWorkingBlocks(setWorkingBlocks);
		InstrumentWorkless0Or1Star(&M, pLoop, setWorkingBlocks);
	}
	else
	{
		errs() << "Wrong Workless Instrument Type\n";
	}

	return true;
}
示例#13
0
文件: branch.cpp 项目: gtanski/lljvm
/**
 * Print a loop.
 * 
 * @param l  the loop
 */
void JVMWriter::printLoop(const Loop *l) {
    printLabel(getLabelName(l->getHeader()));
    for(Loop::block_iterator i = l->block_begin(),
                             e = l->block_end(); i != e; i++) {
        const BasicBlock *block = *i;
        Loop *blockLoop = getAnalysis<LoopInfo>().getLoopFor(block);
        if(l == blockLoop)
            // the loop is the innermost parent of this block
            printBasicBlock(block);
        else if(block == blockLoop->getHeader()
                 && l == blockLoop->getParentLoop())
            // this block is the header of its innermost parent loop,
            // and the loop is the parent of that loop
            printLoop(blockLoop);
    }
    printSimpleInstruction("goto", getLabelName(l->getHeader()));
}
int BranchProbabilities::CheckLoopBranchHeuristic()
{
    for (Loop *L = _LI->getLoopFor(_BB); L; L = L->getParentLoop())
    {
        BasicBlock *loopHeader = L->getHeader();
        bool bLoops[2] = {false, false};
        bool bExits[2] = {false, false};

        for (int i = 0; i < 2; i++)
            if (loopHeader == _Succ[i])
                bLoops[i] = true;

        if (bLoops[0])
        {
            if (!bLoops[1])
                return 0;
            else
                continue;
        }
        else if (bLoops[1])
            return 1;

        for (int i = 0; i < 2; i++)
        {
            BasicBlock *BB = _Succ[i];

            if (!L->contains(BB))
                bExits[i] = true;
        }

        if (bExits[0] == bExits[1])
            continue;

        if (bExits[0])
            return 1;
        else
            return 0;
    }

    return -1;
}
示例#15
0
bool LoopIndexSplit::splitLoop() {
  SplitCondition = NULL;
  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
    return false;
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  BranchInst *SBR = NULL; // Split Condition Branch
  BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
  // If Exiting block includes loop variant instructions then this
  // loop may not be split safely.
  BasicBlock *ExitingBlock = ExitCondition->getParent();
  if (!cleanBlock(ExitingBlock)) return false;

  LLVMContext &Context = Header->getContext();

  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
       I != E; ++I) {
    BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
    if (!BR || BR->isUnconditional()) continue;
    ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
    if (!CI || CI == ExitCondition 
        || CI->getPredicate() == ICmpInst::ICMP_NE
        || CI->getPredicate() == ICmpInst::ICMP_EQ)
      continue;

    // Unable to handle triangle loops at the moment.
    // In triangle loop, split condition is in header and one of the
    // the split destination is loop latch. If split condition is EQ
    // then such loops are already handle in processOneIterationLoop().
    if (Header == (*I)
        && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
      continue;

    // If the block does not dominate the latch then this is not a diamond.
    // Such loop may not benefit from index split.
    if (!DT->dominates((*I), Latch))
      continue;

    // If split condition branches heads do not have single predecessor, 
    // SplitCondBlock, then is not possible to remove inactive branch.
    if (!BR->getSuccessor(0)->getSinglePredecessor() 
        || !BR->getSuccessor(1)->getSinglePredecessor())
      return false;

    // If the merge point for BR is not loop latch then skip this condition.
    if (BR->getSuccessor(0) != Latch) {
      DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
      assert (DF0 != DF->end() && "Unable to find dominance frontier");
      if (!DF0->second.count(Latch))
        continue;
    }
    
    if (BR->getSuccessor(1) != Latch) {
      DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
      assert (DF1 != DF->end() && "Unable to find dominance frontier");
      if (!DF1->second.count(Latch))
        continue;
    }
    SplitCondition = CI;
    SBR = BR;
    break;
  }
   
  if (!SplitCondition)
    return false;

  // If the predicate sign does not match then skip.
  if (ExitCondition->isSigned() != SplitCondition->isSigned())
    return false;

  unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
  unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
  Value *SplitValue = SplitCondition->getOperand(SVOpNum);
  if (!L->isLoopInvariant(SplitValue))
    return false;
  if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
    return false;

  // Normalize loop conditions so that it is easier to calculate new loop
  // bounds.
  if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
    ExitCondition->setPredicate(ExitCondition->getInversePredicate());
    BasicBlock *T = EBR->getSuccessor(0);
    EBR->setSuccessor(0, EBR->getSuccessor(1));
    EBR->setSuccessor(1, T);
  }

  if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
    SplitCondition->setPredicate(SplitCondition->getInversePredicate());
    BasicBlock *T = SBR->getSuccessor(0);
    SBR->setSuccessor(0, SBR->getSuccessor(1));
    SBR->setSuccessor(1, T);
  }

  //[*] Calculate new loop bounds.
  Value *AEV = SplitValue;
  Value *BSV = SplitValue;
  bool Sign = SplitCondition->isSigned();
  Instruction *PHTerm = L->getLoopPreheader()->getTerminator();

  if (IVisLT(*ExitCondition)) {
    if (IVisLT(*SplitCondition)) {
      /* Do nothing */
    }
    else if (IVisLE(*SplitCondition)) {
      AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
    } else {
      assert (0 && "Unexpected split condition!");
    }
  }
  else if (IVisLE(*ExitCondition)) {
    if (IVisLT(*SplitCondition)) {
      AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
    }
    else if (IVisLE(*SplitCondition)) {
      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
    } else {
      assert (0 && "Unexpected split condition!");
    }
  } else {
    assert (0 && "Unexpected exit condition!");
  }
  AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
  BSV = getMax(BSV, IVStartValue, Sign, PHTerm);

  // [*] Clone Loop
  DenseMap<const Value *, Value *> ValueMap;
  Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
  Loop *ALoop = L;

  // [*] ALoop's exiting edge enters BLoop's header.
  //    ALoop's original exit block becomes BLoop's exit block.
  PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
  BasicBlock *A_ExitingBlock = ExitCondition->getParent();
  BranchInst *A_ExitInsn =
    dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
  assert (A_ExitInsn && "Unable to find suitable loop exit branch");
  BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
  BasicBlock *B_Header = BLoop->getHeader();
  if (ALoop->contains(B_ExitBlock)) {
    B_ExitBlock = A_ExitInsn->getSuccessor(0);
    A_ExitInsn->setSuccessor(0, B_Header);
  } else
    A_ExitInsn->setSuccessor(1, B_Header);

  // [*] Update ALoop's exit value using new exit value.
  ExitCondition->setOperand(EVOpNum, AEV);

  // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
  //     original loop's preheader. Add incoming PHINode values from
  //     ALoop's exiting block. Update BLoop header's domiantor info.

  // Collect inverse map of Header PHINodes.
  DenseMap<Value *, Value *> InverseMap;
  for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), 
         BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
      InverseMap[PNClone] = PN;
    } else
      break;
  }

  BasicBlock *A_Preheader = ALoop->getLoopPreheader();
  for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
       BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      // Remove incoming value from original preheader.
      PN->removeIncomingValue(A_Preheader);

      // Add incoming value from A_ExitingBlock.
      if (PN == B_IndVar)
        PN->addIncoming(BSV, A_ExitingBlock);
      else { 
        PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
        Value *V2 = NULL;
        // If loop header is also loop exiting block then
        // OrigPN is incoming value for B loop header.
        if (A_ExitingBlock == ALoop->getHeader())
          V2 = OrigPN;
        else
          V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
        PN->addIncoming(V2, A_ExitingBlock);
      }
    } else
      break;
  }

  DT->changeImmediateDominator(B_Header, A_ExitingBlock);
  DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
  
  // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
  //     block. Remove incoming PHINode values from ALoop's exiting block.
  //     Add new incoming values from BLoop's incoming exiting value.
  //     Update BLoop exit block's dominator info..
  BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
  for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
       BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], 
                                                            B_ExitingBlock);
      PN->removeIncomingValue(A_ExitingBlock);
    } else
      break;
  }

  DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
  DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);

  //[*] Split ALoop's exit edge. This creates a new block which
  //    serves two purposes. First one is to hold PHINode defnitions
  //    to ensure that ALoop's LCSSA form. Second use it to act
  //    as a preheader for BLoop.
  BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);

  //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
  //    in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
  for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
      BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
      PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
      newPHI->addIncoming(V1, A_ExitingBlock);
      A_ExitBlock->getInstList().push_front(newPHI);
      PN->removeIncomingValue(A_ExitBlock);
      PN->addIncoming(newPHI, A_ExitBlock);
    } else
      break;
  }

  //[*] Eliminate split condition's inactive branch from ALoop.
  BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
  BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
  BasicBlock *A_InactiveBranch = NULL;
  BasicBlock *A_ActiveBranch = NULL;
  A_ActiveBranch = A_BR->getSuccessor(0);
  A_InactiveBranch = A_BR->getSuccessor(1);
  A_BR->setUnconditionalDest(A_ActiveBranch);
  removeBlocks(A_InactiveBranch, L, A_ActiveBranch);

  //[*] Eliminate split condition's inactive branch in from BLoop.
  BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
  BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
  BasicBlock *B_InactiveBranch = NULL;
  BasicBlock *B_ActiveBranch = NULL;
  B_ActiveBranch = B_BR->getSuccessor(1);
  B_InactiveBranch = B_BR->getSuccessor(0);
  B_BR->setUnconditionalDest(B_ActiveBranch);
  removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);

  BasicBlock *A_Header = ALoop->getHeader();
  if (A_ExitingBlock == A_Header)
    return true;

  //[*] Move exit condition into split condition block to avoid
  //    executing dead loop iteration.
  ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
  Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
  ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);

  moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
                    cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, 
                    ALoop, EVOpNum);

  moveExitCondition(B_SplitCondBlock, B_ActiveBranch, 
                    B_ExitBlock, B_ExitCondition,
                    B_SplitCondition, B_IndVar, B_IndVarIncrement, 
                    BLoop, EVOpNum);

  NumIndexSplit++;
  return true;
}
/// FindBackAndExitEdges - Search for back and exit edges for all blocks
/// within the function loops, calculated using loop information.
void BranchPredictionInfo::FindBackAndExitEdges(Function &F) {
  SmallPtrSet<const BasicBlock *, 64> LoopsVisited;
  SmallPtrSet<const BasicBlock *, 64> BlocksVisited;

	int count = 0;
	if(F.getName() == "hypre_SMGResidual")
		count = count + 1;
  for (LoopInfo::iterator LIT = LI->begin(), LIE = LI->end();
       LIT != LIE; ++LIT) {
    Loop *rootLoop = *LIT;
    BasicBlock *rootHeader = rootLoop->getHeader();

    // Check if we already visited this loop.
    if (LoopsVisited.count(rootHeader))
      continue;

    // Create a stack to hold loops (inner most on the top).
    SmallVectorImpl<Loop *> Stack(8);
    SmallPtrSet<const BasicBlock *, 8> InStack;

    // Put the current loop into the Stack.
    Stack.push_back(rootLoop);
    InStack.insert(rootHeader);

    do {
      Loop *loop = Stack.back();

      // Search for new inner loops.
      bool foundNew = false;
      for (Loop::iterator I = loop->begin(), E = loop->end(); I != E; ++I) {
        Loop *innerLoop = *I;
        BasicBlock *innerHeader = innerLoop->getHeader();

        // Skip visited inner loops.
        if (!LoopsVisited.count(innerHeader)) {
          Stack.push_back(innerLoop);
          InStack.insert(innerHeader);
          foundNew = true;
          break;
        }
      }

      // If a new loop is found, continue.
      // Otherwise, it is time to expand it, because it is the most inner loop
      // yet unprocessed.
      if (foundNew)
        continue;

      // The variable "loop" is now the unvisited inner most loop.
      BasicBlock *header = loop->getHeader();

      // Search for all basic blocks on the loop.
      for (Loop::block_iterator LBI = loop->block_begin(),
           LBE = loop->block_end(); LBI != LBE; ++LBI) {
        BasicBlock *lpBB = *LBI;
        if (!BlocksVisited.insert(lpBB))
          continue;

        // Set the number of back edges to this loop head (lpBB) as zero.
        BackEdgesCount[lpBB] = 0;

        // For each loop block successor, check if the block pointing is
        // outside the loop.
        TerminatorInst *TI = lpBB->getTerminator();
        for (unsigned s = 0; s < TI->getNumSuccessors(); ++s) {
          BasicBlock *successor = TI->getSuccessor(s);
          Edge edge = std::make_pair(lpBB, successor);

          // If the successor matches any loop header on the stack,
          // then it is a backedge.
          if (InStack.count(successor)) {
            listBackEdges.insert(edge);
            ++BackEdgesCount[lpBB];
          }

          // If the successor is not present in the loop block list, then it is
          // an exit edge.
          if (!loop->contains(successor))
            listExitEdges.insert(edge);
        }
      }

      // Cleaning the visited loop.
      LoopsVisited.insert(header);
      Stack.pop_back();
      InStack.erase(header);
    } while (!InStack.empty());
  }
}
bool RangedAddressSanitizer::generateCallFor(Loop *L, Instruction *I) {
  if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
    return false;

// Reduce memory access to array base pointer + offset
  Value *Array;
  unsigned Size;
  Expr Subscript;
  if (!reduceMemoryAccess(I, Array, Subscript, Size)) {
     return false;
  }

#ifdef ENABLE_REUSE
  Loop *Final;
  Expr ReuseEx = RE_->getExecutionsRelativeTo(L, nullptr, Final);
  if (!ReuseEx.isValid()) {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: could not calculate reuse for "
                        "loop " << L->getHeader()->getName() << "\n");
    return false;
  }
  SPM_DEBUG(dbgs() << "RangedAddressSanitizer: reuse of "
                   << L->getHeader()->getName() << " relative to "
                   << Final->getHeader()->getName() << ": " << ReuseEx << "\n");
#else /* ! ENABLE_REUSE */
  // TODO find Final loop
  Loop * Final = L; // FIXME
  for (;Final->getParentLoop() != nullptr; Final = Final->getParentLoop()) {}
#endif
  
  BasicBlock *Preheader = Final->getLoopPreheader();
  typedef GraphTraits< BasicBlock * > CFG;
  typedef GraphTraits< Inverse< BasicBlock * > > InverseCFG;
  // typedef InverseCFG::pred_iterator pred_iterator;
  
  if (!Preheader) {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: trying to recover pre-header\n" );
    BasicBlock * header = Final->getHeader();
    for (pred_iterator itPred = InverseCFG::child_begin(header); itPred != InverseCFG::child_end(header); ++itPred)
    {
        BasicBlock * pred = *itPred;
        if (! LI_->getLoopFor(pred)) {
            Preheader = pred;
            break;
        }
    }    
  }
  
  assert(Preheader && "could not find nor recover pre-header");
  BasicBlock *Exit      = Final->getExitBlock();
  if (!Exit) {
	  errs() << "[ERROR] Non unique exit block in loop. Leaving loop uninstrumented " << *L << "\n";
	  return false;

  }
  assert(Exit && "loop w/o unique exit block");
  
  // FIXME: instead of bailing, we should set the toplevel loop in the call to
  // getExecutionsRelativeTo.
  if (Instruction *AI = dyn_cast<Instruction>(Array)) {
    if (!DT_->dominates(AI->getParent(), Preheader) &&
         AI->getParent() != Preheader) {
      SPM_DEBUG(dbgs() << "RangedAddressSanitizer: array does not dominate "
                          "loop preheader\n");
      return false;
    }
  }

// Query array offset range
  Expr MinEx, MaxEx;
  if (!RMM_->getMinMax(Subscript, MinEx, MaxEx)) {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: could calculate min/max for "
                        " subscript " << Subscript << "\n");
    return false;
  }
  SPM_DEBUG(dbgs() << "RangedAddressSanitizer: min/max for subscript "
                   << Subscript << ": " << MinEx << ", " << MaxEx << "\n");

// Materialize expressions in the loop header
  IRBuilder<> IRB(Preheader->getTerminator());
#ifdef ENABLE_REUSE
  Value *Reuse = (ReuseEx * Size).getExprValue(64, IRB, Module_);
#else
  Value *Reuse = ConstantInt::get(IntegerType::get(IRB.getContext(), 64), 0); // bogus
#endif
  Value *Min   = MinEx.getExprValue(64, IRB, Module_);
  Value *Max   = MaxEx.getExprValue(64, IRB, Module_);

  SPM_DEBUG(dbgs() << "RangedAddressSanitizer: values for reuse, min, max: "
                   << *Reuse << ", " << *Min << ", " << *Max << "\n");

// If there is already a range check for this array and loop cached, merge the intervals
  CallInfo CI = { Final, Preheader, Exit, Array, Min, Max, Reuse };
  auto Call = Calls_.insert(CI);
  
  if (!Call.second) {
    IRBuilder<> IRB(Preheader->getTerminator());
    CallInfo SCI = *Call.first;

    Value *CmpMin = IRB.CreateICmp(CmpInst::ICMP_SLT, SCI.Min, CI.Min);
    SCI.Min = IRB.CreateSelect(CmpMin, SCI.Min, CI.Min);

    Value *CmpMax = IRB.CreateICmp(CmpInst::ICMP_SGT, SCI.Max, CI.Max);
    SCI.Max = IRB.CreateSelect(CmpMax, SCI.Max, CI.Max);

    SCI.Reuse = IRB.CreateAdd(SCI.Reuse, CI.Reuse);

    Calls_.erase(SCI);
    Calls_.insert(SCI);
  }

  return true;
}
bool RangedAddressSanitizer::runOnFunction(Function &F) {
    if (getenv("FASAN_DISABLE")) {
      SPM_DEBUG( dbgs() << "FASan : disabled\n" );
      return false;
    }

    DL_  = &getAnalysis<DataLayout>();
    DT_  = &getAnalysis<DominatorTree>();
    LI_  = &getAnalysis<LoopInfo>();
    RI_  = &getAnalysis<ReduceIndexation>();
#ifdef ENABLE_REUSE
    RE_  = &getAnalysis<RelativeExecutions>();
#endif
    RMM_ = &getAnalysis<RelativeMinMax>();

    Module_  = F.getParent();
    Context_ = &Module_->getContext();

    Type        *VoidTy    = Type::getVoidTy(*Context_);
    IntegerType *IntTy     = IntegerType::getInt64Ty(*Context_);
    IntegerType *BoolTy     = IntegerType::getInt1Ty(*Context_);
    PointerType *IntPtrTy  = PointerType::getUnqual(IntTy);
    PointerType *VoidPtrTy = PointerType::getInt8PtrTy(*Context_);

    SPM_DEBUG( F.dump() );
    outs() << "[IterationInfo]\n";
    for (Loop * loop : *LI_) {
            ii_visitLoop(loop);
    }
    outs() << "[EndOfIterationInfo]\n";
    
#if 0 // disabled initialization,shutdown sequence for FASan
  if (F.getName() == "main") {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: inserting hwloc calls into "
                        "main function\n");

    FunctionType *FnType = FunctionType::get(VoidTy, ArrayRef<Type*>(), false);
    IRBuilder<> IRB(&(*F.getEntryBlock().begin()));

    Constant *Init = Module_->getOrInsertFunction("__spm_init", FnType);
    IRB.CreateCall(Init);

    Constant *End = Module_->getOrInsertFunction("__spm_end", FnType);
    for (auto &BB : F) {
      TerminatorInst *TI = BB.getTerminator();
      if (isa<ReturnInst>(TI)) {
        IRB.SetInsertPoint(TI);
        IRB.CreateCall(End);
      }
    }
  }
#endif

  if (!ClFunc.empty() && F.getName() != ClFunc) {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: skipping function "
                     << F.getName() << "\n");
    return false;
  }

  Calls_.clear();

  SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing function "
                   << F.getName() << "\n");

  std::vector<Type*> ReuseFnFormals = { VoidPtrTy, IntTy, IntTy, IntTy };
  FunctionType *ReuseFnType = FunctionType::get(BoolTy, ReuseFnFormals, false);
  ReuseFn_ =
    F.getParent()->getOrInsertFunction("__fasan_check", ReuseFnType);
  ReuseFnDestroy_ =
    F.getParent()->getOrInsertFunction("__spm_give", ReuseFnType);

// Visit all loops in bottom-up order (innter-most loops first)
  std::set<BasicBlock*> Processed;
  auto Entry = DT_->getRootNode();
  for (auto ET = po_begin(Entry), EE = po_end(Entry); ET != EE; ++ET) {
    BasicBlock *Header = (*ET)->getBlock();

    if (LI_->isLoopHeader(Header)) {
      SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing loop at "
                       << Header->getName() << "\n");
      Loop *L = LI_->getLoopFor(Header);

      if (L->getNumBackEdges() != 1 ||
          std::distance(pred_begin(Header), pred_end(Header)) != 2) {
        SPM_DEBUG(dbgs() << "RangedAddressSanitizer: loop has multiple "
                         << "backedges or multiple incoming outer blocks\n");
        continue;
      }

      SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing loop at "
                       << Header->getName() << "\n");

    // visit all memory acccesses in this loop
      for (auto BB = L->block_begin(), BE = L->block_end(); BB != BE; ++BB) {
        if (!Processed.count(*BB)) {
          Processed.insert(*BB);
          for (auto &I : *(*BB))
            generateCallFor(L, &I);
        }
      }
    }
  }

  // FAsan logic goes here

  std::map<const BasicBlock*,BasicBlock*> clonedBlockMap; // keeps track of cloned regions to avoid redundant cloning

  std::vector<CallInst*> ToInline;

  for (auto &CI : Calls_) {
    BasicBlock * Preheader = CI.Preheader;
    
  // TODO decide whether it is worthwhile to optimize for this case

  // insert range check
    IRBuilder<> IRB(Preheader->getTerminator());
    Value *VoidArray = IRB.CreateBitCast(CI.Array, VoidPtrTy);
    std::vector<Value*> Args = { VoidArray, CI.Min, CI.Max, CI.Reuse };
    CallInst *CR = IRB.CreateCall(ReuseFn_, Args);
    ToInline.push_back(CR);
    
 // verify if this loop was already instrumented
    TerminatorInst * preHeaderTerm = CR->getParent()->getTerminator();
    BranchInst * preHeaderBranch = dyn_cast<BranchInst>(preHeaderTerm);

    if (preHeaderBranch && preHeaderBranch->isConditional()) {

    // discover the structure of the instrumented code (safe and default region)
    // abort, if this does not look like instrumented code
    	BasicBlock * firstTarget = preHeaderBranch->getSuccessor(0);
    	BasicBlock * secondTarget = preHeaderBranch->getSuccessor(1);
    	BasicBlock * safeHeader, * defHeader;
    	if (clonedBlockMap.count(firstTarget)) {
    		defHeader = firstTarget;
    		safeHeader = clonedBlockMap[firstTarget];
    		assert(safeHeader == secondTarget);
    	} else {
    		assert(clonedBlockMap.count(secondTarget));
    		defHeader = secondTarget;
			safeHeader = clonedBlockMap[secondTarget];
			assert(safeHeader == firstTarget);
    	}

    	SPM_DEBUG( dbgs() << "FASan: (Unsupported) second array in safe region controlled by " << * preHeaderBranch << "\n" );
    	Loop * defLoop = LI_->getLoopFor(defHeader);
    	assert(defLoop && "default region is not a loop!");

		Loop::block_iterator itBodyBlock,S,E;
		S = defLoop->block_begin();
		E = defLoop->block_end();

	// mark accesses in cloned region as safe
    	for (itBodyBlock = S;itBodyBlock != E; ++itBodyBlock) {
    		BasicBlock * defBodyBlock = *itBodyBlock;
    		BasicBlock * safeBodyBlock = clonedBlockMap[defBodyBlock];

    		for(auto & inst : *safeBodyBlock) {
				markSafeArrayUse(&inst, CI.Array);
			}
    	}

    // add conjunctive test
    	Value * oldCond = preHeaderBranch->getCondition();
    	Value * joinedCond = IRB.CreateAnd(oldCond, CR, "allsafe");
    	preHeaderBranch->setCondition(joinedCond);

    } else {

	  // get loop
		Loop* finalLoop = CI.FinalLoop;
		Loop::block_iterator itBodyBlock,S,E;
		S = finalLoop->block_begin();
		E = finalLoop->block_end();

	  // clone loop body (cloned loop will run unchecked)
		ValueToValueMapTy cloneMap;

		BasicBlock * clonedHeader = 0;
		std::vector<BasicBlock*> clonedBlocks;

		for (itBodyBlock = S;itBodyBlock != E; ++itBodyBlock) {

			const BasicBlock * bodyBlock = *itBodyBlock;
			BasicBlock * clonedBlock = CloneBasicBlock(bodyBlock, cloneMap, "_checked", &F, 0);

			cloneMap[bodyBlock] = clonedBlock;
			clonedBlockMap[bodyBlock] = clonedBlock;
			clonedBlocks.push_back(clonedBlock);

			if (bodyBlock == finalLoop->getHeader()) {
				clonedHeader = clonedBlock;
				SPM_DEBUG( dbgs() << "FASan: loop header case at " << bodyBlock->getName() << "\n" );
			} else {
				SPM_DEBUG( dbgs() << "FASan: non-header block at " << bodyBlock->getName() << "\n" );
			}
		}

		if (!clonedHeader) {
			// TODO run clean-up code
			SPM_DEBUG( dbgs() << "FASan: could not find header!\n");
			abort();
		}

	  // Remap uses inside cloned region (mark pointers in the region as unguarded)
		for (BasicBlock * block : clonedBlocks) {
			for(auto & inst : *block) {
				RemapInstruction(&inst, cloneMap, RF_IgnoreMissingEntries);
				markSafeArrayUse(&inst, CI.Array);
			}
		}

	   // TODO fix PHI-nodes in exit blocks

	   // Rewire terminator of the range check to branch to the cloned region
		TerminatorInst * checkTermInst = CR->getParent()->getTerminator();

		if (BranchInst * checkBranchInst = dyn_cast<BranchInst>(checkTermInst)) {
			if (checkBranchInst->isUnconditional()) {
				BasicBlock * defTarget = checkBranchInst->getSuccessor(0);
				BranchInst * modifiedBranchInst = BranchInst::Create(clonedHeader, defTarget, CR, checkBranchInst);
				checkBranchInst->replaceAllUsesWith(modifiedBranchInst);
				checkBranchInst->eraseFromParent();
			} else {
				SPM_DEBUG( dbgs() << "FASan: Unexpected conditional branch (preheader should branch unconditional, other array checks will introduce conditional branches) " << * checkTermInst << "\n" );
				abort();
			}
		} else {
			SPM_DEBUG( dbgs() << "FASan: unsupported terminator type " << * checkTermInst << "\n" );
			abort();
		}
    }
    
#if 0
    IRB.SetInsertPoint(&(*CI.Final->begin()));
    IRB.CreateCall(ReuseFnDestroy_, Args);
#endif
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: call instruction: " << *CR
                     << "\n");
  }


  // inline calls
#ifdef FASAN_INLINE_RUNTIME
  for (CallInst * call : ToInline) {
	assert(call);
	InlineFunctionInfo IFI;
	InlineFunction(call, IFI, false);
  }
#endif

  SPM_DEBUG( F.dump() );
  return true;
}
/*
  This method performs Unroll and Jam. For a simple loop like:
  for (i = ..)
    Fore(i)
    for (j = ..)
      SubLoop(i, j)
    Aft(i)

  Instead of doing normal inner or outer unrolling, we do:
  for (i = .., i+=2)
    Fore(i)
    Fore(i+1)
    for (j = ..)
      SubLoop(i, j)
      SubLoop(i+1, j)
    Aft(i)
    Aft(i+1)

  So the outer loop is essetially unrolled and then the inner loops are fused
  ("jammed") together into a single loop. This can increase speed when there
  are loads in SubLoop that are invariant to i, as they become shared between
  the now jammed inner loops.

  We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
  Fore blocks are those before the inner loop, Aft are those after. Normal
  Unroll code is used to copy each of these sets of blocks and the results are
  combined together into the final form above.

  isSafeToUnrollAndJam should be used prior to calling this to make sure the
  unrolling will be valid. Checking profitablility is also advisable.
*/
LoopUnrollResult
llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
                       unsigned TripMultiple, bool UnrollRemainder,
                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {

  // When we enter here we should have already checked that it is safe
  BasicBlock *Header = L->getHeader();
  assert(L->getSubLoops().size() == 1);
  Loop *SubLoop = *L->begin();

  // Don't enter the unroll code if there is nothing to do.
  if (TripCount == 0 && Count < 2) {
    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
    return LoopUnrollResult::Unmodified;
  }

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = (Count == TripCount);

  // We use the runtime remainder in cases where we don't know trip multiple
  if (TripMultiple == 1 || TripMultiple % Count != 0) {
    if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                    /*UseEpilogRemainder*/ true,
                                    UnrollRemainder, LI, SE, DT, AC, true)) {
      LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                           "generated when assuming runtime trip count\n");
      return LoopUnrollResult::Unmodified;
    }
  }

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  if (SE) {
    SE->forgetLoop(L);
    SE->forgetLoop(SubLoop);
  }

  using namespace ore;
  // Report the unrolling decision.
  if (CompletelyUnroll) {
    LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
                      << Header->getName() << " with trip count " << TripCount
                      << "!\n");
    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
                                 L->getHeader())
              << "completely unroll and jammed loop with "
              << NV("UnrollCount", TripCount) << " iterations");
  } else {
    auto DiagBuilder = [&]() {
      OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
                              L->getHeader());
      return Diag << "unroll and jammed loop by a factor of "
                  << NV("UnrollCount", Count);
    };

    LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
                      << " by " << Count);
    if (TripMultiple != 1) {
      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
      ORE->emit([&]() {
        return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
                             << " trips per branch";
      });
    } else {
      LLVM_DEBUG(dbgs() << " with run-time trip count");
      ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
    }
    LLVM_DEBUG(dbgs() << "!\n");
  }

  BasicBlock *Preheader = L->getLoopPreheader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  assert(Preheader && LatchBlock && Header);
  assert(BI && !BI->isUnconditional());
  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
  bool SubLoopContinueOnTrue = SubLoop->contains(
      SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));

  // Partition blocks in an outer/inner loop pair into blocks before and after
  // the loop
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
                           DT);

  // We keep track of the entering/first and exiting/last block of each of
  // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
  // blocks easier.
  std::vector<BasicBlock *> ForeBlocksFirst;
  std::vector<BasicBlock *> ForeBlocksLast;
  std::vector<BasicBlock *> SubLoopBlocksFirst;
  std::vector<BasicBlock *> SubLoopBlocksLast;
  std::vector<BasicBlock *> AftBlocksFirst;
  std::vector<BasicBlock *> AftBlocksLast;
  ForeBlocksFirst.push_back(Header);
  ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
  SubLoopBlocksFirst.push_back(SubLoop->getHeader());
  SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
  AftBlocksFirst.push_back(SubLoop->getExitBlock());
  AftBlocksLast.push_back(L->getExitingBlock());
  // Maps Blocks[0] -> Blocks[It]
  ValueToValueMapTy LastValueMap;

  // Move any instructions from fore phi operands from AftBlocks into Fore.
  moveHeaderPhiOperandsToForeBlocks(
      Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(),
      AftBlocks);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);
  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  if (Header->getParent()->isDebugInfoForProfiling())
    for (BasicBlock *BB : L->getBlocks())
      for (Instruction &I : *BB)
        if (!isa<DbgInfoIntrinsic>(&I))
          if (const DILocation *DIL = I.getDebugLoc())
            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));

  // Copy all blocks
  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock *> NewBlocks;
    // Maps Blocks[It] -> Blocks[It-1]
    DenseMap<Value *, Value *> PrevItValueMap;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      if (ForeBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == ForeBlocksFirst[0])
          ForeBlocksFirst.push_back(New);
        if (*BB == ForeBlocksLast[0])
          ForeBlocksLast.push_back(New);
      } else if (SubLoopBlocks.count(*BB)) {
        SubLoop->addBasicBlockToLoop(New, *LI);

        if (*BB == SubLoopBlocksFirst[0])
          SubLoopBlocksFirst.push_back(New);
        if (*BB == SubLoopBlocksLast[0])
          SubLoopBlocksLast.push_back(New);
      } else if (AftBlocks.count(*BB)) {
        L->addBasicBlockToLoop(New, *LI);

        if (*BB == AftBlocksFirst[0])
          AftBlocksFirst.push_back(New);
        if (*BB == AftBlocksLast[0])
          AftBlocksLast.push_back(New);
      } else {
        llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
      }

      // Update our running maps of newest clones
      PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI) {
        PrevItValueMap[VI->second] =
            const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
        LastValueMap[VI->first] = VI->second;
      }

      NewBlocks.push_back(New);

      // Update DomTree:
      if (*BB == ForeBlocksFirst[0])
        DT->addNewBlock(New, ForeBlocksLast[It - 1]);
      else if (*BB == SubLoopBlocksFirst[0])
        DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
      else if (*BB == AftBlocksFirst[0])
        DT->addNewBlock(New, AftBlocksLast[It - 1]);
      else {
        // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
        // structure.
        auto BBDomNode = DT->getNode(*BB);
        auto BBIDom = BBDomNode->getIDom();
        BasicBlock *OriginalBBIDom = BBIDom->getBlock();
        assert(OriginalBBIDom);
        assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
        DT->addNewBlock(
            New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
      }
    }

    // Remap all instructions in the most recent iteration
    for (BasicBlock *NewBlock : NewBlocks) {
      for (Instruction &I : *NewBlock) {
        ::remapInstruction(&I, LastValueMap);
        if (auto *II = dyn_cast<IntrinsicInst>(&I))
          if (II->getIntrinsicID() == Intrinsic::assume)
            AC->registerAssumption(II);
      }
    }

    // Alter the ForeBlocks phi's, pointing them at the latest version of the
    // value from the previous iteration's phis
    for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
      Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
      assert(OldValue && "should have incoming edge from Aft[It]");
      Value *NewValue = OldValue;
      if (Value *PrevValue = PrevItValueMap[OldValue])
        NewValue = PrevValue;

      assert(Phi.getNumOperands() == 2);
      Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
      Phi.setIncomingValue(0, NewValue);
      Phi.removeIncomingValue(1);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // finish up connecting the blocks and phi nodes. At this point LastValueMap
  // is the last unrolled iterations values.

  // Update Phis in BB from OldBB to point to NewBB
  auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB,
                            BasicBlock *NewBB) {
    for (PHINode &Phi : BB->phis()) {
      int I = Phi.getBasicBlockIndex(OldBB);
      Phi.setIncomingBlock(I, NewBB);
    }
  };
  // Update Phis in BB from OldBB to point to NewBB and use the latest value
  // from LastValueMap
  auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
                                     BasicBlock *NewBB,
                                     ValueToValueMapTy &LastValueMap) {
    for (PHINode &Phi : BB->phis()) {
      for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
        if (Phi.getIncomingBlock(b) == OldBB) {
          Value *OldValue = Phi.getIncomingValue(b);
          if (Value *LastValue = LastValueMap[OldValue])
            Phi.setIncomingValue(b, LastValue);
          Phi.setIncomingBlock(b, NewBB);
          break;
        }
      }
    }
  };
  // Move all the phis from Src into Dest
  auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
    Instruction *insertPoint = Dest->getFirstNonPHI();
    while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
      Phi->moveBefore(insertPoint);
  };

  // Update the PHI values outside the loop to point to the last block
  updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
                           LastValueMap);

  // Update ForeBlocks successors and phi nodes
  BranchInst *ForeTerm =
      cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
  BasicBlock *Dest = SubLoopBlocksFirst[0];
  ForeTerm->setSuccessor(0, Dest);

  if (CompletelyUnroll) {
    while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
      Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
      Phi->getParent()->getInstList().erase(Phi);
    }
  } else {
    // Update the PHI values to point to the last aft block
    updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
                             AftBlocksLast.back(), LastValueMap);
  }

  for (unsigned It = 1; It != Count; It++) {
    // Remap ForeBlock successors from previous iteration to this
    BranchInst *ForeTerm =
        cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
    BasicBlock *Dest = ForeBlocksFirst[It];
    ForeTerm->setSuccessor(0, Dest);
  }

  // Subloop successors and phis
  BranchInst *SubTerm =
      cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
  SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
  SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
  updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
                  ForeBlocksLast.back());
  updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *SubTerm =
        cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(SubLoopBlocksFirst[It], SubTerm);
    SubTerm->eraseFromParent();

    updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
                    ForeBlocksLast.back());
    updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
  }

  // Aft blocks successors and phis
  BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
  if (CompletelyUnroll) {
    BranchInst::Create(LoopExit, Term);
    Term->eraseFromParent();
  } else {
    Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
  }
  updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
                  SubLoopBlocksLast.back());

  for (unsigned It = 1; It != Count; It++) {
    // Replace the conditional branch of the previous iteration subloop with an
    // unconditional one to this one
    BranchInst *AftTerm =
        cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
    BranchInst::Create(AftBlocksFirst[It], AftTerm);
    AftTerm->eraseFromParent();

    updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
                    SubLoopBlocksLast.back());
    movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
  }

  // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
  // new ones required.
  if (Count != 1) {
    SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
                           SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
                           SubLoopBlocksLast[0], AftBlocksFirst[0]);

    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
                           SubLoopBlocksLast.back(), AftBlocksFirst[0]);
    DT->applyUpdates(DTUpdates);
  }

  // Merge adjacent basic blocks, if possible.
  SmallPtrSet<BasicBlock *, 16> MergeBlocks;
  MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
  MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
  MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
  while (!MergeBlocks.empty()) {
    BasicBlock *BB = *MergeBlocks.begin();
    BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
    if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
        // Don't remove BB and add Fold as they are the same BB
        assert(Fold == BB);
        (void)Fold;
        MergeBlocks.erase(Dest);
      } else
        MergeBlocks.erase(BB);
    } else
      MergeBlocks.erase(BB);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC);
  simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC);

  NumCompletelyUnrolledAndJammed += CompletelyUnroll;
  ++NumUnrolledAndJammed;

#ifndef NDEBUG
  // We shouldn't have done anything to break loop simplify form or LCSSA.
  Loop *OuterL = L->getParentLoop();
  Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
  assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
  if (!CompletelyUnroll)
    assert(L->isLoopSimplifyForm());
  assert(SubLoop->isLoopSimplifyForm());
  assert(DT->verify());
#endif

  // Update LoopInfo if the loop is completely removed.
  if (CompletelyUnroll)
    LI->erase(L);

  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
                          : LoopUnrollResult::PartiallyUnrolled;
}
bool TripCountProfiler::runOnFunction(Function &F) {

    IRBuilder<> Builder(F.getEntryBlock().getTerminator());

    if (!moduleIdentifierStr) {
        moduleIdentifierStr = Builder.CreateGlobalStringPtr(F.getParent()->getModuleIdentifier(), "moduleIdentifierStr");
    }

    Value* main = F.getParent()->getFunction("main");
    if(!main) main = F.getParent()->getFunction("MAIN__"); //Fortan hack

    bool isMain = (&F == main);

    if (isMain) {
        Builder.CreateCall(initLoopList, "");
    }


    if (&F ==  F.getParent()->getFunction("P7Traces2Alignment")) {

        errs() << F << "\n";
        return false;

    }



    LoopInfoEx& li = getAnalysis<LoopInfoEx>();
    TripCountAnalysis& tca = getAnalysis<TripCountAnalysis>();


    /*
     * Here we have all the instructions that will stop the program
     *
     * E.g.: abort, exit, return of function main
     *
     * Before those instructions, we will print all the data we have collected.
     */
    ExitInfo& eI = getAnalysis<ExitInfo>();
    for(std::set<Instruction*>::iterator Iit = eI.exitPoints.begin(), Iend = eI.exitPoints.end(); Iit != Iend; Iit++) {

        Instruction* I = *Iit;

        if(I->getParent()->getParent() == &F) {
            Builder.SetInsertPoint(I);

            std::vector<Value*> args;
            args.push_back(moduleIdentifierStr);
            llvm::ArrayRef<llvm::Value *> arrayArgs(args);
            Builder.CreateCall(flushLoopStats, arrayArgs, "");
        }

    }



    LoopNormalizerAnalysis& ln = getAnalysis<LoopNormalizerAnalysis>();


    Constant* constZero = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);

    Constant* unknownTripCount = ConstantInt::get(Type::getInt64Ty(F.getContext()), -2);


    for(LoopInfoEx::iterator lit = li.begin(); lit != li.end(); lit++) {


        bool mustInstrument = true;

        Loop* loop = *lit;

        BasicBlock* header = loop->getHeader();
        BasicBlock* entryBlock = ln.entryBlocks[header];

        /*
         * Here we are looking for the predicate that stops the loop.
         *
         * At this moment, we are only considering loops that are controlled by
         * integer comparisons.
         */
        BasicBlock* exitBlock = findLoopControllerBlock(loop);
        assert (exitBlock && "Exit block not found!");


        TerminatorInst* T = exitBlock->getTerminator();
        BranchInst* BI = dyn_cast<BranchInst>(T);
        ICmpInst* CI = BI ? dyn_cast<ICmpInst>(BI->getCondition()) : NULL;

        Value* Op1 = NULL;
        Value* Op2 = NULL;

        int LoopClass;

        if (!CI) {
            LoopClass = 2;
            mustInstrument = false;
        }
        else {

            if (isIntervalComparison(CI)) {
                LoopClass = 0;
            } else {
                LoopClass = 1;
            }


            Op1 = getValueAtEntryPoint(CI->getOperand(0), header);
            Op2 = getValueAtEntryPoint(CI->getOperand(1), header);


            if((!Op1) || (!Op2) ) {

            } else if((!Op1->getType()->isIntegerTy()) || (!Op2->getType()->isIntegerTy())) {
                mustInstrument = false;
            }
        }

        Value* estimatedTripCount = tca.getTripCount(header);

        if((!estimatedTripCount) && mustInstrument) {
            estimatedTripCount = unknownTripCount;
            LoopClass += 3; // 3 = UnknownIntervalLoop; 4 = UnknownEqualityLoop
            NumUnknownTripCount++;
        }


        if (estimatedTripCount) {

            //Before the loop starts, the trip count is zero
            AllocaInst* tripCount = insertAlloca(entryBlock, constZero);

            //Every time the loop header is executed, we increment the trip count
            insertAdd(header, tripCount);


            /*
             * We will collect the actual trip count and the estimate trip count in every
             * basic block that is outside the loop
             */
            std::set<BasicBlock*> blocksToInstrument;
            SmallVector<BasicBlock*, 2> exitBlocks;
            loop->getExitBlocks(exitBlocks);
            for (SmallVectorImpl<BasicBlock*>::iterator eb = exitBlocks.begin(); eb !=  exitBlocks.end(); eb++) {

                BasicBlock* CurrentEB = *eb;

                /*
                 * Does not instrument landingPad (exception handling) blocks
                 * TODO: Handle LandingPad blocks (if possible)
                 */
                if(!CurrentEB->isLandingPad())
                    blocksToInstrument.insert(CurrentEB);

            }

            saveTripCount(blocksToInstrument, tripCount, estimatedTripCount, header, LoopClass);

            NumInstrumentedLoops++;

        } else {
            NumIgnoredLoops++;
        }
    }

    return true;

}
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
                             AssumptionCache &AC, const TargetLibraryInfo &TLI,
                             MemorySSAUpdater *MSSAU) {
  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
  SimplifyQuery SQ(DL, &TLI, &DT, &AC);

  // On the first pass over the loop body we try to simplify every instruction.
  // On subsequent passes, we can restrict this to only simplifying instructions
  // where the inputs have been updated. We end up needing two sets: one
  // containing the instructions we are simplifying in *this* pass, and one for
  // the instructions we will want to simplify in the *next* pass. We use
  // pointers so we can swap between two stably allocated sets.
  SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;

  // Track the PHI nodes that have already been visited during each iteration so
  // that we can identify when it is necessary to iterate.
  SmallPtrSet<PHINode *, 4> VisitedPHIs;

  // While simplifying we may discover dead code or cause code to become dead.
  // Keep track of all such instructions and we will delete them at the end.
  SmallVector<Instruction *, 8> DeadInsts;

  // First we want to create an RPO traversal of the loop body. By processing in
  // RPO we can ensure that definitions are processed prior to uses (for non PHI
  // uses) in all cases. This ensures we maximize the simplifications in each
  // iteration over the loop and minimizes the possible causes for continuing to
  // iterate.
  LoopBlocksRPO RPOT(&L);
  RPOT.perform(&LI);
  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;

  bool Changed = false;
  for (;;) {
    if (MSSAU && VerifyMemorySSA)
      MSSA->verifyMemorySSA();
    for (BasicBlock *BB : RPOT) {
      for (Instruction &I : *BB) {
        if (auto *PI = dyn_cast<PHINode>(&I))
          VisitedPHIs.insert(PI);

        if (I.use_empty()) {
          if (isInstructionTriviallyDead(&I, &TLI))
            DeadInsts.push_back(&I);
          continue;
        }

        // We special case the first iteration which we can detect due to the
        // empty `ToSimplify` set.
        bool IsFirstIteration = ToSimplify->empty();

        if (!IsFirstIteration && !ToSimplify->count(&I))
          continue;

        Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I));
        if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
          continue;

        for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
             UI != UE;) {
          Use &U = *UI++;
          auto *UserI = cast<Instruction>(U.getUser());
          U.set(V);

          // If the instruction is used by a PHI node we have already processed
          // we'll need to iterate on the loop body to converge, so add it to
          // the next set.
          if (auto *UserPI = dyn_cast<PHINode>(UserI))
            if (VisitedPHIs.count(UserPI)) {
              Next->insert(UserPI);
              continue;
            }

          // If we are only simplifying targeted instructions and the user is an
          // instruction in the loop body, add it to our set of targeted
          // instructions. Because we process defs before uses (outside of PHIs)
          // we won't have visited it yet.
          //
          // We also skip any uses outside of the loop being simplified. Those
          // should always be PHI nodes due to LCSSA form, and we don't want to
          // try to simplify those away.
          assert((L.contains(UserI) || isa<PHINode>(UserI)) &&
                 "Uses outside the loop should be PHI nodes due to LCSSA!");
          if (!IsFirstIteration && L.contains(UserI))
            ToSimplify->insert(UserI);
        }

        if (MSSAU)
          if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V))
            if (MemoryAccess *MA = MSSA->getMemoryAccess(&I))
              if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI))
                MA->replaceAllUsesWith(ReplacementMA);

        assert(I.use_empty() && "Should always have replaced all uses!");
        if (isInstructionTriviallyDead(&I, &TLI))
          DeadInsts.push_back(&I);
        ++NumSimplified;
        Changed = true;
      }
    }

    // Delete any dead instructions found thus far now that we've finished an
    // iteration over all instructions in all the loop blocks.
    if (!DeadInsts.empty()) {
      Changed = true;
      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU);
    }

    if (MSSAU && VerifyMemorySSA)
      MSSA->verifyMemorySSA();

    // If we never found a PHI that needs to be simplified in the next
    // iteration, we're done.
    if (Next->empty())
      break;

    // Otherwise, put the next set in place for the next iteration and reset it
    // and the visited PHIs for that iteration.
    std::swap(Next, ToSimplify);
    Next->clear();
    VisitedPHIs.clear();
    DeadInsts.clear();
  }

  return Changed;
}
void TripCountGenerator::generateVectorEstimatedTripCounts(Function &F){

	LoopInfoEx& li = getAnalysis<LoopInfoEx>();
	LoopNormalizerAnalysis& ln = getAnalysis<LoopNormalizerAnalysis>();

	for(LoopInfoEx::iterator lit = li.begin(); lit != li.end(); lit++){

		//Indicates if we don't have ways to determine the trip count
		bool unknownTC = false;

		Loop* loop = *lit;

		BasicBlock* header = loop->getHeader();
		BasicBlock* entryBlock = ln.entryBlocks[header];

		LoopControllersDepGraph& lcd = getAnalysis<LoopControllersDepGraph>();
		lcd.setPerspective(header);

		/*
		 * Here we are looking for the predicate that stops the loop.
		 *
		 * At this moment, we are only considering loops that are controlled by
		 * integer comparisons.
		 */
		BasicBlock* exitBlock = findLoopControllerBlock(loop);
		assert(exitBlock && "Exiting Block not found!");

		TerminatorInst* T = exitBlock->getTerminator();
		BranchInst* BI = dyn_cast<BranchInst>(T);
		ICmpInst* CI = BI ? dyn_cast<ICmpInst>(BI->getCondition()) : NULL;

		Value* Op1 = NULL;
		Value* Op2 = NULL;

		if (!CI) unknownTC = true;
		else {

			int LoopClass;
			if (isIntervalComparison(CI)) {
				LoopClass = 0;
				NumIntervalLoops++;
			} else {
				LoopClass = 1;
				NumEqualityLoops++;
			}

			Op1 = getValueAtEntryPoint(CI->getOperand(0), header);
			Op2 = getValueAtEntryPoint(CI->getOperand(1), header);


			if((!Op1) || (!Op2) ) {

				if (!LoopClass) NumUnknownConditionsIL++;
				else 			NumUnknownConditionsEL++;

				unknownTC = true;
			} else {


				if (!(Op1->getType()->isIntegerTy() && Op2->getType()->isIntegerTy())) {
					//We only handle loop conditions that compares integer variables
					NumIncompatibleOperandTypes++;
					unknownTC = true;
				}

			}

		}

		ProgressVector* V1 = NULL;
		ProgressVector* V2 = NULL;


		if (!unknownTC) {
			V1 = generateConstantProgressVector(CI->getOperand(0), header);
			V2 = generateConstantProgressVector(CI->getOperand(1), header);

			if ((!V1) || (!V2)) {

				//TODO: Increment a statistic here
				unknownTC = true;
			}

		}

		if(!unknownTC) {
			generateVectorEstimatedTripCount(header, entryBlock, Op1, Op2, V1, V2, CI);
			NumVectorEstimatedTCs++;
		}


	}

}
示例#23
0
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
                                                     const LoopInfo &LI,
                                                     SccInfo &SccI) {
  int SccNum;
  Loop *L = LI.getLoopFor(BB);
  if (!L) {
    SccNum = getSCCNum(BB, SccI);
    if (SccNum < 0)
      return false;
  }

  SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks;
  if (L)
    computeUnlikelySuccessors(BB, L, UnlikelyBlocks);

  SmallVector<unsigned, 8> BackEdges;
  SmallVector<unsigned, 8> ExitingEdges;
  SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
  SmallVector<unsigned, 8> UnlikelyEdges;

  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
    // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch
    // irreducible loops.
    if (L) {
      if (UnlikelyBlocks.count(*I) != 0)
        UnlikelyEdges.push_back(I.getSuccessorIndex());
      else if (!L->contains(*I))
        ExitingEdges.push_back(I.getSuccessorIndex());
      else if (L->getHeader() == *I)
        BackEdges.push_back(I.getSuccessorIndex());
      else
        InEdges.push_back(I.getSuccessorIndex());
    } else {
      if (getSCCNum(*I, SccI) != SccNum)
        ExitingEdges.push_back(I.getSuccessorIndex());
      else if (isSCCHeader(*I, SccNum, SccI))
        BackEdges.push_back(I.getSuccessorIndex());
      else
        InEdges.push_back(I.getSuccessorIndex());
    }
  }

  if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty())
    return false;

  // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
  // normalize them so that they sum up to one.
  unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
                   (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
                   (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) +
                   (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);

  if (uint32_t numBackEdges = BackEdges.size()) {
    BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
    auto Prob = TakenProb / numBackEdges;
    for (unsigned SuccIdx : BackEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numInEdges = InEdges.size()) {
    BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
    auto Prob = TakenProb / numInEdges;
    for (unsigned SuccIdx : InEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numExitingEdges = ExitingEdges.size()) {
    BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT,
                                                       Denom);
    auto Prob = NotTakenProb / numExitingEdges;
    for (unsigned SuccIdx : ExitingEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) {
    BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT,
                                                       Denom);
    auto Prob = UnlikelyProb / numUnlikelyEdges;
    for (unsigned SuccIdx : UnlikelyEdges)
      setEdgeProbability(BB, SuccIdx, Prob);
  }

  return true;
}
示例#24
0
bool LocalFrequencies::isBackEdge(Edge E)
{
  LoopInfo &LI = getAnalysis<LoopInfo>();
  Loop *L = LI.getLoopFor(E.second);
  return L && (L->getHeader() == E.second) && L->contains(E.first);
}
void BranchProbabilities::propagateFreq(BasicBlock *BB)
{
    // if BB has been visited then return
    if (_Visited.count(BB))
    {
        return;
    }

    // 1. find bfreq(BB)
    if (BB == _head)
    {
        _bfreq[BB] = new float(1.0f);
    }
    else
    {
        // in order to be a back edge, BB must be a loop header and
        // pred must be contained in the loop
        Loop *L = _LI->getLoopFor(BB);
        if (!(L && (L->getHeader() == BB)))
        {
            L = NULL;
        }

        for (pred_iterator BI = pred_begin(BB), Bend = pred_end(BB); BI != Bend;
                ++BI)
        {
            BasicBlock *BBp = *BI;
            if (!_Visited.count(BBp) && !(L && L->contains(BBp) &&
                                          (L->getLoopPreheader() != BBp)))
            {
                return;
            }
        }

        _bfreq[BB] = new float(0.0f);
        float cyclic_probability = 0.0f;
        for (pred_iterator BI = pred_begin(BB), Bend = pred_end(BB); BI != Bend;
                ++BI)
        {
            BasicBlock *BBp = *BI;
            if (L && L->contains(BBp) && (L->getLoopPreheader() != BBp))
                cyclic_probability += (*(_BackEdgeProb[BBp][BB]));
            else
                (*(_bfreq[BB])) += (*(_Freq[BBp][BB]));
        }
        if (cyclic_probability > 0.95f)
            cyclic_probability = 0.95f;
        (*(_bfreq[BB])) = (*(_bfreq[BB])) / (1.0f - cyclic_probability);
    }

    // 2. calculate the frequencies of b's out edges
    _Visited.insert(BB);
    for (succ_iterator BI = succ_begin(BB), Bend = succ_end(BB); BI != Bend;
            ++BI)
    {
        BasicBlock *BBs = *BI;
        float fFreq = getProb(BB, BBs) * (*(_bfreq[BB]));
        _Freq[BB][BBs] = new float(fFreq);

        // update back_edge_prob(BB, BBs) so it
        // can be used by outer loops to calculate
        // cyclic_probability of inner loops
        if (BBs == _head)
            (*(_BackEdgeProb[BB][BBs])) = fFreq;
    }

    // 3. propagate to successor blocks
    for (succ_iterator BI = succ_begin(BB), Bend = succ_end(BB); BI != Bend;
            ++BI)
    {
        BasicBlock *BBs = *BI;
        Loop *L = _LI->getLoopFor(BBs);
        if (!(L && (BBs == L->getHeader()) && L->contains(BB) &&
                (BB != L->getLoopPreheader())))
            propagateFreq(BBs);
    }
}
示例#26
0
/// Create a clone of the blocks in a loop and connect them together.
/// If CreateRemainderLoop is false, loop structure will not be cloned,
/// otherwise a new loop will be created including all cloned blocks, and the
/// iterator of it switches to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
/// Return the new cloned loop that is created when CreateRemainderLoop is true.
static Loop *
CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
                const bool UseEpilogRemainder, const bool UnrollRemainder,
                BasicBlock *InsertTop,
                BasicBlock *InsertBot, BasicBlock *Preheader,
                std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
                ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  Function *F = Header->getParent();
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  Loop *ParentLoop = L->getParentLoop();
  NewLoopsMap NewLoops;
  NewLoops[ParentLoop] = ParentLoop;
  if (!CreateRemainderLoop)
    NewLoops[L] = ParentLoop;

  // For each block in the original loop, create a new copy,
  // and update the value map with the newly created values.
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
    NewBlocks.push_back(NewBB);

    // If we're unrolling the outermost loop, there's no remainder loop,
    // and this block isn't in a nested loop, then the new block is not
    // in any loop. Otherwise, add it to loopinfo.
    if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
      addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);

    VMap[*BB] = NewBB;
    if (Header == *BB) {
      // For the first block, add a CFG connection to this newly
      // created block.
      InsertTop->getTerminator()->setSuccessor(0, NewBB);
    }

    if (DT) {
      if (Header == *BB) {
        // The header is dominated by the preheader.
        DT->addNewBlock(NewBB, InsertTop);
      } else {
        // Copy information from original loop to unrolled loop.
        BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
      }
    }

    if (Latch == *BB) {
      // For the last block, if CreateRemainderLoop is false, create a direct
      // jump to InsertBot. If not, create a loop back to cloned head.
      VMap.erase((*BB)->getTerminator());
      BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
      BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
      IRBuilder<> Builder(LatchBR);
      if (!CreateRemainderLoop) {
        Builder.CreateBr(InsertBot);
      } else {
        PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
                                          suffix + ".iter",
                                          FirstLoopBB->getFirstNonPHI());
        Value *IdxSub =
            Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                              NewIdx->getName() + ".sub");
        Value *IdxCmp =
            Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
        Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
        NewIdx->addIncoming(NewIter, InsertTop);
        NewIdx->addIncoming(IdxSub, NewBB);
      }
      LatchBR->eraseFromParent();
    }
  }

  // Change the incoming values to the ones defined in the preheader or
  // cloned loop.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
    if (!CreateRemainderLoop) {
      if (UseEpilogRemainder) {
        unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
        NewPHI->setIncomingBlock(idx, InsertTop);
        NewPHI->removeIncomingValue(Latch, false);
      } else {
        VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
        cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
      }
    } else {
      unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
      NewPHI->setIncomingBlock(idx, InsertTop);
      BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
      idx = NewPHI->getBasicBlockIndex(Latch);
      Value *InVal = NewPHI->getIncomingValue(idx);
      NewPHI->setIncomingBlock(idx, NewLatch);
      if (Value *V = VMap.lookup(InVal))
        NewPHI->setIncomingValue(idx, V);
    }
  }
  if (CreateRemainderLoop) {
    Loop *NewLoop = NewLoops[L];
    assert(NewLoop && "L should have been cloned");

    // Only add loop metadata if the loop is not going to be completely
    // unrolled.
    if (UnrollRemainder)
      return NewLoop;

    // Add unroll disable metadata to disable future unrolling for this loop.
    SmallVector<Metadata *, 4> MDs;
    // Reserve first location for self reference to the LoopID metadata node.
    MDs.push_back(nullptr);
    MDNode *LoopID = NewLoop->getLoopID();
    if (LoopID) {
      // First remove any existing loop unrolling metadata.
      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
        bool IsUnrollMetadata = false;
        MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
        if (MD) {
          const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
          IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
        }
        if (!IsUnrollMetadata)
          MDs.push_back(LoopID->getOperand(i));
      }
    }

    LLVMContext &Context = NewLoop->getHeader()->getContext();
    SmallVector<Metadata *, 1> DisableOperands;
    DisableOperands.push_back(MDString::get(Context,
                                            "llvm.loop.unroll.disable"));
    MDNode *DisableNode = MDNode::get(Context, DisableOperands);
    MDs.push_back(DisableNode);

    MDNode *NewLoopID = MDNode::get(Context, MDs);
    // Set operand 0 to refer to the loop id itself.
    NewLoopID->replaceOperandWith(0, NewLoopID);
    NewLoop->setLoopID(NewLoopID);
    return NewLoop;
  }
  else
    return nullptr;
}
// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
// as taken, exiting edges as not-taken.
bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
  uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();

  Loop *L = LI->getLoopFor(BB);
  if (!L)
    return false;

  SmallPtrSet<BasicBlock *, 8> BackEdges;
  SmallPtrSet<BasicBlock *, 8> ExitingEdges;
  SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop.

  bool isHeader = BB == L->getHeader();

  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
    BasicBlock *Succ = *I;
    Loop *SuccL = LI->getLoopFor(Succ);
    if (SuccL != L)
      ExitingEdges.insert(Succ);
    else if (Succ == L->getHeader())
      BackEdges.insert(Succ);
    else if (isHeader)
      InEdges.insert(Succ);
  }

  if (uint32_t numBackEdges = BackEdges.size()) {
    uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
    if (backWeight < NORMAL_WEIGHT)
      backWeight = NORMAL_WEIGHT;

    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
         EE = BackEdges.end(); EI != EE; ++EI) {
      BasicBlock *Back = *EI;
      BP->setEdgeWeight(BB, Back, backWeight);
    }
  }

  if (uint32_t numInEdges = InEdges.size()) {
    uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
    if (inWeight < NORMAL_WEIGHT)
      inWeight = NORMAL_WEIGHT;

    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(),
         EE = InEdges.end(); EI != EE; ++EI) {
      BasicBlock *Back = *EI;
      BP->setEdgeWeight(BB, Back, inWeight);
    }
  }

  uint32_t numExitingEdges = ExitingEdges.size();
  if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
    if (exitWeight < MIN_WEIGHT)
      exitWeight = MIN_WEIGHT;

    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
         EE = ExitingEdges.end(); EI != EE; ++EI) {
      BasicBlock *Exiting = *EI;
      BP->setEdgeWeight(BB, Exiting, exitWeight);
    }
  }

  return true;
}
bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                                DependenceInfo &DI) {
  /* We currently handle outer loops like this:
        |
    ForeFirst    <----\    }
     Blocks           |    } ForeBlocks
    ForeLast          |    }
        |             |
    SubLoopFirst  <\  |    }
     Blocks        |  |    } SubLoopBlocks
    SubLoopLast   -/  |    }
        |             |
    AftFirst          |    }
     Blocks           |    } AftBlocks
    AftLast     ------/    }
        |

    There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
    and AftBlocks, providing that there is one edge from Fores to SubLoops,
    one edge from SubLoops to Afts and a single outer loop exit (from Afts).
    In practice we currently limit Aft blocks to a single block, and limit
    things further in the profitablility checks of the unroll and jam pass.

    Because of the way we rearrange basic blocks, we also require that
    the Fore blocks on all unrolled iterations are safe to move before the
    SubLoop blocks of all iterations. So we require that the phi node looping
    operands of ForeHeader can be moved to at least the end of ForeEnd, so that
    we can arrange cloned Fore Blocks before the subloop and match up Phi's
    correctly.

    i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2.
    It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2.

    There are then a number of checks along the lines of no calls, no
    exceptions, inner loop IV is consistent, etc. Note that for loops requiring
    runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
    UnrollAndJamLoop if the trip count cannot be easily calculated.
  */

  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
    return false;
  Loop *SubLoop = L->getSubLoops()[0];
  if (!SubLoop->isLoopSimplifyForm())
    return false;

  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Exit = L->getExitingBlock();
  BasicBlock *SubLoopHeader = SubLoop->getHeader();
  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
  BasicBlock *SubLoopExit = SubLoop->getExitingBlock();

  if (Latch != Exit)
    return false;
  if (SubLoopLatch != SubLoopExit)
    return false;

  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
    return false;

  // Split blocks into Fore/SubLoop/Aft based on dominators
  BasicBlockSet SubLoopBlocks;
  BasicBlockSet ForeBlocks;
  BasicBlockSet AftBlocks;
  if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
                                AftBlocks, &DT))
    return false;

  // Aft blocks may need to move instructions to fore blocks, which becomes more
  // difficult if there are multiple (potentially conditionally executed)
  // blocks. For now we just exclude loops with multiple aft blocks.
  if (AftBlocks.size() != 1)
    return false;

  // Check inner loop IV is consistent between all iterations
  const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
  if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
      !SubLoopBECountSC->getType()->isIntegerTy())
    return false;
  ScalarEvolution::LoopDisposition LD =
      SE.getLoopDisposition(SubLoopBECountSC, L);
  if (LD != ScalarEvolution::LoopInvariant)
    return false;

  // Check the loop safety info for exceptions.
  LoopSafetyInfo LSI;
  computeLoopSafetyInfo(&LSI, L);
  if (LSI.MayThrow)
    return false;

  // We've ruled out the easy stuff and now need to check that there are no
  // interdependencies which may prevent us from moving the:
  //  ForeBlocks before Subloop and AftBlocks.
  //  Subloop before AftBlocks.
  //  ForeBlock phi operands before the subloop

  // Make sure we can move all instructions we need to before the subloop
  SmallVector<Instruction *, 8> Worklist;
  SmallPtrSet<Instruction *, 8> Visited;
  for (auto &Phi : Header->phis()) {
    Value *V = Phi.getIncomingValueForBlock(Latch);
    if (Instruction *I = dyn_cast<Instruction>(V))
      Worklist.push_back(I);
  }
  while (!Worklist.empty()) {
    Instruction *I = Worklist.back();
    Worklist.pop_back();
    if (Visited.insert(I).second) {
      if (SubLoop->contains(I->getParent()))
        return false;
      if (AftBlocks.count(I->getParent())) {
        // If we hit a phi node in afts we know we are done (probably LCSSA)
        if (isa<PHINode>(I))
          return false;
        if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
          return false;
        for (auto &U : I->operands())
          if (Instruction *II = dyn_cast<Instruction>(U))
            Worklist.push_back(II);
      }
    }
  }

  // Check for memory dependencies which prohibit the unrolling we are doing.
  // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
  // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
    return false;

  return true;
}
示例#29
0
void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap,
                               LoopToScevMapT &LTS) {
  assert(Stmt.isRegionStmt() &&
         "Only region statements can be copied by the block generator");

  // Forget all old mappings.
  BlockMap.clear();
  RegionMaps.clear();
  IncompletePHINodeMap.clear();

  // The region represented by the statement.
  Region *R = Stmt.getRegion();

  // Create a dedicated entry for the region where we can reload all demoted
  // inputs.
  BasicBlock *EntryBB = R->getEntry();
  BasicBlock *EntryBBCopy =
      SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
  EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
  Builder.SetInsertPoint(EntryBBCopy->begin());

  for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
    if (!R->contains(*PI))
      BlockMap[*PI] = EntryBBCopy;

  // Iterate over all blocks in the region in a breadth-first search.
  std::deque<BasicBlock *> Blocks;
  SmallPtrSet<BasicBlock *, 8> SeenBlocks;
  Blocks.push_back(EntryBB);
  SeenBlocks.insert(EntryBB);

  while (!Blocks.empty()) {
    BasicBlock *BB = Blocks.front();
    Blocks.pop_front();

    // First split the block and update dominance information.
    BasicBlock *BBCopy = splitBB(BB);
    BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);

    // In order to remap PHI nodes we store also basic block mappings.
    BlockMap[BB] = BBCopy;

    // Get the mapping for this block and initialize it with the mapping
    // available at its immediate dominator (in the new region).
    ValueMapT &RegionMap = RegionMaps[BBCopy];
    RegionMap = RegionMaps[BBCopyIDom];

    // Copy the block with the BlockGenerator.
    copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS);

    // In order to remap PHI nodes we store also basic block mappings.
    BlockMap[BB] = BBCopy;

    // Add values to incomplete PHI nodes waiting for this block to be copied.
    for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
      addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB,
                      GlobalMap, LTS);
    IncompletePHINodeMap[BB].clear();

    // And continue with new successors inside the region.
    for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
      if (R->contains(*SI) && SeenBlocks.insert(*SI).second)
        Blocks.push_back(*SI);
  }

  // Now create a new dedicated region exit block and add it to the region map.
  BasicBlock *ExitBBCopy =
      SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
  ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
  BlockMap[R->getExit()] = ExitBBCopy;

  repairDominance(R->getExit(), ExitBBCopy);

  // As the block generator doesn't handle control flow we need to add the
  // region control flow by hand after all blocks have been copied.
  for (BasicBlock *BB : SeenBlocks) {

    BranchInst *BI = cast<BranchInst>(BB->getTerminator());

    BasicBlock *BBCopy = BlockMap[BB];
    Instruction *BICopy = BBCopy->getTerminator();

    ValueMapT &RegionMap = RegionMaps[BBCopy];
    RegionMap.insert(BlockMap.begin(), BlockMap.end());

    Builder.SetInsertPoint(BBCopy);
    copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS);
    BICopy->eraseFromParent();
  }

  // Add counting PHI nodes to all loops in the region that can be used as
  // replacement for SCEVs refering to the old loop.
  for (BasicBlock *BB : SeenBlocks) {
    Loop *L = LI.getLoopFor(BB);
    if (L == nullptr || L->getHeader() != BB)
      continue;

    BasicBlock *BBCopy = BlockMap[BB];
    Value *NullVal = Builder.getInt32(0);
    PHINode *LoopPHI =
        PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
    Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
        LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
    LoopPHI->insertBefore(BBCopy->begin());
    LoopPHIInc->insertBefore(BBCopy->getTerminator());

    for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
      if (!R->contains(PredBB))
        continue;
      if (L->contains(PredBB))
        LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]);
      else
        LoopPHI->addIncoming(NullVal, BlockMap[PredBB]);
    }

    for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
      if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
        LoopPHI->addIncoming(NullVal, PredBBCopy);

    LTS[L] = SE.getUnknown(LoopPHI);
  }

  // Add all mappings from the region to the global map so outside uses will use
  // the copied instructions.
  for (auto &BBMap : RegionMaps)
    GlobalMap.insert(BBMap.second.begin(), BBMap.second.end());

  // Reset the old insert point for the build.
  Builder.SetInsertPoint(ExitBBCopy->begin());
}
示例#30
0
/// Create a clone of the blocks in a loop and connect them together.
/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
/// loop will be created including all cloned blocks, and the iterator of it
/// switches to count NewIter down to 0.
///
static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
                            BasicBlock *InsertTop, BasicBlock *InsertBot,
                            std::vector<BasicBlock *> &NewBlocks,
                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
                            LoopInfo *LI) {
    BasicBlock *Preheader = L->getLoopPreheader();
    BasicBlock *Header = L->getHeader();
    BasicBlock *Latch = L->getLoopLatch();
    Function *F = Header->getParent();
    LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
    LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
    Loop *NewLoop = 0;
    Loop *ParentLoop = L->getParentLoop();
    if (!UnrollProlog) {
        NewLoop = new Loop();
        if (ParentLoop)
            ParentLoop->addChildLoop(NewLoop);
        else
            LI->addTopLevelLoop(NewLoop);
    }

    // For each block in the original loop, create a new copy,
    // and update the value map with the newly created values.
    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
        BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
        NewBlocks.push_back(NewBB);

        if (NewLoop)
            NewLoop->addBasicBlockToLoop(NewBB, *LI);
        else if (ParentLoop)
            ParentLoop->addBasicBlockToLoop(NewBB, *LI);

        VMap[*BB] = NewBB;
        if (Header == *BB) {
            // For the first block, add a CFG connection to this newly
            // created block.
            InsertTop->getTerminator()->setSuccessor(0, NewBB);

        }
        if (Latch == *BB) {
            // For the last block, if UnrollProlog is true, create a direct jump to
            // InsertBot. If not, create a loop back to cloned head.
            VMap.erase((*BB)->getTerminator());
            BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
            BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
            IRBuilder<> Builder(LatchBR);
            if (UnrollProlog) {
                Builder.CreateBr(InsertBot);
            } else {
                PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
                                                  FirstLoopBB->getFirstNonPHI());
                Value *IdxSub =
                    Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                                      NewIdx->getName() + ".sub");
                Value *IdxCmp =
                    Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
                Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
                NewIdx->addIncoming(NewIter, InsertTop);
                NewIdx->addIncoming(IdxSub, NewBB);
            }
            LatchBR->eraseFromParent();
        }
    }

    // Change the incoming values to the ones defined in the preheader or
    // cloned loop.
    for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
        PHINode *NewPHI = cast<PHINode>(VMap[I]);
        if (UnrollProlog) {
            VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
            cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
        } else {
            unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
            NewPHI->setIncomingBlock(idx, InsertTop);
            BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
            idx = NewPHI->getBasicBlockIndex(Latch);
            Value *InVal = NewPHI->getIncomingValue(idx);
            NewPHI->setIncomingBlock(idx, NewLatch);
            if (VMap[InVal])
                NewPHI->setIncomingValue(idx, VMap[InVal]);
        }
    }
    if (NewLoop) {
        // Add unroll disable metadata to disable future unrolling for this loop.
        SmallVector<Metadata *, 4> MDs;
        // Reserve first location for self reference to the LoopID metadata node.
        MDs.push_back(nullptr);
        MDNode *LoopID = NewLoop->getLoopID();
        if (LoopID) {
            // First remove any existing loop unrolling metadata.
            for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
                bool IsUnrollMetadata = false;
                MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
                if (MD) {
                    const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
                    IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
                }
                if (!IsUnrollMetadata)
                    MDs.push_back(LoopID->getOperand(i));
            }
        }

        LLVMContext &Context = NewLoop->getHeader()->getContext();
        SmallVector<Metadata *, 1> DisableOperands;
        DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
        MDNode *DisableNode = MDNode::get(Context, DisableOperands);
        MDs.push_back(DisableNode);

        MDNode *NewLoopID = MDNode::get(Context, MDs);
        // Set operand 0 to refer to the loop id itself.
        NewLoopID->replaceOperandWith(0, NewLoopID);
        NewLoop->setLoopID(NewLoopID);
    }
}