Esempio n. 1
0
// Tries to remove a sanity check; returns true if it worked.
bool AsapPass::optimizeCheckAway(llvm::Instruction *Inst) {
    BranchInst *BI = cast<BranchInst>(Inst);
    assert(BI->isConditional() && "Sanity check must be conditional branch.");
    
    unsigned int RegularBranch = getRegularBranch(BI, SCI);
    
    bool Changed = false;
    if (RegularBranch == 0) {
        BI->setCondition(ConstantInt::getTrue(Inst->getContext()));
        Changed = true;
    } else if (RegularBranch == 1) {
        BI->setCondition(ConstantInt::getFalse(Inst->getContext()));
        Changed = true;
    } else {
        // This can happen, e.g., in the following case:
        //     array[-1] = a + b;
        // is transformed into
        //     if (a + b overflows)
        //         report_overflow()
        //     else
        //         report_index_out_of_bounds();
        // In this case, removing the sanity check does not help much, so we
        // just do nothing.
        // Thanks to Will Dietz for his explanation at
        // http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-April/071958.html
        dbgs() << "Warning: Sanity check with no regular branch found.\n";
        dbgs() << "The sanity check has been kept intact.\n";
    }
    
    if (PrintRemovedChecks && Changed) {
        DebugLoc DL = getSanityCheckDebugLoc(BI, RegularBranch);
        printDebugLoc(DL, BI->getContext(), dbgs());
        dbgs() << ": SanityCheck with cost ";
        dbgs() << *BI->getMetadata("cost")->getOperand(0);

        if (MDNode *IA = DL.getInlinedAt()) {
            dbgs() << " (inlined at ";
            printDebugLoc(DebugLoc(IA), BI->getContext(), dbgs());
            dbgs() << ")";
        }

        BasicBlock *Succ = BI->getSuccessor(RegularBranch == 0 ? 1 : 0);
        if (const CallInst *CI = SCI->findSanityCheckCall(Succ)) {
            dbgs() << " " << CI->getCalledFunction()->getName();
        }
        dbgs() << "\n";
    }

    return Changed;
}
Esempio n. 2
0
/// \brief Insert the missing branch conditions
void StructurizeCFG::insertConditions(bool Loops) {
  BranchVector &Conds = Loops ? LoopConds : Conditions;
  Value *Default = Loops ? BoolTrue : BoolFalse;
  SSAUpdater PhiInserter;

  for (BranchVector::iterator I = Conds.begin(),
       E = Conds.end(); I != E; ++I) {

    BranchInst *Term = *I;
    assert(Term->isConditional());

    BasicBlock *Parent = Term->getParent();
    BasicBlock *SuccTrue = Term->getSuccessor(0);
    BasicBlock *SuccFalse = Term->getSuccessor(1);

    PhiInserter.Initialize(Boolean, "");
    PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
    PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);

    BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];

    NearestCommonDominator Dominator(DT);
    Dominator.addBlock(Parent, false);

    Value *ParentValue = 0;
    for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
         PI != PE; ++PI) {

      if (PI->first == Parent) {
        ParentValue = PI->second;
        break;
      }
      PhiInserter.AddAvailableValue(PI->first, PI->second);
      Dominator.addBlock(PI->first);
    }

    if (ParentValue) {
      Term->setCondition(ParentValue);
    } else {
      if (!Dominator.wasResultExplicitMentioned())
        PhiInserter.AddAvailableValue(Dominator.getResult(), Default);

      Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
    }
  }
}
bool RangedAddressSanitizer::runOnFunction(Function &F) {
    if (getenv("FASAN_DISABLE")) {
      SPM_DEBUG( dbgs() << "FASan : disabled\n" );
      return false;
    }

    DL_  = &getAnalysis<DataLayout>();
    DT_  = &getAnalysis<DominatorTree>();
    LI_  = &getAnalysis<LoopInfo>();
    RI_  = &getAnalysis<ReduceIndexation>();
#ifdef ENABLE_REUSE
    RE_  = &getAnalysis<RelativeExecutions>();
#endif
    RMM_ = &getAnalysis<RelativeMinMax>();

    Module_  = F.getParent();
    Context_ = &Module_->getContext();

    Type        *VoidTy    = Type::getVoidTy(*Context_);
    IntegerType *IntTy     = IntegerType::getInt64Ty(*Context_);
    IntegerType *BoolTy     = IntegerType::getInt1Ty(*Context_);
    PointerType *IntPtrTy  = PointerType::getUnqual(IntTy);
    PointerType *VoidPtrTy = PointerType::getInt8PtrTy(*Context_);

    SPM_DEBUG( F.dump() );
    outs() << "[IterationInfo]\n";
    for (Loop * loop : *LI_) {
            ii_visitLoop(loop);
    }
    outs() << "[EndOfIterationInfo]\n";
    
#if 0 // disabled initialization,shutdown sequence for FASan
  if (F.getName() == "main") {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: inserting hwloc calls into "
                        "main function\n");

    FunctionType *FnType = FunctionType::get(VoidTy, ArrayRef<Type*>(), false);
    IRBuilder<> IRB(&(*F.getEntryBlock().begin()));

    Constant *Init = Module_->getOrInsertFunction("__spm_init", FnType);
    IRB.CreateCall(Init);

    Constant *End = Module_->getOrInsertFunction("__spm_end", FnType);
    for (auto &BB : F) {
      TerminatorInst *TI = BB.getTerminator();
      if (isa<ReturnInst>(TI)) {
        IRB.SetInsertPoint(TI);
        IRB.CreateCall(End);
      }
    }
  }
#endif

  if (!ClFunc.empty() && F.getName() != ClFunc) {
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: skipping function "
                     << F.getName() << "\n");
    return false;
  }

  Calls_.clear();

  SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing function "
                   << F.getName() << "\n");

  std::vector<Type*> ReuseFnFormals = { VoidPtrTy, IntTy, IntTy, IntTy };
  FunctionType *ReuseFnType = FunctionType::get(BoolTy, ReuseFnFormals, false);
  ReuseFn_ =
    F.getParent()->getOrInsertFunction("__fasan_check", ReuseFnType);
  ReuseFnDestroy_ =
    F.getParent()->getOrInsertFunction("__spm_give", ReuseFnType);

// Visit all loops in bottom-up order (innter-most loops first)
  std::set<BasicBlock*> Processed;
  auto Entry = DT_->getRootNode();
  for (auto ET = po_begin(Entry), EE = po_end(Entry); ET != EE; ++ET) {
    BasicBlock *Header = (*ET)->getBlock();

    if (LI_->isLoopHeader(Header)) {
      SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing loop at "
                       << Header->getName() << "\n");
      Loop *L = LI_->getLoopFor(Header);

      if (L->getNumBackEdges() != 1 ||
          std::distance(pred_begin(Header), pred_end(Header)) != 2) {
        SPM_DEBUG(dbgs() << "RangedAddressSanitizer: loop has multiple "
                         << "backedges or multiple incoming outer blocks\n");
        continue;
      }

      SPM_DEBUG(dbgs() << "RangedAddressSanitizer: processing loop at "
                       << Header->getName() << "\n");

    // visit all memory acccesses in this loop
      for (auto BB = L->block_begin(), BE = L->block_end(); BB != BE; ++BB) {
        if (!Processed.count(*BB)) {
          Processed.insert(*BB);
          for (auto &I : *(*BB))
            generateCallFor(L, &I);
        }
      }
    }
  }

  // FAsan logic goes here

  std::map<const BasicBlock*,BasicBlock*> clonedBlockMap; // keeps track of cloned regions to avoid redundant cloning

  std::vector<CallInst*> ToInline;

  for (auto &CI : Calls_) {
    BasicBlock * Preheader = CI.Preheader;
    
  // TODO decide whether it is worthwhile to optimize for this case

  // insert range check
    IRBuilder<> IRB(Preheader->getTerminator());
    Value *VoidArray = IRB.CreateBitCast(CI.Array, VoidPtrTy);
    std::vector<Value*> Args = { VoidArray, CI.Min, CI.Max, CI.Reuse };
    CallInst *CR = IRB.CreateCall(ReuseFn_, Args);
    ToInline.push_back(CR);
    
 // verify if this loop was already instrumented
    TerminatorInst * preHeaderTerm = CR->getParent()->getTerminator();
    BranchInst * preHeaderBranch = dyn_cast<BranchInst>(preHeaderTerm);

    if (preHeaderBranch && preHeaderBranch->isConditional()) {

    // discover the structure of the instrumented code (safe and default region)
    // abort, if this does not look like instrumented code
    	BasicBlock * firstTarget = preHeaderBranch->getSuccessor(0);
    	BasicBlock * secondTarget = preHeaderBranch->getSuccessor(1);
    	BasicBlock * safeHeader, * defHeader;
    	if (clonedBlockMap.count(firstTarget)) {
    		defHeader = firstTarget;
    		safeHeader = clonedBlockMap[firstTarget];
    		assert(safeHeader == secondTarget);
    	} else {
    		assert(clonedBlockMap.count(secondTarget));
    		defHeader = secondTarget;
			safeHeader = clonedBlockMap[secondTarget];
			assert(safeHeader == firstTarget);
    	}

    	SPM_DEBUG( dbgs() << "FASan: (Unsupported) second array in safe region controlled by " << * preHeaderBranch << "\n" );
    	Loop * defLoop = LI_->getLoopFor(defHeader);
    	assert(defLoop && "default region is not a loop!");

		Loop::block_iterator itBodyBlock,S,E;
		S = defLoop->block_begin();
		E = defLoop->block_end();

	// mark accesses in cloned region as safe
    	for (itBodyBlock = S;itBodyBlock != E; ++itBodyBlock) {
    		BasicBlock * defBodyBlock = *itBodyBlock;
    		BasicBlock * safeBodyBlock = clonedBlockMap[defBodyBlock];

    		for(auto & inst : *safeBodyBlock) {
				markSafeArrayUse(&inst, CI.Array);
			}
    	}

    // add conjunctive test
    	Value * oldCond = preHeaderBranch->getCondition();
    	Value * joinedCond = IRB.CreateAnd(oldCond, CR, "allsafe");
    	preHeaderBranch->setCondition(joinedCond);

    } else {

	  // get loop
		Loop* finalLoop = CI.FinalLoop;
		Loop::block_iterator itBodyBlock,S,E;
		S = finalLoop->block_begin();
		E = finalLoop->block_end();

	  // clone loop body (cloned loop will run unchecked)
		ValueToValueMapTy cloneMap;

		BasicBlock * clonedHeader = 0;
		std::vector<BasicBlock*> clonedBlocks;

		for (itBodyBlock = S;itBodyBlock != E; ++itBodyBlock) {

			const BasicBlock * bodyBlock = *itBodyBlock;
			BasicBlock * clonedBlock = CloneBasicBlock(bodyBlock, cloneMap, "_checked", &F, 0);

			cloneMap[bodyBlock] = clonedBlock;
			clonedBlockMap[bodyBlock] = clonedBlock;
			clonedBlocks.push_back(clonedBlock);

			if (bodyBlock == finalLoop->getHeader()) {
				clonedHeader = clonedBlock;
				SPM_DEBUG( dbgs() << "FASan: loop header case at " << bodyBlock->getName() << "\n" );
			} else {
				SPM_DEBUG( dbgs() << "FASan: non-header block at " << bodyBlock->getName() << "\n" );
			}
		}

		if (!clonedHeader) {
			// TODO run clean-up code
			SPM_DEBUG( dbgs() << "FASan: could not find header!\n");
			abort();
		}

	  // Remap uses inside cloned region (mark pointers in the region as unguarded)
		for (BasicBlock * block : clonedBlocks) {
			for(auto & inst : *block) {
				RemapInstruction(&inst, cloneMap, RF_IgnoreMissingEntries);
				markSafeArrayUse(&inst, CI.Array);
			}
		}

	   // TODO fix PHI-nodes in exit blocks

	   // Rewire terminator of the range check to branch to the cloned region
		TerminatorInst * checkTermInst = CR->getParent()->getTerminator();

		if (BranchInst * checkBranchInst = dyn_cast<BranchInst>(checkTermInst)) {
			if (checkBranchInst->isUnconditional()) {
				BasicBlock * defTarget = checkBranchInst->getSuccessor(0);
				BranchInst * modifiedBranchInst = BranchInst::Create(clonedHeader, defTarget, CR, checkBranchInst);
				checkBranchInst->replaceAllUsesWith(modifiedBranchInst);
				checkBranchInst->eraseFromParent();
			} else {
				SPM_DEBUG( dbgs() << "FASan: Unexpected conditional branch (preheader should branch unconditional, other array checks will introduce conditional branches) " << * checkTermInst << "\n" );
				abort();
			}
		} else {
			SPM_DEBUG( dbgs() << "FASan: unsupported terminator type " << * checkTermInst << "\n" );
			abort();
		}
    }
    
#if 0
    IRB.SetInsertPoint(&(*CI.Final->begin()));
    IRB.CreateCall(ReuseFnDestroy_, Args);
#endif
    SPM_DEBUG(dbgs() << "RangedAddressSanitizer: call instruction: " << *CR
                     << "\n");
  }


  // inline calls
#ifdef FASAN_INLINE_RUNTIME
  for (CallInst * call : ToInline) {
	assert(call);
	InlineFunctionInfo IFI;
	InlineFunction(call, IFI, false);
  }
#endif

  SPM_DEBUG( F.dump() );
  return true;
}
Esempio n. 4
0
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  bool MadeChange = false;

  Triple TT = Triple(L->getHeader()->getParent()->getParent()->
                     getTargetTriple());
  if (!TT.isArch32Bit() && !TT.isArch64Bit())
    return MadeChange; // Unknown arch. type.

  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
  }

  // If a nested loop has been converted, then we can't convert this loop.
  if (MadeChange)
    return MadeChange;

#ifndef NDEBUG
  // Stop trying after reaching the limit (if any).
  int Limit = CTRLoopLimit;
  if (Limit >= 0) {
    if (Counter >= CTRLoopLimit)
      return false;
    Counter++;
  }
#endif

  // We don't want to spill/restore the counter register, and so we don't
  // want to use the counter register if the loop contains calls.
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I)
    if (mightUseCTR(TT, *I))
      return MadeChange;

  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  BasicBlock *CountedExitBlock = 0;
  const SCEV *ExitCount = 0;
  BranchInst *CountedExitBranch = 0;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       IE = ExitingBlocks.end(); I != IE; ++I) {
    const SCEV *EC = SE->getExitCount(L, *I);
    DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
                    (*I)->getName() << ": " << *EC << "\n");
    if (isa<SCEVCouldNotCompute>(EC))
      continue;
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
      if (ConstEC->getValue()->isZero())
        continue;
    } else if (!SE->isLoopInvariant(EC, L))
      continue;

    if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
      continue;

    // We now have a loop-invariant count of loop iterations (which is not the
    // constant zero) for which we know that this loop will not exit via this
    // exisiting block.

    // We need to make sure that this block will run on every loop iteration.
    // For this to be true, we must dominate all blocks with backedges. Such
    // blocks are in-loop predecessors to the header block.
    bool NotAlways = false;
    for (pred_iterator PI = pred_begin(L->getHeader()),
         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
      if (!L->contains(*PI))
        continue;

      if (!DT->dominates(*I, *PI)) {
        NotAlways = true;
        break;
      }
    }

    if (NotAlways)
      continue;

    // Make sure this blocks ends with a conditional branch.
    Instruction *TI = (*I)->getTerminator();
    if (!TI)
      continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (!BI->isConditional())
        continue;

      CountedExitBranch = BI;
    } else
      continue;

    // Note that this block may not be the loop latch block, even if the loop
    // has a latch block.
    CountedExitBlock = *I;
    ExitCount = EC;
    break;
  }

  if (!CountedExitBlock)
    return MadeChange;

  BasicBlock *Preheader = L->getLoopPreheader();

  // If we don't have a preheader, then insert one. If we already have a
  // preheader, then we can use it (except if the preheader contains a use of
  // the CTR register because some such uses might be reordered by the
  // selection DAG after the mtctr instruction).
  if (!Preheader || mightUseCTR(TT, Preheader))
    Preheader = InsertPreheaderForLoop(L, this);
  if (!Preheader)
    return MadeChange;

  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");

  // Insert the count into the preheader and replace the condition used by the
  // selected branch.
  MadeChange = true;

  SCEVExpander SCEVE(*SE, "loopcnt");
  LLVMContext &C = SE->getContext();
  Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
                                       Type::getInt32Ty(C);
  if (!ExitCount->getType()->isPointerTy() &&
      ExitCount->getType() != CountType)
    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  ExitCount = SE->getAddExpr(ExitCount,
                             SE->getConstant(CountType, 1)); 
  Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
                                       Preheader->getTerminator());

  IRBuilder<> CountBuilder(Preheader->getTerminator());
  Module *M = Preheader->getParent()->getParent();
  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
                                               CountType);
  CountBuilder.CreateCall(MTCTRFunc, ECValue);

  IRBuilder<> CondBuilder(CountedExitBranch);
  Value *DecFunc =
    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  Value *NewCond = CondBuilder.CreateCall(DecFunc);
  Value *OldCond = CountedExitBranch->getCondition();
  CountedExitBranch->setCondition(NewCond);

  // The false branch must exit the loop.
  if (!L->contains(CountedExitBranch->getSuccessor(0)))
    CountedExitBranch->swapSuccessors();

  // The old condition may be dead now, and may have even created a dead PHI
  // (the original induction variable).
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  DeleteDeadPHIs(CountedExitBlock);

  ++NumCTRLoops;
  return MadeChange;
}
Esempio n. 5
0
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                      bool AllowExpensiveTripCount,
                                      bool UseEpilogRemainder,
                                      bool UnrollRemainder,
                                      LoopInfo *LI, ScalarEvolution *SE,
                                      DominatorTree *DT, AssumptionCache *AC,
                                      OptimizationRemarkEmitter *ORE,
                                      bool PreserveLCSSA) {
  DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
  DEBUG(L->dump());
  DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" :
        dbgs() << "Using prolog remainder.\n");

  // Make sure the loop is in canonical form.
  if (!L->isLoopSimplifyForm()) {
    DEBUG(dbgs() << "Not in simplify form!\n");
    return false;
  }

  // Guaranteed by LoopSimplifyForm.
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Header = L->getHeader();

  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
  // targets of the Latch be an exit block out of the loop. This needs
  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
  assert(!L->contains(LatchExit) &&
         "one of the loop latch successors should be the exit block!");
  // These are exit blocks other than the target of the latch exiting block.
  SmallVector<BasicBlock *, 4> OtherExits;
  bool isMultiExitUnrollingEnabled =
      canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                   UseEpilogRemainder) &&
      canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                       UseEpilogRemainder);
  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
  if (!isMultiExitUnrollingEnabled &&
      (!L->getExitingBlock() || OtherExits.size())) {
    DEBUG(
        dbgs()
        << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
           "enabled!\n");
    return false;
  }
  // Use Scalar Evolution to compute the trip count. This allows more loops to
  // be unrolled than relying on induction var simplification.
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count, and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item).
  // We calculate the backedge count by using getExitCount on the Latch block,
  // which is proven to be the only exiting block in this loop. This is same as
  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
  // exiting blocks).
  const SCEV *BECountSC = SE->getExitCount(L, Latch);
  if (isa<SCEVCouldNotCompute>(BECountSC) ||
      !BECountSC->getType()->isIntegerTy()) {
    DEBUG(dbgs() << "Could not compute exit block SCEV\n");
    return false;
  }

  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

  // Add 1 since the backedge count doesn't include the first loop iteration.
  const SCEV *TripCountSC =
      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
    DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
    return false;
  }

  BasicBlock *PreHeader = L->getLoopPreheader();
  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  const DataLayout &DL = Header->getModule()->getDataLayout();
  SCEVExpander Expander(*SE, DL, "loop-unroll");
  if (!AllowExpensiveTripCount &&
      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
    DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
    return false;
  }

  // This constraint lets us deal with an overflowing trip count easily; see the
  // comment on ModVal below.
  if (Log2_32(Count) > BEWidth) {
    DEBUG(dbgs()
          << "Count failed constraint on overflow trip count calculation.\n");
    return false;
  }

  // Loop structure is the following:
  //
  // PreHeader
  //   Header
  //   ...
  //   Latch
  // LatchExit

  BasicBlock *NewPreHeader;
  BasicBlock *NewExit = nullptr;
  BasicBlock *PrologExit = nullptr;
  BasicBlock *EpilogPreHeader = nullptr;
  BasicBlock *PrologPreHeader = nullptr;

  if (UseEpilogRemainder) {
    // If epilog remainder
    // Split PreHeader to insert a branch around loop for unrolling.
    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
    // Split LatchExit to create phi nodes from branch above.
    SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
                                     DT, LI, PreserveLCSSA);
    // Split NewExit to insert epilog remainder loop.
    EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
  } else {
    // If prolog remainder
    // Split the original preheader twice to insert prolog remainder loop
    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
    PrologPreHeader->setName(Header->getName() + ".prol.preheader");
    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
                            DT, LI);
    PrologExit->setName(Header->getName() + ".prol.loopexit");
    // Split PrologExit to get NewPreHeader.
    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
  }
  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // *NewPreHeader     *PrologPreHeader
  //   Header          *PrologExit
  //   ...             *NewPreHeader
  //   Latch             Header
  // *NewExit            ...
  // *EpilogPreHeader    Latch
  // LatchExit              LatchExit

  // Calculate conditions for branch around loop for unrolling
  // in epilog case and around prolog remainder loop in prolog case.
  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % loop unroll factor
  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                          PreHeaderBR);
  IRBuilder<> B(PreHeaderBR);
  Value *ModVal;
  // Calculate ModVal = (BECount + 1) % Count.
  // Note that TripCount is BECount + 1.
  if (isPowerOf2_32(Count)) {
    // When Count is power of 2 we don't BECount for epilog case, however we'll
    // need it for a branch around unrolling loop for prolog case.
    ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
    //  1. There are no iterations to be run in the prolog/epilog loop.
    // OR
    //  2. The addition computing TripCount overflowed.
    //
    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
    // the number of iterations that remain to be run in the original loop is a
    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
    // explicitly check this above).
  } else {
    // As (BECount + 1) can potentially unsigned overflow we count
    // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
    Value *ModValTmp = B.CreateURem(BECount,
                                    ConstantInt::get(BECount->getType(),
                                                     Count));
    Value *ModValAdd = B.CreateAdd(ModValTmp,
                                   ConstantInt::get(ModValTmp->getType(), 1));
    // At that point (BECount % Count) + 1 could be equal to Count.
    // To handle this case we need to take mod by Count one more time.
    ModVal = B.CreateURem(ModValAdd,
                          ConstantInt::get(BECount->getType(), Count),
                          "xtraiter");
  }
  Value *BranchVal =
      UseEpilogRemainder ? B.CreateICmpULT(BECount,
                                           ConstantInt::get(BECount->getType(),
                                                            Count - 1)) :
                           B.CreateIsNotNull(ModVal, "lcmp.mod");
  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
  // Branch to either remainder (extra iterations) loop or unrolling loop.
  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
  PreHeaderBR->eraseFromParent();
  if (DT) {
    if (UseEpilogRemainder)
      DT->changeImmediateDominator(NewExit, PreHeader);
    else
      DT->changeImmediateDominator(PrologExit, PreHeader);
  }
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog/epilog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  std::vector<BasicBlock *> NewBlocks;
  ValueToValueMapTy VMap;

  // For unroll factor 2 remainder loop will have 1 iterations.
  // Do not create 1 iteration loop.
  bool CreateRemainderLoop = (Count != 2);

  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
  // the loop, otherwise we create a cloned loop to execute the extra
  // iterations. This function adds the appropriate CFG connections.
  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
  Loop *remainderLoop = CloneLoopBlocks(
      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
      InsertTop, InsertBot,
      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),
                                F->getBasicBlockList(),
                                NewBlocks[0]->getIterator(),
                                F->end());

  // Now the loop blocks are cloned and the other exiting blocks from the
  // remainder are connected to the original Loop's exit blocks. The remaining
  // work is to update the phi nodes in the original loop, and take in the
  // values from the cloned region. Also update the dominator info for
  // OtherExits and their immediate successors, since we have new edges into
  // OtherExits.
  SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
  for (auto *BB : OtherExits) {
   for (auto &II : *BB) {

     // Given we preserve LCSSA form, we know that the values used outside the
     // loop will be used through these phi nodes at the exit blocks that are
     // transformed below.
     if (!isa<PHINode>(II))
       break;
     PHINode *Phi = cast<PHINode>(&II);
     unsigned oldNumOperands = Phi->getNumIncomingValues();
     // Add the incoming values from the remainder code to the end of the phi
     // node.
     for (unsigned i =0; i < oldNumOperands; i++){
       Value *newVal = VMap[Phi->getIncomingValue(i)];
       // newVal can be a constant or derived from values outside the loop, and
       // hence need not have a VMap value.
       if (!newVal)
         newVal = Phi->getIncomingValue(i);
       Phi->addIncoming(newVal,
                           cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
     }
   }
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
    for (BasicBlock *SuccBB : successors(BB)) {
      assert(!(any_of(OtherExits,
                      [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
               SuccBB == LatchExit) &&
             "Breaks the definition of dedicated exits!");
    }
#endif
   // Update the dominator info because the immediate dominator is no longer the
   // header of the original Loop. BB has edges both from L and remainder code.
   // Since the preheader determines which loop is run (L or directly jump to
   // the remainder code), we set the immediate dominator as the preheader.
   if (DT) {
     DT->changeImmediateDominator(BB, PreHeader);
     // Also update the IDom for immediate successors of BB.  If the current
     // IDom is the header, update the IDom to be the preheader because that is
     // the nearest common dominator of all predecessors of SuccBB.  We need to
     // check for IDom being the header because successors of exit blocks can
     // have edges from outside the loop, and we should not incorrectly update
     // the IDom in that case.
     for (BasicBlock *SuccBB: successors(BB))
       if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
         if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
           assert(!SuccBB->getSinglePredecessor() &&
                  "BB should be the IDom then!");
           DT->changeImmediateDominator(SuccBB, PreHeader);
         }
       }
    }
  }

  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // NewPreHeader      PrologPreHeader
  //   Header            PrologHeader
  //   ...               ...
  //   Latch             PrologLatch
  // NewExit           PrologExit
  // EpilogPreHeader   NewPreHeader
  //   EpilogHeader      Header
  //   ...               ...
  //   EpilogLatch       Latch
  // LatchExit              LatchExit

  // Rewrite the cloned instruction operands to use the values created when the
  // clone is created.
  for (BasicBlock *BB : NewBlocks) {
    for (Instruction &I : *BB) {
      RemapInstruction(&I, VMap,
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
    }
  }

  if (UseEpilogRemainder) {
    // Connect the epilog code to the original loop and update the
    // PHI functions.
    ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
                  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
                  PreserveLCSSA);

    // Update counter in loop for unrolling.
    // I should be multiply of Count.
    IRBuilder<> B2(NewPreHeader->getTerminator());
    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
    B2.SetInsertPoint(LatchBR);
    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
                                      Header->getFirstNonPHI());
    Value *IdxSub =
        B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                     NewIdx->getName() + ".nsub");
    Value *IdxCmp;
    if (LatchBR->getSuccessor(0) == Header)
      IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
    else
      IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
    NewIdx->addIncoming(TestVal, NewPreHeader);
    NewIdx->addIncoming(IdxSub, Latch);
    LatchBR->setCondition(IdxCmp);
  } else {
    // Connect the prolog code to the original loop and update the
    // PHI functions.
    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
                  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
  }

  // If this loop is nested, then the loop unroller changes the code in the
  // parent loop, so the Scalar Evolution pass needs to be run again.
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
  // cannot rely on the LoopUnrollPass to do this because it only does
  // canonicalization for parent/subloops and not the sibling loops.
  if (OtherExits.size() > 0) {
    // Generate dedicated exit blocks for the original loop, to preserve
    // LoopSimplifyForm.
    formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
    // Generate dedicated exit blocks for the remainder loop if one exists, to
    // preserve LoopSimplifyForm.
    if (remainderLoop)
      formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
  }

  if (remainderLoop && UnrollRemainder) {
    DEBUG(dbgs() << "Unrolling remainder loop\n");
    UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1,
               /*Force*/false, /*AllowRuntime*/false,
               /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true,
               /*PreserveOnlyFirst*/false, /*TripMultiple*/1,
               /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE,
               PreserveLCSSA);
  }

  NumRuntimeUnrolled++;
  return true;
}
Esempio n. 6
0
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                      bool AllowExpensiveTripCount,
                                      bool UseEpilogRemainder,
                                      LoopInfo *LI, ScalarEvolution *SE,
                                      DominatorTree *DT, bool PreserveLCSSA) {
  // for now, only unroll loops that contain a single exit
  if (!L->getExitingBlock())
    return false;

  // Make sure the loop is in canonical form, and there is a single
  // exit block only.
  if (!L->isLoopSimplifyForm())
    return false;
  BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
  if (!Exit)
    return false;

  // Use Scalar Evolution to compute the trip count. This allows more loops to
  // be unrolled than relying on induction var simplification.
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count, and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item).
  const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(BECountSC) ||
      !BECountSC->getType()->isIntegerTy())
    return false;

  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

  // Add 1 since the backedge count doesn't include the first loop iteration.
  const SCEV *TripCountSC =
      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

  BasicBlock *Header = L->getHeader();
  BasicBlock *PreHeader = L->getLoopPreheader();
  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  const DataLayout &DL = Header->getModule()->getDataLayout();
  SCEVExpander Expander(*SE, DL, "loop-unroll");
  if (!AllowExpensiveTripCount &&
      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
    return false;

  // This constraint lets us deal with an overflowing trip count easily; see the
  // comment on ModVal below.
  if (Log2_32(Count) > BEWidth)
    return false;

  BasicBlock *Latch = L->getLoopLatch();

  // Loop structure is the following:
  //
  // PreHeader
  //   Header
  //   ...
  //   Latch
  // Exit

  BasicBlock *NewPreHeader;
  BasicBlock *NewExit = nullptr;
  BasicBlock *PrologExit = nullptr;
  BasicBlock *EpilogPreHeader = nullptr;
  BasicBlock *PrologPreHeader = nullptr;

  if (UseEpilogRemainder) {
    // If epilog remainder
    // Split PreHeader to insert a branch around loop for unrolling.
    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
    // Split Exit to create phi nodes from branch above.
    SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
    NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
                                     DT, LI, PreserveLCSSA);
    // Split NewExit to insert epilog remainder loop.
    EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
  } else {
    // If prolog remainder
    // Split the original preheader twice to insert prolog remainder loop
    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
    PrologPreHeader->setName(Header->getName() + ".prol.preheader");
    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
                            DT, LI);
    PrologExit->setName(Header->getName() + ".prol.loopexit");
    // Split PrologExit to get NewPreHeader.
    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
  }
  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // *NewPreHeader     *PrologPreHeader
  //   Header          *PrologExit
  //   ...             *NewPreHeader
  //   Latch             Header
  // *NewExit            ...
  // *EpilogPreHeader    Latch
  // Exit              Exit

  // Calculate conditions for branch around loop for unrolling
  // in epilog case and around prolog remainder loop in prolog case.
  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % loop unroll factor
  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                          PreHeaderBR);
  IRBuilder<> B(PreHeaderBR);
  Value *ModVal;
  // Calculate ModVal = (BECount + 1) % Count.
  // Note that TripCount is BECount + 1.
  if (isPowerOf2_32(Count)) {
    // When Count is power of 2 we don't BECount for epilog case, however we'll
    // need it for a branch around unrolling loop for prolog case.
    ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
    //  1. There are no iterations to be run in the prolog/epilog loop.
    // OR
    //  2. The addition computing TripCount overflowed.
    //
    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
    // the number of iterations that remain to be run in the original loop is a
    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
    // explicitly check this above).
  } else {
    // As (BECount + 1) can potentially unsigned overflow we count
    // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
    Value *ModValTmp = B.CreateURem(BECount,
                                    ConstantInt::get(BECount->getType(),
                                                     Count));
    Value *ModValAdd = B.CreateAdd(ModValTmp,
                                   ConstantInt::get(ModValTmp->getType(), 1));
    // At that point (BECount % Count) + 1 could be equal to Count.
    // To handle this case we need to take mod by Count one more time.
    ModVal = B.CreateURem(ModValAdd,
                          ConstantInt::get(BECount->getType(), Count),
                          "xtraiter");
  }
  Value *BranchVal =
      UseEpilogRemainder ? B.CreateICmpULT(BECount,
                                           ConstantInt::get(BECount->getType(),
                                                            Count - 1)) :
                           B.CreateIsNotNull(ModVal, "lcmp.mod");
  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
  // Branch to either remainder (extra iterations) loop or unrolling loop.
  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
  PreHeaderBR->eraseFromParent();
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog/epilog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  std::vector<BasicBlock *> NewBlocks;
  ValueToValueMapTy VMap;

  // For unroll factor 2 remainder loop will have 1 iterations.
  // Do not create 1 iteration loop.
  bool CreateRemainderLoop = (Count != 2);

  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
  // the loop, otherwise we create a cloned loop to execute the extra
  // iterations. This function adds the appropriate CFG connections.
  BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
  CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
                  InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),
                                F->getBasicBlockList(),
                                NewBlocks[0]->getIterator(),
                                F->end());

  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // NewPreHeader      PrologPreHeader
  //   Header            PrologHeader
  //   ...               ...
  //   Latch             PrologLatch
  // NewExit           PrologExit
  // EpilogPreHeader   NewPreHeader
  //   EpilogHeader      Header
  //   ...               ...
  //   EpilogLatch       Latch
  // Exit              Exit

  // Rewrite the cloned instruction operands to use the values created when the
  // clone is created.
  for (BasicBlock *BB : NewBlocks) {
    for (Instruction &I : *BB) {
      RemapInstruction(&I, VMap,
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
    }
  }

  if (UseEpilogRemainder) {
    // Connect the epilog code to the original loop and update the
    // PHI functions.
    ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
                  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
                  PreserveLCSSA);

    // Update counter in loop for unrolling.
    // I should be multiply of Count.
    IRBuilder<> B2(NewPreHeader->getTerminator());
    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
    B2.SetInsertPoint(LatchBR);
    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
                                      Header->getFirstNonPHI());
    Value *IdxSub =
        B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                     NewIdx->getName() + ".nsub");
    Value *IdxCmp;
    if (LatchBR->getSuccessor(0) == Header)
      IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
    else
      IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
    NewIdx->addIncoming(TestVal, NewPreHeader);
    NewIdx->addIncoming(IdxSub, Latch);
    LatchBR->setCondition(IdxCmp);
  } else {
    // Connect the prolog code to the original loop and update the
    // PHI functions.
    ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
                  VMap, DT, LI, PreserveLCSSA);
  }

  // If this loop is nested, then the loop unroller changes the code in the
  // parent loop, so the Scalar Evolution pass needs to be run again.
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  NumRuntimeUnrolled++;
  return true;
}
Esempio n. 7
0
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  bool MadeChange = false;

  // Do not convert small short loops to CTR loop.
  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
    SmallPtrSet<const Value *, 32> EphValues;
    auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
        *L->getHeader()->getParent());
    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
    CodeMetrics Metrics;
    for (BasicBlock *BB : L->blocks())
      Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
    // 6 is an approximate latency for the mtctr instruction.
    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
      return false;
  }

  // Process nested loops first.
  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    MadeChange |= convertToCTRLoop(*I);
    LLVM_DEBUG(dbgs() << "Nested loop converted\n");
  }

  // If a nested loop has been converted, then we can't convert this loop.
  if (MadeChange)
    return MadeChange;

  // Bail out if the loop has irreducible control flow.
  LoopBlocksRPO RPOT(L);
  RPOT.perform(LI);
  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
    return false;

#ifndef NDEBUG
  // Stop trying after reaching the limit (if any).
  int Limit = CTRLoopLimit;
  if (Limit >= 0) {
    if (Counter >= CTRLoopLimit)
      return false;
    Counter++;
  }
#endif

  // We don't want to spill/restore the counter register, and so we don't
  // want to use the counter register if the loop contains calls.
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I)
    if (mightUseCTR(*I))
      return MadeChange;

  SmallVector<BasicBlock*, 4> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);

  // If there is an exit edge known to be frequently taken,
  // we should not transform this loop.
  for (auto &BB : ExitingBlocks) {
    Instruction *TI = BB->getTerminator();
    if (!TI) continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      uint64_t TrueWeight = 0, FalseWeight = 0;
      if (!BI->isConditional() ||
          !BI->extractProfMetadata(TrueWeight, FalseWeight))
        continue;

      // If the exit path is more frequent than the loop path,
      // we return here without further analysis for this loop.
      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
      if (( TrueIsExit && FalseWeight < TrueWeight) ||
          (!TrueIsExit && FalseWeight > TrueWeight))
        return MadeChange;
    }
  }

  BasicBlock *CountedExitBlock = nullptr;
  const SCEV *ExitCount = nullptr;
  BranchInst *CountedExitBranch = nullptr;
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       IE = ExitingBlocks.end(); I != IE; ++I) {
    const SCEV *EC = SE->getExitCount(L, *I);
    LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
                      << (*I)->getName() << ": " << *EC << "\n");
    if (isa<SCEVCouldNotCompute>(EC))
      continue;
    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
      if (ConstEC->getValue()->isZero())
        continue;
    } else if (!SE->isLoopInvariant(EC, L))
      continue;

    if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
      continue;

    // If this exiting block is contained in a nested loop, it is not eligible
    // for insertion of the branch-and-decrement since the inner loop would
    // end up messing up the value in the CTR.
    if (LI->getLoopFor(*I) != L)
      continue;

    // We now have a loop-invariant count of loop iterations (which is not the
    // constant zero) for which we know that this loop will not exit via this
    // existing block.

    // We need to make sure that this block will run on every loop iteration.
    // For this to be true, we must dominate all blocks with backedges. Such
    // blocks are in-loop predecessors to the header block.
    bool NotAlways = false;
    for (pred_iterator PI = pred_begin(L->getHeader()),
         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
      if (!L->contains(*PI))
        continue;

      if (!DT->dominates(*I, *PI)) {
        NotAlways = true;
        break;
      }
    }

    if (NotAlways)
      continue;

    // Make sure this blocks ends with a conditional branch.
    Instruction *TI = (*I)->getTerminator();
    if (!TI)
      continue;

    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
      if (!BI->isConditional())
        continue;

      CountedExitBranch = BI;
    } else
      continue;

    // Note that this block may not be the loop latch block, even if the loop
    // has a latch block.
    CountedExitBlock = *I;
    ExitCount = EC;
    break;
  }

  if (!CountedExitBlock)
    return MadeChange;

  BasicBlock *Preheader = L->getLoopPreheader();

  // If we don't have a preheader, then insert one. If we already have a
  // preheader, then we can use it (except if the preheader contains a use of
  // the CTR register because some such uses might be reordered by the
  // selection DAG after the mtctr instruction).
  if (!Preheader || mightUseCTR(Preheader))
    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
  if (!Preheader)
    return MadeChange;

  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
                    << "\n");

  // Insert the count into the preheader and replace the condition used by the
  // selected branch.
  MadeChange = true;

  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
  LLVMContext &C = SE->getContext();
  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
  if (!ExitCount->getType()->isPointerTy() &&
      ExitCount->getType() != CountType)
    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
  Value *ECValue =
      SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());

  IRBuilder<> CountBuilder(Preheader->getTerminator());
  Module *M = Preheader->getParent()->getParent();
  Function *MTCTRFunc =
      Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType);
  CountBuilder.CreateCall(MTCTRFunc, ECValue);

  IRBuilder<> CondBuilder(CountedExitBranch);
  Function *DecFunc =
      Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
  Value *OldCond = CountedExitBranch->getCondition();
  CountedExitBranch->setCondition(NewCond);

  // The false branch must exit the loop.
  if (!L->contains(CountedExitBranch->getSuccessor(0)))
    CountedExitBranch->swapSuccessors();

  // The old condition may be dead now, and may have even created a dead PHI
  // (the original induction variable).
  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  // Run through the basic blocks of the loop and see if any of them have dead
  // PHIs that can be removed.
  for (auto I : L->blocks())
    DeleteDeadPHIs(I);

  ++NumCTRLoops;
  return MadeChange;
}