Esempio n. 1
0
/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
/// turned into an explicit branch.
static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
  // FIXME: This should use the same heuristics as IfConversion to determine
  // whether a select is better represented as a branch.  This requires that
  // branch probability metadata is preserved for the select, which is not the
  // case currently.

  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());

  // If the branch is predicted right, an out of order CPU can avoid blocking on
  // the compare.  Emit cmovs on compares with a memory operand as branches to
  // avoid stalls on the load from memory.  If the compare has more than one use
  // there's probably another cmov or setcc around so it's not worth emitting a
  // branch.
  if (!Cmp)
    return false;

  Value *CmpOp0 = Cmp->getOperand(0);
  Value *CmpOp1 = Cmp->getOperand(1);

  // We check that the memory operand has one use to avoid uses of the loaded
  // value directly after the compare, making branches unprofitable.
  return Cmp->hasOneUse() &&
         ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
          (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
}
// Predict that a comparison in which a register is an operand, the register is
// used before being defined in a successor block, and the successor block
// does not post-dominate will reach the successor block.
int BranchProbabilities::CheckGuardHeuristic()
{
    BranchInst *BI = dyn_cast<BranchInst>(_TI);
    bool bUses[2] = {false, false};

    // If we don't have a conditional branch, abandon
    if ((!BI) || (BI->isUnconditional()))
        return -1;

    // If the condition is not immediately dependent on a comparison, abandon
    CmpInst *cmp = dyn_cast<CmpInst>(BI->getCondition());
    if (!cmp)
        return -1;

    for (int i = 0; i < 2; i++)
    {
        if (_bPostDoms[i])
            continue;

        // Get the values being compared
        Value *v = cmp->getOperand(i);

        // For all uses of the first value check if the use post-dominates
        for (Value::use_iterator UI = v->use_begin(), UE = v->use_end();
                UI != UE; ++UI)
        {
            // if the use is not an instruction, skip it
            Instruction *I = dyn_cast<Instruction>(*UI);
            if (!I)
                continue;

            BasicBlock *UsingBlock = I->getParent();

            // Check if the use is in either successor
            for (int i = 0; i < 2; i++)
                if (UsingBlock == _Succ[i])
                    bUses[i] = true;
        }
    }

    if (bUses[0] == bUses[1])
        return -1;
    if (bUses[0])
        return 0;
    else
        return 1;
}
// Converts LLVM encoding of comparison predicates to the
// corresponding bitcode versions.
static unsigned GetEncodedCmpPredicate(const CmpInst &Cmp) {
  switch (Cmp.getPredicate()) {
  default: report_fatal_error(
      "Comparison predicate not supported by PNaCl bitcode");
  case CmpInst::FCMP_FALSE:
    return naclbitc::FCMP_FALSE;
  case CmpInst::FCMP_OEQ:
    return naclbitc::FCMP_OEQ;
  case CmpInst::FCMP_OGT:
    return naclbitc::FCMP_OGT;
  case CmpInst::FCMP_OGE:
    return naclbitc::FCMP_OGE;
  case CmpInst::FCMP_OLT:
    return naclbitc::FCMP_OLT;
  case CmpInst::FCMP_OLE:
    return naclbitc::FCMP_OLE;
  case CmpInst::FCMP_ONE:
    return naclbitc::FCMP_ONE;
  case CmpInst::FCMP_ORD:
    return naclbitc::FCMP_ORD;
  case CmpInst::FCMP_UNO:
    return naclbitc::FCMP_UNO;
  case CmpInst::FCMP_UEQ:
    return naclbitc::FCMP_UEQ;
  case CmpInst::FCMP_UGT:
    return naclbitc::FCMP_UGT;
  case CmpInst::FCMP_UGE:
    return naclbitc::FCMP_UGE;
  case CmpInst::FCMP_ULT:
    return naclbitc::FCMP_ULT;
  case CmpInst::FCMP_ULE:
    return naclbitc::FCMP_ULE;
  case CmpInst::FCMP_UNE:
    return naclbitc::FCMP_UNE;
  case CmpInst::FCMP_TRUE:
    return naclbitc::FCMP_TRUE;
  case CmpInst::ICMP_EQ:
    return naclbitc::ICMP_EQ;
  case CmpInst::ICMP_NE:
    return naclbitc::ICMP_NE;
  case CmpInst::ICMP_UGT:
    return naclbitc::ICMP_UGT;
  case CmpInst::ICMP_UGE:
    return naclbitc::ICMP_UGE;
  case CmpInst::ICMP_ULT:
    return naclbitc::ICMP_ULT;
  case CmpInst::ICMP_ULE:
    return naclbitc::ICMP_ULE;
  case CmpInst::ICMP_SGT:
    return naclbitc::ICMP_SGT;
  case CmpInst::ICMP_SGE:
    return naclbitc::ICMP_SGE;
  case CmpInst::ICMP_SLT:
    return naclbitc::ICMP_SLT;
  case CmpInst::ICMP_SLE:
    return naclbitc::ICMP_SLE;
  }
}
Esempio n. 4
0
/// Try to simplify cmp instruction.
bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) {
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

  // First try to handle simplified comparisons.
  if (!isa<Constant>(LHS))
    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
      LHS = SimpleLHS;
  if (!isa<Constant>(RHS))
    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
      RHS = SimpleRHS;

  if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
    auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
    if (SimplifiedLHS != SimplifiedAddresses.end()) {
      auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
      if (SimplifiedRHS != SimplifiedAddresses.end()) {
        SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
        SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
        if (LHSAddr.Base == RHSAddr.Base) {
          LHS = LHSAddr.Offset;
          RHS = RHSAddr.Offset;
        }
      }
    }
  }

  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
    if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
      if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
        SimplifiedValues[&I] = C;
        return true;
      }
    }
  }

  return Base::visitCmpInst(I);
}
Esempio n. 5
0
/// Returns true if the select instruction has users in the compare-and-add
/// reduction pattern below. The select instruction argument is the last one
/// in the sequence.
///
/// %sum.1 = phi ...
/// ...
/// %cmp = fcmp pred %0, %CFP
/// %add = fadd %0, %sum.1
/// %sum.2 = select %cmp, %add, %sum.1
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isConditionalRdxPattern(
    RecurrenceKind Kind, Instruction *I) {
  SelectInst *SI = dyn_cast<SelectInst>(I);
  if (!SI)
    return InstDesc(false, I);

  CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
  // Only handle single use cases for now.
  if (!CI || !CI->hasOneUse())
    return InstDesc(false, I);

  Value *TrueVal = SI->getTrueValue();
  Value *FalseVal = SI->getFalseValue();
  // Handle only when either of operands of select instruction is a PHI
  // node for now.
  if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
      (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
    return InstDesc(false, I);

  Instruction *I1 =
      isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
                             : dyn_cast<Instruction>(TrueVal);
  if (!I1 || !I1->isBinaryOp())
    return InstDesc(false, I);

  Value *Op1, *Op2;
  if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1)  ||
       m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
      I1->isFast())
    return InstDesc(Kind == RK_FloatAdd, SI);

  if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
    return InstDesc(Kind == RK_FloatMult, SI);

  return InstDesc(false, I);
}
Esempio n. 6
0
/// check if value in memory at `memAddr` was changed when accessing `val`, store result into flag
void RedoBBBuilder::insertCheck(Value *val, Value *memAddr, Value* flag)
{
    for (auto it = val->use_begin(), ite = val->use_end(); it != ite; it++) {
        if (StoreInst *SI = dyn_cast<StoreInst>(*it)) {
            // skip those not in current top loop
            if (!isCurrentTopLoop(*SI)) { continue; }

            // skip instruction we don't interested in
            if (!shouldCheck(*SI)) { continue; }

            // if we have checked this store for this memAddr
            CmpInst *chkRes = 0;
            if (StoreToCheckMap.count(SI)) {
                for (auto pair : StoreToCheckMap[SI]) {
                    if (pair.first == memAddr) {
                        chkRes = pair.second;
                        DEBUG(dbgs() << "        Found existing check '" << chkRes->getName()
                                    << "' for (" << *SI << "  ) in "
                                    << SI->getParent()->getName() << "\n");
                    }
                }
            }
            if (!chkRes) {
                // check if before and after the store, the memore address changed
                //
                // %orig        = load %memAddr
                // the STORE we checking
                // %modified    = load %memAddr
                // %cmp         = icmp ne, %orig, %modified
                LoadInst *orig = new LoadInst(memAddr, "", SI);

                Instruction *next = SI->getNextNode();
                LoadInst *modified = new LoadInst(memAddr, "", next);
                chkRes = CmpInst::Create(Instruction::ICmp,
                                         CmpInst::ICMP_NE,
                                         orig, modified,
                                         "chk", next);

                CheckingInstrs.insert(orig);
                CheckingInstrs.insert(modified);
                CheckingInstrs.insert(chkRes);

                // set consitant name
                orig->setName(chkRes->getName() + ".orig");
                modified->setName(chkRes->getName() + ".mod");

                StoreToCheckMap[SI].push_back({memAddr, chkRes});

                DEBUG(dbgs() << "        Inserted check '" << chkRes->getName()
                            << "' for (" << *SI << "  ) in "
                            << SI->getParent()->getName() << "\n");
            }


            // if we have stored the check result to the flag
            Check pair = {memAddr, chkRes};
            for (auto f : CheckToFlagMap[pair]) {
                if (f == flag) {
                    DEBUG(dbgs() << "        Existing flag store found\n");
                    return;
                }
            }

            DEBUG(dbgs() << "            Check result stored to " << flag->getName() << "\n");
            // merge old value and new value with or
            //
            // %oldflgval   = load %flag
            // %newflgval   = or %oldflgval, %chkRes
            // store  %newflgval, %flag
            // the next instr after STORE we checking
            Instruction *next = chkRes->getNextNode();
            LoadInst *oldflgval = new LoadInst(flag, flag->getName() + ".oldval", next);
            auto *newflgval = BinaryOperator::Create(Instruction::Or,
                                                     oldflgval, chkRes,
                                                     flag->getName() + ".newval",
                                                     next);
            StoreInst *st = new StoreInst(newflgval, flag, next);
            CheckingInstrs.insert(oldflgval);
            CheckingInstrs.insert(newflgval);
            CheckingInstrs.insert(st);

            CheckToFlagMap[pair].push_back(flag);
        }
    }
}
Esempio n. 7
0
void SystemZTDCPass::convertFCmp(CmpInst &I) {
  Value *Op0 = I.getOperand(0);
  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
  auto Pred = I.getPredicate();
  // Only comparisons with consts are interesting.
  if (!Const)
    return;
  // Compute the smallest normal number (and its negation).
  auto &Sem = Op0->getType()->getFltSemantics();
  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
  APFloat NegSmallest = Smallest;
  NegSmallest.changeSign();
  // Check if Const is one of our recognized consts.
  int WhichConst;
  if (Const->isZero()) {
    // All comparisons with 0 can be converted.
    WhichConst = 0;
  } else if (Const->isInfinity()) {
    // Likewise for infinities.
    WhichConst = Const->isNegative() ? 2 : 1;
  } else if (Const->isExactlyValue(Smallest)) {
    // For Smallest, we cannot do EQ separately from GT.
    if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
        (Pred & CmpInst::FCMP_OGE) != 0)
      return;
    WhichConst = 3;
  } else if (Const->isExactlyValue(NegSmallest)) {
    // Likewise for NegSmallest, we cannot do EQ separately from LT.
    if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
        (Pred & CmpInst::FCMP_OLE) != 0)
      return;
    WhichConst = 4;
  } else {
    // Not one of our special constants.
    return;
  }
  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
  static const int Masks[][4] = {
    { // 0
      SystemZ::TDCMASK_ZERO,              // eq
      SystemZ::TDCMASK_POSITIVE,          // gt
      SystemZ::TDCMASK_NEGATIVE,          // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // inf
      SystemZ::TDCMASK_INFINITY_PLUS,     // eq
      0,                                  // gt
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_NEGATIVE |
       SystemZ::TDCMASK_NORMAL_PLUS |
       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // -inf
      SystemZ::TDCMASK_INFINITY_MINUS,    // eq
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_POSITIVE |
       SystemZ::TDCMASK_NORMAL_MINUS |
       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
      0,                                  // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // minnorm
      0,                                  // eq (unsupported)
      (SystemZ::TDCMASK_NORMAL_PLUS |
       SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_NEGATIVE |
       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // -minnorm
      0,                                  // eq (unsupported)
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_POSITIVE |
       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
      (SystemZ::TDCMASK_NORMAL_MINUS |
       SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
      SystemZ::TDCMASK_NAN,               // un
    }
  };
  // Construct the mask as a combination of the partial masks.
  int Mask = 0;
  if (Pred & CmpInst::FCMP_OEQ)
    Mask |= Masks[WhichConst][0];
  if (Pred & CmpInst::FCMP_OGT)
    Mask |= Masks[WhichConst][1];
  if (Pred & CmpInst::FCMP_OLT)
    Mask |= Masks[WhichConst][2];
  if (Pred & CmpInst::FCMP_UNO)
    Mask |= Masks[WhichConst][3];
  // A lone fcmp is unworthy of tdc conversion on its own, but may become
  // worthy if combined with fabs.
  bool Worthy = false;
  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
    Function *F = CI->getCalledFunction();
    if (F && F->getIntrinsicID() == Intrinsic::fabs) {
      // Fold with fabs - adjust the mask appropriately.
      Mask &= SystemZ::TDCMASK_PLUS;
      Mask |= Mask >> 1;
      Op0 = CI->getArgOperand(0);
      // A combination of fcmp with fabs is a win, unless the constant
      // involved is 0 (which is handled by later passes).
      Worthy = WhichConst != 0;
      PossibleJunk.insert(CI);
    }
Esempio n. 8
0
/// \brief Simplify one loop and queue further loops for simplification.
///
/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
/// Pass pointer. The Pass pointer is used by numerous utilities to update
/// specific analyses. Rather than a pass it would be much cleaner and more
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
                            DominatorTree *DT, LoopInfo *LI,
                            ScalarEvolution *SE, Pass *PP,
                            AssumptionCache *AC) {
  bool Changed = false;
ReprocessLoop:

  // Check to see that no blocks (other than the header) in this loop have
  // predecessors that are not in the loop.  This is not valid for natural
  // loops, but can occur if the blocks are unreachable.  Since they are
  // unreachable we can just shamelessly delete those CFG edges!
  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
       BB != E; ++BB) {
    if (*BB == L->getHeader()) continue;

    SmallPtrSet<BasicBlock*, 4> BadPreds;
    for (pred_iterator PI = pred_begin(*BB),
         PE = pred_end(*BB); PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (!L->contains(P))
        BadPreds.insert(P);
    }

    // Delete each unique out-of-loop (and thus dead) predecessor.
    for (BasicBlock *P : BadPreds) {

      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
                   << P->getName() << "\n");

      // Inform each successor of each dead pred.
      for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI)
        (*SI)->removePredecessor(P);
      // Zap the dead pred's terminator and replace it with unreachable.
      TerminatorInst *TI = P->getTerminator();
       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
      P->getTerminator()->eraseFromParent();
      new UnreachableInst(P->getContext(), P);
      Changed = true;
    }
  }

  // If there are exiting blocks with branches on undef, resolve the undef in
  // the direction which will exit the loop. This will help simplify loop
  // trip count computations.
  SmallVector<BasicBlock*, 8> ExitingBlocks;
  L->getExitingBlocks(ExitingBlocks);
  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
       E = ExitingBlocks.end(); I != E; ++I)
    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
      if (BI->isConditional()) {
        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {

          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
                       << (*I)->getName() << "\n");

          BI->setCondition(ConstantInt::get(Cond->getType(),
                                            !L->contains(BI->getSuccessor(0))));

          // This may make the loop analyzable, force SCEV recomputation.
          if (SE)
            SE->forgetLoop(L);

          Changed = true;
        }
      }

  // Does the loop already have a preheader?  If so, don't insert one.
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    Preheader = InsertPreheaderForLoop(L, PP);
    if (Preheader) {
      ++NumInserted;
      Changed = true;
    }
  }

  // Next, check to make sure that all exit nodes of the loop only have
  // predecessors that are inside of the loop.  This check guarantees that the
  // loop preheader/header will dominate the exit blocks.  If the exit block has
  // predecessors from outside of the loop, split the edge now.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getExitBlocks(ExitBlocks);

  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
                                               ExitBlocks.end());
  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
         E = ExitBlockSet.end(); I != E; ++I) {
    BasicBlock *ExitBlock = *I;
    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
         PI != PE; ++PI)
      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
      // allowed.
      if (!L->contains(*PI)) {
        if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PP)) {
          ++NumInserted;
          Changed = true;
        }
        break;
      }
  }

  // If the header has more than two predecessors at this point (from the
  // preheader and from multiple backedges), we must adjust the loop.
  BasicBlock *LoopLatch = L->getLoopLatch();
  if (!LoopLatch) {
    // If this is really a nested loop, rip it out into a child loop.  Don't do
    // this for loops with a giant number of backedges, just factor them into a
    // common backedge instead.
    if (L->getNumBackEdges() < 8) {
      if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, PP, AC)) {
        ++NumNested;
        // Enqueue the outer loop as it should be processed next in our
        // depth-first nest walk.
        Worklist.push_back(OuterL);

        // This is a big restructuring change, reprocess the whole loop.
        Changed = true;
        // GCC doesn't tail recursion eliminate this.
        // FIXME: It isn't clear we can't rely on LLVM to TRE this.
        goto ReprocessLoop;
      }
    }

    // If we either couldn't, or didn't want to, identify nesting of the loops,
    // insert a new block that all backedges target, then make it jump to the
    // loop header.
    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
    if (LoopLatch) {
      ++NumInserted;
      Changed = true;
    }
  }

  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();

  // Scan over the PHI nodes in the loop header.  Since they now have only two
  // incoming values (the loop is canonicalized), we may have simplified the PHI
  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
  PHINode *PN;
  for (BasicBlock::iterator I = L->getHeader()->begin();
       (PN = dyn_cast<PHINode>(I++)); )
    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
      if (SE) SE->forgetValue(PN);
      PN->replaceAllUsesWith(V);
      PN->eraseFromParent();
    }

  // If this loop has multiple exits and the exits all go to the same
  // block, attempt to merge the exits. This helps several passes, such
  // as LoopRotation, which do not support loops with multiple exits.
  // SimplifyCFG also does this (and this code uses the same utility
  // function), however this code is loop-aware, where SimplifyCFG is
  // not. That gives it the advantage of being able to hoist
  // loop-invariant instructions out of the way to open up more
  // opportunities, and the disadvantage of having the responsibility
  // to preserve dominator information.
  bool UniqueExit = true;
  if (!ExitBlocks.empty())
    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
      if (ExitBlocks[i] != ExitBlocks[0]) {
        UniqueExit = false;
        break;
      }
  if (UniqueExit) {
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      BasicBlock *ExitingBlock = ExitingBlocks[i];
      if (!ExitingBlock->getSinglePredecessor()) continue;
      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
      if (!BI || !BI->isConditional()) continue;
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      if (!CI || CI->getParent() != ExitingBlock) continue;

      // Attempt to hoist out all instructions except for the
      // comparison and the branch.
      bool AllInvariant = true;
      bool AnyInvariant = false;
      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
        Instruction *Inst = I++;
        // Skip debug info intrinsics.
        if (isa<DbgInfoIntrinsic>(Inst))
          continue;
        if (Inst == CI)
          continue;
        if (!L->makeLoopInvariant(Inst, AnyInvariant,
                                  Preheader ? Preheader->getTerminator()
                                            : nullptr)) {
          AllInvariant = false;
          break;
        }
      }
      if (AnyInvariant) {
        Changed = true;
        // The loop disposition of all SCEV expressions that depend on any
        // hoisted values have also changed.
        if (SE)
          SE->forgetLoopDispositions(L);
      }
      if (!AllInvariant) continue;

      // The block has now been cleared of all instructions except for
      // a comparison and a conditional branch. SimplifyCFG may be able
      // to fold it now.
      if (!FoldBranchToCommonDest(BI))
        continue;

      // Success. The block is now dead, so remove it from the loop,
      // update the dominator tree and delete it.
      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
                   << ExitingBlock->getName() << "\n");

      // Notify ScalarEvolution before deleting this block. Currently assume the
      // parent loop doesn't change (spliting edges doesn't count). If blocks,
      // CFG edges, or other values in the parent loop change, then we need call
      // to forgetLoop() for the parent instead.
      if (SE)
        SE->forgetLoop(L);

      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
      Changed = true;
      LI->removeBlock(ExitingBlock);

      DomTreeNode *Node = DT->getNode(ExitingBlock);
      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
        Node->getChildren();
      while (!Children.empty()) {
        DomTreeNode *Child = Children.front();
        DT->changeImmediateDominator(Child, Node->getIDom());
      }
      DT->eraseNode(ExitingBlock);

      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
      ExitingBlock->eraseFromParent();
    }
  }

  return Changed;
}
Esempio n. 9
0
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  // First try to handle simplified comparisons.
  if (!isa<Constant>(LHS))
    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
      LHS = SimpleLHS;
  if (!isa<Constant>(RHS))
    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
      RHS = SimpleRHS;
  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
    if (Constant *CRHS = dyn_cast<Constant>(RHS))
      if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
        SimplifiedValues[&I] = C;
        return true;
      }
  }

  if (I.getOpcode() == Instruction::FCmp)
    return false;

  // Otherwise look for a comparison between constant offset pointers with
  // a common base.
  Value *LHSBase, *RHSBase;
  APInt LHSOffset, RHSOffset;
  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
  if (LHSBase) {
    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
    if (RHSBase && LHSBase == RHSBase) {
      // We have common bases, fold the icmp to a constant based on the
      // offsets.
      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
        SimplifiedValues[&I] = C;
        ++NumConstantPtrCmps;
        return true;
      }
    }
  }

  // If the comparison is an equality comparison with null, we can simplify it
  // for any alloca-derived argument.
  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
    if (isAllocaDerivedArg(I.getOperand(0))) {
      // We can actually predict the result of comparisons between an
      // alloca-derived value and null. Note that this fires regardless of
      // SROA firing.
      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
                                        : ConstantInt::getFalse(I.getType());
      return true;
    }

  // Finally check for SROA candidates in comparisons.
  Value *SROAArg;
  DenseMap<Value *, int>::iterator CostIt;
  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
    if (isa<ConstantPointerNull>(I.getOperand(1))) {
      accumulateSROACost(CostIt, InlineConstants::InstrCost);
      return true;
    }

    disableSROA(CostIt);
  }

  return false;
}
Esempio n. 10
0
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
///
/// Before:
///   ......
///   %cmp10 = fcmp une float %tmp1, %tmp2
///   br i1 %cmp1, label %if.then, label %lor.rhs
///
/// lor.rhs:
///   ......
///   %cmp11 = fcmp une float %tmp3, %tmp4
///   br i1 %cmp11, label %if.then, label %ifend
///
/// if.end:  // the merge block
///   ......
///
/// if.then: // has two predecessors, both of them contains conditional branch.
///   ......
///   br label %if.end;
///
/// After:
///  ......
///  %cmp10 = fcmp une float %tmp1, %tmp2
///  ......
///  %cmp11 = fcmp une float %tmp3, %tmp4
///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
///  br i1 %cmp12, label %if.then, label %ifend
///
///  if.end:
///    ......
///
///  if.then:
///    ......
///    br label %if.end;
///
///  Current implementation handles two cases.
///  Case 1: \param BB is on the else-path.
///
///          BB1
///        /     |
///       BB2    |
///      /   \   |
///     BB3   \  |     where, BB1, BB2 contain conditional branches.
///      \    |  /     BB3 contains unconditional branch.
///       \   | /      BB4 corresponds to \param BB which is also the merge.
///  BB => BB4
///
///
///  Corresponding source code:
///
///  if (a == b && c == d)
///    statement; // BB3
///
///  Case 2: \param BB BB is on the then-path.
///
///             BB1
///          /      |
///         |      BB2
///         \    /    |  where BB1, BB2 contain conditional branches.
///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
///           \     /    to \param BB.  BB4 is the merge.
///             BB4
///
///  Corresponding source code:
///
///  if (a == b || c == d)
///    statement;  // BB3
///
///  In both cases,  \param BB is the common successor of conditional branches.
///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
///  as its predecessors.
///
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
                                         Pass *P) {
  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
  if (PHI)
    return false; // For simplicity, avoid cases containing PHI nodes.

  BasicBlock *LastCondBlock = NULL;
  BasicBlock *FirstCondBlock = NULL;
  BasicBlock *UnCondBlock = NULL;
  int Idx = -1;

  // Check predecessors of \param BB.
  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
       PI != PE; ++PI) {
    BasicBlock *Pred = *PI;
    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());

    // All predecessors should terminate with a branch.
    if (!PBI)
      return false;

    BasicBlock *PP = Pred->getSinglePredecessor();

    if (PBI->isUnconditional()) {
      // Case 1: Pred (BB3) is an unconditional block, it should
      // have a single predecessor (BB2) that is also a predecessor
      // of \param BB (BB4) and should not have address-taken.
      // There should exist only one such unconditional
      // branch among the predecessors.
      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
          Pred->hasAddressTaken())
        return false;

      UnCondBlock = Pred;
      continue;
    }

    // Only conditional branches are allowed beyond this point.
    assert(PBI->isConditional());

    // Condition's unique use should be the branch instruction.
    Value *PC = PBI->getCondition();
    if (!PC || !PC->hasOneUse())
      return false;

    if (PP && Preds.count(PP)) {
      // These are internal condition blocks to be merged from, e.g.,
      // BB2 in both cases.
      // Should not be address-taken.
      if (Pred->hasAddressTaken())
        return false;

      // Instructions in the internal condition blocks should be safe
      // to hoist up.
      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
        Instruction *CI = BI++;
        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
          return false;
      }
    } else {
      // This is the condition block to be merged into, e.g. BB1 in
      // both cases.
      if (FirstCondBlock)
        return false;
      FirstCondBlock = Pred;
    }

    // Find whether BB is uniformly on the true (or false) path
    // for all of its predecessors.
    BasicBlock *PS1 = PBI->getSuccessor(0);
    BasicBlock *PS2 = PBI->getSuccessor(1);
    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
    int CIdx = (PS1 == BB) ? 0 : 1;

    if (Idx == -1)
      Idx = CIdx;
    else if (CIdx != Idx)
      return false;

    // PS is the successor which is not BB. Check successors to identify
    // the last conditional branch.
    if (Preds.count(PS) == 0) {
      // Case 2.
      LastCondBlock = Pred;
    } else {
      // Case 1
      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
      if (BPS && BPS->isUnconditional()) {
        // Case 1: PS(BB3) should be an unconditional branch.
        LastCondBlock = Pred;
      }
    }
  }

  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
    return false;

  TerminatorInst *TBB = LastCondBlock->getTerminator();
  BasicBlock *PS1 = TBB->getSuccessor(0);
  BasicBlock *PS2 = TBB->getSuccessor(1);
  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());

  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
  // attempt branch inversion.
  if (!PBI1 || !PBI1->isUnconditional() ||
      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
    // Check whether PS2 jumps into PS1.
    if (!PBI2 || !PBI2->isUnconditional() ||
        (PS2->getTerminator()->getSuccessor(0) != PS1))
      return false;

    // Do branch inversion.
    BasicBlock *CurrBlock = LastCondBlock;
    bool EverChanged = false;
    while (1) {
      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      CmpInst::Predicate Predicate = CI->getPredicate();
      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
        CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
        BI->swapSuccessors();
        EverChanged = true;
      }
      if (CurrBlock == FirstCondBlock)
        break;
      CurrBlock = CurrBlock->getSinglePredecessor();
    }
    return EverChanged;
  }

  // PS1 must have a conditional branch.
  if (!PBI1 || !PBI1->isUnconditional())
    return false;

  // PS2 should not contain PHI node.
  PHI = dyn_cast<PHINode>(PS2->begin());
  if (PHI)
    return false;

  // Do the transformation.
  BasicBlock *CB;
  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
  bool Iteration = true;
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Value *PC = PBI->getCondition();

  do {
    CB = PBI->getSuccessor(1 - Idx);
    // Delete the conditional branch.
    FirstCondBlock->getInstList().pop_back();
    FirstCondBlock->getInstList()
        .splice(FirstCondBlock->end(), CB->getInstList());
    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
    Value *CC = PBI->getCondition();
    // Merge conditions.
    Builder.SetInsertPoint(PBI);
    Value *NC;
    if (Idx == 0)
      // Case 2, use parallel or.
      NC = Builder.CreateOr(PC, CC);
    else
      // Case 1, use parallel and.
      NC = Builder.CreateAnd(PC, CC);

    PBI->replaceUsesOfWith(CC, NC);
    PC = NC;
    if (CB == LastCondBlock)
      Iteration = false;
    // Remove internal conditional branches.
    CB->dropAllReferences();
    // make CB unreachable and let downstream to delete the block.
    new UnreachableInst(CB->getContext(), CB);
  } while (Iteration);

  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
  return true;
}
	void CGGraph::constructGraph() {

		std::vector<CGConstraint*>::iterator it;
		std::string nodeName;
		CGConstraint* curConstraint;
		CGNode* firstNode;
		CGNode* secondNode;
		std::vector<int>::iterator lengthIt;
		int curValue;
		ConstantInt* cInt;

		for (it = constraints.begin(); it != constraints.end(); ++it) {

			curConstraint = *it;
			Instruction* PP = curConstraint->programPoint;

			switch(curConstraint->type) {
				case CGConstraint::C1:
				{

					firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(0), PP));
					secondNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(1), PP));

					firstNode->connectTo(secondNode, 0);
					
					break;
				}
				case CGConstraint::C2:
				{
					if (isa<StoreInst>(PP)) {
						/*
						operand(0) = constant int or variable of int type
						operand(1)  = var being stored into
						*/
						firstNode = getNode(getNameFromValue(PP->getOperand(0), PP));
						secondNode = getNode(getNameFromValue(PP->getOperand(1), PP));
						firstNode->connectTo(secondNode, 0);
					}
					else if (isa<LoadInst>(PP)) {
						/*
						operand(0) = pointer being loaded from
						(Value*)PP  = var being loaded into
						*/
						firstNode = getNode(getNameFromValue(PP->getOperand(0), PP));
						secondNode = getNode(getNameFromValue(PP, PP));
						firstNode->connectTo(secondNode, 0);
					}
					else if (isa<CastInst>(PP)) {
						/*
						operand(0) = var being casted 
						(Value*)PP  = var getting the result of the cast
						*/
						firstNode = getNode(getNameFromValue(PP->getOperand(0), PP));
						secondNode = getNode(getNameFromValue(PP, PP));
						firstNode->connectTo(secondNode, 0);
					}

					break;
				}
				case CGConstraint::C3:
				{
				
					secondNode = getNode(getNameFromValue(PP, PP));
					if ((cInt = dyn_cast<ConstantInt>(curConstraint->programPoint->getOperand(0)))) {
						firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(1), PP));
					} else if ((cInt = dyn_cast<ConstantInt>(curConstraint->programPoint->getOperand(1)))) {
						firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(0), PP));
					} else {
						return;
					}
					curValue = cInt->getSExtValue();
					firstNode->connectTo(secondNode, curValue);
					break;
				}
				case CGConstraint::C4:
				{

					CmpInst* cmpInst;
					BranchInst* branchInst;
					
					if( !(branchInst = dyn_cast<BranchInst>(curConstraint->programPoint)) ) {
						errs() << "ERROR: BranchInst cast unsuccessful for C4 in CGGraph::constructGraph() \n";
						return;
					}
					if( !(cmpInst = dyn_cast<CmpInst>(owner->branchToCompare[branchInst])) ) {
						errs() << "ERROR: CmpInst not found for C4 in CGGraph::constructGraph() \n";
						return;
					}

					int size1, size2;
					size1 = curConstraint->piAssignments.size();
					size2 = curConstraint->piAssignments2.size();
					/*
					There are two possibilities here: (a) size1 = size2 = 2, or (b) size1 = size2 = 1;
					If (b), there will be one operand in the compare inst that is a literal value.
					That literal value still generates a constraint although it does not generate a pi assignment.
					We need to account for that.
					*/

					if (size1 != size2) {
						errs() << "ERROR: piAssignments not of equal length for C4 in CGGraph::constructGraph()\n"; 
						return;
					}
					if ( (size1 < 1) || (size1 > 2) ) {
						errs() << "ERROR: piAssignments.size() != 1 or 2 for C4 in CGGraph::constructGraph()\n"; 
						return;
					}
						
					//this takes care of the first two constraints for both cases (size = 1 or size = 2)
					//first branch
					for (int i = 0; i < size1; ++i) {
						firstNode = getNode(curConstraint->piAssignments[i]->getOperandName()); //vi - wr
						secondNode = getNode(curConstraint->piAssignments[i]->getAssignedName()); //vj - ws 
						firstNode->connectTo(secondNode, 0); //vi -> vj 0  -  wr -> ws 0
					}
					//second branch
					for (int i = 0; i < size2; ++i) {
						firstNode = getNode(curConstraint->piAssignments2[i]->getOperandName()); //vi - wr
						secondNode = getNode(curConstraint->piAssignments2[i]->getAssignedName()); //vk - wt 
						firstNode->connectTo(secondNode, 0); //vi -> vk 0  -  wr -> wt 0
					}
		
					/*
					- first and second op names for the third constraint for branches 1 and 2
					- each case is stored in the order of the pi assignments, which is also the 
					order of the cmp instruction operands
					*/
					std::string firstOpNameBr1, secondOpNameBr1, firstOpNameBr2, secondOpNameBr2;
					
					if (size1 == 1) {
						/*
						the 2nd constraint in the table will not exist in this case, but there will be a 3rd constraint
						that was missed by the above, e.g.
						if (x1 <= 10)
						1. x2 <= x1
						2. nothing
						3. x2 <= 10  <---- we need to add this here
						*/ 
						
						//prune the int from the CmpInst
						if (cmpInst->getNumOperands() != 2) {
							errs() << "ERROR: cmpInst->getNumOperands() != 2 in CGGraph::constructGraph()\n";
							return;
						}
						if ((cInt = dyn_cast<ConstantInt>(cmpInst->getOperand(0)))) {
							//int is first op
							firstOpNameBr1 = getNameFromValue(cInt, PP);
							secondOpNameBr1 = curConstraint->piAssignments[0]->getAssignedName();
							firstOpNameBr2 = firstOpNameBr1;
							secondOpNameBr2 = curConstraint->piAssignments2[0]->getAssignedName();
						} else if ((cInt = dyn_cast<ConstantInt>(cmpInst->getOperand(1)))) {
							//int is second op 
							firstOpNameBr1 = curConstraint->piAssignments[0]->getAssignedName();
							secondOpNameBr1 = getNameFromValue(cInt, PP);
							firstOpNameBr2 = curConstraint->piAssignments2[0]->getAssignedName();
							secondOpNameBr2 = secondOpNameBr1;
						} else {
							errs() << "ERROR: int not found in cmpInstr in CGGraph::constructGraph()\n";
							return;
						}
					}
					else if (size1 == 2) {
						//store in order of pi assignments
						firstOpNameBr1 = curConstraint->piAssignments[0]->getAssignedName();
						secondOpNameBr1 = curConstraint->piAssignments[1]->getAssignedName();
						firstOpNameBr2 = curConstraint->piAssignments2[0]->getAssignedName();
						secondOpNameBr2 = curConstraint->piAssignments2[1]->getAssignedName();
					}
	
					CGNode* firstNodeBr1 = getNode(firstOpNameBr1);
					CGNode* secondNodeBr1 = getNode(secondOpNameBr1);
					CGNode* firstNodeBr2 = getNode(firstOpNameBr2);
					CGNode* secondNodeBr2 = getNode(secondOpNameBr2);
					
					switch(cmpInst->getPredicate()) {
						case CmpInst::ICMP_SGT: // >
							firstNodeBr1->connectTo(secondNodeBr1, -1); //vj -> ws -1
							secondNodeBr2->connectTo(firstNodeBr2, 0); //wt -> vk 0
							break;
						case CmpInst::ICMP_SLT: // <
							secondNodeBr1->connectTo(firstNodeBr1, -1); //ws -> vj -1
							firstNodeBr2->connectTo(secondNodeBr2, 0); //vk -> wt 0
							break;
						case CmpInst::ICMP_SGE: // >=
							firstNodeBr1->connectTo(secondNodeBr1, 0); //vj -> ws 0
							secondNodeBr2->connectTo(firstNodeBr2, -1); //wt -> vk -1
							break;
						case CmpInst::ICMP_SLE: // <=
							secondNodeBr1->connectTo(firstNodeBr1, 0); //ws -> vj 0
							firstNodeBr2->connectTo(secondNodeBr2, -1); //vk -> wt -1						
							break;
						default:
							break;
					}

					break;
				}
				case CGConstraint::C5:
				{
					//operand 1 = array length
					firstNode = getNode(getNameFromValue(PP->getOperand(1), PP));
					secondNode = getNode(curConstraint->piAssignments[0]->getAssignedName());
					firstNode->connectTo(secondNode, -1);
					break;
				}
				case CGConstraint::CONTROL_FLOW:
				{
					//Done and ready to test

					firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(0), PP));
					secondNode = getNode(getNameFromValue(curConstraint->programPoint, PP));
					firstNode->connectTo(secondNode, 0);
					firstNode = getNode(getNameFromValue(curConstraint->programPoint->getOperand(1), PP));
					firstNode->connectTo(secondNode, 0);
					break;
				}
			} //end switch
		} //end for	
	} //end constructGraph()
Esempio n. 12
0
// Compute the unlikely successors to the block BB in the loop L, specifically
// those that are unlikely because this is a loop, and add them to the
// UnlikelyBlocks set.
static void
computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
                          SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
  // Sometimes in a loop we have a branch whose condition is made false by
  // taking it. This is typically something like
  //  int n = 0;
  //  while (...) {
  //    if (++n >= MAX) {
  //      n = 0;
  //    }
  //  }
  // In this sort of situation taking the branch means that at the very least it
  // won't be taken again in the next iteration of the loop, so we should
  // consider it less likely than a typical branch.
  //
  // We detect this by looking back through the graph of PHI nodes that sets the
  // value that the condition depends on, and seeing if we can reach a successor
  // block which can be determined to make the condition false.
  //
  // FIXME: We currently consider unlikely blocks to be half as likely as other
  // blocks, but if we consider the example above the likelyhood is actually
  // 1/MAX. We could therefore be more precise in how unlikely we consider
  // blocks to be, but it would require more careful examination of the form
  // of the comparison expression.
  const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
  if (!BI || !BI->isConditional())
    return;

  // Check if the branch is based on an instruction compared with a constant
  CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
  if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
      !isa<Constant>(CI->getOperand(1)))
    return;

  // Either the instruction must be a PHI, or a chain of operations involving
  // constants that ends in a PHI which we can then collapse into a single value
  // if the PHI value is known.
  Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
  PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
  Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
  // Collect the instructions until we hit a PHI
  SmallVector<BinaryOperator *, 1> InstChain;
  while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
         isa<Constant>(CmpLHS->getOperand(1))) {
    // Stop if the chain extends outside of the loop
    if (!L->contains(CmpLHS))
      return;
    InstChain.push_back(cast<BinaryOperator>(CmpLHS));
    CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
    if (CmpLHS)
      CmpPHI = dyn_cast<PHINode>(CmpLHS);
  }
  if (!CmpPHI || !L->contains(CmpPHI))
    return;

  // Trace the phi node to find all values that come from successors of BB
  SmallPtrSet<PHINode*, 8> VisitedInsts;
  SmallVector<PHINode*, 8> WorkList;
  WorkList.push_back(CmpPHI);
  VisitedInsts.insert(CmpPHI);
  while (!WorkList.empty()) {
    PHINode *P = WorkList.back();
    WorkList.pop_back();
    for (BasicBlock *B : P->blocks()) {
      // Skip blocks that aren't part of the loop
      if (!L->contains(B))
        continue;
      Value *V = P->getIncomingValueForBlock(B);
      // If the source is a PHI add it to the work list if we haven't
      // already visited it.
      if (PHINode *PN = dyn_cast<PHINode>(V)) {
        if (VisitedInsts.insert(PN).second)
          WorkList.push_back(PN);
        continue;
      }
      // If this incoming value is a constant and B is a successor of BB, then
      // we can constant-evaluate the compare to see if it makes the branch be
      // taken or not.
      Constant *CmpLHSConst = dyn_cast<Constant>(V);
      if (!CmpLHSConst ||
          std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB))
        continue;
      // First collapse InstChain
      for (Instruction *I : llvm::reverse(InstChain)) {
        CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst,
                                        cast<Constant>(I->getOperand(1)), true);
        if (!CmpLHSConst)
          break;
      }
      if (!CmpLHSConst)
        continue;
      // Now constant-evaluate the compare
      Constant *Result = ConstantExpr::getCompare(CI->getPredicate(),
                                                  CmpLHSConst, CmpConst, true);
      // If the result means we don't branch to the block then that block is
      // unlikely.
      if (Result &&
          ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
           (Result->isOneValue() && B == BI->getSuccessor(1))))
        UnlikelyBlocks.insert(B);
    }
  }
}
/// MatchGuardHeuristic - Predict that a comparison in which a register is
/// an operand, the register is used before being defined in a successor
/// block, and the successor block does not post-dominate will reach the
/// successor block.
/// @returns a Prediction that is a pair in which the first element is the
/// successor taken, and the second the successor not taken.
Prediction BranchHeuristicsInfo::MatchGuardHeuristic(BasicBlock *root) const {
  bool matched = false;
  Prediction pred;

  // Last instruction of basic block.
  TerminatorInst *TI = root->getTerminator();

  // Basic block successors. True and False branches.
  BasicBlock *trueSuccessor = TI->getSuccessor(0);
  BasicBlock *falseSuccessor = TI->getSuccessor(1);

  // Is the last instruction a Branch Instruction?
  BranchInst *BI = dyn_cast<BranchInst>(TI);
  if (!BI || !BI->isConditional())
    return empty;

  // Conditional instruction.
  Value *cond = BI->getCondition();

  // Find if the variable used in the branch instruction is
  // in fact a comparison instruction.
  CmpInst *CI = dyn_cast<CmpInst>(cond);
  if (!CI)
    return empty;

  // Seek over all of the operands of this comparison instruction.
  for (unsigned ops = 0; ops < CI->getNumOperands(); ++ops) {
    // Find the operand.
    Value *operand = CI->getOperand(ops);

    // Check if the operand is neither a function argument or a value.
    if (!isa<Argument>(operand) && !isa<User>(operand))
      continue;

    // Check if this variable was used in the true successor and
    // does not post dominate.
    // Since LLVM is in SSA form, it's impossible for a variable being used
    // before being defined, so that statement is skipped.
    if (operand->isUsedInBasicBlock(trueSuccessor) &&
        !PDT->dominates(trueSuccessor, root)) {
      // If a heuristic was already matched, predict none and abort immediately.
      if (matched)
        return empty;

      matched = true;
      pred = std::make_pair(trueSuccessor, falseSuccessor);
    }

    // Check if this variable was used in the false successor and
    // does not post dominate.
    if (operand->isUsedInBasicBlock(falseSuccessor) &&
        !PDT->dominates(falseSuccessor, root)) {
      // If a heuristic was already matched, predict none and abort immediately.
      if (matched)
        return empty;

      matched = true;
      pred = std::make_pair(falseSuccessor, trueSuccessor);
    }
  }

  return (matched ? pred : empty);
}