Example #1
void GraphBuilder::visitPtrToIntInst(PtrToIntInst& I) {
  DSNode* N = getValueDest(I.getOperand(0)).getNode();
  if(I.hasOneUse()) {
    if(isa<ICmpInst>(*(I.use_begin()))) {
  if(I.hasOneUse()) {
    Value *V = dyn_cast<Value>(*(I.use_begin()));
    DenseSet<Value *> Seen;
    while(V && V->hasOneUse() &&
          Seen.insert(V).second) {
      V = dyn_cast<Value>(*(V->use_begin()));
Example #2
/// Try to find redundant insertvalue instructions, like the following ones:
///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0
///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0
/// Here the second instruction inserts values at the same indices, as the
/// first one, making the first one redundant.
/// It should be transformed to:
///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0
Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
  bool IsRedundant = false;
  ArrayRef<unsigned int> FirstIndices = I.getIndices();

  // If there is a chain of insertvalue instructions (each of them except the
  // last one has only one use and it's another insertvalue insn from this
  // chain), check if any of the 'children' uses the same indices as the first
  // instruction. In this case, the first one is redundant.
  Value *V = &I;
  unsigned Depth = 0;
  while (V->hasOneUse() && Depth < 10) {
    User *U = V->user_back();
    auto UserInsInst = dyn_cast<InsertValueInst>(U);
    if (!UserInsInst || U->getOperand(0) != V)
    if (UserInsInst->getIndices() == FirstIndices) {
      IsRedundant = true;
    V = UserInsInst;

  if (IsRedundant)
    return replaceInstUsesWith(I, I.getOperand(0));
  return nullptr;
Example #3
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
    BasicBlock *DefBB = I->getParent();

    // If the result of a {s|z}ext and its source are both live out, rewrite all
    // other uses of the source with result of extension.
    Value *Src = I->getOperand(0);
    if (Src->hasOneUse())
        return false;

    // Only do this xform if truncating is free.
    if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
        return false;

    // Only safe to perform the optimization if the source is also defined in
    // this block.
    if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
        return false;

    bool DefIsLiveOut = false;
    for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
            UI != E; ++UI) {
        Instruction *User = cast<Instruction>(*UI);

        // Figure out which BB this ext is used in.
        BasicBlock *UserBB = User->getParent();
        if (UserBB == DefBB) continue;
        DefIsLiveOut = true;
    if (!DefIsLiveOut)
        return false;

    // Make sure non of the uses are PHI nodes.
    for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
            UI != E; ++UI) {
        Instruction *User = cast<Instruction>(*UI);
        BasicBlock *UserBB = User->getParent();
        if (UserBB == DefBB) continue;
        // Be conservative. We don't want this xform to end up introducing
        // reloads just before load / store instructions.
        if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
            return false;

    // InsertedTruncs - Only insert one trunc in each block once.
    DenseMap<BasicBlock*, Instruction*> InsertedTruncs;

    bool MadeChange = false;
    for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
            UI != E; ++UI) {
        Use &TheUse = UI.getUse();
        Instruction *User = cast<Instruction>(*UI);

        // Figure out which BB this ext is used in.
        BasicBlock *UserBB = User->getParent();
        if (UserBB == DefBB) continue;

        // Both src and def are live in this block. Rewrite the use.
        Instruction *&InsertedTrunc = InsertedTruncs[UserBB];

        if (!InsertedTrunc) {
            BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
            InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);

        // Replace a use of the {s|z}ext source with a use of the result.
        TheUse = InsertedTrunc;
        MadeChange = true;

    return MadeChange;
Example #4
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  Value *Op = LI.getOperand(0);

  // Try to canonicalize the loaded type.
  if (Instruction *Res = combineLoadToOperationType(*this, LI))
    return Res;

  // Attempt to improve the alignment.
  unsigned KnownAlign = getOrEnforceKnownAlignment(
      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
  unsigned LoadAlign = LI.getAlignment();
  unsigned EffectiveLoadAlign =
      LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());

  if (KnownAlign > EffectiveLoadAlign)
  else if (LoadAlign == 0)

  // Replace GEP indices if possible.
  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
      return &LI;

  // None of the following transforms are legal for volatile/atomic loads.
  // FIXME: Some of it is okay for atomic loads; needs refactoring.
  if (!LI.isSimple()) return nullptr;

  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
    return Res;

  // Do really simple store-to-load forwarding and load CSE, to catch cases
  // where there are several consecutive memory accesses to the same location,
  // separated by a few arithmetic operations.
  BasicBlock::iterator BBI(LI);
  AAMDNodes AATags;
  if (Value *AvailableVal =
      FindAvailableLoadedValue(Op, LI.getParent(), BBI,
                               DefMaxInstsToScan, AA, &AATags)) {
    if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
      unsigned KnownIDs[] = {
          LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
          LLVMContext::MD_noalias,         LLVMContext::MD_range,
          LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
          LLVMContext::MD_invariant_group, LLVMContext::MD_align,
      combineMetadata(NLI, &LI, KnownIDs);

    return ReplaceInstUsesWith(
        LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
                                            LI.getName() + ".cast"));

  // load(gep null, ...) -> unreachable
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
    const Value *GEPI0 = GEPI->getOperand(0);
    // TODO: Consider a target hook for valid address spaces for this xform.
    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
      // Insert a new store to null instruction before the load to indicate
      // that this code is not reachable.  We do this instead of inserting
      // an unreachable instruction directly because we cannot modify the
      // CFG.
      new StoreInst(UndefValue::get(LI.getType()),
                    Constant::getNullValue(Op->getType()), &LI);
      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));

  // load null/undef -> unreachable
  // TODO: Consider a target hook for valid address spaces for this xform.
  if (isa<UndefValue>(Op) ||
      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
    // Insert a new store to null instruction before the load to indicate that
    // this code is not reachable.  We do this instead of inserting an
    // unreachable instruction directly because we cannot modify the CFG.
    new StoreInst(UndefValue::get(LI.getType()),
                  Constant::getNullValue(Op->getType()), &LI);
    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));

  if (Op->hasOneUse()) {
    // Change select and PHI nodes to select values instead of addresses: this
    // helps alias analysis out a lot, allows many others simplifications, and
    // exposes redundancy in the code.
    // Note that we cannot do the transformation unless we know that the
    // introduced loads cannot trap!  Something like this is valid as long as
    // the condition is always false: load (select bool %C, int* null, int* %G),
    // but it would not be valid if we transformed it to load from null
    // unconditionally.
    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
      unsigned Align = LI.getAlignment();
      if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, SI) &&
          isSafeToLoadUnconditionally(SI->getOperand(2), Align, SI)) {
        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
        return SelectInst::Create(SI->getCondition(), V1, V2);

      // load (select (cond, null, P)) -> load P
      if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
          LI.getPointerAddressSpace() == 0) {
        LI.setOperand(0, SI->getOperand(2));
        return &LI;

      // load (select (cond, P, null)) -> load P
      if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
          LI.getPointerAddressSpace() == 0) {
        LI.setOperand(0, SI->getOperand(1));
        return &LI;
  return nullptr;
Example #5
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  Value *Val = SI.getOperand(0);
  Value *Ptr = SI.getOperand(1);

  // Try to canonicalize the stored type.
  if (combineStoreToValueType(*this, SI))
    return EraseInstFromFunction(SI);

  // Attempt to improve the alignment.
  unsigned KnownAlign = getOrEnforceKnownAlignment(
      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
  unsigned StoreAlign = SI.getAlignment();
  unsigned EffectiveStoreAlign =
      StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());

  if (KnownAlign > EffectiveStoreAlign)
  else if (StoreAlign == 0)

  // Try to canonicalize the stored type.
  if (unpackStoreToAggregate(*this, SI))
    return EraseInstFromFunction(SI);

  // Replace GEP indices if possible.
  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
      return &SI;

  // Don't hack volatile/ordered stores.
  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
  if (!SI.isUnordered()) return nullptr;

  // If the RHS is an alloca with a single use, zapify the store, making the
  // alloca dead.
  if (Ptr->hasOneUse()) {
    if (isa<AllocaInst>(Ptr))
      return EraseInstFromFunction(SI);
    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
      if (isa<AllocaInst>(GEP->getOperand(0))) {
        if (GEP->getOperand(0)->hasOneUse())
          return EraseInstFromFunction(SI);

  // Do really simple DSE, to catch cases where there are several consecutive
  // stores to the same location, separated by a few arithmetic operations. This
  // situation often occurs with bitfield accesses.
  BasicBlock::iterator BBI(SI);
  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
       --ScanInsts) {
    // Don't count debug info directives, lest they affect codegen,
    // and we skip pointer-to-pointer bitcasts, which are NOPs.
    if (isa<DbgInfoIntrinsic>(BBI) ||
        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {

    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
      // Prev store isn't volatile, and stores to the same location?
      if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
                                                        SI.getOperand(1))) {

    // If this is a load, we have to stop.  However, if the loaded value is from
    // the pointer we're loading and is producing the pointer we're storing,
    // then *this* store is dead (X = load P; store X -> P).
    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
        assert(SI.isUnordered() && "can't eliminate ordering operation");
        return EraseInstFromFunction(SI);

      // Otherwise, this is a load from some other location.  Stores before it
      // may not be dead.

    // Don't skip over loads or things that can modify memory.
    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())

  // store X, null    -> turns into 'unreachable' in SimplifyCFG
  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
    if (!isa<UndefValue>(Val)) {
      SI.setOperand(0, UndefValue::get(Val->getType()));
      if (Instruction *U = dyn_cast<Instruction>(Val))
        Worklist.Add(U);  // Dropped a use.
    return nullptr;  // Do not modify these!

  // store undef, Ptr -> noop
  if (isa<UndefValue>(Val))
    return EraseInstFromFunction(SI);

  // The code below needs to be audited and adjusted for unordered atomics
  if (!SI.isSimple())
    return nullptr;

  // If this store is the last instruction in the basic block (possibly
  // excepting debug info instructions), and if the block ends with an
  // unconditional branch, try to move it to the successor block.
  BBI = SI.getIterator();
  do {
  } while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
    if (BI->isUnconditional())
      if (SimplifyStoreAtEndOfBlock(SI))
        return nullptr;  // xform done!

  return nullptr;
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
  bool Changed = SimplifyAssociativeOrCommutative(I);
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
                                 I.hasNoUnsignedWrap(), TD))
    return ReplaceInstUsesWith(I, V);

  // (A*B)+(A*C) -> A*(B+C) etc
  if (Value *V = SimplifyUsingDistributiveLaws(I))
    return ReplaceInstUsesWith(I, V);

  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
    // X + (signbit) --> X ^ signbit
    const APInt &Val = CI->getValue();
    if (Val.isSignBit())
      return BinaryOperator::CreateXor(LHS, RHS);
    // See if SimplifyDemandedBits can simplify this.  This handles stuff like
    // (X & 254)+1 -> (X&254)|1
    if (SimplifyDemandedInstructionBits(I))
      return &I;

    // zext(bool) + C -> bool ? C + 1 : C
    if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
      if (ZI->getSrcTy()->isIntegerTy(1))
        return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
    Value *XorLHS = 0; ConstantInt *XorRHS = 0;
    if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
      const APInt &RHSVal = CI->getValue();
      unsigned ExtendAmt = 0;
      // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
      // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
      if (XorRHS->getValue() == -RHSVal) {
        if (RHSVal.isPowerOf2())
          ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
        else if (XorRHS->getValue().isPowerOf2())
          ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
      if (ExtendAmt) {
        APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
        if (!MaskedValueIsZero(XorLHS, Mask))
          ExtendAmt = 0;
      if (ExtendAmt) {
        Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
        Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
        return BinaryOperator::CreateAShr(NewShl, ShAmt);

  if (isa<Constant>(RHS) && isa<PHINode>(LHS))
    if (Instruction *NV = FoldOpIntoPhi(I))
      return NV;

  if (I.getType()->isIntegerTy(1))
    return BinaryOperator::CreateXor(LHS, RHS);

  // X + X --> X << 1
  if (LHS == RHS) {
    BinaryOperator *New =
      BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
    return New;

  // -A + B  -->  B - A
  // -A + -B  -->  -(A + B)
  if (Value *LHSV = dyn_castNegVal(LHS)) {
    if (Value *RHSV = dyn_castNegVal(RHS)) {
      Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
      return BinaryOperator::CreateNeg(NewAdd);
    return BinaryOperator::CreateSub(RHS, LHSV);

  // A + -B  -->  A - B
  if (!isa<Constant>(RHS))
    if (Value *V = dyn_castNegVal(RHS))
      return BinaryOperator::CreateSub(LHS, V);

  ConstantInt *C2;
  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
    if (X == RHS)   // X*C + X --> X * (C+1)
      return BinaryOperator::CreateMul(RHS, AddOne(C2));

    // X*C1 + X*C2 --> X * (C1+C2)
    ConstantInt *C1;
    if (X == dyn_castFoldableMul(RHS, C1))
      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));

  // X + X*C --> X * (C+1)
  if (dyn_castFoldableMul(RHS, C2) == LHS)
    return BinaryOperator::CreateMul(LHS, AddOne(C2));

  // A+B --> A|B iff A and B have no bits set in common.
  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
    APInt LHSKnownOne(IT->getBitWidth(), 0);
    APInt LHSKnownZero(IT->getBitWidth(), 0);
    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
    if (LHSKnownZero != 0) {
      APInt RHSKnownOne(IT->getBitWidth(), 0);
      APInt RHSKnownZero(IT->getBitWidth(), 0);
      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
      // No bits in common -> bitwise or.
      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
        return BinaryOperator::CreateOr(LHS, RHS);

  // W*X + Y*Z --> W * (X+Z)  iff W == Y
    Value *W, *X, *Y, *Z;
    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
      if (W != Y) {
        if (W == Z) {
          std::swap(Y, Z);
        } else if (Y == X) {
          std::swap(W, X);
        } else if (X == Z) {
          std::swap(Y, Z);
          std::swap(W, X);

      if (W == Y) {
        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
        return BinaryOperator::CreateMul(W, NewAdd);

  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
    Value *X = 0;
    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
      return BinaryOperator::CreateSub(SubOne(CRHS), X);

    // (X & FF00) + xx00  -> (X+xx00) & FF00
    if (LHS->hasOneUse() &&
        match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
        CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
      // See if all bits from the first bit set in the Add RHS up are included
      // in the mask.  First, get the rightmost bit.
      const APInt &AddRHSV = CRHS->getValue();
      // Form a mask of all bits from the lowest bit added through the top.
      APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));

      // See if the and mask includes all of these bits.
      APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());

      if (AddRHSHighBits == AddRHSHighBitsAnd) {
        // Okay, the xform is safe.  Insert the new add pronto.
        Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
        return BinaryOperator::CreateAnd(NewAdd, C2);

    // Try to fold constant add into select arguments.
    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
      if (Instruction *R = FoldOpIntoSelect(I, SI))
        return R;

  // add (select X 0 (sub n A)) A  -->  select X A n
    SelectInst *SI = dyn_cast<SelectInst>(LHS);
    Value *A = RHS;
    if (!SI) {
      SI = dyn_cast<SelectInst>(RHS);
      A = LHS;
    if (SI && SI->hasOneUse()) {
      Value *TV = SI->getTrueValue();
      Value *FV = SI->getFalseValue();
      Value *N;

      // Can we fold the add into the argument of the select?
      // We check both true and false select arguments for a matching subtract.
      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
        // Fold the add into the true select value.
        return SelectInst::Create(SI->getCondition(), N, A);
      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
        // Fold the add into the false select value.
        return SelectInst::Create(SI->getCondition(), A, N);

  // Check for (add (sext x), y), see if we can merge this into an
  // integer add followed by a sext.
  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
    // (add (sext x), cst) --> (sext (add x, cst'))
    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
      Constant *CI = 
        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
      if (LHSConv->hasOneUse() &&
          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
        // Insert the new, smaller add.
        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
                                              CI, "addconv");
        return new SExtInst(NewAdd, I.getType());
    // (add (sext x), (sext y)) --> (sext (add int x, y))
    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
      // Only do this if x/y have the same type, if at last one of them has a
      // single use (so we don't increase the number of sexts), and if the
      // integer add will not overflow.
      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
                                   RHSConv->getOperand(0))) {
        // Insert the new integer add.
        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
                                             RHSConv->getOperand(0), "addconv");
        return new SExtInst(NewAdd, I.getType());

  return Changed ? &I : 0;
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  Value *Op = LI.getOperand(0);

  // Attempt to improve the alignment.
  if (DL) {
    unsigned KnownAlign =
      getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL);
    unsigned LoadAlign = LI.getAlignment();
    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :

    if (KnownAlign > EffectiveLoadAlign)
    else if (LoadAlign == 0)

  // load (cast X) --> cast (load X) iff safe.
  if (isa<CastInst>(Op))
    if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
      return Res;

  // None of the following transforms are legal for volatile/atomic loads.
  // FIXME: Some of it is okay for atomic loads; needs refactoring.
  if (!LI.isSimple()) return nullptr;

  // Do really simple store-to-load forwarding and load CSE, to catch cases
  // where there are several consecutive memory accesses to the same location,
  // separated by a few arithmetic operations.
  BasicBlock::iterator BBI = &LI;
  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
    return ReplaceInstUsesWith(LI, AvailableVal);

  // load(gep null, ...) -> unreachable
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
    const Value *GEPI0 = GEPI->getOperand(0);
    // TODO: Consider a target hook for valid address spaces for this xform.
    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
      // Insert a new store to null instruction before the load to indicate
      // that this code is not reachable.  We do this instead of inserting
      // an unreachable instruction directly because we cannot modify the
      // CFG.
      new StoreInst(UndefValue::get(LI.getType()),
                    Constant::getNullValue(Op->getType()), &LI);
      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));

  // load null/undef -> unreachable
  // TODO: Consider a target hook for valid address spaces for this xform.
  if (isa<UndefValue>(Op) ||
      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
    // Insert a new store to null instruction before the load to indicate that
    // this code is not reachable.  We do this instead of inserting an
    // unreachable instruction directly because we cannot modify the CFG.
    new StoreInst(UndefValue::get(LI.getType()),
                  Constant::getNullValue(Op->getType()), &LI);
    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));

  // Instcombine load (constantexpr_cast global) -> cast (load global)
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
    if (CE->isCast())
      if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
        return Res;

  if (Op->hasOneUse()) {
    // Change select and PHI nodes to select values instead of addresses: this
    // helps alias analysis out a lot, allows many others simplifications, and
    // exposes redundancy in the code.
    // Note that we cannot do the transformation unless we know that the
    // introduced loads cannot trap!  Something like this is valid as long as
    // the condition is always false: load (select bool %C, int* null, int* %G),
    // but it would not be valid if we transformed it to load from null
    // unconditionally.
    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
      unsigned Align = LI.getAlignment();
      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
        return SelectInst::Create(SI->getCondition(), V1, V2);

      // load (select (cond, null, P)) -> load P
      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
        if (C->isNullValue()) {
          LI.setOperand(0, SI->getOperand(2));
          return &LI;

      // load (select (cond, P, null)) -> load P
      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
        if (C->isNullValue()) {
          LI.setOperand(0, SI->getOperand(1));
          return &LI;
  return nullptr;
Example #8
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
/// Before:
///   ......
///   %cmp10 = fcmp une float %tmp1, %tmp2
///   br i1 %cmp1, label %if.then, label %lor.rhs
/// lor.rhs:
///   ......
///   %cmp11 = fcmp une float %tmp3, %tmp4
///   br i1 %cmp11, label %if.then, label %ifend
/// if.end:  // the merge block
///   ......
/// if.then: // has two predecessors, both of them contains conditional branch.
///   ......
///   br label %if.end;
/// After:
///  ......
///  %cmp10 = fcmp une float %tmp1, %tmp2
///  ......
///  %cmp11 = fcmp une float %tmp3, %tmp4
///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
///  br i1 %cmp12, label %if.then, label %ifend
///  if.end:
///    ......
///  if.then:
///    ......
///    br label %if.end;
///  Current implementation handles two cases.
///  Case 1: \param BB is on the else-path.
///          BB1
///        /     |
///       BB2    |
///      /   \   |
///     BB3   \  |     where, BB1, BB2 contain conditional branches.
///      \    |  /     BB3 contains unconditional branch.
///       \   | /      BB4 corresponds to \param BB which is also the merge.
///  BB => BB4
///  Corresponding source code:
///  if (a == b && c == d)
///    statement; // BB3
///  Case 2: \param BB BB is on the then-path.
///             BB1
///          /      |
///         |      BB2
///         \    /    |  where BB1, BB2 contain conditional branches.
///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
///           \     /    to \param BB.  BB4 is the merge.
///             BB4
///  Corresponding source code:
///  if (a == b || c == d)
///    statement;  // BB3
///  In both cases,  \param BB is the common successor of conditional branches.
///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
///  as its predecessors.
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
                                         Pass *P) {
  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
  if (PHI)
    return false; // For simplicity, avoid cases containing PHI nodes.

  BasicBlock *LastCondBlock = NULL;
  BasicBlock *FirstCondBlock = NULL;
  BasicBlock *UnCondBlock = NULL;
  int Idx = -1;

  // Check predecessors of \param BB.
  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
       PI != PE; ++PI) {
    BasicBlock *Pred = *PI;
    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());

    // All predecessors should terminate with a branch.
    if (!PBI)
      return false;

    BasicBlock *PP = Pred->getSinglePredecessor();

    if (PBI->isUnconditional()) {
      // Case 1: Pred (BB3) is an unconditional block, it should
      // have a single predecessor (BB2) that is also a predecessor
      // of \param BB (BB4) and should not have address-taken.
      // There should exist only one such unconditional
      // branch among the predecessors.
      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
        return false;

      UnCondBlock = Pred;

    // Only conditional branches are allowed beyond this point.

    // Condition's unique use should be the branch instruction.
    Value *PC = PBI->getCondition();
    if (!PC || !PC->hasOneUse())
      return false;

    if (PP && Preds.count(PP)) {
      // These are internal condition blocks to be merged from, e.g.,
      // BB2 in both cases.
      // Should not be address-taken.
      if (Pred->hasAddressTaken())
        return false;

      // Instructions in the internal condition blocks should be safe
      // to hoist up.
      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
        Instruction *CI = BI++;
        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
          return false;
    } else {
      // This is the condition block to be merged into, e.g. BB1 in
      // both cases.
      if (FirstCondBlock)
        return false;
      FirstCondBlock = Pred;

    // Find whether BB is uniformly on the true (or false) path
    // for all of its predecessors.
    BasicBlock *PS1 = PBI->getSuccessor(0);
    BasicBlock *PS2 = PBI->getSuccessor(1);
    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
    int CIdx = (PS1 == BB) ? 0 : 1;

    if (Idx == -1)
      Idx = CIdx;
    else if (CIdx != Idx)
      return false;

    // PS is the successor which is not BB. Check successors to identify
    // the last conditional branch.
    if (Preds.count(PS) == 0) {
      // Case 2.
      LastCondBlock = Pred;
    } else {
      // Case 1
      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
      if (BPS && BPS->isUnconditional()) {
        // Case 1: PS(BB3) should be an unconditional branch.
        LastCondBlock = Pred;

  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
    return false;

  TerminatorInst *TBB = LastCondBlock->getTerminator();
  BasicBlock *PS1 = TBB->getSuccessor(0);
  BasicBlock *PS2 = TBB->getSuccessor(1);
  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());

  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
  // attempt branch inversion.
  if (!PBI1 || !PBI1->isUnconditional() ||
      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
    // Check whether PS2 jumps into PS1.
    if (!PBI2 || !PBI2->isUnconditional() ||
        (PS2->getTerminator()->getSuccessor(0) != PS1))
      return false;

    // Do branch inversion.
    BasicBlock *CurrBlock = LastCondBlock;
    bool EverChanged = false;
    while (1) {
      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      CmpInst::Predicate Predicate = CI->getPredicate();
      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
        EverChanged = true;
      if (CurrBlock == FirstCondBlock)
      CurrBlock = CurrBlock->getSinglePredecessor();
    return EverChanged;

  // PS1 must have a conditional branch.
  if (!PBI1 || !PBI1->isUnconditional())
    return false;

  // PS2 should not contain PHI node.
  PHI = dyn_cast<PHINode>(PS2->begin());
  if (PHI)
    return false;

  // Do the transformation.
  BasicBlock *CB;
  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
  bool Iteration = true;
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Value *PC = PBI->getCondition();

  do {
    CB = PBI->getSuccessor(1 - Idx);
    // Delete the conditional branch.
        .splice(FirstCondBlock->end(), CB->getInstList());
    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
    Value *CC = PBI->getCondition();
    // Merge conditions.
    Value *NC;
    if (Idx == 0)
      // Case 2, use parallel or.
      NC = Builder.CreateOr(PC, CC);
      // Case 1, use parallel and.
      NC = Builder.CreateAnd(PC, CC);

    PBI->replaceUsesOfWith(CC, NC);
    PC = NC;
    if (CB == LastCondBlock)
      Iteration = false;
    // Remove internal conditional branches.
    // make CB unreachable and let downstream to delete the block.
    new UnreachableInst(CB->getContext(), CB);
  } while (Iteration);

  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
  return true;
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  Value *Val = SI.getOperand(0);
  Value *Ptr = SI.getOperand(1);

  // If the RHS is an alloca with a single use, zapify the store, making the
  // alloca dead.
  // If the RHS is an alloca with a two uses, the other one being a 
  // llvm.dbg.declare, zapify the store and the declare, making the
  // alloca dead.  We must do this to prevent declares from affecting
  // codegen.
  if (!SI.isVolatile()) {
    if (Ptr->hasOneUse()) {
      if (isa<AllocaInst>(Ptr)) 
        return EraseInstFromFunction(SI);
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
        if (isa<AllocaInst>(GEP->getOperand(0))) {
          if (GEP->getOperand(0)->hasOneUse())
            return EraseInstFromFunction(SI);
          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
            return EraseInstFromFunction(SI);
    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
      return EraseInstFromFunction(SI);

  // Attempt to improve the alignment.
  if (TD) {
    unsigned KnownAlign =
      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
    unsigned StoreAlign = SI.getAlignment();
    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :

    if (KnownAlign > EffectiveStoreAlign)
    else if (StoreAlign == 0)

  // Do really simple DSE, to catch cases where there are several consecutive
  // stores to the same location, separated by a few arithmetic operations. This
  // situation often occurs with bitfield accesses.
  BasicBlock::iterator BBI = &SI;
  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
       --ScanInsts) {
    // Don't count debug info directives, lest they affect codegen,
    // and we skip pointer-to-pointer bitcasts, which are NOPs.
    if (isa<DbgInfoIntrinsic>(BBI) ||
        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
      // Prev store isn't volatile, and stores to the same location?
      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
                                                          SI.getOperand(1))) {
    // If this is a load, we have to stop.  However, if the loaded value is from
    // the pointer we're loading and is producing the pointer we're storing,
    // then *this* store is dead (X = load P; store X -> P).
    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
        return EraseInstFromFunction(SI);
      // Otherwise, this is a load from some other location.  Stores before it
      // may not be dead.
    // Don't skip over loads or things that can modify memory.
    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.

  // store X, null    -> turns into 'unreachable' in SimplifyCFG
  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
    if (!isa<UndefValue>(Val)) {
      SI.setOperand(0, UndefValue::get(Val->getType()));
      if (Instruction *U = dyn_cast<Instruction>(Val))
        Worklist.Add(U);  // Dropped a use.
    return 0;  // Do not modify these!

  // store undef, Ptr -> noop
  if (isa<UndefValue>(Val))
    return EraseInstFromFunction(SI);

  // If the pointer destination is a cast, see if we can fold the cast into the
  // source instead.
  if (isa<CastInst>(Ptr))
    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
      return Res;
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
    if (CE->isCast())
      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
        return Res;

  // If this store is the last instruction in the basic block (possibly
  // excepting debug info instructions), and if the block ends with an
  // unconditional branch, try to move it to the successor block.
  BBI = &SI; 
  do {
  } while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
    if (BI->isUnconditional())
      if (SimplifyStoreAtEndOfBlock(SI))
        return 0;  // xform done!
  return 0;
/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
/// instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
  SelectInst *SI = cast<SelectInst>(I.getOperand(1));
  // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
  int NonNullOperand = -1;
  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
    if (ST->isNullValue())
      NonNullOperand = 2;
  // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
    if (ST->isNullValue())
      NonNullOperand = 1;
  if (NonNullOperand == -1)
    return false;
  Value *SelectCond = SI->getOperand(0);
  // Change the div/rem to use 'Y' instead of the select.
  I.setOperand(1, SI->getOperand(NonNullOperand));
  // Okay, we know we replace the operand of the div/rem with 'Y' with no
  // problem.  However, the select, or the condition of the select may have
  // multiple uses.  Based on our knowledge that the operand must be non-zero,
  // propagate the known value for the select into other uses of it, and
  // propagate a known value of the condition into its other users.
  // If the select and condition only have a single use, don't bother with this,
  // early exit.
  if (SI->use_empty() && SelectCond->hasOneUse())
    return true;
  // Scan the current block backward, looking for other uses of SI.
  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
  while (BBI != BBFront) {
    // If we found a call to a function, we can't assume it will return, so
    // information from below it cannot be propagated above it.
    if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
    // Replace uses of the select or its condition with the known values.
    for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
         I != E; ++I) {
      if (*I == SI) {
        *I = SI->getOperand(NonNullOperand);
      } else if (*I == SelectCond) {
        *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
    // If we past the instruction, quit looking for it.
    if (&*BBI == SI)
      SI = 0;
    if (&*BBI == SelectCond)
      SelectCond = 0;
    // If we ran out of things to eliminate, break out of the loop.
    if (SelectCond == 0 && SI == 0)
  return true;
Example #11
/// LinearizeExprTree - Given an associative binary expression tree, traverse
/// all of the uses putting it into canonical form.  This forces a left-linear
/// form of the expression (((a+b)+c)+d), and collects information about the
/// rank of the non-tree operands.
/// NOTE: These intentionally destroys the expression tree operands (turning
/// them into undef values) to reduce #uses of the values.  This means that the
/// caller MUST use something like RewriteExprTree to put the values back in.
void Reassociate::LinearizeExprTree(BinaryOperator *I,
                                    SmallVectorImpl<ValueEntry> &Ops) {
  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
  unsigned Opcode = I->getOpcode();

  // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
  BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
  BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);

  // If this is a multiply expression tree and it contains internal negations,
  // transform them into multiplies by -1 so they can be reassociated.
  if (I->getOpcode() == Instruction::Mul) {
    if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
      LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
      LHSBO = isReassociableOp(LHS, Opcode);
    if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
      RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
      RHSBO = isReassociableOp(RHS, Opcode);

  if (!LHSBO) {
    if (!RHSBO) {
      // Neither the LHS or RHS as part of the tree, thus this is a leaf.  As
      // such, just remember these operands and their rank.
      Ops.push_back(ValueEntry(getRank(LHS), LHS));
      Ops.push_back(ValueEntry(getRank(RHS), RHS));
      // Clear the leaves out.
      I->setOperand(0, UndefValue::get(I->getType()));
      I->setOperand(1, UndefValue::get(I->getType()));
    // Turn X+(Y+Z) -> (Y+Z)+X
    std::swap(LHSBO, RHSBO);
    std::swap(LHS, RHS);
    bool Success = !I->swapOperands();
    assert(Success && "swapOperands failed");
    MadeChange = true;
  } else if (RHSBO) {
    // Turn (A+B)+(C+D) -> (((A+B)+C)+D).  This guarantees the RHS is not
    // part of the expression tree.
    LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
    RHS = I->getOperand(1);
    RHSBO = 0;

  // Okay, now we know that the LHS is a nested expression and that the RHS is
  // not.  Perform reassociation.
  assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");

  // Move LHS right before I to make sure that the tree expression dominates all
  // values.

  // Linearize the expression tree on the LHS.
  LinearizeExprTree(LHSBO, Ops);

  // Remember the RHS operand and its rank.
  Ops.push_back(ValueEntry(getRank(RHS), RHS));
  // Clear the RHS leaf out.
  I->setOperand(1, UndefValue::get(I->getType()));