void GraphBuilder::visitPtrToIntInst(PtrToIntInst& I) { DSNode* N = getValueDest(I.getOperand(0)).getNode(); if(I.hasOneUse()) { if(isa<ICmpInst>(*(I.use_begin()))) { NumBoringIntToPtr++; return; } } if(I.hasOneUse()) { Value *V = dyn_cast<Value>(*(I.use_begin())); DenseSet<Value *> Seen; while(V && V->hasOneUse() && Seen.insert(V).second) { if(isa<LoadInst>(V)) break; if(isa<StoreInst>(V)) break; if(isa<CallInst>(V)) break; V = dyn_cast<Value>(*(V->use_begin())); } if(isa<BranchInst>(V)){ NumBoringIntToPtr++; return; } } if(N) N->setPtrToIntMarker(); }
/// Try to find redundant insertvalue instructions, like the following ones: /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 /// Here the second instruction inserts values at the same indices, as the /// first one, making the first one redundant. /// It should be transformed to: /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { bool IsRedundant = false; ArrayRef<unsigned int> FirstIndices = I.getIndices(); // If there is a chain of insertvalue instructions (each of them except the // last one has only one use and it's another insertvalue insn from this // chain), check if any of the 'children' uses the same indices as the first // instruction. In this case, the first one is redundant. Value *V = &I; unsigned Depth = 0; while (V->hasOneUse() && Depth < 10) { User *U = V->user_back(); auto UserInsInst = dyn_cast<InsertValueInst>(U); if (!UserInsInst || U->getOperand(0) != V) break; if (UserInsInst->getIndices() == FirstIndices) { IsRedundant = true; break; } V = UserInsInst; Depth++; } if (IsRedundant) return replaceInstUsesWith(I, I.getOperand(0)); return nullptr; }
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); // If the result of a {s|z}ext and its source are both live out, rewrite all // other uses of the source with result of extension. Value *Src = I->getOperand(0); if (Src->hasOneUse()) return false; // Only do this xform if truncating is free. if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) return false; // Only safe to perform the optimization if the source is also defined in // this block. if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) return false; bool DefIsLiveOut = false; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); // Figure out which BB this ext is used in. BasicBlock *UserBB = User->getParent(); if (UserBB == DefBB) continue; DefIsLiveOut = true; break; } if (!DefIsLiveOut) return false; // Make sure non of the uses are PHI nodes. for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); BasicBlock *UserBB = User->getParent(); if (UserBB == DefBB) continue; // Be conservative. We don't want this xform to end up introducing // reloads just before load / store instructions. if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) return false; } // InsertedTruncs - Only insert one trunc in each block once. DenseMap<BasicBlock*, Instruction*> InsertedTruncs; bool MadeChange = false; for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Use &TheUse = UI.getUse(); Instruction *User = cast<Instruction>(*UI); // Figure out which BB this ext is used in. BasicBlock *UserBB = User->getParent(); if (UserBB == DefBB) continue; // Both src and def are live in this block. Rewrite the use. Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; if (!InsertedTrunc) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); } // Replace a use of the {s|z}ext source with a use of the result. TheUse = InsertedTrunc; ++NumExtUses; MadeChange = true; } return MadeChange; }
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Try to canonicalize the loaded type. if (Instruction *Res = combineLoadToOperationType(*this, LI)) return Res; // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); if (KnownAlign > EffectiveLoadAlign) LI.setAlignment(KnownAlign); else if (LoadAlign == 0) LI.setAlignment(EffectiveLoadAlign); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { Worklist.Add(NewGEPI); return &LI; } // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return nullptr; if (Instruction *Res = unpackLoadToAggregate(*this, LI)) return Res; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI(LI); AAMDNodes AATags; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI, DefMaxInstsToScan, AA, &AATags)) { if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) { unsigned KnownIDs[] = { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null}; combineMetadata(NLI, &LI, KnownIDs); }; return ReplaceInstUsesWith( LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), LI.getName() + ".cast")); } // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. if (isa<UndefValue>(Op) || (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. We do this instead of inserting an // unreachable instruction directly because we cannot modify the CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and // exposes redundancy in the code. // // Note that we cannot do the transformation unless we know that the // introduced loads cannot trap! Something like this is valid as long as // the condition is always false: load (select bool %C, int* null, int* %G), // but it would not be valid if we transformed it to load from null // unconditionally. // if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, SI) && isSafeToLoadUnconditionally(SI->getOperand(2), Align, SI)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), SI->getOperand(2)->getName()+".val"); V1->setAlignment(Align); V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); } // load (select (cond, null, P)) -> load P if (isa<ConstantPointerNull>(SI->getOperand(1)) && LI.getPointerAddressSpace() == 0) { LI.setOperand(0, SI->getOperand(2)); return &LI; } // load (select (cond, P, null)) -> load P if (isa<ConstantPointerNull>(SI->getOperand(2)) && LI.getPointerAddressSpace() == 0) { LI.setOperand(0, SI->getOperand(1)); return &LI; } } } return nullptr; }
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // Try to canonicalize the stored type. if (combineStoreToValueType(*this, SI)) return EraseInstFromFunction(SI); // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); else if (StoreAlign == 0) SI.setAlignment(EffectiveStoreAlign); // Try to canonicalize the stored type. if (unpackStoreToAggregate(*this, SI)) return EraseInstFromFunction(SI); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { Worklist.Add(NewGEPI); return &SI; } // Don't hack volatile/ordered stores. // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. if (!SI.isUnordered()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); } } } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. BasicBlock::iterator BBI(SI); for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. if (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; } if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); continue; } break; } // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { assert(SI.isUnordered() && "can't eliminate ordering operation"); return EraseInstFromFunction(SI); } // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) Worklist.Add(U); // Dropped a use. } return nullptr; // Do not modify these! } // store undef, Ptr -> noop if (isa<UndefValue>(Val)) return EraseInstFromFunction(SI); // The code below needs to be audited and adjusted for unordered atomics if (!SI.isSimple()) return nullptr; // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. BBI = SI.getIterator(); do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return nullptr; // xform done! return nullptr; }
Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { // X + (signbit) --> X ^ signbit const APInt &Val = CI->getValue(); if (Val.isSignBit()) return BinaryOperator::CreateXor(LHS, RHS); // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 if (SimplifyDemandedInstructionBits(I)) return &I; // zext(bool) + C -> bool ? C + 1 : C if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); Value *XorLHS = 0; ConstantInt *XorRHS = 0; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt &RHSVal = CI->getValue(); unsigned ExtendAmt = 0; // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. if (XorRHS->getValue() == -RHSVal) { if (RHSVal.isPowerOf2()) ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } } } if (isa<Constant>(RHS) && isa<PHINode>(LHS)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 if (LHS == RHS) { BinaryOperator *New = BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); New->setHasNoSignedWrap(I.hasNoSignedWrap()); New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); return New; } // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { if (Value *RHSV = dyn_castNegVal(RHS)) { Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } return BinaryOperator::CreateSub(RHS, LHSV); } // A + -B --> A - B if (!isa<Constant>(RHS)) if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); ConstantInt *C2; if (Value *X = dyn_castFoldableMul(LHS, C2)) { if (X == RHS) // X*C + X --> X * (C+1) return BinaryOperator::CreateMul(RHS, AddOne(C2)); // X*C1 + X*C2 --> X * (C1+C2) ConstantInt *C1; if (X == dyn_castFoldableMul(RHS, C1)) return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); } // X + X*C --> X * (C+1) if (dyn_castFoldableMul(RHS, C2) == LHS) return BinaryOperator::CreateMul(LHS, AddOne(C2)); // A+B --> A|B iff A and B have no bits set in common. if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateOr(LHS, RHS); } } // W*X + Y*Z --> W * (X+Z) iff W == Y { Value *W, *X, *Y, *Z; if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { if (W != Y) { if (W == Z) { std::swap(Y, Z); } else if (Y == X) { std::swap(W, X); } else if (X == Z) { std::swap(Y, Z); std::swap(W, X); } } if (W == Y) { Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); return BinaryOperator::CreateMul(W, NewAdd); } } } if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { Value *X = 0; if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X return BinaryOperator::CreateSub(SubOne(CRHS), X); // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. const APInt &AddRHSV = CRHS->getValue(); // Form a mask of all bits from the lowest bit added through the top. APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); // See if the and mask includes all of these bits. APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); return BinaryOperator::CreateAnd(NewAdd, C2); } } // Try to fold constant add into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; } // add (select X 0 (sub n A)) A --> select X A n { SelectInst *SI = dyn_cast<SelectInst>(LHS); Value *A = RHS; if (!SI) { SI = dyn_cast<SelectInst>(RHS); A = LHS; } if (SI && SI->hasOneUse()) { Value *TV = SI->getTrueValue(); Value *FV = SI->getFalseValue(); Value *N; // Can we fold the add into the argument of the select? // We check both true and false select arguments for a matching subtract. if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); } } // Check for (add (sext x), y), see if we can merge this into an // integer add followed by a sext. if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { // (add (sext x), cst) --> (sext (add x, cst')) if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } // (add (sext x), (sext y)) --> (sext (add int x, y)) if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of sexts), and if the // integer add will not overflow. if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } } } return Changed ? &I : 0; }
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : DL->getABITypeAlignment(LI.getType()); if (KnownAlign > EffectiveLoadAlign) LI.setAlignment(KnownAlign); else if (LoadAlign == 0) LI.setAlignment(EffectiveLoadAlign); } // load (cast X) --> cast (load X) iff safe. if (isa<CastInst>(Op)) if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) return Res; // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return nullptr; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) return ReplaceInstUsesWith(LI, AvailableVal); // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. if (isa<UndefValue>(Op) || (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. We do this instead of inserting an // unreachable instruction directly because we cannot modify the CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } // Instcombine load (constantexpr_cast global) -> cast (load global) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) if (CE->isCast()) if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) return Res; if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and // exposes redundancy in the code. // // Note that we cannot do the transformation unless we know that the // introduced loads cannot trap! Something like this is valid as long as // the condition is always false: load (select bool %C, int* null, int* %G), // but it would not be valid if we transformed it to load from null // unconditionally. // if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), SI->getOperand(2)->getName()+".val"); V1->setAlignment(Align); V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); } // load (select (cond, null, P)) -> load P if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) if (C->isNullValue()) { LI.setOperand(0, SI->getOperand(2)); return &LI; } // load (select (cond, P, null)) -> load P if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) if (C->isNullValue()) { LI.setOperand(0, SI->getOperand(1)); return &LI; } } } return nullptr; }
/// If \param [in] BB has more than one predecessor that is a conditional /// branch, attempt to use parallel and/or for the branch condition. \returns /// true on success. /// /// Before: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 /// br i1 %cmp1, label %if.then, label %lor.rhs /// /// lor.rhs: /// ...... /// %cmp11 = fcmp une float %tmp3, %tmp4 /// br i1 %cmp11, label %if.then, label %ifend /// /// if.end: // the merge block /// ...... /// /// if.then: // has two predecessors, both of them contains conditional branch. /// ...... /// br label %if.end; /// /// After: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 /// ...... /// %cmp11 = fcmp une float %tmp3, %tmp4 /// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. /// br i1 %cmp12, label %if.then, label %ifend /// /// if.end: /// ...... /// /// if.then: /// ...... /// br label %if.end; /// /// Current implementation handles two cases. /// Case 1: \param BB is on the else-path. /// /// BB1 /// / | /// BB2 | /// / \ | /// BB3 \ | where, BB1, BB2 contain conditional branches. /// \ | / BB3 contains unconditional branch. /// \ | / BB4 corresponds to \param BB which is also the merge. /// BB => BB4 /// /// /// Corresponding source code: /// /// if (a == b && c == d) /// statement; // BB3 /// /// Case 2: \param BB BB is on the then-path. /// /// BB1 /// / | /// | BB2 /// \ / | where BB1, BB2 contain conditional branches. /// BB => BB3 | BB3 contains unconditiona branch and corresponds /// \ / to \param BB. BB4 is the merge. /// BB4 /// /// Corresponding source code: /// /// if (a == b || c == d) /// statement; // BB3 /// /// In both cases, \param BB is the common successor of conditional branches. /// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. /// bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P) { PHINode *PHI = dyn_cast<PHINode>(BB->begin()); if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. BasicBlock *LastCondBlock = NULL; BasicBlock *FirstCondBlock = NULL; BasicBlock *UnCondBlock = NULL; int Idx = -1; // Check predecessors of \param BB. SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { BasicBlock *Pred = *PI; BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); // All predecessors should terminate with a branch. if (!PBI) return false; BasicBlock *PP = Pred->getSinglePredecessor(); if (PBI->isUnconditional()) { // Case 1: Pred (BB3) is an unconditional block, it should // have a single predecessor (BB2) that is also a predecessor // of \param BB (BB4) and should not have address-taken. // There should exist only one such unconditional // branch among the predecessors. if (UnCondBlock || !PP || (Preds.count(PP) == 0) || Pred->hasAddressTaken()) return false; UnCondBlock = Pred; continue; } // Only conditional branches are allowed beyond this point. assert(PBI->isConditional()); // Condition's unique use should be the branch instruction. Value *PC = PBI->getCondition(); if (!PC || !PC->hasOneUse()) return false; if (PP && Preds.count(PP)) { // These are internal condition blocks to be merged from, e.g., // BB2 in both cases. // Should not be address-taken. if (Pred->hasAddressTaken()) return false; // Instructions in the internal condition blocks should be safe // to hoist up. for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { Instruction *CI = BI++; if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) return false; } } else { // This is the condition block to be merged into, e.g. BB1 in // both cases. if (FirstCondBlock) return false; FirstCondBlock = Pred; } // Find whether BB is uniformly on the true (or false) path // for all of its predecessors. BasicBlock *PS1 = PBI->getSuccessor(0); BasicBlock *PS2 = PBI->getSuccessor(1); BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; int CIdx = (PS1 == BB) ? 0 : 1; if (Idx == -1) Idx = CIdx; else if (CIdx != Idx) return false; // PS is the successor which is not BB. Check successors to identify // the last conditional branch. if (Preds.count(PS) == 0) { // Case 2. LastCondBlock = Pred; } else { // Case 1 BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); if (BPS && BPS->isUnconditional()) { // Case 1: PS(BB3) should be an unconditional branch. LastCondBlock = Pred; } } } if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) return false; TerminatorInst *TBB = LastCondBlock->getTerminator(); BasicBlock *PS1 = TBB->getSuccessor(0); BasicBlock *PS2 = TBB->getSuccessor(1); BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); // If PS1 does not jump into PS2, but PS2 jumps into PS1, // attempt branch inversion. if (!PBI1 || !PBI1->isUnconditional() || (PS1->getTerminator()->getSuccessor(0) != PS2)) { // Check whether PS2 jumps into PS1. if (!PBI2 || !PBI2->isUnconditional() || (PS2->getTerminator()->getSuccessor(0) != PS1)) return false; // Do branch inversion. BasicBlock *CurrBlock = LastCondBlock; bool EverChanged = false; while (1) { BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); CmpInst::Predicate Predicate = CI->getPredicate(); // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); BI->swapSuccessors(); EverChanged = true; } if (CurrBlock == FirstCondBlock) break; CurrBlock = CurrBlock->getSinglePredecessor(); } return EverChanged; } // PS1 must have a conditional branch. if (!PBI1 || !PBI1->isUnconditional()) return false; // PS2 should not contain PHI node. PHI = dyn_cast<PHINode>(PS2->begin()); if (PHI) return false; // Do the transformation. BasicBlock *CB; BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); bool Iteration = true; BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Value *PC = PBI->getCondition(); do { CB = PBI->getSuccessor(1 - Idx); // Delete the conditional branch. FirstCondBlock->getInstList().pop_back(); FirstCondBlock->getInstList() .splice(FirstCondBlock->end(), CB->getInstList()); PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); Value *CC = PBI->getCondition(); // Merge conditions. Builder.SetInsertPoint(PBI); Value *NC; if (Idx == 0) // Case 2, use parallel or. NC = Builder.CreateOr(PC, CC); else // Case 1, use parallel and. NC = Builder.CreateAnd(PC, CC); PBI->replaceUsesOfWith(CC, NC); PC = NC; if (CB == LastCondBlock) Iteration = false; // Remove internal conditional branches. CB->dropAllReferences(); // make CB unreachable and let downstream to delete the block. new UnreachableInst(CB->getContext(), CB); } while (Iteration); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); return true; }
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. // If the RHS is an alloca with a two uses, the other one being a // llvm.dbg.declare, zapify the store and the declare, making the // alloca dead. We must do this to prevent declares from affecting // codegen. if (!SI.isVolatile()) { if (Ptr->hasOneUse()) { if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { EraseInstFromFunction(*DI); return EraseInstFromFunction(SI); } } } } if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { EraseInstFromFunction(*DI); return EraseInstFromFunction(SI); } } // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : TD->getABITypeAlignment(Val->getType()); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); else if (StoreAlign == 0) SI.setAlignment(EffectiveStoreAlign); } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. BasicBlock::iterator BBI = &SI; for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. if (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; } if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); continue; } break; } // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && !SI.isVolatile()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) Worklist.Add(U); // Dropped a use. } return 0; // Do not modify these! } // store undef, Ptr -> noop if (isa<UndefValue>(Val)) return EraseInstFromFunction(SI); // If the pointer destination is a cast, see if we can fold the cast into the // source instead. if (isa<CastInst>(Ptr)) if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) if (CE->isCast()) if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return 0; // xform done! return 0; }
/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select /// instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { SelectInst *SI = cast<SelectInst>(I.getOperand(1)); // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y int NonNullOperand = -1; if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) if (ST->isNullValue()) NonNullOperand = 2; // div/rem X, (Cond ? Y : 0) -> div/rem X, Y if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) if (ST->isNullValue()) NonNullOperand = 1; if (NonNullOperand == -1) return false; Value *SelectCond = SI->getOperand(0); // Change the div/rem to use 'Y' instead of the select. I.setOperand(1, SI->getOperand(NonNullOperand)); // Okay, we know we replace the operand of the div/rem with 'Y' with no // problem. However, the select, or the condition of the select may have // multiple uses. Based on our knowledge that the operand must be non-zero, // propagate the known value for the select into other uses of it, and // propagate a known value of the condition into its other users. // If the select and condition only have a single use, don't bother with this, // early exit. if (SI->use_empty() && SelectCond->hasOneUse()) return true; // Scan the current block backward, looking for other uses of SI. BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); while (BBI != BBFront) { --BBI; // If we found a call to a function, we can't assume it will return, so // information from below it cannot be propagated above it. if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) break; // Replace uses of the select or its condition with the known values. for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); I != E; ++I) { if (*I == SI) { *I = SI->getOperand(NonNullOperand); Worklist.Add(BBI); } else if (*I == SelectCond) { *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : ConstantInt::getFalse(BBI->getContext()); Worklist.Add(BBI); } } // If we past the instruction, quit looking for it. if (&*BBI == SI) SI = 0; if (&*BBI == SelectCond) SelectCond = 0; // If we ran out of things to eliminate, break out of the loop. if (SelectCond == 0 && SI == 0) break; } return true; }
/// LinearizeExprTree - Given an associative binary expression tree, traverse /// all of the uses putting it into canonical form. This forces a left-linear /// form of the expression (((a+b)+c)+d), and collects information about the /// rank of the non-tree operands. /// /// NOTE: These intentionally destroys the expression tree operands (turning /// them into undef values) to reduce #uses of the values. This means that the /// caller MUST use something like RewriteExprTree to put the values back in. /// void Reassociate::LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); unsigned Opcode = I->getOpcode(); // First step, linearize the expression if it is in ((A+B)+(C+D)) form. BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); // If this is a multiply expression tree and it contains internal negations, // transform them into multiplies by -1 so they can be reassociated. if (I->getOpcode() == Instruction::Mul) { if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); LHSBO = isReassociableOp(LHS, Opcode); } if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); RHSBO = isReassociableOp(RHS, Opcode); } } if (!LHSBO) { if (!RHSBO) { // Neither the LHS or RHS as part of the tree, thus this is a leaf. As // such, just remember these operands and their rank. Ops.push_back(ValueEntry(getRank(LHS), LHS)); Ops.push_back(ValueEntry(getRank(RHS), RHS)); // Clear the leaves out. I->setOperand(0, UndefValue::get(I->getType())); I->setOperand(1, UndefValue::get(I->getType())); return; } // Turn X+(Y+Z) -> (Y+Z)+X std::swap(LHSBO, RHSBO); std::swap(LHS, RHS); bool Success = !I->swapOperands(); assert(Success && "swapOperands failed"); (void)Success; MadeChange = true; } else if (RHSBO) { // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not // part of the expression tree. LinearizeExpr(I); LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); RHS = I->getOperand(1); RHSBO = 0; } // Okay, now we know that the LHS is a nested expression and that the RHS is // not. Perform reassociation. assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); // Move LHS right before I to make sure that the tree expression dominates all // values. LHSBO->moveBefore(I); // Linearize the expression tree on the LHS. LinearizeExprTree(LHSBO, Ops); // Remember the RHS operand and its rank. Ops.push_back(ValueEntry(getRank(RHS), RHS)); // Clear the RHS leaf out. I->setOperand(1, UndefValue::get(I->getType())); }