void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP) { // At least, ArrayIdx = ArrayIdx *nsw 1. allocateCandidatesAndFindBasisForGEP( Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), ArrayIdx, ElementSize, GEP); Value *LHS = nullptr; ConstantInt *RHS = nullptr; // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx // itself. This would allow us to handle the shl case for free. However, // matching SCEVs has two issues: // // 1. this would complicate rewriting because the rewriting procedure // would have to translate SCEVs back to IR instructions. This translation // is difficult when LHS is further evaluated to a composite SCEV. // // 2. ScalarEvolution is designed to be control-flow oblivious. It tends // to strip nsw/nuw flags which are critical for SLSR to trace into // sext'ed multiplication. if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { // SLSR is currently unsafe if i * S may overflow. // GEP = Base + sext(LHS *nsw RHS) * ElementSize allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) { // GEP = Base + sext(LHS <<nsw RHS) * ElementSize // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize APInt One(RHS->getBitWidth(), 1); ConstantInt *PowerOf2 = ConstantInt::get(RHS->getContext(), One << RHS->getValue()); allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP); } }
bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, Value *&X, Value *&Y, Value *&Z) { ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)); if (!C) return false; switch (I->getPredicate()) { default: return false; case ICmpInst::ICMP_SLT: // X < 0 is equivalent to (X & SignBit) != 0. if (!C->isZero()) return false; Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignBit) == 0. if (!C->isAllOnesValue()) return false; Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0. if (!C->getValue().isPowerOf2()) return false; Y = ConstantInt::get(I->getContext(), -C->getValue()); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_UGT: // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. if (!(C->getValue() + 1).isPowerOf2()) return false; Y = ConstantInt::get(I->getContext(), ~C->getValue()); Pred = ICmpInst::ICMP_NE; break; } X = I->getOperand(0); Z = ConstantInt::getNullValue(C->getType()); return true; }
void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( Value *LHS, Value *RHS, Instruction *I) { Value *S = nullptr; ConstantInt *Idx = nullptr; if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) { // I = LHS + RHS = LHS + Idx * S allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) { // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx) APInt One(Idx->getBitWidth(), 1); Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue()); allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); } else { // At least, I = LHS + 1 * RHS ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1); allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS, I); } }
/// foldSelectICmpAnd - If one of the constants is zero (we know they can't /// both be) and we have an icmp instruction with zero, and we have an 'and' /// with the non-constant value and a power of two we can turn the select /// into a shift on the result of the 'and'. static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, ConstantInt *FalseVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) return nullptr; if (!match(IC->getOperand(1), m_Zero())) return nullptr; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) return nullptr; // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. ConstantInt *Offset = nullptr; if (!TrueVal->isZero() && !FalseVal->isZero()) { if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) Offset = FalseVal; else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) Offset = TrueVal; else return nullptr; // Adjust TrueVal and FalseVal to the offset. TrueVal = ConstantInt::get(Builder->getContext(), TrueVal->getValue() - Offset->getValue()); FalseVal = ConstantInt::get(Builder->getContext(), FalseVal->getValue() - Offset->getValue()); } // Make sure the mask in the 'and' and one of the select arms is a power of 2. if (!AndRHS->getValue().isPowerOf2() || (!TrueVal->getValue().isPowerOf2() && !FalseVal->getValue().isPowerOf2())) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; unsigned ValZeros = ValC->getValue().logBase2(); unsigned AndZeros = AndRHS->getValue().logBase2(); // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. if (AndZeros >= ValC->getBitWidth()) return nullptr; Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) V = Builder->CreateShl(V, ValZeros - AndZeros); else if (ValZeros < AndZeros) V = Builder->CreateLShr(V, AndZeros - ValZeros); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. bool ShouldNotVal = !TrueVal->isZero(); ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; if (ShouldNotVal) V = Builder->CreateXor(V, ValC); // Apply an offset if needed. if (Offset) V = Builder->CreateAdd(V, Offset); return V; }
bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) { bool dirty = false; bool block_split=false; #if LLVM_VERSION_CODE <= LLVM_VERSION(3, 1) unsigned WordSize = TargetData.getPointerSizeInBits() / 8; #else unsigned WordSize = DataLayout.getPointerSizeInBits() / 8; #endif for (BasicBlock::iterator i = b.begin(), ie = b.end(); (i != ie) && (block_split == false);) { IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i); // increment now since LowerIntrinsic deletion makes iterator invalid. ++i; if(ii) { switch (ii->getIntrinsicID()) { case Intrinsic::vastart: case Intrinsic::vaend: break; // Lower vacopy so that object resolution etc is handled by // normal instructions. // // FIXME: This is much more target dependent than just the word size, // however this works for x86-32 and x86-64. case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src) Value *dst = ii->getArgOperand(0); Value *src = ii->getArgOperand(1); if (WordSize == 4) { Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))); Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii); Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii); Value *load = new LoadInst(castedSrc, "vacopy.read", ii); new StoreInst(load, castedDst, false, ii); } else { assert(WordSize == 8 && "Invalid word size!"); Type *i64p = PointerType::getUnqual(Type::getInt64Ty(getGlobalContext())); Value *pDst = CastInst::CreatePointerCast(dst, i64p, "vacopy.cast.dst", ii); Value *pSrc = CastInst::CreatePointerCast(src, i64p, "vacopy.cast.src", ii); Value *val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); Value *off = ConstantInt::get(Type::getInt64Ty(getGlobalContext()), 1); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); } ii->removeFromParent(); delete ii; break; } case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::umul_with_overflow: { IRBuilder<> builder(ii->getParent(), ii); Value *op1 = ii->getArgOperand(0); Value *op2 = ii->getArgOperand(1); Value *result = 0; Value *result_ext = 0; Value *overflow = 0; unsigned int bw = op1->getType()->getPrimitiveSizeInBits(); unsigned int bw2 = op1->getType()->getPrimitiveSizeInBits()*2; if ((ii->getIntrinsicID() == Intrinsic::uadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::usub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::umul_with_overflow)) { Value *op1ext = builder.CreateZExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateZExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getMaxValue(bw)); Value *int_max = builder.CreateZExt(int_max_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::uadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::usub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::umul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateICmpUGT(result_ext, int_max); } else if ((ii->getIntrinsicID() == Intrinsic::sadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::smul_with_overflow)) { Value *op1ext = builder.CreateSExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateSExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getSignedMaxValue(bw)); Value *int_min_s = ConstantInt::get(op1->getType(), APInt::getSignedMinValue(bw)); Value *int_max = builder.CreateSExt(int_max_s, IntegerType::get(M.getContext(), bw2)); Value *int_min = builder.CreateSExt(int_min_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::sadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::smul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateOr(builder.CreateICmpSGT(result_ext, int_max), builder.CreateICmpSLT(result_ext, int_min)); } // This trunc cound be replaced by a more general trunc replacement // that allows to detect also undefined behavior in assignments or // overflow in operation with integers whose dimension is smaller than // int's dimension, e.g. // uint8_t = uint8_t + uint8_t; // if one desires the wrapping should write // uint8_t = (uint8_t + uint8_t) & 0xFF; // before this, must check if it has side effects on other operations result = builder.CreateTrunc(result_ext, op1->getType()); Value *resultStruct = builder.CreateInsertValue(UndefValue::get(ii->getType()), result, 0); resultStruct = builder.CreateInsertValue(resultStruct, overflow, 1); ii->replaceAllUsesWith(resultStruct); ii->removeFromParent(); delete ii; dirty = true; break; } case Intrinsic::dbg_value: case Intrinsic::dbg_declare: // Remove these regardless of lower intrinsics flag. This can // be removed once IntrinsicLowering is fixed to not have bad // caches. ii->eraseFromParent(); dirty = true; break; case Intrinsic::trap: { // Intrisic instruction "llvm.trap" found. Directly lower it to // a call of the abort() function. Function *F = cast<Function>( M.getOrInsertFunction( "abort", Type::getVoidTy(getGlobalContext()), NULL)); F->setDoesNotReturn(); F->setDoesNotThrow(); CallInst::Create(F, Twine(), ii); new UnreachableInst(getGlobalContext(), ii); ii->eraseFromParent(); dirty = true; break; } case Intrinsic::objectsize: { // We don't know the size of an object in general so we replace // with 0 or -1 depending on the second argument to the intrinsic. assert(ii->getNumArgOperands() == 2 && "wrong number of arguments"); Value *minArg = ii->getArgOperand(1); assert(minArg && "Failed to get second argument"); ConstantInt *minArgAsInt = dyn_cast<ConstantInt>(minArg); assert(minArgAsInt && "Second arg is not a ConstantInt"); assert(minArgAsInt->getBitWidth() == 1 && "Second argument is not an i1"); Value *replacement = NULL; LLVM_TYPE_Q IntegerType *intType = dyn_cast<IntegerType>(ii->getType()); assert(intType && "intrinsic does not have integer return type"); if (minArgAsInt->isZero()) { // min=false replacement = ConstantInt::get(intType, -1, /*isSigned=*/true); } else { // min=true replacement = ConstantInt::get(intType, 0, /*isSigned=*/false); } ii->replaceAllUsesWith(replacement); ii->eraseFromParent(); dirty = true; break; } default: if (LowerIntrinsics) IL->LowerIntrinsicCall(ii); dirty = true; break; } } } return dirty; }