int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { switch (I->getOpcode()) { case Instruction::GetElementPtr: return getUserCost(I); case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { return getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { TargetTransformInfo::OperandValueKind Op1VK = getOperandInfo(I->getOperand(0)); TargetTransformInfo::OperandValueKind Op2VK = getOperandInfo(I->getOperand(1)); SmallVector<const Value*, 2> Operands(I->operand_values()); return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, Operands); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); } case Instruction::Store: { const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace(), I); } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(I); return getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace(), I); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); } case Instruction::ExtractElement: { const ExtractElementInst * EEI = cast<ExtractElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); // Try to match a reduction sequence (series of shufflevector and vector // adds followed by a extractelement). unsigned ReduxOpCode; Type *ReduxType; switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, /*IsPairwiseForm=*/false); case RK_MinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); case RK_None: break; } switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, /*IsPairwiseForm=*/true); case RK_MinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); case RK_None: break; } return getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { const InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); if (NumVecElems == Mask.size()) { if (isReverseVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, nullptr); if (isAlternateVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_Alternate, VecTypOp0, 0, nullptr); if (isZeroEltBroadcastVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTypOp0, 0, nullptr); if (isSingleSourceVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTypOp0, 0, nullptr); return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTypOp0, 0, nullptr); } return -1; } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { SmallVector<Value *, 4> Args(II->arg_operands()); FastMathFlags FMF; if (auto *FPMO = dyn_cast<FPMathOperator>(II)) FMF = FPMO->getFastMathFlags(); return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, FMF); } return -1; default: // We don't have any information on this instruction. return -1; } }
unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (!TTI) return -1; switch (I->getOpcode()) { case Instruction::GetElementPtr:{ Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); return TTI->getAddressComputationCost(ValTy); } case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { return TTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return TTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(I); return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { const ExtractElementInst * EEI = cast<ExtractElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return TTI->getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { const InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return TTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, 0); return -1; } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { SmallVector<Type*, 4> Tys; for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) Tys.push_back(II->getArgOperand(J)->getType()); return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Tys); } return -1; default: // We don't have any information on this instruction. return -1; } }
/// CloneBlock - The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone){ WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; // Nope, clone it now. BasicBlock *NewBB; BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we // want to map block addresses from the old function to block addresses in // the clone. (This is different from the generic ValueMapper // implementation, which generates an invalid blockaddress when // cloning a function.) // // Note that we don't need to fix the mapping for unreachable blocks; // the default mapping there is safe. if (BB->hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), const_cast<BasicBlock*>(BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); } bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); II != IE; ++II) { Instruction *NewInst = II->clone(); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into // the basic block. if (Value *V = SimplifyInstruction(NewInst, TD)) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (Value *MappedV = VMap.lookup(V)) V = MappedV; VMap[II] = V; delete NewInst; continue; } } if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); VMap[II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) hasStaticAllocas = true; else hasDynamicAllocas = true; } } // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... if (Cond == 0) { Value *V = VMap[BI->getCondition()]; Cond = dyn_cast_or_null<ConstantInt>(V); } // Constant fold to uncond branch! if (Cond) { BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } } } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); if (Cond == 0) { // Or known constant after constant prop in the callee... Value *V = VMap[SI->getCondition()]; Cond = dyn_cast_or_null<ConstantInt>(V); } if (Cond) { // Constant fold to uncond branch! SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } } if (!TerminatorDone) { Instruction *NewInst = OldTI->clone(); if (OldTI->hasName()) NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) ToClone.push_back(TI->getSuccessor(i)); } if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator())) Returns.push_back(RI); }
// If we can determine that all possible objects pointed to by the provided // pointer value are, not only dereferenceable, but also definitively less than // or equal to the provided maximum size, then return true. Otherwise, return // false (constant global values and allocas fall into this category). // // FIXME: This should probably live in ValueTracking (or similar). static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist(1, V); do { Value *P = Worklist.pop_back_val(); P = P->stripPointerCasts(); if (!Visited.insert(P).second) continue; if (SelectInst *SI = dyn_cast<SelectInst>(P)) { Worklist.push_back(SI->getTrueValue()); Worklist.push_back(SI->getFalseValue()); continue; } if (PHINode *PN = dyn_cast<PHINode>(P)) { for (Value *IncValue : PN->incoming_values()) Worklist.push_back(IncValue); continue; } if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) { if (GA->mayBeOverridden()) return false; Worklist.push_back(GA->getAliasee()); continue; } // If we know how big this object is, and it is less than MaxSize, continue // searching. Otherwise, return false. if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) { if (!AI->getAllocatedType()->isSized()) return false; ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize()); if (!CS) return false; uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Make sure that, even if the multiplication below would wrap as an // uint64_t, we still do the right thing. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) return false; continue; } if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) return false; uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); if (InitSize > MaxSize) return false; continue; } return false; } while (!Worklist.empty()); return true; }
static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result, const DataLayout &DL, TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; if (Result.isConstant()) { Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, TLI); if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; } if (Result.isConstantRange()) { ConstantInt *CI = dyn_cast<ConstantInt>(C); if (!CI) return LazyValueInfo::Unknown; ConstantRange CR = Result.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) return LazyValueInfo::False; if (CR.isSingleElement() && CR.contains(CI->getValue())) return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) return LazyValueInfo::True; if (CR.isSingleElement() && CR.contains(CI->getValue())) return LazyValueInfo::False; } // Handle more complex predicates. ConstantRange TrueValues = ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); if (TrueValues.contains(CR)) return LazyValueInfo::True; if (TrueValues.inverse().contains(CR)) return LazyValueInfo::False; return LazyValueInfo::Unknown; } if (Result.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, Result.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, Result.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::True; } return LazyValueInfo::Unknown; } return LazyValueInfo::Unknown; }
/// processMemCpyMemCpyDependence - We've found that the (upward scanning) /// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to /// copy from MDep's input if we can. MSize is the size of M's copy. /// bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, uint64_t MSize) { // We can only transforms memcpy's where the dest of one is the source of the // other. if (M->getSource() != MDep->getDest() || MDep->isVolatile()) return false; // If dep instruction is reading from our current input, then it is a noop // transfer and substituting the input won't change this instruction. Just // ignore the input and let someone else zap MDep. This handles cases like: // memcpy(a <- a) // memcpy(b <- a) if (M->getSource() == MDep->getSource()) return false; // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Verify that the copied-from memory doesn't change in between the two // transfers. For example, in: // memcpy(a <- b) // *b = 42; // memcpy(c <- a) // It would be invalid to transform the second memcpy into memcpy(c <- b). // // TODO: If the code between M and MDep is transparent to the destination "c", // then we could still perform the xform by moving M up to the first memcpy. // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. MemDepResult SourceDep = MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), false, M, M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) UseMemMove = true; // If all checks passed, then we can transform M. // Make sure to use the lesser of the alignment of the source and the dest // since we're changing where we're reading from, but don't want to increase // the alignment past what can be read from or written to. // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); IRBuilder<> Builder(M); if (UseMemMove) Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), Align, M->isVolatile()); else Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), Align, M->isVolatile()); // Remove the instruction we're replacing. MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; return true; }
bool InductionDescriptor::isInductionPHI( PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr, SmallVectorImpl<Instruction *> *CastsToIgnore) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) return false; // Check that the PHI is consecutive. const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); return false; } if (AR->getLoop() != TheLoop) { // FIXME: We should treat this as a uniform. Unfortunately, we // don't currently know how to handled uniform PHIs. LLVM_DEBUG( dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); return false; } Value *StartValue = Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); BasicBlock *Latch = AR->getLoop()->getLoopLatch(); if (!Latch) return false; BinaryOperator *BOp = dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch)); const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. // The stride may be a constant or a loop invariant integer value. const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) return false; if (PhiTy->isIntegerTy()) { D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp, CastsToIgnore); return true; } assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); // Pointer induction should be a constant. if (!ConstStep) return false; ConstantInt *CV = ConstStep->getValue(); Type *PointerElementType = PhiTy->getPointerElementType(); // The pointer stride cannot be determined if the pointer element type is not // sized. if (!PointerElementType->isSized()) return false; const DataLayout &DL = Phi->getModule()->getDataLayout(); int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); if (!Size) return false; int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp); return true; }
SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { Optional<AllocFnsTy> FnData = getAllocationData(CS.getInstruction(), AnyAlloc, TLI); if (!FnData) return unknown(); // handle strdup-like functions separately if (FnData->AllocTy == StrDupLike) { APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); if (!Size) return unknown(); // strndup limits strlen if (FnData->FstParam > 0) { ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); if (Size.ugt(MaxSize)) Size = MaxSize + 1; } return std::make_pair(Size, Zero); } ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); // When we're compiling N-bit code, and the user uses parameters that are // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into // trouble with APInt size issues. This function handles resizing + overflow // checks for us. auto CheckedZextOrTrunc = [&](APInt &I) { // More bits than we can handle. Checking the bit width isn't necessary, but // it's faster than checking active bits, and should give `false` in the // vast majority of cases. if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) return false; if (I.getBitWidth() != IntTyBits) I = I.zextOrTrunc(IntTyBits); return true; }; APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size)) return unknown(); // size determined by just 1 parameter if (FnData->SndParam < 0) return std::make_pair(Size, Zero); Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); if (!Arg) return unknown(); APInt NumElems = Arg->getValue(); if (!CheckedZextOrTrunc(NumElems)) return unknown(); bool Overflow; Size = Size.umul_ov(NumElems, Overflow); return Overflow ? unknown() : std::make_pair(Size, Zero); // TODO: handle more standard functions (+ wchar cousins): // - strdup / strndup // - strcpy / strncpy // - strcat / strncat // - memcpy / memmove // - strcat / strncat // - memset }
int qdp_jit_vec::vectorize_loads( std::vector<std::vector<Instruction*> >& load_instructions ) { DEBUG(dbgs() << "Vectorize loads, total of " << load_instructions.size() << "\n"); //std::vector<std::pair<Value*,Value*> > scalar_vector_loads; scalar_vector_pairs.clear(); if (load_instructions.empty()) return 0; int load_vec_elem = 0; for( std::vector<Instruction*>& VI : load_instructions ) { DEBUG(dbgs() << "Processing vector of loads number " << load_vec_elem++ << "\n"); assert( VI.size() == vec_len && "length of vector of loads does not match vec_len" ); int loads_consec = true; uint64_t lo,hi; bool first = true; for( Instruction* I : VI ) { GetElementPtrInst* GEP; if ((GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0)))) { if (first) { ConstantInt * CI; if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) { lo = CI->getZExtValue(); hi = lo+1; first=false; } else { DEBUG(dbgs() << "First load in the chain: Operand of GEP not a ConstantInt" << *GEP->getOperand(1) << "\n"); assert( 0 && "First load in the chain: Operand of GEP not a ConstantInt\n"); exit(0); } } else { ConstantInt * CI; if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) { if (hi != CI->getZExtValue()) { DEBUG(dbgs() << "Loads not consecutive lo=" << lo << " hi=" << hi << " this=" << CI->getZExtValue() << "\n"); loads_consec = false; } else { hi++; } } } } else { DEBUG(dbgs() << "Operand of load not a GEP " << *I->getOperand(0) << "\n"); assert( 0 && "Operand of load not a GEP" ); exit(0); loads_consec = false; } } if (loads_consec) { DEBUG(dbgs() << "Loads consecuetive\n"); LoadInst* LI = cast<LoadInst>(VI.at(0)); GetElementPtrInst* GEP = cast<GetElementPtrInst>(LI->getOperand(0)); Instruction* GEPcl = clone_with_operands(GEP); unsigned AS = LI->getPointerAddressSpace(); VectorType *VecTy = VectorType::get( LI->getType() , vec_len ); unsigned bitwidth = LI->getType()->getPrimitiveSizeInBits(); unsigned bytewidth = bitwidth == 1 ? 1 : bitwidth/8; DEBUG(dbgs() << "bit/byte width of load instr trype: " << bitwidth << "/" << bytewidth << "\n"); //Builder->SetInsertPoint( GEP ); Value *VecPtr = Builder->CreateBitCast(GEPcl,VecTy->getPointerTo(AS)); //Value *VecLoad = Builder->CreateLoad( VecPtr ); unsigned align = lo % vec_len == 0 ? bytewidth * vec_len : bytewidth; Value *VecLoad = Builder->CreateAlignedLoad( VecPtr , align ); //DEBUG(dbgs() << "created vector load: " << *VecLoad << "\n"); //function->dump(); // unsigned AS = LI->getPointerAddressSpace(); // VectorType *VecTy = VectorType::get( LI->getType() , vec_len ); // Builder->SetInsertPoint( LI ); // Value *VecPtr = Builder->CreateBitCast(LI->getPointerOperand(),VecTy->getPointerTo(AS)); // Value *VecLoad = Builder->CreateLoad( VecPtr ); scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , VecLoad ) ); } else { DEBUG(dbgs() << "Loads not consecutive:\n"); for (Value* V: VI) { DEBUG(dbgs() << *V << "\n"); } //Instruction* I = dyn_cast<Instruction>(VI.back()->getNextNode()); //DEBUG(dbgs() << *I << "\n"); //Builder->SetInsertPoint( VI.at(0) ); std::vector<Instruction*> VIcl; for( Instruction* I : VI ) { VIcl.push_back( clone_with_operands(I) ); } VectorType *VecTy = VectorType::get( VI.at(0)->getType() , vec_len ); Value *Vec = UndefValue::get(VecTy); int i=0; for( Instruction* I : VIcl ) { Vec = Builder->CreateInsertElement(Vec, I, Builder->getInt32(i++)); } scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , Vec ) ); } } //vectorize_all_uses( scalar_vector_loads ); DEBUG(dbgs() << "Searching for the stores:\n"); //function->dump(); // // Vectorize all StoreInst reachable by the first load of each vector of loads // { SetVector<Instruction*> to_visit; SetVector<Instruction*> stores_processed; for( std::vector<Instruction*>& VI : load_instructions ) { to_visit.insert(VI.at(0)); } while (!to_visit.empty()) { Instruction* I = to_visit.back(); to_visit.pop_back(); DEBUG(dbgs() << "visiting " << *I << "\n"); if (StoreInst* SI = dyn_cast<StoreInst>(I)) { if (!stores_processed.count(SI)) { get_vector_version( SI ); stores_processed.insert( SI ); } } else { for (Use &U : I->uses()) { Value* V = U.getUser(); to_visit.insert(cast<Instruction>(V)); } } } } // DEBUG(dbgs() << "After vectorizing the stores\n"); // function->dump(); // // Mark all stores as being processed // SetVector<Instruction*> to_visit; for( std::vector<Instruction*>& VI : load_instructions ) { for( Instruction* I : VI ) { to_visit.insert(I); if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0))) { for_erasure.insert(GEP); } } } while (!to_visit.empty()) { Instruction* I = to_visit.back(); to_visit.pop_back(); for_erasure.insert(I); if (StoreInst* SI = dyn_cast<StoreInst>(I)) { stores_processed.insert(SI); if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(SI->getOperand(1))) { for_erasure.insert(GEP); } } else { for (Use &U : I->uses()) { Value* V = U.getUser(); to_visit.insert(cast<Instruction>(V)); } } } DEBUG(dbgs() << "----------------------------------------\n"); DEBUG(dbgs() << "After vectorize_loads\n"); //function->dump(); return 0; }
/// CanEvaluateShifted - See if we can compute the specified value, but shifted /// logically to the left or right by some number of bits. This should return /// true if the expression can be computed for the same cost as the current /// expression tree. This is used to eliminate extraneous shifting from things /// like: /// %C = shl i128 %A, 64 /// %D = shl i128 %B, 96 /// %E = or i128 %C, %D /// %F = lshr i128 %E, 64 /// where the client will ask if E can be computed shifted right by 64-bits. If /// this succeeds, the GetShiftedValue function will be called to produce the /// value. static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, InstCombiner &IC) { // We can always evaluate constants shifted. if (isa<Constant>(V)) return true; Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; // If this is the opposite shift, we can directly reuse the input of the shift // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. // TODO: Handle opposite shift by exact value. ConstantInt *CI = 0; if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { if (CI->getZExtValue() == NumBits) { // TODO: Check that the input bits are already zero with MaskedValueIsZero #if 0 // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); uint32_t BitWidth = Ty->getScalarSizeInBits(); if (MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && CI->getLimitedValue(BitWidth) < BitWidth) { return CanEvaluateTruncated(I->getOperand(0), Ty); } #endif } } // We can't mutate something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; switch (I->getOpcode()) { default: return false; case Instruction::And: case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; // We can always turn shl(c)+shr(c) -> and(c2). if (CI->getValue() == NumBits) return true; unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. if (CI->getZExtValue() > NumBits) { unsigned LowBits = TypeWidth - CI->getZExtValue(); if (MaskedValueIsZero(I->getOperand(0), APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } return false; } case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; // We can always turn lshr(c)+shl(c) -> and(c2). if (CI->getValue() == NumBits) return true; unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) { unsigned LowBits = CI->getZExtValue() - NumBits; if (MaskedValueIsZero(I->getOperand(0), APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } return false; } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) return false; return true; } } }
bool FastISel::SelectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { // Don't attempt to handle constraints. if (!IA->getConstraintString().empty()) return false; unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::INLINEASM)) .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo); return true; } MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); ComputeUsesVAFloatArgument(*Call, &MMI); const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. switch (F->getIntrinsicID()) { default: break; // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } const Value *Address = DI->getAddress(); if (!Address || isa<UndefValue>(Address)) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } unsigned Reg = 0; unsigned Offset = 0; if (const Argument *Arg = dyn_cast<Argument>(Address)) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) Reg = lookUpRegForValue(Address); if (!Reg && isa<Instruction>(Address) && (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) Reg = FuncInfo.InitializeRegForValue(Address); if (Reg) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset) .addMetadata(DI->getVariable()); else // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. DEBUG(dbgs() << "Dropping debug info for " << DI); return true; } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(Call); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. DEBUG(dbgs() << "Dropping debug info for " << DI); } return true; } case Intrinsic::objectsize: { ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; Constant *ResCI = ConstantInt::get(Call->getType(), Res); unsigned ResultReg = getRegForValue(ResCI); if (ResultReg == 0) return false; UpdateValueMap(Call, ResultReg); return true; } } // Usually, it does not make sense to initialize a value, // make an unrelated function call and use the value, because // it tends to be spilled on the stack. So, we move the pointer // to the last local value to the beginning of the block, so that // all the values which have already been materialized, // appear after the call. It also makes sense to skip intrinsics // since they tend to be inlined. if (!isa<IntrinsicInst>(F)) flushLocalValueMap(); // An arbitrary call. Bail. return false; }
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, InstCombiner &IC) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) V = IC.Builder->CreateShl(C, NumBits); else V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), IC.getTargetLibraryInfo()); return V; } Instruction *I = cast<Instruction>(V); IC.Worklist.Add(I); switch (I->getOpcode()) { default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); case Instruction::And: case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); return I; case Instruction::Shl: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(I->getType()); BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setHasNoUnsignedWrap(false); BO->setHasNoSignedWrap(false); return I; } // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(BO->getOperand(0), ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(BO); VI->takeName(BO); } return V; } // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); BO->setOperand(1, ConstantInt::get(BO->getType(), CI->getZExtValue() - NumBits)); BO->setHasNoUnsignedWrap(false); BO->setHasNoSignedWrap(false); return BO; } case Instruction::LShr: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(BO->getType()); BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setIsExact(false); return I; } // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(I->getOperand(0), ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(I); VI->takeName(I); } return V; } // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); BO->setOperand(1, ConstantInt::get(BO->getType(), CI->getZExtValue() - NumBits)); BO->setIsExact(false); return BO; } case Instruction::Select: I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); return I; case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits, isLeftShift, IC)); return PN; } } }
/// Return true if we can evaluate the specified expression tree if the vector /// elements were shuffled in a different order. static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, unsigned Depth = 5) { // We can always reorder the elements of a constant. if (isa<Constant>(V)) return true; // We won't reorder vector arguments. No IPO here. Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; // Two users may expect different orders of the elements. Don't try it. if (!I->hasOneUse()) return false; if (Depth == 0) return false; switch (I->getOpcode()) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::GetElementPtr: { for (Value *Operand : I->operands()) { if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) return false; } return true; } case Instruction::InsertElement: { ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); if (!CI) return false; int ElementNumber = CI->getLimitedValue(); // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' // can't put an element into multiple indices. bool SeenOnce = false; for (int i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] == ElementNumber) { if (SeenOnce) return false; SeenOnce = true; } } return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); } } return false; }
/// \brief Check if Value is always a dereferenceable pointer. /// /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, SmallPtrSetImpl<const Value *> &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. // These are obviously ok. if (isa<AllocaInst>(V)) return true; // It's not always safe to follow a bitcast, for example: // bitcast i8* (alloca i8) to i32* // would result in a 4-byte load from a 1-byte alloca. However, // if we're casting from a pointer from a type of larger size // to a type of smaller size (or the same size), and the alignment // is at least as large as for the resulting pointer type, then // we can look through the bitcast. if (DL) if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) { Type *STy = BC->getSrcTy()->getPointerElementType(), *DTy = BC->getDestTy()->getPointerElementType(); if (STy->isSized() && DTy->isSized() && (DL->getTypeStoreSize(STy) >= DL->getTypeStoreSize(DTy)) && (DL->getABITypeAlignment(STy) >= DL->getABITypeAlignment(DTy))) return isDereferenceablePointer(BC->getOperand(0), DL, Visited); } // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return !GV->hasExternalWeakLinkage(); // byval arguments are okay. Arguments specifically marked as // dereferenceable are okay too. if (const Argument *A = dyn_cast<Argument>(V)) { if (A->hasByValAttr()) return true; else if (uint64_t Bytes = A->getDereferenceableBytes()) { Type *Ty = V->getType()->getPointerElementType(); if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes) return true; } return false; } // Return values from call sites specifically marked as dereferenceable are // also okay. if (ImmutableCallSite CS = V) { if (uint64_t Bytes = CS.getDereferenceableBytes(0)) { Type *Ty = V->getType()->getPointerElementType(); if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes) return true; } } // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0)).second) return false; if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); for (User::const_op_iterator I = GEP->op_begin()+1, E = GEP->op_end(); I != E; ++I) { Value *Index = *I; Type *Ty = *GTI++; // Struct indices can't be out of bounds. if (isa<StructType>(Ty)) continue; ConstantInt *CI = dyn_cast<ConstantInt>(Index); if (!CI) return false; // Zero is always ok. if (CI->isZero()) continue; // Check to see that it's within the bounds of an array. ArrayType *ATy = dyn_cast<ArrayType>(Ty); if (!ATy) return false; if (CI->getValue().getActiveBits() > 64) return false; if (CI->getZExtValue() >= ATy->getNumElements()) return false; } // Indices check out; this is dereferenceable. return true; } if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) return isDereferenceablePointer(ASC->getOperand(0), DL, Visited); // If we don't know, assume the worst. return false; }
void TartGCPrinter::finishAssembly(AsmPrinter &AP) { unsigned nextLabel = 1; SafePointList safePoints; // Set up for emitting addresses. int pointerSize = AP.TM.getTargetData()->getPointerSize(); int addressAlignLog; if (pointerSize == sizeof(int32_t)) { addressAlignLog = 2; } else { addressAlignLog = 3; } MCStreamer & outStream = AP.OutStreamer; // Put this in the data section. outStream.SwitchSection(AP.getObjFileLowering().getDataSection()); // For each function... for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { GCFunctionInfo & gcFn = **FI; // if (optShowGC) { // errs() << "GCStrategy: Function: " << gcFn.getFunction().getName() << "\n"; // } // And each safe point... for (GCFunctionInfo::iterator sp = gcFn.begin(); sp != gcFn.end(); ++sp) { StackTraceTable::FieldOffsetList fieldOffsets; StackTraceTable::TraceMethodList traceMethods; // And for each live root... for (GCFunctionInfo::live_iterator rt = gcFn.live_begin(sp); rt != gcFn.live_end(sp); ++rt) { int64_t offset = rt->StackOffset; const Constant * meta = rt->Metadata; if (meta != NULL && !meta->isNullValue()) { // Meta is non-null, so it's a value type. const ConstantArray * traceArray = cast<ConstantArray>(getGlobalValue(meta)); // For each trace descriptor in thre meta array... for (ConstantArray::const_op_iterator it = traceArray->op_begin(); it != traceArray->op_end(); ++it) { ConstantStruct * descriptor = cast<ConstantStruct>(*it); ConstantInt * fieldCount = cast<ConstantInt>(descriptor->getOperand(1)); int64_t dscOffset = toInt(descriptor->getOperand(2), AP.TM); if (fieldCount->isZero()) { // A zero field count means that this is a trace method descriptor. const Constant * traceMethod = descriptor->getOperand(3); assert(offset > -1000 && offset < 1000); assert(dscOffset > -1000 && dscOffset < 1000); traceMethods.push_back(TraceMethodEntry(offset + dscOffset, traceMethod)); } else { // Otherwise it's a field offset descriptor. const GlobalVariable * fieldOffsetsVar = cast<GlobalVariable>( descriptor->getOperand(3)->getOperand(0)); // Handle case where the array value is just a ConstantAggregateZero, which // can be generated by llvm::ConstantArray::get() if the array values // are all zero. if (const ConstantAggregateZero * zero = dyn_cast<ConstantAggregateZero>(fieldOffsetsVar->getInitializer())) { // Array should never contain duplicate offsets, so an all-zero array // can only have one entry. (void)zero; assert(fieldCount->isOne()); fieldOffsets.push_back(offset + dscOffset); } else { // Get the field offset array and add to field offsets for this // safe point. const ConstantArray * fieldOffsetArray = cast<ConstantArray>( fieldOffsetsVar->getInitializer()); for (ConstantArray::const_op_iterator el = fieldOffsetArray->op_begin(); el != fieldOffsetArray->op_end(); ++el) { fieldOffsets.push_back( offset + dscOffset + toInt(cast<llvm::Constant>(*el), AP.TM)); } } } } } else { // No metadata, so it's an object reference - just add the field offset. fieldOffsets.push_back(offset); } } // Nothing to trace? Then we're done. if (fieldOffsets.empty() && traceMethods.empty()) { continue; } // Create a folding set node and merge with any identical trace tables. std::sort(fieldOffsets.begin(), fieldOffsets.end()); llvm::FoldingSetNodeID id; StackTraceTable::ProfileEntries(id, fieldOffsets, traceMethods); void * insertPos; StackTraceTable * sTable = traceTables.FindNodeOrInsertPos(id, insertPos); if (sTable == NULL) { sTable = new StackTraceTable(fieldOffsets, traceMethods); // Generate the labels for the trace table and field offset table. sTable->fieldOffsetsLabel = AP.GetTempSymbol("gc_stack_offsets", nextLabel); sTable->traceTableLabel = AP.GetTempSymbol("gc_stack", nextLabel++); // Add to folding set traceTables.InsertNode(sTable, insertPos); // Generate the trace table outStream.AddBlankLine(); AP.EmitAlignment(addressAlignLog); // First the field offset descriptor outStream.EmitLabel(sTable->traceTableLabel); size_t traceMethodCount = sTable->traceMethods.size(); if (!sTable->fieldOffsets.empty()) { outStream.EmitIntValue(traceMethodCount == 0 ? 1 : 0, 2, 0); outStream.EmitIntValue(sTable->fieldOffsets.size(), 2, 0); outStream.EmitIntValue(0, 4, 0); outStream.EmitSymbolValue(sTable->fieldOffsetsLabel, pointerSize, 0); } // Next the trace method descriptors for (size_t i = 0; i < traceMethodCount; ++i) { const TraceMethodEntry * tm = &sTable->traceMethods[i]; const Function * method = dyn_cast<Function>(tm->method()); if (method == NULL) { method = cast<Function>(tm->method()->getOperand(0)); } outStream.EmitIntValue((i + 1 == traceMethodCount ? 1 : 0), 2, 0); outStream.EmitIntValue(0, 2, 0); outStream.EmitIntValue(tm->offset(), 4, 0); MCSymbol * methodSym = AP.Mang->getSymbol(method); outStream.EmitSymbolValue(methodSym, pointerSize, 0); } // Now emit the field offset array outStream.AddBlankLine(); AP.EmitAlignment(addressAlignLog); outStream.EmitLabel(sTable->fieldOffsetsLabel); for (StackTraceTable::FieldOffsetList::const_iterator it = fieldOffsets.begin(); it != fieldOffsets.end(); ++it) { outStream.EmitIntValue(*it, pointerSize, 0); } } safePoints.push_back(std::pair<MCSymbol *, MCSymbol *>(sp->Label, sTable->traceTableLabel)); // if (optShowGC) { // if (!sTable->fieldOffsets.empty()) { // errs() << "GCStrategy: Field offset descriptor:"; // for (StackTraceTable::FieldOffsetList::const_iterator it = sTable->fieldOffsets.begin(); // it != sTable->fieldOffsets.end(); ++it) { // errs() << " " << *it; // } // errs() << "\n"; // } // if (!sTable->traceMethods.empty()) { // errs() << "GCStrategy: Trace method descriptor: " << "\n"; // } // } } } // Finally, generate the safe point map. outStream.AddBlankLine(); MCSymbol * gcSafepointSymbol = AP.GetExternalSymbolSymbol("GC_safepoint_map"); outStream.EmitSymbolAttribute(gcSafepointSymbol, MCSA_Global); outStream.EmitLabel(gcSafepointSymbol); outStream.EmitIntValue(safePoints.size(), pointerSize, 0); for (SafePointList::const_iterator it = safePoints.begin(); it != safePoints.end(); ++it) { outStream.EmitSymbolValue(it->first, pointerSize, 0); outStream.EmitSymbolValue(it->second, pointerSize, 0); } }
// Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) return false; if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) return false; MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); if (!WeightsNode) return false; // Check that the number of successors is manageable. assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors"); // Ensure there are weights for all of the successors. Note that the first // operand to the metadata node is a name, not a weight. if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) return false; // Build up the final weights that will be used in a temporary buffer. // Compute the sum of all weights to later decide whether they need to // be scaled to fit in 32 bits. uint64_t WeightSum = 0; SmallVector<uint32_t, 2> Weights; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { ConstantInt *Weight = mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i)); if (!Weight) return false; assert(Weight->getValue().getActiveBits() <= 32 && "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); // If the sum of weights does not fit in 32 bits, scale every weight down // accordingly. uint64_t ScalingFactor = (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; WeightSum = 0; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { Weights[i] /= ScalingFactor; WeightSum += Weights[i]; } if (WeightSum == 0) { for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) setEdgeProbability(BB, i, {1, e}); } else { for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)}); } assert(WeightSum <= UINT32_MAX && "Expected weights to scale down to 32 bits"); return true; }
bool HexagonGenExtract::convert(Instruction *In) { using namespace PatternMatch; Value *BF = 0; ConstantInt *CSL = 0, *CSR = 0, *CM = 0; BasicBlock *BB = In->getParent(); LLVMContext &Ctx = BB->getContext(); bool LogicalSR; // (and (shl (lshr x, #sr), #sl), #m) LogicalSR = true; bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CSL)), m_ConstantInt(CM))); if (!Match) { // (and (shl (ashr x, #sr), #sl), #m) LogicalSR = false; Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CSL)), m_ConstantInt(CM))); } if (!Match) { // (and (shl x, #sl), #m) LogicalSR = true; CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0); Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)), m_ConstantInt(CM))); if (Match && NoSR0) return false; } if (!Match) { // (and (lshr x, #sr), #m) LogicalSR = true; CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CM))); } if (!Match) { // (and (ashr x, #sr), #m) LogicalSR = false; CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CM))); } if (!Match) { CM = 0; // (shl (lshr x, #sr), #sl) LogicalSR = true; Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CSL))); } if (!Match) { CM = 0; // (shl (ashr x, #sr), #sl) LogicalSR = false; Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), m_ConstantInt(CSL))); } if (!Match) return false; Type *Ty = BF->getType(); if (!Ty->isIntegerTy()) return false; unsigned BW = Ty->getPrimitiveSizeInBits(); if (BW != 32 && BW != 64) return false; uint32_t SR = CSR->getZExtValue(); uint32_t SL = CSL->getZExtValue(); if (!CM) { // If there was no and, and the shift left did not remove all potential // sign bits created by the shift right, then extractu cannot reproduce // this value. if (!LogicalSR && (SR > SL)) return false; APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL); CM = ConstantInt::get(Ctx, A); } // CM is the shifted-left mask. Shift it back right to remove the zero // bits on least-significant positions. APInt M = CM->getValue().lshr(SL); uint32_t T = M.countTrailingOnes(); // During the shifts some of the bits will be lost. Calculate how many // of the original value will remain after shift right and then left. uint32_t U = BW - std::max(SL, SR); // The width of the extracted field is the minimum of the original bits // that remain after the shifts and the number of contiguous 1s in the mask. uint32_t W = std::min(U, T); if (W == 0) return false; // Check if the extracted bits are contained within the mask that it is // and-ed with. The extract operation will copy these bits, and so the // mask cannot any holes in it that would clear any of the bits of the // extracted field. if (!LogicalSR) { // If the shift right was arithmetic, it could have included some 1 bits. // It is still ok to generate extract, but only if the mask eliminates // those bits (i.e. M does not have any bits set beyond U). APInt C = APInt::getHighBitsSet(BW, BW-U); if (M.intersects(C) || !APIntOps::isMask(W, M)) return false; } else { // Check if M starts with a contiguous sequence of W times 1 bits. Get // the low U bits of M (which eliminates the 0 bits shifted in on the // left), and check if the result is APInt's "mask": if (!APIntOps::isMask(W, M.getLoBits(U))) return false; } IRBuilder<> IRB(In); Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu : Intrinsic::hexagon_S2_extractup; Module *Mod = BB->getParent()->getParent(); Value *ExtF = Intrinsic::getDeclaration(Mod, IntId); Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)}); if (SL != 0) NewIn = IRB.CreateShl(NewIn, SL, CSL->getName()); In->replaceAllUsesWith(NewIn); return true; }
bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; Value *Cond = BI->getCondition(); ICmpInst *CI = dyn_cast<ICmpInst>(Cond); if (!CI) return false; Value *RHS = CI->getOperand(1); ConstantInt *CV = dyn_cast<ConstantInt>(RHS); if (!CV) return false; // If the LHS is the result of AND'ing a value with a single bit bitmask, // we don't have information about probabilities. if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) if (LHS->getOpcode() == Instruction::And) if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) if (AndRHS->getUniqueInteger().isPowerOf2()) return false; bool isProb; if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely isProb = false; break; case CmpInst::ICMP_NE: // X != 0 -> Likely isProb = true; break; case CmpInst::ICMP_SLT: // X < 0 -> Unlikely isProb = false; break; case CmpInst::ICMP_SGT: // X > 0 -> Likely isProb = true; break; default: return false; } } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; } else if (CV->isAllOnesValue()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == -1 -> Unlikely isProb = false; break; case CmpInst::ICMP_NE: // X != -1 -> Likely isProb = true; break; case CmpInst::ICMP_SGT: // InstCombine canonicalizes X >= 0 into X > -1. // X >= 0 -> Likely isProb = true; break; default: return false; } } else { return false; } unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) std::swap(TakenIdx, NonTakenIdx); BranchProbability TakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); setEdgeProbability(BB, TakenIdx, TakenProb); setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); CallSite CS(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); if (!cpyLength) return false; // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLength->getZExtValue() < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { User *UI = srcUseList.pop_back_val(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); else return false; } else if (UI != C && UI != cpy) { return false; } } // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTree>(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { if (cpySrc->getType() != cpyDest->getType()) cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) return false; // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); MD.removeInstruction(C); // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); ++NumMemCpyInstr; return true; }
// switchConvert - Convert the switch statement into a binary lookup of // the case values. The function recursively builds this tree. // LowerBound and UpperBound are used to keep track of the bounds for Val // that have already been checked by a block emitted by one of the previous // calls to switchConvert in the call stack. BasicBlock * LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, BasicBlock *Predecessor, BasicBlock *OrigBlock, BasicBlock *Default, const std::vector<IntRange> &UnreachableRanges) { unsigned Size = End - Begin; if (Size == 1) { // Check if the Case Range is perfectly squeezed in between // already checked Upper and Lower bounds. If it is then we can avoid // emitting the code that checks if the value actually falls in the range // because the bounds already tell us so. if (Begin->Low == LowerBound && Begin->High == UpperBound) { unsigned NumMergedCases = 0; if (LowerBound && UpperBound) NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue(); fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases); return Begin->BB; } return newLeafBlock(*Begin, Val, OrigBlock, Default); } unsigned Mid = Size / 2; std::vector<CaseRange> LHS(Begin, Begin + Mid); DEBUG(dbgs() << "LHS: " << LHS << "\n"); std::vector<CaseRange> RHS(Begin + Mid, End); DEBUG(dbgs() << "RHS: " << RHS << "\n"); CaseRange &Pivot = *(Begin + Mid); DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -" << Pivot.High->getValue() << "\n"); // NewLowerBound here should never be the integer minimal value. // This is because it is computed from a case range that is never // the smallest, so there is always a case range that has at least // a smaller value. ConstantInt *NewLowerBound = Pivot.Low; // Because NewLowerBound is never the smallest representable integer // it is safe here to subtract one. ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), NewLowerBound->getValue() - 1); if (!UnreachableRanges.empty()) { // Check if the gap between LHS's highest and NewLowerBound is unreachable. int64_t GapLow = LHS.back().High->getSExtValue() + 1; int64_t GapHigh = NewLowerBound->getSExtValue() - 1; IntRange Gap = { GapLow, GapHigh }; if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges)) NewUpperBound = LHS.back().High; } DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) { dbgs() << LowerBound->getSExtValue(); } else { dbgs() << "NONE"; } dbgs() << " - " << NewUpperBound->getSExtValue() << "\n"; dbgs() << "RHS Bounds ==> "; dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) { dbgs() << UpperBound->getSExtValue() << "\n"; } else { dbgs() << "NONE\n"; });
int InductionDescriptor::getConsecutiveDirection() const { ConstantInt *ConstStep = getConstIntStepValue(); if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) return ConstStep->getSExtValue(); return 0; }
/// Evaluate all instructions in block BB, returning true if successful, false /// if we can't evaluate it. NewBB returns the next BB that control flows into, /// or null upon return. bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. while (1) { Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (!SI->isSimple()) { DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(SI->getOperand(1)); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } if (!isSimpleEnoughPointerToCommit(Ptr)) { // If this is too complex for us to commit, reject it. DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); return false; } Constant *Val = getVal(SI->getOperand(0)); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val << "\n"); return false; } if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { if (CE->getOpcode() == Instruction::BitCast) { DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. Ptr = CE->getOperand(0); Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try // introspecting NewTy to find a legal conversion. while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { // If NewTy is a struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. if (StructType *STy = dyn_cast<StructType>(NewTy)) { NewTy = STy->getTypeAtIndex(0U); IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); Constant * const IdxList[] = {IdxZero, IdxZero}; Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) Ptr = ConstantFoldConstantExpression(CE, DL, TLI); // If we can't improve the situation by introspecting NewTy, // we have to give up. } else { DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " "evaluate.\n"); return false; } } // If we found compatible types, go ahead and push the bitcast // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); } } MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), getVal(BO->getOperand(1))); DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult << "\n"); } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), getVal(CI->getOperand(0)), getVal(CI->getOperand(1))); DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult << "\n"); } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), getVal(CI->getOperand(0)), CI->getType()); DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult << "\n"); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), getVal(SI->getOperand(1)), getVal(SI->getOperand(2))); DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult << "\n"); } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { InstResult = ConstantExpr::getExtractValue( getVal(EVI->getAggregateOperand()), EVI->getIndices()); DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult << "\n"); } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { InstResult = ConstantExpr::getInsertValue( getVal(IVI->getAggregateOperand()), getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector<Constant*, 8> GEPOps; for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(*i)); InstResult = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, cast<GEPOperator>(GEP)->isInBounds()); DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { if (!LI->isSimple()) { DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(LI->getOperand(0)); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "Found a constant pointer expression, constant " "folding: " << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); if (!InstResult) { DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." "\n"); return false; // Could not evaluate load. } DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) { DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); AllocaTmps.push_back( make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { CallSite CS(&*CurInst); // Debug info can safely be ignored here. if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { DEBUG(dbgs() << "Ignoring debug info.\n"); ++CurInst; continue; } // Cannot handle inline asm. if (isa<InlineAsm>(CS.getCalledValue())) { DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); return false; } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { if (MSI->isVolatile()) { DEBUG(dbgs() << "Can not optimize a volatile memset " << "intrinsic.\n"); return false; } Constant *Ptr = getVal(MSI->getDest()); Constant *Val = getVal(MSI->getValue()); Constant *DestVal = ComputeLoadResult(getVal(Ptr)); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. DEBUG(dbgs() << "Ignoring no-op memset.\n"); ++CurInst; continue; } } if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); ++CurInst; continue; } if (II->getIntrinsicID() == Intrinsic::invariant_start) { // We don't insert an entry into Values, as it doesn't have a // meaningful return value. if (!II->use_empty()) { DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); return false; } ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); Value *PtrArg = getVal(II->getArgOperand(1)); Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = GV->getValueType(); if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV << "\n"); } else { DEBUG(dbgs() << "Found a global var, but can not treat it as an " "invariant.\n"); } } // Continue even if we do nothing. ++CurInst; continue; } else if (II->getIntrinsicID() == Intrinsic::assume) { DEBUG(dbgs() << "Skipping assume intrinsic.\n"); ++CurInst; continue; } DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); return false; } // Resolve function pointers. Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); if (!Callee || Callee->mayBeOverridden()) { DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. } SmallVector<Constant*, 8> Formals; for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(*i)); if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); } else { DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; } } else { if (Callee->getFunctionType()->isVarArg()) { DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); return false; } Constant *RetVal = nullptr; // Execute the call, if successful, use the return value. ValueStack.emplace_back(); if (!EvaluateFunction(Callee, RetVal, Formals)) { DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; } ValueStack.pop_back(); InstResult = RetVal; if (InstResult) { DEBUG(dbgs() << "Successfully evaluated function. Result: " << *InstResult << "\n\n"); } else { DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); } } } else if (isa<TerminatorInst>(CurInst)) { DEBUG(dbgs() << "Found a terminator instruction.\n"); if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { if (BI->isUnconditional()) { NextBB = BI->getSuccessor(0); } else { ConstantInt *Cond = dyn_cast<ConstantInt>(getVal(BI->getCondition())); if (!Cond) return false; // Cannot determine. NextBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { ConstantInt *Val = dyn_cast<ConstantInt>(getVal(SI->getCondition())); if (!Val) return false; // Cannot determine. NextBB = SI->findCaseValue(Val).getCaseSuccessor(); } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) NextBB = BA->getBasicBlock(); else return false; // Cannot determine. } else if (isa<ReturnInst>(CurInst)) { NextBB = nullptr; } else { // invoke, unwind, resume, unreachable. DEBUG(dbgs() << "Can not handle terminator."); return false; // Cannot handle this terminator. } // We succeeded at evaluating this block! DEBUG(dbgs() << "Successfully evaluated block.\n"); return true; } else { // Did not know how to evaluate this! DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." "\n"); return false; } if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) InstResult = ConstantFoldConstantExpression(CE, DL, TLI); setVal(&*CurInst, InstResult); } // If we just processed an invoke, we finished evaluating the block. if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { NextBB = II->getNormalDest(); DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); return true; } // Advance program counter. ++CurInst; } }
/// foldSelectICmpAnd - If one of the constants is zero (we know they can't /// both be) and we have an icmp instruction with zero, and we have an 'and' /// with the non-constant value and a power of two we can turn the select /// into a shift on the result of the 'and'. static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, ConstantInt *FalseVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) return nullptr; if (!match(IC->getOperand(1), m_Zero())) return nullptr; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) return nullptr; // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. ConstantInt *Offset = nullptr; if (!TrueVal->isZero() && !FalseVal->isZero()) { if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) Offset = FalseVal; else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) Offset = TrueVal; else return nullptr; // Adjust TrueVal and FalseVal to the offset. TrueVal = ConstantInt::get(Builder->getContext(), TrueVal->getValue() - Offset->getValue()); FalseVal = ConstantInt::get(Builder->getContext(), FalseVal->getValue() - Offset->getValue()); } // Make sure the mask in the 'and' and one of the select arms is a power of 2. if (!AndRHS->getValue().isPowerOf2() || (!TrueVal->getValue().isPowerOf2() && !FalseVal->getValue().isPowerOf2())) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; unsigned ValZeros = ValC->getValue().logBase2(); unsigned AndZeros = AndRHS->getValue().logBase2(); // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. if (AndZeros >= ValC->getBitWidth()) return nullptr; Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) V = Builder->CreateShl(V, ValZeros - AndZeros); else if (ValZeros < AndZeros) V = Builder->CreateLShr(V, AndZeros - ValZeros); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. bool ShouldNotVal = !TrueVal->isZero(); ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; if (ShouldNotVal) V = Builder->CreateXor(V, ValC); // Apply an offset if needed. if (Offset) V = Builder->CreateAdd(V, Offset); return V; }
/// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite /// B to be a memcpy from X to Z (or potentially a memmove, depending on /// circumstances). This allows later passes to remove the first memcpy /// altogether. bool MemCpyOpt::processMemCpy(MemCpyInst *M) { // We can only optimize non-volatile memcpy's. if (M->isVolatile()) return false; // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { MD->removeInstruction(M); M->eraseFromParent(); return false; } // If copying from a constant, try to turn the memcpy into a memset. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) if (GV->isConstant() && GV->hasDefinitiveInitializer()) if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { IRBuilder<> Builder(M); Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), M->getAlignment(), false); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; return true; } MemDepResult DepInfo = MD->getDependency(M); // Try to turn a partially redundant memset + memcpy into // memcpy + smaller memset. We don't need the memcpy size for this. if (DepInfo.isClobber()) if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst())) if (processMemSetMemCpyDependence(M, MDep)) return true; // The optimizations after this point require the memcpy size. ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); if (!CopySize) return false; // There are four possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. // c) memcpy from freshly alloca'd space or space that has just started its // lifetime copies undefined data, and we can therefore eliminate the // memcpy in favor of the data that was already at the destination. // d) memcpy from a just-memset'd source can be turned into memset. if (DepInfo.isClobber()) { if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { if (performCallSlotOptzn(M, M->getDest(), M->getSource(), CopySize->getZExtValue(), M->getAlignment(), C)) { MD->removeInstruction(M); M->eraseFromParent(); return true; } } } MemoryLocation SrcLoc = MemoryLocation::getForSource(M); MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( SrcLoc, true, M->getIterator(), M->getParent()); if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { Instruction *I = SrcDepInfo.getInst(); bool hasUndefContents = false; if (isa<AllocaInst>(I)) { hasUndefContents = true; } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0))) if (LTSize->getZExtValue() >= CopySize->getZExtValue()) hasUndefContents = true; } if (hasUndefContents) { MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; return true; } } if (SrcDepInfo.isClobber()) if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst())) if (performMemCpyToMemSetOptzn(M, MDep)) { MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; return true; } return false; }
// Set outgoing edges alive dependent on the terminator instruction SI. // If the terminator is an Invoke instruction, the call has already been run. // Return true if anything changed. bool IntegrationAttempt::checkBlockOutgoingEdges(ShadowInstruction* SI) { // TOCHECK: I think this only returns false if the block ends with an Unreachable inst? switch(SI->invar->I->getOpcode()) { case Instruction::Br: case Instruction::Switch: case Instruction::Invoke: case Instruction::Resume: break; default: return false; } if(inst_is<InvokeInst>(SI)) { InlineAttempt* IA = getInlineAttempt(SI); bool changed = false; // !localStore indicates the invoke instruction doesn't return normally if(SI->parent->localStore) { changed |= !SI->parent->succsAlive[0]; SI->parent->succsAlive[0] = true; } // I mark the exceptional edge reachable here if the call is disabled, even though // we might have proved it isn't feasible. This could be improved by converting the // invoke into a call in the final program. if((!IA) || (!IA->isEnabled()) || IA->mayUnwind) { changed |= !SI->parent->succsAlive[1]; SI->parent->succsAlive[1] = true; } return changed; } else if(inst_is<ResumeInst>(SI)) { bool changed = !mayUnwind; mayUnwind = true; return changed; } else if(BranchInst* BI = dyn_cast_inst<BranchInst>(SI)) { if(BI->isUnconditional()) { bool changed = !SI->parent->succsAlive[0]; SI->parent->succsAlive[0] = true; return changed; } } // Both switches and conditional branches use operand 0 for the condition. ShadowValue Condition = SI->getOperand(0); bool changed = false; ConstantInt* ConstCondition = dyn_cast_or_null<ConstantInt>(getConstReplacement(Condition)); if(!ConstCondition) { if(Condition.t == SHADOWVAL_INST || Condition.t == SHADOWVAL_ARG) { // Switch statements can operate on a ptrtoint operand, of which only ptrtoint(null) is useful: if(ImprovedValSetSingle* IVS = dyn_cast_or_null<ImprovedValSetSingle>(getIVSRef(Condition))) { if(IVS->onlyContainsNulls()) { ConstCondition = cast<ConstantInt>(Constant::getNullValue(SI->invar->I->getOperand(0)->getType())); } } } } if(!ConstCondition) { std::pair<ValSetType, ImprovedVal> PathVal; if(tryGetPathValue(Condition, SI->parent, PathVal)) ConstCondition = dyn_cast_val<ConstantInt>(PathVal.second.V); } TerminatorInst* TI = cast_inst<TerminatorInst>(SI); const unsigned NumSucc = TI->getNumSuccessors(); if(ConstCondition) { BasicBlock* takenTarget = 0; if(BranchInst* BI = dyn_cast_inst<BranchInst>(SI)) { // This ought to be a boolean. if(ConstCondition->isZero()) takenTarget = BI->getSuccessor(1); else takenTarget = BI->getSuccessor(0); } else { SwitchInst* SwI = cast_inst<SwitchInst>(SI); SwitchInst::CaseIt targetidx = SwI->findCaseValue(ConstCondition); takenTarget = targetidx.getCaseSuccessor(); } if(takenTarget) { // We know where the instruction is going -- remove this block as a predecessor for its other targets. LPDEBUG("Branch or switch instruction given known target: " << takenTarget->getName() << "\n"); return setEdgeAlive(TI, SI->parent, takenTarget); } // Else fall through to set all alive. } SwitchInst* Switch; ImprovedValSetSingle* IVS; if((Switch = dyn_cast_inst<SwitchInst>(SI)) && (IVS = dyn_cast<ImprovedValSetSingle>(getIVSRef(Condition))) && IVS->SetType == ValSetTypeScalar && !IVS->Values.empty()) { // A set of values feeding a switch. Set each corresponding edge alive. bool changed = false; for (unsigned i = 0, ilim = IVS->Values.size(); i != ilim; ++i) { SwitchInst::CaseIt targetit = Switch->findCaseValue(cast<ConstantInt>(getConstReplacement(IVS->Values[i].V))); BasicBlock* target = targetit.getCaseSuccessor(); changed |= setEdgeAlive(TI, SI->parent, target); } return changed; } // Condition unknown -- set all successors alive. for (unsigned I = 0; I != NumSucc; ++I) { // Mark outgoing edge alive if(!SI->parent->succsAlive[I]) changed = true; SI->parent->succsAlive[I] = true; } return changed; }
bool Instruction::isSafeToSpeculativelyExecute() const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast<Constant>(getOperand(i))) if (C->canTrap()) return false; switch (getOpcode()) { default: return true; case UDiv: case URem: { // x / y is undefined if y == 0, but calcuations like x / 3 are safe. ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1)); return Op && !Op->isNullValue(); } case SDiv: case SRem: { // x / y is undefined if y == 0, and might be undefined if y == -1, // but calcuations like x / 3 are safe. ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1)); return Op && !Op->isNullValue() && !Op->isAllOnesValue(); } case Load: { const LoadInst *LI = cast<LoadInst>(this); if (LI->isVolatile()) return false; return LI->getPointerOperand()->isDereferenceablePointer(); } case Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(this)) { switch (II->getIntrinsicID()) { case Intrinsic::gla_abs: case Intrinsic::gla_addCarry: case Intrinsic::gla_all: case Intrinsic::gla_any: case Intrinsic::gla_bitCount: case Intrinsic::gla_bitFieldInsert: case Intrinsic::gla_bitReverse: // case Intrinsic::gla_discard: // case Intrinsic::gla_discardConditional: case Intrinsic::gla_fAbs: case Intrinsic::gla_fAcos: case Intrinsic::gla_fAcosh: case Intrinsic::gla_fAsin: case Intrinsic::gla_fAsinh: case Intrinsic::gla_fAtan: case Intrinsic::gla_fAtan2: case Intrinsic::gla_fAtanh: case Intrinsic::gla_fCeiling: case Intrinsic::gla_fClamp: case Intrinsic::gla_fCos: case Intrinsic::gla_fCosh: case Intrinsic::gla_fCross: case Intrinsic::gla_fDFdx: case Intrinsic::gla_fDFdy: case Intrinsic::gla_fDegrees: case Intrinsic::gla_fDistance: case Intrinsic::gla_fDot2: case Intrinsic::gla_fDot3: case Intrinsic::gla_fDot4: case Intrinsic::gla_fExp: case Intrinsic::gla_fExp10: case Intrinsic::gla_fExp2: case Intrinsic::gla_fFaceForward: case Intrinsic::gla_fFilterWidth: case Intrinsic::gla_fFixedTransform: case Intrinsic::gla_fFloatBitsToInt: case Intrinsic::gla_fFloor: case Intrinsic::gla_fFma: case Intrinsic::gla_fFraction: case Intrinsic::gla_fFrexp: case Intrinsic::gla_fIntBitsTofloat: case Intrinsic::gla_fInverseSqrt: case Intrinsic::gla_fIsInf: case Intrinsic::gla_fIsNan: case Intrinsic::gla_fLdexp: case Intrinsic::gla_fLength: case Intrinsic::gla_fLit: case Intrinsic::gla_fLog: case Intrinsic::gla_fLog10: case Intrinsic::gla_fLog2: case Intrinsic::gla_fMax: case Intrinsic::gla_fMin: case Intrinsic::gla_fMix: case Intrinsic::gla_fModF: case Intrinsic::gla_fMultiInsert: case Intrinsic::gla_fNormalize: case Intrinsic::gla_fNormalize3D: case Intrinsic::gla_fPackDouble2x32: case Intrinsic::gla_fPackSnorm4x8: case Intrinsic::gla_fPackUnorm2x16: case Intrinsic::gla_fPackUnorm4x8: case Intrinsic::gla_fPow: case Intrinsic::gla_fPowi: case Intrinsic::gla_fQueryTextureLod: case Intrinsic::gla_fRTextureSample1: case Intrinsic::gla_fRTextureSample2: case Intrinsic::gla_fRTextureSample3: case Intrinsic::gla_fRTextureSample4: case Intrinsic::gla_fRTextureSampleLodRefZ1: case Intrinsic::gla_fRTextureSampleLodRefZ2: case Intrinsic::gla_fRTextureSampleLodRefZ3: case Intrinsic::gla_fRTextureSampleLodRefZ4: case Intrinsic::gla_fRTextureSampleLodRefZOffset1: case Intrinsic::gla_fRTextureSampleLodRefZOffset2: case Intrinsic::gla_fRTextureSampleLodRefZOffset3: case Intrinsic::gla_fRTextureSampleLodRefZOffset4: case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad1: case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad2: case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad3: case Intrinsic::gla_fRTextureSampleLodRefZOffsetGrad4: case Intrinsic::gla_fRadians: case Intrinsic::gla_fReadData: case Intrinsic::gla_fReadInterpolant: case Intrinsic::gla_fReadInterpolantOffset: case Intrinsic::gla_fReflect: case Intrinsic::gla_fRefract: case Intrinsic::gla_fRoundEven: case Intrinsic::gla_fRoundFast: case Intrinsic::gla_fRoundZero: case Intrinsic::gla_fSign: case Intrinsic::gla_fSin: case Intrinsic::gla_fSinh: case Intrinsic::gla_fSmoothStep: case Intrinsic::gla_fSqrt: case Intrinsic::gla_fStep: case Intrinsic::gla_fSwizzle: case Intrinsic::gla_fTan: case Intrinsic::gla_fTanh: case Intrinsic::gla_fTexelFetchOffset: case Intrinsic::gla_fTexelGather: case Intrinsic::gla_fTexelGatherOffset: case Intrinsic::gla_fTexelGatherOffsets: case Intrinsic::gla_fTextureSample: case Intrinsic::gla_fTextureSampleLodRefZ: case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: case Intrinsic::gla_fUnpackDouble2x32: case Intrinsic::gla_fUnpackSnorm4x8: case Intrinsic::gla_fUnpackUnorm2x16: case Intrinsic::gla_fUnpackUnorm4x8: // case Intrinsic::gla_fWriteData: // case Intrinsic::gla_fWriteInterpolant: case Intrinsic::gla_findLSB: case Intrinsic::gla_getInterpolant: case Intrinsic::gla_multiInsert: case Intrinsic::gla_not: case Intrinsic::gla_queryTextureSize: case Intrinsic::gla_rTextureSample1: case Intrinsic::gla_rTextureSample2: case Intrinsic::gla_rTextureSample3: case Intrinsic::gla_rTextureSample4: case Intrinsic::gla_rTextureSampleLodRefZ1: case Intrinsic::gla_rTextureSampleLodRefZ2: case Intrinsic::gla_rTextureSampleLodRefZ3: case Intrinsic::gla_rTextureSampleLodRefZ4: case Intrinsic::gla_rTextureSampleLodRefZOffset1: case Intrinsic::gla_rTextureSampleLodRefZOffset2: case Intrinsic::gla_rTextureSampleLodRefZOffset3: case Intrinsic::gla_rTextureSampleLodRefZOffset4: case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad1: case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad2: case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad3: case Intrinsic::gla_rTextureSampleLodRefZOffsetGrad4: case Intrinsic::gla_readData: case Intrinsic::gla_sBitFieldExtract: case Intrinsic::gla_sClamp: case Intrinsic::gla_sFindMSB: case Intrinsic::gla_sFma: case Intrinsic::gla_sMax: case Intrinsic::gla_sMin: case Intrinsic::gla_smulExtended: case Intrinsic::gla_subBorrow: case Intrinsic::gla_swizzle: case Intrinsic::gla_texelFetchOffset: case Intrinsic::gla_texelGather: case Intrinsic::gla_texelGatherOffset: case Intrinsic::gla_texelGatherOffsets: case Intrinsic::gla_textureSample: case Intrinsic::gla_textureSampleLodRefZ: case Intrinsic::gla_textureSampleLodRefZOffset: case Intrinsic::gla_textureSampleLodRefZOffsetGrad: case Intrinsic::gla_uBitFieldExtract: case Intrinsic::gla_uClamp: case Intrinsic::gla_uFindMSB: case Intrinsic::gla_uFma: case Intrinsic::gla_uMax: case Intrinsic::gla_uMin: case Intrinsic::gla_umulExtended: // case Intrinsic::gla_writeData: return true; default: break; } } return false; // The called function could have undefined behavior or // side-effects. // FIXME: We should special-case some intrinsics (bswap, // overflow-checking arithmetic, etc.) case VAArg: case Alloca: case Invoke: case PHI: case Store: case Ret: case Br: case IndirectBr: case Switch: case Unwind: case Unreachable: return false; // Misc instructions which have effects } }
/// MatchOperationAddr - Given an instruction or constant expr, see if we can /// fold the operation into the addressing mode. If so, update the addressing /// mode and return true, otherwise return false without modifying AddrMode. bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth) { // Avoid exponential behavior on extremely deep expression trees. if (Depth >= 5) return false; switch (Opcode) { case Instruction::PtrToInt: // PtrToInt is always a noop, as we know that the int type is pointer sized. return MatchAddr(AddrInst->getOperand(0), Depth); case Instruction::IntToPtr: // This inttoptr is a no-op if the integer type is pointer sized. if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == TLI.getPointerTy()) return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || AddrInst->getOperand(0)->getType()->isIntegerTy()) && // Don't touch identity bitcasts. These were probably put here by LSR, // and we don't want to mess around with them. Assume it knows what it // is doing. AddrInst->getOperand(0)->getType() != AddrInst->getType()) return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::Add: { // Check to see if we can merge in the RHS then the LHS. If so, we win. ExtAddrMode BackupAddrMode = AddrMode; unsigned OldSize = AddrModeInsts.size(); if (MatchAddr(AddrInst->getOperand(1), Depth+1) && MatchAddr(AddrInst->getOperand(0), Depth+1)) return true; // Restore the old addr mode info. AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. if (MatchAddr(AddrInst->getOperand(0), Depth+1) && MatchAddr(AddrInst->getOperand(1), Depth+1)) return true; // Otherwise we definitely can't merge the ADD in. AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); break; } //case Instruction::Or: // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. //break; case Instruction::Mul: case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); if (!RHS) return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) Scale = 1LL << Scale; return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); } case Instruction::GetElementPtr: { // Scan the GEP. We check it if it contains constant offsets and at most // one variable offset. int VariableOperand = -1; unsigned VariableScale = 0; int64_t ConstantOffset = 0; const TargetData *TD = TLI.getTargetData(); gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { if (const StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); unsigned Idx = cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); ConstantOffset += SL->getElementOffset(Idx); } else { uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { ConstantOffset += CI->getSExtValue()*TypeSize; } else if (TypeSize) { // Scales of zero don't do anything. // We only allow one variable index at the moment. if (VariableOperand != -1) return false; // Remember the variable index. VariableOperand = i; VariableScale = TypeSize; } } } // A common case is for the GEP to only do a constant offset. In this case, // just add it to the disp field and check validity. if (VariableOperand == -1) { AddrMode.BaseOffs += ConstantOffset; if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ // Check to see if we can fold the base pointer in too. if (MatchAddr(AddrInst->getOperand(0), Depth+1)) return true; } AddrMode.BaseOffs -= ConstantOffset; return false; } // Save the valid addressing mode in case we can't match. ExtAddrMode BackupAddrMode = AddrMode; unsigned OldSize = AddrModeInsts.size(); // See if the scale and offset amount is valid for this target. AddrMode.BaseOffs += ConstantOffset; // Match the base operand of the GEP. if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { // If it couldn't be matched, just stuff the value in a register. if (AddrMode.HasBaseReg) { AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); return false; } AddrMode.HasBaseReg = true; AddrMode.BaseReg = AddrInst->getOperand(0); } // Match the remaining variable portion of the GEP. if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, Depth)) { // If it couldn't be matched, try stuffing the base into a register // instead of matching it, and retrying the match of the scale. AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); if (AddrMode.HasBaseReg) return false; AddrMode.HasBaseReg = true; AddrMode.BaseReg = AddrInst->getOperand(0); AddrMode.BaseOffs += ConstantOffset; if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, Depth)) { // If even that didn't work, bail. AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); return false; } } return true; } } return false; }
int BranchProbabilities::CheckIntegerHeuristic() { // Heuristic fails if the last instruction is not a conditional branch BranchInst *BI = dyn_cast<BranchInst>(_TI); if ((!BI) || (BI->isUnconditional())) return -1; // All integer comparisons are done with the icmp instruction ICmpInst *icmp = dyn_cast<ICmpInst>(BI->getCondition()); if (!icmp) return -1; Value *v[2]; v[0] = icmp->getOperand(0); v[1] = icmp->getOperand(1); // If neither is a constant, nothing to do if (!isa<ConstantInt>(v[0]) && !isa<ConstantInt>(v[1])) return -1; // If we're dealing with something other than ints, nothing to do if (!isa<IntegerType>(v[0]->getType())) return -1; // Get comparison ICmpInst::Predicate pred = icmp->getPredicate(); // Eq and Not Eq are easy cases if (pred == ICmpInst::ICMP_EQ) return 1; else if (pred == ICmpInst::ICMP_NE) return 0; ConstantInt *CI = dyn_cast<ConstantInt>(v[1]); // If the right side isn't a constant, swap the predicate so we can pretend if (!CI) { pred = icmp->getSwappedPredicate(); CI = cast<ConstantInt>(v[0]); } // Choose the appropriate branch depending on the const val and predicate if (CI->isZero()) { switch (pred) { case ICmpInst::ICMP_UGE: assert("UGE zero always returns true"); return -1; case ICmpInst::ICMP_ULT: assert("ULT zero always returns false"); return -1; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: return 0; case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: return 1; default: return -1; } } else if (CI->isOne()) { switch (pred) { case ICmpInst::ICMP_UGE: case ICmpInst::ICMP_SGE: return 0; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: return 1; default: return -1; } } else if (CI->isAllOnesValue()) { switch (pred) { case ICmpInst::ICMP_SGT: return 0; case ICmpInst::ICMP_SLE: return 1; default: return -1; } } return -1; }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, Value *cpySrc, uint64_t cpyLen, unsigned cpyAlign, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. CallSite CS(C); // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. if (!DL) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLen < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; Type *StructTy = cast<PointerType>(A->getType())->getElementType(); if (!StructTy->isSized()) { // The call may never return and hence the copy-instruction may never // be executed, and therefore it's not safe to say "the destination // has at least <cpyLen> bytes, as implied by the copy-instruction", return false; } uint64_t destSize = DL->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that dest points to memory that is at least as aligned as src. unsigned srcAlign = srcAlloca->getAlignment(); if (!srcAlign) srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType()); bool isDestSufficientlyAligned = srcAlign <= cpyAlign; // If dest is not aligned enough and we can't increase its alignment then // bail out. if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) return false; // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(), srcAlloca->user_end()); while (!srcUseList.empty()) { User *U = srcUseList.pop_back_val(); if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { for (User *UU : U->users()) srcUseList.push_back(UU); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { if (G->hasAllZeroIndices()) for (User *UU : U->users()) srcUseList.push_back(UU); else return false; } else if (U != C && U != cpy) { return false; } } // Check that src isn't captured by the called function since the // transformation can cause aliasing issues in that case. for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i)) return false; // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize); // If necessary, perform additional analysis. if (MR != AliasAnalysis::NoModRef) MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); if (MR != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == Dest->getType()) CS.setArgument(i, Dest); else CS.setArgument(i, CastInst::CreatePointerCast(Dest, CS.getArgument(i)->getType(), Dest->getName(), C)); } if (!changedArgument) return false; // If the destination wasn't sufficiently aligned then increase its alignment. if (!isDestSufficientlyAligned) { assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); } // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MD->removeInstruction(C); // Remove the memcpy. MD->removeInstruction(cpy); ++NumMemCpyInstr; return true; }
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty) { if (!EnableReduxCost) return RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return RK_None; auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); if (!RdxStart) return RK_None; Optional<ReductionData> RD = getReductionData(RdxStart); if (!RD) return RK_None; Type *VecTy = ReduxRoot->getOperand(0)->getType(); unsigned NumVecElems = VecTy->getVectorNumElements(); if (!isPowerOf2_32(NumVecElems)) return RK_None; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 // %r = extractelement <4 x float> %bin.rdx8, i32 0 unsigned MaskStart = 1; Instruction *RdxOp = RdxStart; SmallVector<int, 32> ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. if (!RdxOp) return RK_None; Optional<ReductionData> RDLevel = getReductionData(RdxOp); if (!RDLevel || !RDLevel->hasSameData(*RD)) return RK_None; Value *NextRdxOp; ShuffleVectorInst *Shuffle; std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); // Check the current reduction operation and the shuffle use the same value. if (Shuffle == nullptr) return RK_None; if (Shuffle->getOperand(0) != NextRdxOp) return RK_None; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) ShuffleMask[j] = MaskStart + j; // Fill the rest of the mask with -1 for undef. std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); if (ShuffleMask != Mask) return RK_None; RdxOp = dyn_cast<Instruction>(NextRdxOp); NumVecElemsRemain /= 2; MaskStart *= 2; } Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; }