// Delete the instructions that we scalarized. If a full vector result // is still needed, recreate it using InsertElements. bool Scalarizer::finish() { // The presence of data in Gathered or Scattered indicates changes // made to the Function. if (Gathered.empty() && Scattered.empty()) return false; for (const auto &GMI : Gathered) { Instruction *Op = GMI.first; ValueVector &CV = *GMI.second; if (!Op->use_empty()) { // The value is still needed, so recreate it using a series of // InsertElements. Type *Ty = Op->getType(); Value *Res = UndefValue::get(Ty); BasicBlock *BB = Op->getParent(); unsigned Count = Ty->getVectorNumElements(); IRBuilder<> Builder(Op); if (isa<PHINode>(Op)) Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); for (unsigned I = 0; I < Count; ++I) Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), Op->getName() + ".upto" + Twine(I)); Res->takeName(Op); Op->replaceAllUsesWith(Res); } Op->eraseFromParent(); } Gathered.clear(); Scattered.clear(); return true; }
// Delete the instructions that we scalarized. If a full vector result // is still needed, recreate it using InsertElements. bool Scalarizer::finish() { if (Gathered.empty()) return false; for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end(); GMI != GME; ++GMI) { Instruction *Op = GMI->first; ValueVector &CV = *GMI->second; if (!Op->use_empty()) { // The value is still needed, so recreate it using a series of // InsertElements. Type *Ty = Op->getType(); Value *Res = UndefValue::get(Ty); unsigned Count = Ty->getVectorNumElements(); IRBuilder<> Builder(Op->getParent(), Op); for (unsigned I = 0; I < Count; ++I) Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), Op->getName() + ".upto" + Twine(I)); Res->takeName(Op); Op->replaceAllUsesWith(Res); } Op->eraseFromParent(); } Gathered.clear(); Scattered.clear(); return true; }
bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { InsertAutomaticGCRoots(F, S); // FIXME: Turn this back on after fixing gcregroot in SelectionDAG. //InsertGCRegisterRoots(F, S); bool LowerWr = !S.customWriteBarrier(); bool LowerRd = !S.customReadBarrier(); bool InitRoots = S.initializeRoots(); SmallVector<Instruction *, 32> Roots; bool MadeChange = false; for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE; ++BB) { for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { Function *F = CI->getCalledFunction(); switch (F->getIntrinsicID()) { case Intrinsic::gcwrite: if (LowerWr) { // Replace a write barrier with a simple store. Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); CI->replaceAllUsesWith(St); CI->eraseFromParent(); } break; case Intrinsic::gcread: if (LowerRd) { // Replace a read barrier with a simple load. Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); Ld->takeName(CI); CI->replaceAllUsesWith(Ld); CI->eraseFromParent(); } break; case Intrinsic::gcroot: if (InitRoots) { // Initialize the GC root, but do not delete the intrinsic. The // backend needs the intrinsic to flag the stack slot. Value *V = CI->getArgOperand(0)->stripPointerCastsOnly(); Roots.push_back(cast<Instruction>(V)); } break; default: continue; } MadeChange = true; } } } if (Roots.size()) MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); return MadeChange; }
void FuncRewriter::expandFunc() { Type *I32 = Type::getInt32Ty(Func->getContext()); // We need to do two passes: When we process an invoke we need to // look at its landingpad, so we can't remove the landingpads until // all the invokes have been processed. for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { Instruction *Inst = Iter++; if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) { expandInvokeInst(Invoke); } else if (ResumeInst *Resume = dyn_cast<ResumeInst>(Inst)) { expandResumeInst(Resume); } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(Inst)) { if (Intrinsic->getIntrinsicID() == Intrinsic::eh_typeid_for) { Value *ExcType = Intrinsic->getArgOperand(0); Value *Val = ConstantInt::get( I32, ExcInfoWriter->getIDForExceptionType(ExcType)); Intrinsic->replaceAllUsesWith(Val); Intrinsic->eraseFromParent(); } } } } for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { Instruction *Inst = Iter++; if (LandingPadInst *LP = dyn_cast<LandingPadInst>(Inst)) { initializeFrame(); Value *LPPtr = new BitCastInst( FrameJmpBuf, LP->getType()->getPointerTo(), "landingpad_ptr", LP); Value *LPVal = CopyDebug(new LoadInst(LPPtr, "", LP), LP); LPVal->takeName(LP); LP->replaceAllUsesWith(LPVal); LP->eraseFromParent(); } } } }
Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( GEPOperator *GEP, int Depth) { Value *NewOperand = hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1); if (NewOperand == nullptr) return nullptr; // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. assert(isEliminableAddrSpaceCast(NewOperand)); Operator *Cast = cast<Operator>(NewOperand); SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); Value *NewASC; if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { // GEP = gep (addrspacecast X), indices // => // NewGEP = gep X, indices // NewASC = addrspacecast NewGEP GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP->getSourceElementType(), Cast->getOperand(0), Indices, "", GEPI); NewGEP->setIsInBounds(GEP->isInBounds()); NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); NewASC->takeName(GEP); // Without RAUWing GEP, the compiler would visit GEP again and emit // redundant instructions. This is exercised in test @rauw in // access-non-generic.ll. GEP->replaceAllUsesWith(NewASC); } else { // GEP is a constant expression. Constant *NewGEP = ConstantExpr::getGetElementPtr( GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), Indices, GEP->isInBounds()); NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()); } return NewASC; }
void StraightLineStrengthReduce::rewriteCandidateWithBasis( const Candidate &C, const Candidate &Basis) { // An instruction can correspond to multiple candidates. Therefore, instead of // simply deleting an instruction when we rewrite it, we mark its parent as // nullptr (i.e. unlink it) so that we can skip the candidates whose // instruction is already rewritten. if (!C.Ins->getParent()) return; assert(C.Base == Basis.Base && C.Stride == Basis.Stride); // Basis = (B + i) * S // C = (B + i') * S // ==> // C = Basis + (i' - i) * S IRBuilder<> Builder(C.Ins); ConstantInt *IndexOffset = ConstantInt::get( C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue()); Value *Reduced; // TODO: preserve nsw/nuw in some cases. if (IndexOffset->isOne()) { // If (i' - i) is 1, fold C into Basis + S. Reduced = Builder.CreateAdd(Basis.Ins, C.Stride); } else if (IndexOffset->isMinusOne()) { // If (i' - i) is -1, fold C into Basis - S. Reduced = Builder.CreateSub(Basis.Ins, C.Stride); } else { Value *Bump = Builder.CreateMul(C.Stride, IndexOffset); Reduced = Builder.CreateAdd(Basis.Ins, Bump); } Reduced->takeName(C.Ins); C.Ins->replaceAllUsesWith(Reduced); C.Ins->dropAllReferences(); // Unlink C.Ins so that we can skip other candidates also corresponding to // C.Ins. The actual deletion is postponed to the end of runOnFunction. C.Ins->removeFromParent(); UnlinkedInstructions.insert(C.Ins); }
Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( BitCastOperator *BC, int Depth) { Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1); if (NewOperand == nullptr) return nullptr; // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. assert(isEliminableAddrSpaceCast(NewOperand)); Operator *Cast = cast<Operator>(NewOperand); // Cast = addrspacecast Src // BC = bitcast Cast // => // Cast' = bitcast Src // BC' = addrspacecast Cast' Value *Src = Cast->getOperand(0); Type *TypeOfNewCast = PointerType::get(BC->getType()->getPointerElementType(), Src->getType()->getPointerAddressSpace()); Value *NewBC; if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); NewBC->takeName(BC); // Without RAUWing BC, the compiler would visit BC again and emit // redundant instructions. This is exercised in test @rauw in // access-non-generic.ll. BC->replaceAllUsesWith(NewBC); } else { // BC is a constant expression. Constant *NewCast = ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); } return NewBC; }
Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyMulInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); if (match(Op1, m_AllOnes())) // X * -1 == 0 - X return BinaryOperator::CreateNeg(Op0, I.getName()); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { // ((X << C1)*C2) == (X * (C2 << C1)) if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) if (SI->getOpcode() == Instruction::Shl) if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) return BinaryOperator::CreateMul(SI->getOperand(0), ConstantExpr::getShl(CI, ShOp)); const APInt &Val = CI->getValue(); if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst); if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); return Shl; } // Canonicalize (X+C1)*CI -> X*CI+C1*CI. { Value *X; ConstantInt *C1; if (Op0->hasOneUse() && match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { Value *Add = Builder->CreateMul(X, CI); return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } } // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n // The "* (2**n)" thus becomes a potential shifting opportunity. { const APInt & Val = CI->getValue(); const APInt &PosVal = Val.abs(); if (Val.isNegative() && PosVal.isPowerOf2()) { Value *X = 0, *Y = 0; if (Op0->hasOneUse()) { ConstantInt *C1; Value *Sub = 0; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) Sub = Builder->CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); if (Sub) return BinaryOperator::CreateMul(Sub, ConstantInt::get(Y->getType(), PosVal)); } } } } // Simplify mul instructions with a constant RHS. if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; } if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y if (Value *Op1v = dyn_castNegVal(Op1)) return BinaryOperator::CreateMul(Op0v, Op1v); // (X / Y) * Y = X - (X % Y) // (X / Y) * -Y = (X % Y) - X { Value *Op1C = Op1; BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); if (!BO || (BO->getOpcode() != Instruction::UDiv && BO->getOpcode() != Instruction::SDiv)) { Op1C = Op0; BO = dyn_cast<BinaryOperator>(Op1); } Value *Neg = dyn_castNegVal(Op1C); if (BO && BO->hasOneUse() && (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && (BO->getOpcode() == Instruction::UDiv || BO->getOpcode() == Instruction::SDiv)) { Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO)) if (SDiv->isExact()) { if (Op1BO == Op1C) return ReplaceInstUsesWith(I, Op0BO); return BinaryOperator::CreateNeg(Op0BO); } Value *Rem; if (BO->getOpcode() == Instruction::UDiv) Rem = Builder->CreateURem(Op0BO, Op1BO); else Rem = Builder->CreateSRem(Op0BO, Op1BO); Rem->takeName(BO); if (Op1BO == Op1C) return BinaryOperator::CreateSub(Op0BO, Rem); return BinaryOperator::CreateSub(Rem, Op0BO); } } /// i1 mul -> i1 and. if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y // (1 << Y)*X --> X << Y { Value *Y; if (match(Op0, m_Shl(m_One(), m_Value(Y)))) return BinaryOperator::CreateShl(Op1, Y); if (match(Op1, m_Shl(m_One(), m_Value(Y)))) return BinaryOperator::CreateShl(Op0, Y); } // If one of the operands of the multiply is a cast from a boolean value, then // we know the bool is either zero or one, so this is a 'masking' multiply. // X * Y (where Y is 0 or 1) -> X & (0-Y) if (!I.getType()->isVectorTy()) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); Value *BoolCast = 0, *OtherOp = 0; if (MaskedValueIsZero(Op0, Negative2)) BoolCast = Op0, OtherOp = Op1; else if (MaskedValueIsZero(Op1, Negative2)) BoolCast = Op1, OtherOp = Op0; if (BoolCast) { Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), BoolCast); return BinaryOperator::CreateAnd(V, OtherOp); } } return Changed ? &I : 0; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. // std::map<Argument*, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. std::map<IndicesVector, LoadInst*> OriginalLoads; // Attributes - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // First, determine the new argument list unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); IndicesVector Indices; Indices.reserve(User->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(User)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(User->use_back()); OriginalLoads[Indices] = OrigLoad; } // Add a parameter to the function for each element passed in. for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), SI->begin(), SI->end())); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); const Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Get the alias analysis information that we need to update to reflect our // changes. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. if (Attributes attrs = CallPAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); const Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. if (Attributes attrs = CallPAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); AttributesVec.clear(); // Update the alias analysis implementation to know that we are replacing // the old call with a new one. AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); New->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(I2); I2->takeName(I); AA.replaceWithNewValue(I, I2); ++I2; continue; } if (ByValArgsToTransform.count(I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); continue; } if (I->use_empty()) { AA.deleteValue(I); continue; } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); *It != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->use_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); } AA.deleteValue(GEP); GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) ++I2; } // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling // reference to the CallgraphNode, just leave the dead function around for // someone else to nuke. CallGraphNode *CGN = CG[F]; if (CGN->getNumReferences() == 0) delete CG.removeFunctionFromModule(CGN); else F->setLinkage(Function::ExternalLinkage); return NF_CGN; }
void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { SmallVector<WeakVH, 16> DeadInsts; // Rewrite all induction variable expressions in terms of the canonical // induction variable. // // If there were induction variables of other sizes or offsets, manually // add the offsets to the primary induction variable and cast, avoiding // the need for the code evaluation methods to insert induction variables // of different sizes. for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { Value *Op = UI->getOperandValToReplace(); const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); // Compute the final addrec to expand into code. const SCEV *AR = IU->getReplacementExpr(*UI); // Evaluate the expression out of the loop, if possible. if (!L->contains(UI->getUser())) { const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); if (ExitVal->isLoopInvariant(L)) AR = ExitVal; } // FIXME: It is an extremely bad idea to indvar substitute anything more // complex than affine induction variables. Doing so will put expensive // polynomial evaluations inside of the loop, and the str reduction pass // currently can only reduce affine polynomials. For now just disable // indvar subst on anything more complex than an affine addrec, unless // it can be expanded to a trivial value. if (!isSafe(AR, L)) continue; // Determine the insertion point for this user. By default, insert // immediately before the user. The SCEVExpander class will automatically // hoist loop invariants out of the loop. For PHI nodes, there may be // multiple uses, so compute the nearest common dominator for the // incoming blocks. Instruction *InsertPt = User; if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingValue(i) == Op) { if (InsertPt == User) InsertPt = PHI->getIncomingBlock(i)->getTerminator(); else InsertPt = DT->findNearestCommonDominator(InsertPt->getParent(), PHI->getIncomingBlock(i)) ->getTerminator(); } // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); // Inform ScalarEvolution that this value is changing. The change doesn't // affect its value, but it does potentially affect which use lists the // value will be on after the replacement, which affects ScalarEvolution's // ability to walk use lists and drop dangling pointers when a value is // deleted. SE->forgetValue(User); // Patch the new value into place. if (Op->hasName()) NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' << " into = " << *NewVal << "\n"); ++NumRemoved; Changed = true; // The old value may be dead now. DeadInsts.push_back(Op); } // Clear the rewriter cache, because values that are in the rewriter's cache // can be deleted in the loop below, causing the AssertingVH in the cache to // trigger. Rewriter.clear(); // Now that we're done iterating through lists, clean up any instructions // which are now dead. while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst); }
void StraightLineStrengthReduce::rewriteCandidateWithBasis( const Candidate &C, const Candidate &Basis) { assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && C.Stride == Basis.Stride); // We run rewriteCandidateWithBasis on all candidates in a post-order, so the // basis of a candidate cannot be unlinked before the candidate. assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked"); // An instruction can correspond to multiple candidates. Therefore, instead of // simply deleting an instruction when we rewrite it, we mark its parent as // nullptr (i.e. unlink it) so that we can skip the candidates whose // instruction is already rewritten. if (!C.Ins->getParent()) return; IRBuilder<> Builder(C.Ins); bool BumpWithUglyGEP; Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); Value *Reduced = nullptr; // equivalent to but weaker than C.Ins switch (C.CandidateKind) { case Candidate::Add: case Candidate::Mul: // C = Basis + Bump if (BinaryOperator::isNeg(Bump)) { // If Bump is a neg instruction, emit C = Basis - (-Bump). Reduced = Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump)); // We only use the negative argument of Bump, and Bump itself may be // trivially dead. RecursivelyDeleteTriviallyDeadInstructions(Bump); } else { // It's tempting to preserve nsw on Bump and/or Reduced. However, it's // usually unsound, e.g., // // X = (-2 +nsw 1) *nsw INT_MAX // Y = (-2 +nsw 3) *nsw INT_MAX // => // Y = X + 2 * INT_MAX // // Neither + and * in the resultant expression are nsw. Reduced = Builder.CreateAdd(Basis.Ins, Bump); } break; case Candidate::GEP: { Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); if (BumpWithUglyGEP) { // C = (char *)Basis + Bump unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); if (InBounds) Reduced = Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump); else Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump); Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); } else { // C = gep Basis, Bump // Canonicalize bump to pointer size. Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); if (InBounds) Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump); else Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump); } } break; default: llvm_unreachable("C.CandidateKind is invalid"); }; Reduced->takeName(C.Ins); C.Ins->replaceAllUsesWith(Reduced); // Unlink C.Ins so that we can skip other candidates also corresponding to // C.Ins. The actual deletion is postponed to the end of runOnFunction. C.Ins->removeFromParent(); UnlinkedInstructions.push_back(C.Ins); }
void InitializeSoftBound:: constructCheckHandlers(Module & module){ Type* void_ty = Type::getVoidTy(module.getContext()); Type* void_ptr_ty = PointerType::getUnqual(Type::getInt8Ty(module.getContext())); Type* size_ty = Type::getInt64Ty(module.getContext()); module.getOrInsertFunction("__softboundcets_spatial_load_dereference_check", void_ty, void_ptr_ty, void_ptr_ty, void_ptr_ty, size_ty, NULL); module.getOrInsertFunction("__softboundcets_spatial_store_dereference_check", void_ty, void_ptr_ty, void_ptr_ty, void_ptr_ty, size_ty, NULL); module.getOrInsertFunction("__softboundcets_temporal_load_dereference_check", void_ty, void_ptr_ty, size_ty, void_ptr_ty, void_ptr_ty, NULL); module.getOrInsertFunction("__softboundcets_temporal_store_dereference_check", void_ty, void_ptr_ty, size_ty, void_ptr_ty, void_ptr_ty, NULL); Function* global_init = (Function *) module.getOrInsertFunction("__softboundcets_global_init", void_ty, NULL); global_init->setDoesNotThrow(); global_init->setLinkage(GlobalValue::InternalLinkage); BasicBlock* BB = BasicBlock::Create(module.getContext(), "entry", global_init); Function* softboundcets_init = (Function*) module.getOrInsertFunction("__softboundcets_init", void_ty, Type::getInt32Ty(module.getContext()), NULL); SmallVector<Value*, 8> args; Constant * const_one = ConstantInt::get(Type::getInt32Ty(module.getContext()), 1); args.push_back(const_one); Instruction* ret = ReturnInst::Create(module.getContext(), BB); CallInst::Create(softboundcets_init, args, "", ret); Type * Int32Type = IntegerType::getInt32Ty(module.getContext()); std::vector<Constant *> CtorInits; CtorInits.push_back(ConstantInt::get(Int32Type, 0)); CtorInits.push_back(global_init); StructType * ST = ConstantStruct::getTypeForElements(CtorInits, false); Constant * RuntimeCtorInit = ConstantStruct::get(ST, CtorInits); // // Get the current set of static global constructors and add the new ctor // to the list. // std::vector<Constant *> CurrentCtors; GlobalVariable * GVCtor = module.getNamedGlobal ("llvm.global_ctors"); if (GVCtor) { if (Constant * C = GVCtor->getInitializer()) { for (unsigned index = 0; index < C->getNumOperands(); ++index) { CurrentCtors.push_back (dyn_cast<Constant>(C->getOperand (index))); } } } CurrentCtors.push_back(RuntimeCtorInit); // // Create a new initializer. // ArrayType * AT = ArrayType::get (RuntimeCtorInit-> getType(), CurrentCtors.size()); Constant * NewInit = ConstantArray::get (AT, CurrentCtors); // // Create the new llvm.global_ctors global variable and remove the old one // if it existed. // Value * newGVCtor = new GlobalVariable (module, NewInit->getType(), false, GlobalValue::AppendingLinkage, NewInit, "llvm.global_ctors"); if (GVCtor) { newGVCtor->takeName (GVCtor); GVCtor->eraseFromParent (); } }
/// FoldSelectIntoOp - Try fold the select into one of the operands to /// facilitate further optimization. Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *FalseVal) { // See the comment above GetSelectFoldableOperands for a description of the // transformation we are doing here. if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) { if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && !isa<Constant>(FalseVal)) { if (unsigned SFO = GetSelectFoldableOperands(TVI)) { unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } if (OpToFold) { Constant *C = GetSelectFoldableConstant(TVI); Value *OOp = TVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), FalseVal, NewSel); if (isa<PossiblyExactOperator>(BO)) BO->setIsExact(TVI_BO->isExact()); if (isa<OverflowingBinaryOperator>(BO)) { BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); } return BO; } } } } } if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) { if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && !isa<Constant>(TrueVal)) { if (unsigned SFO = GetSelectFoldableOperands(FVI)) { unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } if (OpToFold) { Constant *C = GetSelectFoldableConstant(FVI); Value *OOp = FVI->getOperand(2-OpToFold); // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), TrueVal, NewSel); if (isa<PossiblyExactOperator>(BO)) BO->setIsExact(FVI_BO->isExact()); if (isa<OverflowingBinaryOperator>(BO)) { BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); } return BO; } } } } } return nullptr; }
/// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGCLowering::runOnFunction(Function &F) { // Quick exit for functions that do not use the shadow stack GC. if (!F.hasGC() || F.getGC() != std::string("shadow-stack")) return false; LLVMContext &Context = F.getContext(); // Find calls to llvm.gcroot. CollectRoots(F); // If there are no roots in this function, then there is no need to add a // stack map entry for it. if (Roots.empty()) return false; // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); // Build the shadow stack entry at the very start of the function. BasicBlock::iterator IP = F.getEntryBlock().begin(); IRBuilder<> AtEntry(IP->getParent(), IP); Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame"); while (isa<AllocaInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, 1, "gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; SlotPtr->takeName(OriginalAlloca); OriginalAlloca->replaceAllUsesWith(SlotPtr); } // Move past the original stores inserted by GCStrategy::InitRoots. This isn't // really necessary (the collector would never see the intermediate state at // runtime), but it's nicer not to push the half-initialized entry onto the // shadow stack. while (isa<StoreInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, 0, "gc_frame.next"); Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, StackEntry, 0, "gc_newhead"); AtEntry.CreateStore(CurrentHead, EntryNextPtr); AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup"); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } // Delete the original allocas (which are no longer used) and the intrinsic // calls (which are no longer valid). Doing this last avoids invalidating // iterators. for (unsigned I = 0, E = Roots.size(); I != E; ++I) { Roots[I].first->eraseFromParent(); Roots[I].second->eraseFromParent(); } Roots.clear(); return true; }
Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyVectorOp(I)) return ReplaceInstUsesWith(I, V); if (isa<Constant>(Op0)) std::swap(Op0, Op1); if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); // Simplify mul instructions with a constant RHS. if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; // (fmul X, -1.0) --> (fsub -0.0, X) if (match(Op1, m_SpecificFP(-1.0))) { Constant *NegZero = ConstantFP::getNegativeZero(Op1->getType()); Instruction *RI = BinaryOperator::CreateFSub(NegZero, Op0); RI->copyFastMathFlags(&I); return RI; } Constant *C = cast<Constant>(Op1); if (AllowReassociate && isFiniteNonZeroFp(C)) { // Let MDC denote an expression in one of these forms: // X * C, C/X, X/C, where C is a constant. // // Try to simplify "MDC * Constant" if (isFMulOrFDivWithConstant(Op0)) if (Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I)) return ReplaceInstUsesWith(I, V); // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C) Instruction *FAddSub = dyn_cast<Instruction>(Op0); if (FAddSub && (FAddSub->getOpcode() == Instruction::FAdd || FAddSub->getOpcode() == Instruction::FSub)) { Value *Opnd0 = FAddSub->getOperand(0); Value *Opnd1 = FAddSub->getOperand(1); Constant *C0 = dyn_cast<Constant>(Opnd0); Constant *C1 = dyn_cast<Constant>(Opnd1); bool Swap = false; if (C0) { std::swap(C0, C1); std::swap(Opnd0, Opnd1); Swap = true; } if (C1 && isFiniteNonZeroFp(C1) && isFMulOrFDivWithConstant(Opnd0)) { Value *M1 = ConstantExpr::getFMul(C1, C); Value *M0 = isNormalFp(cast<Constant>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : nullptr; if (M0 && M1) { if (Swap && FAddSub->getOpcode() == Instruction::FSub) std::swap(M0, M1); Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd) ? BinaryOperator::CreateFAdd(M0, M1) : BinaryOperator::CreateFSub(M0, M1); RI->copyFastMathFlags(&I); return RI; } } } } } // Under unsafe algebra do: // X * log2(0.5*Y) = X*log2(Y) - X if (I.hasUnsafeAlgebra()) { Value *OpX = nullptr; Value *OpY = nullptr; IntrinsicInst *Log2; detectLog2OfHalf(Op0, OpY, Log2); if (OpY) { OpX = Op1; } else { detectLog2OfHalf(Op1, OpY, Log2); if (OpY) { OpX = Op0; } } // if pattern detected emit alternate sequence if (OpX && OpY) { BuilderTy::FastMathFlagGuard Guard(*Builder); Builder->SetFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); Value *FMulVal = Builder->CreateFMul(OpX, Log2); Value *FSub = Builder->CreateFSub(FMulVal, OpX); FSub->takeName(&I); return ReplaceInstUsesWith(I, FSub); } } // Handle symmetric situation in a 2-iteration loop Value *Opnd0 = Op0; Value *Opnd1 = Op1; for (int i = 0; i < 2; i++) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { BuilderTy::FastMathFlagGuard Guard(*Builder); Builder->SetFastMathFlags(I.getFastMathFlags()); Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); // -X * -Y => X*Y if (N1) { Value *FMul = Builder->CreateFMul(N0, N1); FMul->takeName(&I); return ReplaceInstUsesWith(I, FMul); } if (Opnd0->hasOneUse()) { // -X * Y => -(X*Y) (Promote negation as high as possible) Value *T = Builder->CreateFMul(N0, Opnd1); Value *Neg = Builder->CreateFNeg(T); Neg->takeName(&I); return ReplaceInstUsesWith(I, Neg); } } // (X*Y) * X => (X*X) * Y where Y != X // The purpose is two-fold: // 1) to form a power expression (of X). // 2) potentially shorten the critical path: After transformation, the // latency of the instruction Y is amortized by the expression of X*X, // and therefore Y is in a "less critical" position compared to what it // was before the transformation. // if (AllowReassociate) { Value *Opnd0_0, *Opnd0_1; if (Opnd0->hasOneUse() && match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { Value *Y = nullptr; if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) Y = Opnd0_1; else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) Y = Opnd0_0; if (Y) { BuilderTy::FastMathFlagGuard Guard(*Builder); Builder->SetFastMathFlags(I.getFastMathFlags()); Value *T = Builder->CreateFMul(Opnd1, Opnd1); Value *R = Builder->CreateFMul(T, Y); R->takeName(&I); return ReplaceInstUsesWith(I, R); } } } if (!isa<Constant>(Op1)) std::swap(Opnd0, Opnd1); else break; } return Changed ? &I : nullptr; }
// Creates the helper function that will do the setjmp() call and // function call for implementing Invoke. Creates the call to the // helper function. Returns a Value which is zero on the normal // execution path and non-zero if the landingpad block should be // entered. Value *FuncRewriter::createSetjmpWrappedCall(InvokeInst *Invoke) { Type *I32 = Type::getInt32Ty(Func->getContext()); // Allocate space for storing the invoke's result temporarily (so // that the helper function can return multiple values). We don't // need to do this if the result is unused, and we can't if its type // is void. Instruction *ResultAlloca = NULL; if (!Invoke->use_empty()) { ResultAlloca = new AllocaInst(Invoke->getType(), "invoke_result_ptr"); Func->getEntryBlock().getInstList().push_front(ResultAlloca); } // Create type for the helper function. SmallVector<Type *, 10> ArgTypes; for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) ArgTypes.push_back(Invoke->getArgOperand(I)->getType()); ArgTypes.push_back(Invoke->getCalledValue()->getType()); ArgTypes.push_back(FrameJmpBuf->getType()); if (ResultAlloca) ArgTypes.push_back(Invoke->getType()->getPointerTo()); FunctionType *FTy = FunctionType::get(I32, ArgTypes, false); // Create the helper function. Function *HelperFunc = Function::Create( FTy, GlobalValue::InternalLinkage, Func->getName() + "_setjmp_caller"); Func->getParent()->getFunctionList().insertAfter(Func, HelperFunc); BasicBlock *EntryBB = BasicBlock::Create(Func->getContext(), "", HelperFunc); BasicBlock *NormalBB = BasicBlock::Create(Func->getContext(), "normal", HelperFunc); BasicBlock *ExceptionBB = BasicBlock::Create(Func->getContext(), "exception", HelperFunc); // Unpack the helper function's arguments. Function::arg_iterator ArgIter = HelperFunc->arg_begin(); SmallVector<Value *, 10> InnerCallArgs; for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) { ArgIter->setName("arg"); InnerCallArgs.push_back(ArgIter++); } Argument *CalleeArg = ArgIter++; Argument *JmpBufArg = ArgIter++; CalleeArg->setName("func_ptr"); JmpBufArg->setName("jmp_buf"); // Create setjmp() call. Value *SetjmpArgs[] = { JmpBufArg }; CallInst *SetjmpCall = CallInst::Create(SetjmpIntrinsic, SetjmpArgs, "invoke_sj", EntryBB); CopyDebug(SetjmpCall, Invoke); // Setting the "returns_twice" attribute here prevents optimization // passes from inlining HelperFunc into its caller. SetjmpCall->setCanReturnTwice(); // Check setjmp()'s result. Value *IsZero = CopyDebug(new ICmpInst(*EntryBB, CmpInst::ICMP_EQ, SetjmpCall, ConstantInt::get(I32, 0), "invoke_sj_is_zero"), Invoke); CopyDebug(BranchInst::Create(NormalBB, ExceptionBB, IsZero, EntryBB), Invoke); // Handle the normal, non-exceptional code path. CallInst *InnerCall = CallInst::Create(CalleeArg, InnerCallArgs, "", NormalBB); CopyDebug(InnerCall, Invoke); InnerCall->setAttributes(Invoke->getAttributes()); InnerCall->setCallingConv(Invoke->getCallingConv()); if (ResultAlloca) { InnerCall->setName("result"); Argument *ResultArg = ArgIter++; ResultArg->setName("result_ptr"); CopyDebug(new StoreInst(InnerCall, ResultArg, NormalBB), Invoke); } ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 0), NormalBB); // Handle the exceptional code path. ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 1), ExceptionBB); // Create the outer call to the helper function. SmallVector<Value *, 10> OuterCallArgs; for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) OuterCallArgs.push_back(Invoke->getArgOperand(I)); OuterCallArgs.push_back(Invoke->getCalledValue()); OuterCallArgs.push_back(FrameJmpBuf); if (ResultAlloca) OuterCallArgs.push_back(ResultAlloca); CallInst *OuterCall = CallInst::Create(HelperFunc, OuterCallArgs, "invoke_is_exc", Invoke); CopyDebug(OuterCall, Invoke); // Retrieve the function return value stored in the alloca. We only // need to do this on the non-exceptional path, but we currently do // it unconditionally because that is simpler. if (ResultAlloca) { Value *Result = new LoadInst(ResultAlloca, "", Invoke); Result->takeName(Invoke); Invoke->replaceAllUsesWith(Result); } return OuterCall; }
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; ConstantInt *COp1 = nullptr; if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1)) COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()); else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1)) COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()); else COp1 = dyn_cast<ConstantInt>(Op1); if (!COp1) return nullptr; // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); return ReplaceInstUsesWith(I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); } // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); assert(!COp1->uge(TypeBits) && "Shift over the type width should have been removed already"); // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), ConstantExpr::getShl(BOOp, Op1)); // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); // If 'shift2' is an ashr, we would have to get the sign bit into a funny // place. Don't try to do this transformation in this case. Also, we // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to // clear the top bits as needed. This 'and' will usually be zapped by // other xforms later if dead. unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. if (I.getOpcode() == Instruction::Shl) MaskV <<= COp1->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); MaskV = MaskV.lshr(COp1->getZExtValue()); } // shift1 & 0x00FF Value *And = Builder->CreateAnd(NSh, ConstantInt::get(I.getContext(), MaskV), TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); } } if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) Value *V1, *V2; ConstantInt *CC; switch (Op0BO->getOpcode()) { default: break; case Instruction::Add: case Instruction::And: case Instruction::Or: case Instruction::Xor: { // These operators commute. // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, Op0BO->getOperand(1)->getName()); uint32_t Op1Val = COp1->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); Constant *Mask = ConstantInt::get(I.getContext(), Bits); if (VectorType *VT = dyn_cast<VectorType>(X->getType())) Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); return BinaryOperator::CreateAnd(X, Mask); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) Value *Op0BOOp1 = Op0BO->getOperand(1); if (isLeftShift && Op0BOOp1->hasOneUse() && match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), m_ConstantInt(CC)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } // FALL THROUGH. case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, Op0BO->getOperand(0)->getName()); uint32_t Op1Val = COp1->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); Constant *Mask = ConstantInt::get(I.getContext(), Bits); if (VectorType *VT = dyn_cast<VectorType>(X->getType())) Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); return BinaryOperator::CreateAnd(X, Mask); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } break; } } // If the operand is an bitwise operator with a constant RHS, and the // shift is the only use, we can pull it out of the shift. if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { bool isValid = true; // Valid only for And, Or, Xor bool highBitSet = false; // Transform if high bit of constant set? switch (Op0BO->getOpcode()) { default: isValid = false; break; // Do not perform transform! case Instruction::Add: isValid = isLeftShift; break; case Instruction::Or: case Instruction::Xor: highBitSet = false; break; case Instruction::And: highBitSet = true; break; } // If this is a signed shift right, and the high bit is modified // by the logical operation, do not perform the transformation. // The highBitSet boolean indicates the value of the high bit of // the constant which would cause it to be modified for this // operation. // if (isValid && I.getOpcode() == Instruction::AShr) isValid = Op0C->getValue()[TypeBits-1] == highBitSet; if (isValid) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); Value *NewShift = Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); } } } } // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) ShiftOp = nullptr; if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { // This is a constant shift of a constant shift. Be careful about hiding // shl instructions behind bit masks. They are used to represent multiplies // by a constant, and it is important that simple arithmetic expressions // are still recognizable by scalar evolution. // // The transforms applied to shl are very similar to the transforms applied // to mul by constant. We can be more aggressive about optimizing right // shifts. // // Combinations of right and left shifts will still be optimized in // DAGCombine where scalar evolution no longer applies. ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits); assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. // If this is oversized composite shift, then unsigned shifts get 0, ashr // saturates. if (AmtSum >= TypeBits) { if (I.getOpcode() != Instruction::AShr) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } return BinaryOperator::Create(I.getOpcode(), X, ConstantInt::get(Ty, AmtSum)); } if (ShiftAmt1 == ShiftAmt2) { // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), Mask)); } } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>?,exact C1) << C2 --> X << (C2-C1) // The inexact version is deferred to DAGCombine so we don't hide shl // behind a bit mask. if (I.getOpcode() == Instruction::Shl && ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst); NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); return NewShl; } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); // (X <<nuw C1) >>u C2 --> X >>u (C2-C1) if (ShiftOp->hasNoUnsignedWrap()) { BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr, X, ShiftDiffCst); NewLShr->setIsExact(I.isExact()); return NewLShr; } Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && ShiftOp->getOpcode() == Instruction::Shl) { if (ShiftOp->hasNoSignedWrap()) { // (X <<nsw C1) >>s C2 --> X >>s (C2-C1) ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr, X, ShiftDiffCst); NewAShr->setIsExact(I.isExact()); return NewAShr; } } } else { assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) // The inexact version is deferred to DAGCombine so we don't hide shl // behind a bit mask. if (I.getOpcode() == Instruction::Shl && ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), X, ShiftDiffCst); NewShr->setIsExact(true); return NewShr; } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); if (ShiftOp->hasNoUnsignedWrap()) { // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2) BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst); NewShl->setHasNoUnsignedWrap(true); return NewShl; } Value *Shift = Builder->CreateShl(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && ShiftOp->getOpcode() == Instruction::Shl) { if (ShiftOp->hasNoSignedWrap()) { // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2) ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst); NewShl->setHasNoSignedWrap(true); return NewShl; } } } } return nullptr; }
// Insert an intrinsic for fast fdiv for safe math situations where we can // reduce precision. Leave fdiv for situations where the generic node is // expected to be optimized. bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { Type *Ty = FDiv.getType(); // TODO: Handle half if (!Ty->getScalarType()->isFloatTy()) return false; MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath); if (!FPMath) return false; const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv); float ULP = FPOp->getFPAccuracy(); if (ULP < 2.5f) return false; FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); if (ST->hasFP32Denormals() && !UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); Builder.setFastMathFlags(FMF); Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo(); Function *Decl = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {}); Value *Num = FDiv.getOperand(0); Value *Den = FDiv.getOperand(1); Value *NewFDiv = nullptr; if (VectorType *VT = dyn_cast<VectorType>(Ty)) { NewFDiv = UndefValue::get(VT); // FIXME: Doesn't do the right thing for cases where the vector is partially // constant. This works when the scalarizer pass is run first. for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) { Value *NumEltI = Builder.CreateExtractElement(Num, I); Value *DenEltI = Builder.CreateExtractElement(Den, I); Value *NewElt; if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) { NewElt = Builder.CreateFDiv(NumEltI, DenEltI); } else { NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); } NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); } } else { if (!shouldKeepFDivF32(Num, UnsafeDiv)) NewFDiv = Builder.CreateCall(Decl, { Num, Den }); } if (NewFDiv) { FDiv.replaceAllUsesWith(NewFDiv); NewFDiv->takeName(&FDiv); FDiv.eraseFromParent(); } return true; }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform, Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. std::map<Argument *, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. // We need to keep the original loads for each argument and the elements // of the argument that are accessed. std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeSet, 8> ArgAttrVec; AttributeList PAL = F->getAttributes(); // First, determine the new argument list unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), AttributeSet()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; // There may be remaining metadata uses of the argument for things like // llvm.dbg.value. Replace them with undef. I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); Type *SrcTy; if (LoadInst *L = dyn_cast<LoadInst>(UI)) SrcTy = L->getType(); else SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(UI->user_back()); OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( cast<PointerType>(I->getType()->getScalarType())->getElementType(), ArgIndex.second)); ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } Type *RetTy = FTy->getReturnType(); // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(F->getSubprogram()); F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), PAL.getRetAttributes(), ArgAttrVec)); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value *, 16> Args; while (!F->use_empty()) { CallSite CS(F->user_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttributeList &CallPAL = CS.getAttributes(); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value *> Ops; for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); for (auto II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, II)); // Keep track of the type we're currently indexing. if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) ElTy = ElPTy->getElementType(); else ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); } } // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; CS.getOperandBundlesAsDefs(OpBundles); CallSite NewCS; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles, "", Call); } else { auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); NewCS = NewCall; } NewCS.setCallingConv(CS.getCallingConv()); NewCS.setAttributes( AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), CallPAL.getRetAttributes(), ArgAttrVec)); NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) NewCS->setProfWeight(W); Args.clear(); ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) (*ReplaceCallSite)(CS, NewCS); if (!Call->use_empty()) { Call->replaceAllUsesWith(NewCS.getInstruction()); NewCS->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } const DataLayout &DL = F->getParent()->getDataLayout(); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); ++I2; continue; } if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, I->getParamAlignment(), "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName() + "." + Twine(i)); new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. for (User *U : TheAlloca->users()) { CallInst *Call = dyn_cast<CallInst>(U); if (!Call) continue; Call->setTailCall(false); } continue; } if (I->use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->user_back()); L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. std::advance(I2, ArgIndices.size()); } return NF; }