void IndVarSimplify::EliminateIVRemainders() { // Look for SRem and URem users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser()); if (!Rem) continue; bool isSigned = Rem->getOpcode() == Instruction::SRem; if (!isSigned && Rem->getOpcode() != Instruction::URem) continue; // We're only interested in the case where we know something about // the numerator. if (UI.getOperandValToReplace() != Rem->getOperand(0)) continue; // Get the SCEVs for the ICmp operands. const SCEV *S = SE->getSCEV(Rem->getOperand(0)); const SCEV *X = SE->getSCEV(Rem->getOperand(1)); // Simplify unnecessary loops away. const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); S = SE->getSCEVAtScope(S, ICmpLoop); X = SE->getSCEVAtScope(X, ICmpLoop); // i % n --> i if i is in [0,n). if ((!isSigned || SE->isKnownNonNegative(S)) && SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, S, X)) Rem->replaceAllUsesWith(Rem->getOperand(0)); else { // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). const SCEV *LessOne = SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); if ((!isSigned || SE->isKnownNonNegative(LessOne)) && SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LessOne, X)) { ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, Rem->getOperand(0), Rem->getOperand(1), "tmp"); SelectInst *Sel = SelectInst::Create(ICmp, ConstantInt::get(Rem->getType(), 0), Rem->getOperand(0), "tmp", Rem); Rem->replaceAllUsesWith(Sel); } else continue; } // Inform IVUsers about the new users. if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) IU->AddUsersIfInteresting(I); DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); DeadInsts.push_back(Rem); } }
// Insert an intrinsic for fast fdiv for safe math situations where we can // reduce precision. Leave fdiv for situations where the generic node is // expected to be optimized. bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { Type *Ty = FDiv.getType(); // TODO: Handle half if (!Ty->getScalarType()->isFloatTy()) return false; MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath); if (!FPMath) return false; const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv); float ULP = FPOp->getFPAccuracy(); if (ULP < 2.5f) return false; FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); if (ST->hasFP32Denormals() && !UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); Builder.setFastMathFlags(FMF); Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo(); Function *Decl = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {}); Value *Num = FDiv.getOperand(0); Value *Den = FDiv.getOperand(1); Value *NewFDiv = nullptr; if (VectorType *VT = dyn_cast<VectorType>(Ty)) { NewFDiv = UndefValue::get(VT); // FIXME: Doesn't do the right thing for cases where the vector is partially // constant. This works when the scalarizer pass is run first. for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) { Value *NumEltI = Builder.CreateExtractElement(Num, I); Value *DenEltI = Builder.CreateExtractElement(Den, I); Value *NewElt; if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) { NewElt = Builder.CreateFDiv(NumEltI, DenEltI); } else { NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); } NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); } } else { if (!shouldKeepFDivF32(Num, UnsafeDiv)) NewFDiv = Builder.CreateCall(Decl, { Num, Den }); } if (NewFDiv) { FDiv.replaceAllUsesWith(NewFDiv); NewFDiv->takeName(&FDiv); FDiv.eraseFromParent(); } return true; }
// Peephole optimize the following instructions: // %t1 = cast ? to x * // %t2 = add x * %SP, %t1 ;; Constant must be 2nd operand // // Into: %t3 = getelementptr {<...>} * %SP, <element indices> // %t2 = cast <eltype> * %t3 to {<...>}* // static bool HandleCastToPointer(BasicBlock::iterator BI, const PointerType *DestPTy, const TargetData &TD) { CastInst &CI = cast<CastInst>(*BI); if (CI.use_empty()) return false; // Scan all of the uses, looking for any uses that are not add or sub // instructions. If we have non-adds, do not make this transformation. // bool HasSubUse = false; // Keep track of any subtracts... for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*I)) { if ((BO->getOpcode() != Instruction::Add && BO->getOpcode() != Instruction::Sub) || // Avoid add sbyte* %X, %X cases... BO->getOperand(0) == BO->getOperand(1)) return false; else HasSubUse |= BO->getOpcode() == Instruction::Sub; } else { return false; } std::vector<Value*> Indices; Value *Src = CI.getOperand(0); const Type *Result = ConvertibleToGEP(DestPTy, Src, Indices, TD, &BI); if (Result == 0) return false; // Not convertible... // Cannot handle subtracts if there is more than one index required... if (HasSubUse && Indices.size() != 1) return false; PRINT_PEEPHOLE2("cast-add-to-gep:in", *Src, CI); // If we have a getelementptr capability... transform all of the // add instruction uses into getelementptr's. while (!CI.use_empty()) { BinaryOperator *I = cast<BinaryOperator>(*CI.use_begin()); assert((I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Sub) && "Use is not a valid add instruction!"); // Get the value added to the cast result pointer... Value *OtherPtr = I->getOperand((I->getOperand(0) == &CI) ? 1 : 0); Instruction *GEP = new GetElementPtrInst(OtherPtr, Indices, I->getName()); PRINT_PEEPHOLE1("cast-add-to-gep:i", *I); // If the instruction is actually a subtract, we are guaranteed to only have // one index (from code above), so we just need to negate the pointer index // long value. if (I->getOpcode() == Instruction::Sub) { Instruction *Neg = BinaryOperator::createNeg(GEP->getOperand(1), GEP->getOperand(1)->getName()+".neg", I); GEP->setOperand(1, Neg); } if (GEP->getType() == I->getType()) { // Replace the old add instruction with the shiny new GEP inst ReplaceInstWithInst(I, GEP); } else { // If the type produced by the gep instruction differs from the original // add instruction type, insert a cast now. // // Insert the GEP instruction before the old add instruction... I->getParent()->getInstList().insert(I, GEP); PRINT_PEEPHOLE1("cast-add-to-gep:o", *GEP); GEP = new CastInst(GEP, I->getType()); // Replace the old add instruction with the shiny new GEP inst ReplaceInstWithInst(I, GEP); } PRINT_PEEPHOLE1("cast-add-to-gep:o", *GEP); } return true; }
/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select /// instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { SelectInst *SI = cast<SelectInst>(I.getOperand(1)); // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y int NonNullOperand = -1; if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) if (ST->isNullValue()) NonNullOperand = 2; // div/rem X, (Cond ? Y : 0) -> div/rem X, Y if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) if (ST->isNullValue()) NonNullOperand = 1; if (NonNullOperand == -1) return false; Value *SelectCond = SI->getOperand(0); // Change the div/rem to use 'Y' instead of the select. I.setOperand(1, SI->getOperand(NonNullOperand)); // Okay, we know we replace the operand of the div/rem with 'Y' with no // problem. However, the select, or the condition of the select may have // multiple uses. Based on our knowledge that the operand must be non-zero, // propagate the known value for the select into other uses of it, and // propagate a known value of the condition into its other users. // If the select and condition only have a single use, don't bother with this, // early exit. if (SI->use_empty() && SelectCond->hasOneUse()) return true; // Scan the current block backward, looking for other uses of SI. BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); while (BBI != BBFront) { --BBI; // If we found a call to a function, we can't assume it will return, so // information from below it cannot be propagated above it. if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) break; // Replace uses of the select or its condition with the known values. for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); I != E; ++I) { if (*I == SI) { *I = SI->getOperand(NonNullOperand); Worklist.Add(BBI); } else if (*I == SelectCond) { *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : ConstantInt::getFalse(BBI->getContext()); Worklist.Add(BBI); } } // If we past the instruction, quit looking for it. if (&*BBI == SI) SI = 0; if (&*BBI == SelectCond) SelectCond = 0; // If we ran out of things to eliminate, break out of the loop. if (SelectCond == 0 && SI == 0) break; } return true; }
// NegateValue - Insert instructions before the instruction pointed to by BI, // that computes the negative version of the value specified. The negative // version of the value is returned, and BI is left pointing at the instruction // that should be processed next by the reassociation pass. // static Value *NegateValue(Value *V, Instruction *BI) { if (Constant *C = dyn_cast<Constant>(V)) return ConstantExpr::getNeg(C); // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, // this means that we turn this: // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate // the constants. We assume that instcombine will clean up the mess later if // we introduce tons of unnecessary negation instructions. // if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { // Push the negates through the add. I->setOperand(0, NegateValue(I->getOperand(0), BI)); I->setOperand(1, NegateValue(I->getOperand(1), BI)); // We must move the add instruction here, because the neg instructions do // not dominate the old add instruction in general. By moving it, we are // assured that the neg instructions we just inserted dominate the // instruction we are about to insert after them. // I->moveBefore(BI); I->setName(I->getName()+".neg"); return I; } // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ User *U = *UI; if (!BinaryOperator::isNeg(U)) continue; // We found one! Now we have to make sure that the definition dominates // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. BinaryOperator *TheNeg = cast<BinaryOperator>(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) continue; BasicBlock::iterator InsertPt; if (Instruction *InstInput = dyn_cast<Instruction>(V)) { if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { InsertPt = II->getNormalDest()->begin(); } else { InsertPt = InstInput; ++InsertPt; } while (isa<PHINode>(InsertPt)) ++InsertPt; } else { InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); } TheNeg->moveBefore(InsertPt); return TheNeg; } // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); }