bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const { assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv) return false; IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = getI32Ty(Builder, I.getType()); Value *ExtOp0 = nullptr; Value *ExtOp1 = nullptr; Value *ExtRes = nullptr; Value *TruncRes = nullptr; if (isSigned(I)) { ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); } else { ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); } ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1)); TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); return true; }
Value *X86RegisterSema::computeEFLAGSForDef(Value *Def, Value *OldEFLAGS, bool DontUpdateCF) { // FIXME: This describes the general semantics of EFLAGS update, but this // needs to handle the differences between instructions. // This would be done by keeping more information on the instruction with // LastEFLAGSChangingDef. // For now we only do DontUpdateCF, for INC/DEC instructions. setSF(X86::ZF, Builder->CreateIsNull(Def)); setSF(X86::SF, Builder->CreateICmpSLT(Def, ConstantInt::getNullValue(Def->getType()))); // FIXME: We need to generate AF as well. setSF(X86::AF, Builder->getFalse()); // FIXME: CF/OF need a smarter trick. Intrinsic::ID OverflowIntrinsic = Intrinsic::not_intrinsic, CarryIntrinsic = Intrinsic::not_intrinsic; BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Def); if (BinOp && BinOp->getOpcode() == BinaryOperator::Add) { OverflowIntrinsic = Intrinsic::sadd_with_overflow; CarryIntrinsic = Intrinsic::uadd_with_overflow; } else if (BinOp && BinOp->getOpcode() == BinaryOperator::Sub) { OverflowIntrinsic = Intrinsic::ssub_with_overflow; CarryIntrinsic = Intrinsic::usub_with_overflow; } if (BinOp && OverflowIntrinsic && CarryIntrinsic) { Value *Args[] = { BinOp->getOperand(0), BinOp->getOperand(1) }; setSF(X86::OF, Builder->CreateExtractValue( Builder->CreateCall( Intrinsic::getDeclaration( TheModule, OverflowIntrinsic, BinOp->getType()), Args), 1)); if (!DontUpdateCF) setSF(X86::CF, Builder->CreateExtractValue( Builder->CreateCall( Intrinsic::getDeclaration( TheModule, CarryIntrinsic, BinOp->getType()), Args), 1)); } else { if (!DontUpdateCF) setSF(X86::CF, Builder->getFalse()); setSF(X86::OF, Builder->getFalse()); } Type *I8Ty = Builder->getInt8Ty(); setSF(X86::PF, Builder->CreateIsNull(Builder->CreateTrunc( Builder->CreateCall(Intrinsic::getDeclaration( TheModule, Intrinsic::ctpop, I8Ty), {Builder->CreateTrunc(Def, I8Ty)}), Builder->getInt1Ty()))); return createEFLAGSFromSFs(OldEFLAGS); }
//--------------------------------------------------------- void unrollRecordAssigns(StmtEditor& editor, Stmt* S) { const RecordType *RT; for (stmt_iterator<BinaryOperator> i = stmt_ibegin(S), e = stmt_iend(S); i != e;) { BinaryOperator* BO = *i; if (BO->getOpcode() == BO_Assign && (RT = BO->getType()->getAsStructureType()) != 0 && editor.getStatementOfExpression(BO) == BO) // ensures top level assign { CompoundStmt* CS = editor.ensureCompoundParent(BO); std::vector<Stmt*> compoundStmts(CS->child_begin(), CS->child_end()); std::vector<Stmt*>::iterator insertPos = std::find(compoundStmts.begin(), compoundStmts.end(), BO); assert(insertPos != compoundStmts.end()); insertPos = compoundStmts.erase(insertPos); RecordAssignUnroller unroller(editor, RT->getDecl(), BO); compoundStmts.insert(insertPos, unroller.compoundStmts.begin(), unroller.compoundStmts.end()); editor.replaceStmts(CS, &compoundStmts[0], compoundStmts.size()); i = stmt_ibegin(S); } else { ++i; } } }
void ModuloSchedulerDriverPass::foldAddInstructions(Instruction* add) { if (dyn_cast<BinaryOperator>(add) && add->getOpcode() == Instruction::Add) { BinaryOperator *bin = dyn_cast<BinaryOperator>(add); unsigned int bitWidth = cast<IntegerType>(bin->getType())->getBitWidth(); Value *p0 = add->getOperand(0);//param1 Value *p1 = add->getOperand(1);//param0 if (dyn_cast<ConstantInt>(p0) && dyn_cast<ConstantInt>(p1)) { ConstantInt *c0 = dyn_cast<ConstantInt>(p0); ConstantInt *c1 = dyn_cast<ConstantInt>(p1); //TODO:May overflow unsigned int val = (c0->getValue().getZExtValue() + c1->getValue().getZExtValue()); ConstantInt *ncon = ConstantInt::get(APInt(bitWidth, val)); add->replaceAllUsesWith(ncon); add->eraseFromParent(); return; } if (ConstantInt *c0 = dyn_cast<ConstantInt>(p0)) { if (0 == c0->getValue().getZExtValue()) { add->replaceAllUsesWith(p1); add->eraseFromParent(); return; } } if (ConstantInt *c1 = dyn_cast<ConstantInt>(p1)) { if (0 == c1->getValue().getZExtValue()) { add->replaceAllUsesWith(p0); add->eraseFromParent(); return; } } } }
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const { assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SRem || I.getOpcode() == Instruction::URem) return false; IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = getI32Ty(Builder, I.getType()); Value *ExtOp0 = nullptr; Value *ExtOp1 = nullptr; Value *ExtRes = nullptr; Value *TruncRes = nullptr; if (isSigned(I)) { ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty); } else { ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); } ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1); if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) { if (promotedOpIsNSW(cast<Instruction>(I))) Inst->setHasNoSignedWrap(); if (promotedOpIsNUW(cast<Instruction>(I))) Inst->setHasNoUnsignedWrap(); if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) Inst->setIsExact(ExactOp->isExact()); } TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); return true; }
void IndVarSimplify::EliminateIVRemainders() { // Look for SRem and URem users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser()); if (!Rem) continue; bool isSigned = Rem->getOpcode() == Instruction::SRem; if (!isSigned && Rem->getOpcode() != Instruction::URem) continue; // We're only interested in the case where we know something about // the numerator. if (UI.getOperandValToReplace() != Rem->getOperand(0)) continue; // Get the SCEVs for the ICmp operands. const SCEV *S = SE->getSCEV(Rem->getOperand(0)); const SCEV *X = SE->getSCEV(Rem->getOperand(1)); // Simplify unnecessary loops away. const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); S = SE->getSCEVAtScope(S, ICmpLoop); X = SE->getSCEVAtScope(X, ICmpLoop); // i % n --> i if i is in [0,n). if ((!isSigned || SE->isKnownNonNegative(S)) && SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, S, X)) Rem->replaceAllUsesWith(Rem->getOperand(0)); else { // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). const SCEV *LessOne = SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); if ((!isSigned || SE->isKnownNonNegative(LessOne)) && SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, LessOne, X)) { ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, Rem->getOperand(0), Rem->getOperand(1), "tmp"); SelectInst *Sel = SelectInst::Create(ICmp, ConstantInt::get(Rem->getType(), 0), Rem->getOperand(0), "tmp", Rem); Rem->replaceAllUsesWith(Sel); } else continue; } // Inform IVUsers about the new users. if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) IU->AddUsersIfInteresting(I); DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); DeadInsts.push_back(Rem); } }
bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { bool Changed = false; if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && DA->isUniform(&I)) Changed |= promoteUniformOpToI32(I); return Changed; }
bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && DA->isUniform(&I) && promoteUniformOpToI32(I)) return true; bool Changed = false; Instruction::BinaryOps Opc = I.getOpcode(); Type *Ty = I.getType(); Value *NewDiv = nullptr; if ((Opc == Instruction::URem || Opc == Instruction::UDiv || Opc == Instruction::SRem || Opc == Instruction::SDiv) && Ty->getScalarSizeInBits() <= 32) { Value *Num = I.getOperand(0); Value *Den = I.getOperand(1); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); if (VectorType *VT = dyn_cast<VectorType>(Ty)) { NewDiv = UndefValue::get(VT); for (unsigned N = 0, E = VT->getNumElements(); N != E; ++N) { Value *NumEltN = Builder.CreateExtractElement(Num, N); Value *DenEltN = Builder.CreateExtractElement(Den, N); Value *NewElt = expandDivRem32(Builder, I, NumEltN, DenEltN); if (!NewElt) NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN); NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N); } } else { NewDiv = expandDivRem32(Builder, I, Num, Den); } if (NewDiv) { I.replaceAllUsesWith(NewDiv); I.eraseFromParent(); Changed = true; } } return Changed; }
bool IRTranslator::translateBinaryOp(unsigned Opcode, const BinaryOperator &Inst) { // FIXME: handle signed/unsigned wrapping flags. // Get or create a virtual register for each value. // Unless the value is a Constant => loadimm cst? // or inline constant each time? // Creation of a virtual register needs to have a size. unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0)); unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1)); unsigned Res = getOrCreateVReg(Inst); MIRBuilder.buildInstr(Opcode, LLT{*Inst.getType()}) .addDef(Res) .addUse(Op0) .addUse(Op1); return true; }
void Lint::visitShl(BinaryOperator &I) { if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); }
/// HandleFloatingPointIV - If the loop has floating induction variable /// then insert corresponding integer induction variable if possible. /// For example, /// for(double i = 0; i < 10000; ++i) /// bar(i) /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; // Check incoming value. ConstantFP *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); int64_t InitValue; if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) return; // Check IV increment. Reject this PN if increment operation is not // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); int64_t IncValue; if (IncValueVal == 0 || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); Instruction *U1 = cast<Instruction>(IncrUse++); if (IncrUse == Incr->use_end()) return; Instruction *U2 = cast<Instruction>(IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't // only used by a branch, we can't transform it. FCmpInst *Compare = dyn_cast<FCmpInst>(U1); if (!Compare) Compare = dyn_cast<FCmpInst>(U2); if (Compare == 0 || !Compare->hasOneUse() || !isa<BranchInst>(Compare->use_back())) return; BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); // We need to verify that the branch actually controls the iteration count // of the loop. If not, the new IV can overflow and no one will notice. // The branch block must be in the loop and one of the successors must be out // of the loop. assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); if (!L->contains(TheBr->getParent()) || (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); int64_t ExitValue; if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { default: return; // Unknown comparison. case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; case CmpInst::FCMP_ONE: case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; case CmpInst::FCMP_OGT: case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; case CmpInst::FCMP_OGE: case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; // If not actually striding (add x, 0.0), avoid touching the code. if (IncValue == 0) return; // Positive and negative strides have different safety conditions. if (IncValue > 0) { // If we have a positive stride, we require the init to be less than the // exit value and an equality or less than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } unsigned Leftover = Range % uint32_t(IncValue); // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } unsigned Leftover = Range % uint32_t(-IncValue); // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, ConstantInt::get(Int32Ty, ExitValue), Compare->getName()); // In the following deletions, PN may become dead and may be deleted. // Use a WeakVH to observe whether this happens. WeakVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); RecursivelyDeleteTriviallyDeadInstructions(Compare); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction // variable, we chose to eliminate the IV and rewrite it in terms of an // int->fp cast. // // We give preference to sitofp over uitofp because it is faster on most // platforms. if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstNonPHI()); PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN); } // Add a new IVUsers entry for the newly-created integer PHI. IU->AddUsersIfInteresting(NewPHI); }
// Insert an intrinsic for fast fdiv for safe math situations where we can // reduce precision. Leave fdiv for situations where the generic node is // expected to be optimized. bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { Type *Ty = FDiv.getType(); // TODO: Handle half if (!Ty->getScalarType()->isFloatTy()) return false; MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath); if (!FPMath) return false; const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv); float ULP = FPOp->getFPAccuracy(); if (ULP < 2.5f) return false; FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); if (ST->hasFP32Denormals() && !UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); Builder.setFastMathFlags(FMF); Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo(); Function *Decl = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {}); Value *Num = FDiv.getOperand(0); Value *Den = FDiv.getOperand(1); Value *NewFDiv = nullptr; if (VectorType *VT = dyn_cast<VectorType>(Ty)) { NewFDiv = UndefValue::get(VT); // FIXME: Doesn't do the right thing for cases where the vector is partially // constant. This works when the scalarizer pass is run first. for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) { Value *NumEltI = Builder.CreateExtractElement(Num, I); Value *DenEltI = Builder.CreateExtractElement(Den, I); Value *NewElt; if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) { NewElt = Builder.CreateFDiv(NumEltI, DenEltI); } else { NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); } NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); } } else { if (!shouldKeepFDivF32(Num, UnsafeDiv)) NewFDiv = Builder.CreateCall(Decl, { Num, Den }); } if (NewFDiv) { FDiv.replaceAllUsesWith(NewFDiv); NewFDiv->takeName(&FDiv); FDiv.eraseFromParent(); } return true; }
void Lint::visitShl(BinaryOperator &I) { if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); }
// Peephole optimize the following instructions: // %t1 = cast ? to x * // %t2 = add x * %SP, %t1 ;; Constant must be 2nd operand // // Into: %t3 = getelementptr {<...>} * %SP, <element indices> // %t2 = cast <eltype> * %t3 to {<...>}* // static bool HandleCastToPointer(BasicBlock::iterator BI, const PointerType *DestPTy, const TargetData &TD) { CastInst &CI = cast<CastInst>(*BI); if (CI.use_empty()) return false; // Scan all of the uses, looking for any uses that are not add or sub // instructions. If we have non-adds, do not make this transformation. // bool HasSubUse = false; // Keep track of any subtracts... for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*I)) { if ((BO->getOpcode() != Instruction::Add && BO->getOpcode() != Instruction::Sub) || // Avoid add sbyte* %X, %X cases... BO->getOperand(0) == BO->getOperand(1)) return false; else HasSubUse |= BO->getOpcode() == Instruction::Sub; } else { return false; } std::vector<Value*> Indices; Value *Src = CI.getOperand(0); const Type *Result = ConvertibleToGEP(DestPTy, Src, Indices, TD, &BI); if (Result == 0) return false; // Not convertible... // Cannot handle subtracts if there is more than one index required... if (HasSubUse && Indices.size() != 1) return false; PRINT_PEEPHOLE2("cast-add-to-gep:in", *Src, CI); // If we have a getelementptr capability... transform all of the // add instruction uses into getelementptr's. while (!CI.use_empty()) { BinaryOperator *I = cast<BinaryOperator>(*CI.use_begin()); assert((I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Sub) && "Use is not a valid add instruction!"); // Get the value added to the cast result pointer... Value *OtherPtr = I->getOperand((I->getOperand(0) == &CI) ? 1 : 0); Instruction *GEP = new GetElementPtrInst(OtherPtr, Indices, I->getName()); PRINT_PEEPHOLE1("cast-add-to-gep:i", *I); // If the instruction is actually a subtract, we are guaranteed to only have // one index (from code above), so we just need to negate the pointer index // long value. if (I->getOpcode() == Instruction::Sub) { Instruction *Neg = BinaryOperator::createNeg(GEP->getOperand(1), GEP->getOperand(1)->getName()+".neg", I); GEP->setOperand(1, Neg); } if (GEP->getType() == I->getType()) { // Replace the old add instruction with the shiny new GEP inst ReplaceInstWithInst(I, GEP); } else { // If the type produced by the gep instruction differs from the original // add instruction type, insert a cast now. // // Insert the GEP instruction before the old add instruction... I->getParent()->getInstList().insert(I, GEP); PRINT_PEEPHOLE1("cast-add-to-gep:o", *GEP); GEP = new CastInst(GEP, I->getType()); // Replace the old add instruction with the shiny new GEP inst ReplaceInstWithInst(I, GEP); } PRINT_PEEPHOLE1("cast-add-to-gep:o", *GEP); } return true; }
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, InstCombiner &IC) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) V = IC.Builder->CreateShl(C, NumBits); else V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), IC.getTargetLibraryInfo()); return V; } Instruction *I = cast<Instruction>(V); IC.Worklist.Add(I); switch (I->getOpcode()) { default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); case Instruction::And: case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); return I; case Instruction::Shl: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(I->getType()); BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setHasNoUnsignedWrap(false); BO->setHasNoSignedWrap(false); return I; } // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(BO->getOperand(0), ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(BO); VI->takeName(BO); } return V; } // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); BO->setOperand(1, ConstantInt::get(BO->getType(), CI->getZExtValue() - NumBits)); BO->setHasNoUnsignedWrap(false); BO->setHasNoSignedWrap(false); return BO; } case Instruction::LShr: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(BO->getType()); BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setIsExact(false); return I; } // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(I->getOperand(0), ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(I); VI->takeName(I); } return V; } // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); BO->setOperand(1, ConstantInt::get(BO->getType(), CI->getZExtValue() - NumBits)); BO->setIsExact(false); return BO; } case Instruction::Select: I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); return I; case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits, isLeftShift, IC)); return PN; } } }
/// HandleFloatingPointIV - If the loop has floating induction variable /// then insert corresponding integer induction variable if possible. /// For example, /// for(double i = 0; i < 10000; ++i) /// bar(i) /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; // Check incoming value. ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge)); if (!InitValue) return; uint64_t newInitValue = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) return; // Check IV increment. Reject this PH if increment operation is not // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge)); if (!Incr) return; if (Incr->getOpcode() != Instruction::FAdd) return; ConstantFP *IncrValue = NULL; unsigned IncrVIndex = 1; if (Incr->getOperand(1) == PH) IncrVIndex = 0; IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex)); if (!IncrValue) return; uint64_t newIncrValue = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue)) return; // Check Incr uses. One user is PH and the other users is exit condition used // by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); Instruction *U1 = cast<Instruction>(IncrUse++); if (IncrUse == Incr->use_end()) return; Instruction *U2 = cast<Instruction>(IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition. FCmpInst *EC = dyn_cast<FCmpInst>(U1); if (!EC) EC = dyn_cast<FCmpInst>(U2); if (!EC) return; if (BranchInst *BI = dyn_cast<BranchInst>(EC->getParent()->getTerminator())) { if (!BI->isConditional()) return; if (BI->getCondition() != EC) return; } // Find exit value. If exit value can not be represented as an integer then // do not handle this floating point PH. ConstantFP *EV = NULL; unsigned EVIndex = 1; if (EC->getOperand(1) == Incr) EVIndex = 0; EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex)); if (!EV) return; uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(EV->getValueAPF(), &intEV)) return; // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (EC->getPredicate()) { case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; case CmpInst::FCMP_OGT: case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_UGT; break; case CmpInst::FCMP_OGE: case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_UGE; break; case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_ULT; break; case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_ULE; break; default: break; } if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return; // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()), PH->getName()+".int", PH); NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()), newInitValue), PH->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Type::getInt32Ty(PH->getContext()), newIncrValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge)); // The back edge is edge 1 of newPHI, whatever it may have been in the // original PHI. ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()), intEV); Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV); Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1)); ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(), NewPred, LHS, RHS, EC->getName()); // In the following deletions, PH may become dead and may be deleted. // Use a WeakVH to observe whether this happens. WeakVH WeakPH = PH; // Delete old, floating point, exit comparison instruction. NewEC->takeName(EC); EC->replaceAllUsesWith(NewEC); RecursivelyDeleteTriviallyDeadInstructions(EC); // Delete old, floating point, increment instruction. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // Replace floating induction variable, if it isn't already deleted. // Give SIToFPInst preference over UIToFPInst because it is faster on // platforms that are widely used. if (WeakPH && !PH->use_empty()) { if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", PH->getParent()->getFirstNonPHI()); PH->replaceAllUsesWith(Conv); } else { UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", PH->getParent()->getFirstNonPHI()); PH->replaceAllUsesWith(Conv); } RecursivelyDeleteTriviallyDeadInstructions(PH); } // Add a new IVUsers entry for the newly-created integer PHI. IU->AddUsersIfInteresting(NewPHI); }