unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; switch (IID) { default: return TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) return TCC_Free; break; case Intrinsic::experimental_stackmap: if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TCC_Free; break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TCC_Free; break; } return X86TTI::getIntImmCost(Imm, Ty); }
int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { if (DisablePPCConstHoist) return BaseT::getIntImmCost(Imm, Ty); assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; if (Imm == 0) return TTI::TCC_Free; if (Imm.getBitWidth() <= 64) { if (isInt<16>(Imm.getSExtValue())) return TTI::TCC_Basic; if (isInt<32>(Imm.getSExtValue())) { // A constant that can be materialized using lis. if ((Imm.getZExtValue() & 0xFFFF) == 0) return TTI::TCC_Basic; return 2 * TTI::TCC_Basic; } } return 4 * TTI::TCC_Basic; }
int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) return TTI::TCC_Free; switch (IID) { default: return TTI::TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) return TTI::TCC_Free; break; case Intrinsic::experimental_stackmap: if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TTI::TCC_Free; break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TTI::TCC_Free; break; } return X86TTIImpl::getIntImmCost(Imm, Ty); }
Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, const Candidate &C, IRBuilder<> &Builder, const DataLayout *DL, bool &BumpWithUglyGEP) { APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); unifyBitWidth(Idx, BasisIdx); APInt IndexOffset = Idx - BasisIdx; BumpWithUglyGEP = false; if (Basis.CandidateKind == Candidate::GEP) { APInt ElementSize( IndexOffset.getBitWidth(), DL->getTypeAllocSize( cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType())); APInt Q, R; APInt::sdivrem(IndexOffset, ElementSize, Q, R); if (R.getSExtValue() == 0) IndexOffset = Q; else BumpWithUglyGEP = true; } // Compute Bump = C - Basis = (i' - i) * S. // Common case 1: if (i' - i) is 1, Bump = S. if (IndexOffset.getSExtValue() == 1) return C.Stride; // Common case 2: if (i' - i) is -1, Bump = -S. if (IndexOffset.getSExtValue() == -1) return Builder.CreateNeg(C.Stride); // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may // have different bit widths. IntegerType *DeltaType = IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth()); Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType); if (IndexOffset.isPowerOf2()) { // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i). ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2()); return Builder.CreateShl(ExtendedStride, Exponent); } if ((-IndexOffset).isPowerOf2()) { // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i). ConstantInt *Exponent = ConstantInt::get(DeltaType, (-IndexOffset).logBase2()); return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent)); } Constant *Delta = ConstantInt::get(DeltaType, IndexOffset); return Builder.CreateMul(ExtendedStride, Delta); }
int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) return TTI::TCC_Free; // No cost model for operations on integers larger than 64 bit implemented yet. if (BitSize > 64) return TTI::TCC_Free; switch (IID) { default: return TTI::TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: // These get expanded to include a normal addition/subtraction. if (Idx == 1 && Imm.getBitWidth() <= 64) { if (isUInt<32>(Imm.getZExtValue())) return TTI::TCC_Free; if (isUInt<32>(-Imm.getSExtValue())) return TTI::TCC_Free; } break; case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: // These get expanded to include a normal multiplication. if (Idx == 1 && Imm.getBitWidth() <= 64) { if (isInt<32>(Imm.getSExtValue())) return TTI::TCC_Free; } break; case Intrinsic::experimental_stackmap: if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TTI::TCC_Free; break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TTI::TCC_Free; break; } return SystemZTTIImpl::getIntImmCost(Imm, Ty); }
int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; // Never hoist constants larger than 128bit, because this might lead to // incorrect code generation or assertions in codegen. // Fixme: Create a cost model for types larger than i128 once the codegen // issues have been fixed. if (BitSize > 128) return TTI::TCC_Free; if (Imm == 0) return TTI::TCC_Free; // Sign-extend all constants to a multiple of 64-bit. APInt ImmVal = Imm; if (BitSize & 0x3f) ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); // Split the constant into 64-bit chunks and calculate the cost for each // chunk. int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); int64_t Val = Tmp.getSExtValue(); Cost += getIntImmCost(Val); } // We need at least one instruction to materialze the constant. return std::max(1, Cost); }
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); if (Bits == 0 || Imm.getActiveBits() >= 64) return 4; int64_t SImmVal = Imm.getSExtValue(); uint64_t ZImmVal = Imm.getZExtValue(); if (!ST->isThumb()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getSOImmVal(ZImmVal) != -1) || (ARM_AM::getSOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; } if (ST->isThumb2()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; } // Thumb1, any i8 imm cost 1. if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256)) return 1; if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) return 2; // Load from constantpool. return 3; }
/// \brief Finds and combines constants that can be easily rematerialized with /// an add from a common base constant. void ConstantHoisting::FindBaseConstants() { // Sort the constants by value and type. This invalidates the mapping. std::sort(ConstantMap.begin(), ConstantMap.end(), ConstantMapLessThan); // Simple linear scan through the sorted constant map for viable merge // candidates. ConstantMapType::iterator MinValItr = ConstantMap.begin(); for (ConstantMapType::iterator I = llvm::next(ConstantMap.begin()), E = ConstantMap.end(); I != E; ++I) { if (MinValItr->first->getType() == I->first->getType()) { // Check if the constant is in range of an add with immediate. APInt Diff = I->first->getValue() - MinValItr->first->getValue(); if ((Diff.getBitWidth() <= 64) && TTI->isLegalAddImmediate(Diff.getSExtValue())) continue; } // We either have now a different constant type or the constant is not in // range of an add with immediate anymore. FindAndMakeBaseConstant(MinValItr, I); // Start a new base constant search. MinValItr = I; } // Finalize the last base constant search. FindAndMakeBaseConstant(MinValItr, ConstantMap.end()); }
int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) return TTI::TCC_Free; // No cost model for operations on integers larger than 64 bit implemented yet. if (BitSize > 64) return TTI::TCC_Free; if (Imm == 0) return TTI::TCC_Free; if (Imm.getBitWidth() <= 64) { // Constants loaded via lgfi. if (isInt<32>(Imm.getSExtValue())) return TTI::TCC_Basic; // Constants loaded via llilf. if (isUInt<32>(Imm.getZExtValue())) return TTI::TCC_Basic; // Constants loaded via llihf: if ((Imm.getZExtValue() & 0xffffffff) == 0) return TTI::TCC_Basic; return 2 * TTI::TCC_Basic; } return 4 * TTI::TCC_Basic; }
unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); if (Bits == 0 || Bits > 32) return 4; int32_t SImmVal = Imm.getSExtValue(); uint32_t ZImmVal = Imm.getZExtValue(); if (!ST->isThumb()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getSOImmVal(ZImmVal) != -1) || (ARM_AM::getSOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; } else if (ST->isThumb2()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; } else /*Thumb1*/ { if (SImmVal >= 0 && SImmVal < 256) return 1; if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) return 2; // Load from constantpool. return 3; } return 2; }
unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; unsigned ImmIdx = ~0U; switch (Opcode) { default: return TCC_Free; case Instruction::GetElementPtr: if (Idx == 0) return 2 * TCC_Basic; return TCC_Free; case Instruction::Store: ImmIdx = 0; break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: ImmIdx = 1; break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: case Instruction::PHI: case Instruction::Call: case Instruction::Select: case Instruction::Ret: case Instruction::Load: break; } if ((Idx == ImmIdx) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) return TCC_Free; return X86TTI::getIntImmCost(Imm, Ty); }
/// We do not support symbolic projections yet, only 32-bit unsigned integers. bool swift::getIntegerIndex(SILValue IndexVal, unsigned &IndexConst) { if (auto *IndexLiteral = dyn_cast<IntegerLiteralInst>(IndexVal)) { APInt ConstInt = IndexLiteral->getValue(); // IntegerLiterals are signed. if (ConstInt.isIntN(32) && ConstInt.isNonNegative()) { IndexConst = (unsigned)ConstInt.getSExtValue(); return true; } } return false; }
static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals, unsigned &Code, unsigned &AbbrevToUse, const APInt &Val) { if (Val.getBitWidth() <= 64) { uint64_t V = Val.getSExtValue(); emitSignedInt64(Vals, V); Code = naclbitc::CST_CODE_INTEGER; AbbrevToUse = Val == 0 ? CONSTANTS_INTEGER_ZERO_ABBREV : CONSTANTS_INTEGER_ABBREV; } else { report_fatal_error("Wide integers are not supported"); } }
unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; if (Imm.getBitWidth() <= 64 && (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue()))) return TCC_Basic; else return 2 * TCC_Basic; }
unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) return TCC_Free; switch (IID) { default: return TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: if (Idx == 1) { unsigned NumConstants = (BitSize + 63) / 64; unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TCC_Basic) ? static_cast<unsigned>(TCC_Free) : Cost; } break; case Intrinsic::experimental_stackmap: if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TCC_Free; break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TCC_Free; break; } return AArch64TTI::getIntImmCost(Imm, Ty); }
unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; switch (Opcode) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) return TCC_Free; else return X86TTI::getIntImmCost(Imm, Ty); case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: case Instruction::Call: case Instruction::Select: case Instruction::Ret: case Instruction::Load: case Instruction::Store: return X86TTI::getIntImmCost(Imm, Ty); } return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty); }
unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; int64_t Val = Imm.getSExtValue(); if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) return 1; if ((int64_t)Val < 0) Val = ~Val; if (BitSize == 32) Val &= (1LL << 32) - 1; unsigned LZ = countLeadingZeros((uint64_t)Val); unsigned Shift = (63 - LZ) / 16; // MOVZ is free so return true for one or fewer MOVK. return (Shift == 0) ? 1 : Shift; }
int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { // Division by a constant can be turned into multiplication, but only if we // know it's constant. So it's not so much that the immediate is cheap (it's // not), but that the alternative is worse. // FIXME: this is probably unneeded with GlobalISel. if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || Opcode == Instruction::SRem || Opcode == Instruction::URem) && Idx == 1) return 0; if (Opcode == Instruction::And) { // UXTB/UXTH if (Imm == 255 || Imm == 65535) return 0; // Conversion to BIC is free, and means we can use ~Imm instead. return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); } if (Opcode == Instruction::Add) // Conversion to SUB is free, and means we can use -Imm instead. return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); if (Opcode == Instruction::ICmp && Imm.isNegative() && Ty->getIntegerBitWidth() == 32) { int64_t NegImm = -Imm.getSExtValue(); if (ST->isThumb2() && NegImm < 1<<12) // icmp X, #-C -> cmn X, #C return 0; if (ST->isThumb() && NegImm < 1<<8) // icmp X, #-C -> adds X, #C return 0; } // xor a, -1 can always be folded to MVN if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) return 0; return getIntImmCost(Imm, Ty); }
/// \brief Calculate the cost of materializing the given constant. int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; // Sign-extend all constants to a multiple of 64-bit. APInt ImmVal = Imm; if (BitSize & 0x3f) ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); // Split the constant into 64-bit chunks and calculate the cost for each // chunk. int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); int64_t Val = Tmp.getSExtValue(); Cost += getIntImmCost(Val); } // We need at least one instruction to materialze the constant. return std::max(1, Cost); }
Expr::Expr(APInt Int) : Expr_((long int)Int.getSExtValue()) { }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI.getOpcode())) continue; unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; // FIXME: Fold operands with subregs. if (OpToFold.isReg() && (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || OpToFold.getSubReg())) continue; std::vector<FoldCandidate> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); // FIXME: Fold operands with subregs. if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || UseOp.isImplicit())) { continue; } APInt Imm; if (FoldingImm) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI.getRegClass(UseReg) : TRI.getRegClass(UseReg); Imm = APInt(64, OpToFold.getImm()); // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg()) { if (UseRC->getSize() != 8) continue; if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } } // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : TRI.getRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) continue; UseMI->setDesc(TII->get(MovOp)); } } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) continue; if (FoldingImm) { MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); continue; } tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. } for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (!Fold.isImm()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Fold.OpToFold->setIsKill(false); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); } } } } return false; }
int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) return TTI::TCC_Free; // No cost model for operations on integers larger than 64 bit implemented yet. if (BitSize > 64) return TTI::TCC_Free; switch (Opcode) { default: return TTI::TCC_Free; case Instruction::GetElementPtr: // Always hoist the base address of a GetElementPtr. This prevents the // creation of new constants for every base constant that gets constant // folded with the offset. if (Idx == 0) return 2 * TTI::TCC_Basic; return TTI::TCC_Free; case Instruction::Store: if (Idx == 0 && Imm.getBitWidth() <= 64) { // Any 8-bit immediate store can by implemented via mvi. if (BitSize == 8) return TTI::TCC_Free; // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. if (isInt<16>(Imm.getSExtValue())) return TTI::TCC_Free; } break; case Instruction::ICmp: if (Idx == 1 && Imm.getBitWidth() <= 64) { // Comparisons against signed 32-bit immediates implemented via cgfi. if (isInt<32>(Imm.getSExtValue())) return TTI::TCC_Free; // Comparisons against unsigned 32-bit immediates implemented via clgfi. if (isUInt<32>(Imm.getZExtValue())) return TTI::TCC_Free; } break; case Instruction::Add: case Instruction::Sub: if (Idx == 1 && Imm.getBitWidth() <= 64) { // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. if (isUInt<32>(Imm.getZExtValue())) return TTI::TCC_Free; // Or their negation, by swapping addition vs. subtraction. if (isUInt<32>(-Imm.getSExtValue())) return TTI::TCC_Free; } break; case Instruction::Mul: if (Idx == 1 && Imm.getBitWidth() <= 64) { // We use msgfi to multiply by 32-bit signed immediates. if (isInt<32>(Imm.getSExtValue())) return TTI::TCC_Free; } break; case Instruction::Or: case Instruction::Xor: if (Idx == 1 && Imm.getBitWidth() <= 64) { // Masks supported by oilf/xilf. if (isUInt<32>(Imm.getZExtValue())) return TTI::TCC_Free; // Masks supported by oihf/xihf. if ((Imm.getZExtValue() & 0xffffffff) == 0) return TTI::TCC_Free; } break; case Instruction::And: if (Idx == 1 && Imm.getBitWidth() <= 64) { // Any 32-bit AND operation can by implemented via nilf. if (BitSize <= 32) return TTI::TCC_Free; // 64-bit masks supported by nilf. if (isUInt<32>(~Imm.getZExtValue())) return TTI::TCC_Free; // 64-bit masks supported by nilh. if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) return TTI::TCC_Free; // Some 64-bit AND operations can be implemented via risbg. const SystemZInstrInfo *TII = ST->getInstrInfo(); unsigned Start, End; if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) return TTI::TCC_Free; } break; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: // Always return TCC_Free for the shift value of a shift instruction. if (Idx == 1) return TTI::TCC_Free; break; case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: case Instruction::PHI: case Instruction::Call: case Instruction::Select: case Instruction::Ret: case Instruction::Load: break; } return SystemZTTIImpl::getIntImmCost(Imm, Ty); }
static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, std::vector<FoldCandidate> &FoldList, SmallVectorImpl<MachineInstr *> &CopiesToReplace, const SIInstrInfo *TII, const SIRegisterInfo &TRI, MachineRegisterInfo &MRI) { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); // FIXME: Fold operands with subregs. if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || UseOp.isImplicit())) { return; } bool FoldingImm = OpToFold.isImm(); APInt Imm; if (FoldingImm) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI.getRegClass(UseReg) : TRI.getPhysRegClass(UseReg); Imm = APInt(64, OpToFold.getImm()); const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); const TargetRegisterClass *FoldRC = TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); // Split 64-bit constants into 32-bits for folding. if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { if (UseRC->getSize() != 8) return; if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } } // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : TRI.getPhysRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } } // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) continue; foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } return; } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[UseOpIdx].RegClass == -1) return; if (FoldingImm) { MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); return; } tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. return; }
int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { if (DisablePPCConstHoist) return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty); assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); if (BitSize == 0) return ~0U; unsigned ImmIdx = ~0U; bool ShiftedFree = false, RunFree = false, UnsignedFree = false, ZeroFree = false; switch (Opcode) { default: return TTI::TCC_Free; case Instruction::GetElementPtr: // Always hoist the base address of a GetElementPtr. This prevents the // creation of new constants for every base constant that gets constant // folded with the offset. if (Idx == 0) return 2 * TTI::TCC_Basic; return TTI::TCC_Free; case Instruction::And: RunFree = true; // (for the rotate-and-mask instructions) LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::Or: case Instruction::Xor: ShiftedFree = true; LLVM_FALLTHROUGH; case Instruction::Sub: case Instruction::Mul: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: ImmIdx = 1; break; case Instruction::ICmp: UnsignedFree = true; ImmIdx = 1; // Zero comparisons can use record-form instructions. LLVM_FALLTHROUGH; case Instruction::Select: ZeroFree = true; break; case Instruction::PHI: case Instruction::Call: case Instruction::Ret: case Instruction::Load: case Instruction::Store: break; } if (ZeroFree && Imm == 0) return TTI::TCC_Free; if (Idx == ImmIdx && Imm.getBitWidth() <= 64) { if (isInt<16>(Imm.getSExtValue())) return TTI::TCC_Free; if (RunFree) { if (Imm.getBitWidth() <= 32 && (isShiftedMask_32(Imm.getZExtValue()) || isShiftedMask_32(~Imm.getZExtValue()))) return TTI::TCC_Free; if (ST->isPPC64() && (isShiftedMask_64(Imm.getZExtValue()) || isShiftedMask_64(~Imm.getZExtValue()))) return TTI::TCC_Free; } if (UnsignedFree && isUInt<16>(Imm.getZExtValue())) return TTI::TCC_Free; if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0) return TTI::TCC_Free; } return PPCTTIImpl::getIntImmCost(Imm, Ty); }