int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { // Lowering of some vector selects is currently far from perfect. static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = { { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } }; EVT SelCondTy = TLI->getValueType(DL, CondTy); EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, SelCondTy.getSimpleVT(), SelValTy.getSimpleVT())) return Entry->Cost; } std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); return LT.first; } return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); }
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register // width. if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. const int AmortizationCost = 20; static const TypeConversionCostTblEntry<MVT::SimpleValueType> VectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 }, { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 }, { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } }; EVT SelCondTy = TLI->getValueType(DL, CondTy); EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { int Idx = ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), SelValTy.getSimpleVT()); if (Idx != -1) return VectorSelectTbl[Idx].Cost; } } return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); }
bool WebAssemblyFastISel::selectBitCast(const Instruction *I) { // Target-independent code can handle this, except it doesn't set the dead // flag on the ARGUMENTS clobber, so we have to do that manually in order // to satisfy code that expects this of isBitcast() instructions. EVT VT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT RetVT = TLI.getValueType(DL, I->getType()); if (!VT.isSimple() || !RetVT.isSimple()) return false; if (VT == RetVT) { // No-op bitcast. updateValueMap(I, getRegForValue(I->getOperand(0))); return true; } unsigned Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), getRegForValue(I->getOperand(0)), I->getOperand(0)->hasOneUse()); if (!Reg) return false; MachineBasicBlock::iterator Iter = FuncInfo.InsertPt; --Iter; assert(Iter->isBitcast()); Iter->setPhysRegsDeadExcept(ArrayRef<unsigned>(), TRI); updateValueMap(I, Reg); return true; }
unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { // Lowering of some vector selects is currently far from perfect. static const TypeConversionCostTblEntry<MVT::SimpleValueType> NEONVectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } }; EVT SelCondTy = TLI->getValueType(CondTy); EVT SelValTy = TLI->getValueType(ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, SelCondTy.getSimpleVT(), SelValTy.getSimpleVT()); if (Idx != -1) return NEONVectorSelectTbl[Idx].Cost; } std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); }
bool FastISel::SelectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; // Check if the destination type is legal. if (!TLI.isTypeLegal(DstVT)) return false; // Check if the source operand is legal. if (!TLI.isTypeLegal(SrcVT)) return false; unsigned InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) // Unhandled operand. Halt "fast" selection and bail. return false; bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; UpdateValueMap(I, ResultReg); return true; }
bool FastISel::SelectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; // Check if the destination type is legal. Or as a special case, // it may be i1 if we're doing a truncate because that's // easy and somewhat common. if (!TLI.isTypeLegal(DstVT)) if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) // Unhandled type. Halt "fast" selection and bail. return false; // Check if the source operand is legal. Or as a special case, // it may be i1 if we're doing zero-extension because that's // easy and somewhat common. if (!TLI.isTypeLegal(SrcVT)) if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) // Unhandled type. Halt "fast" selection and bail. return false; unsigned InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) // Unhandled operand. Halt "fast" selection and bail. return false; bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); // If the operand is i1, arrange for the high bits in the register to be zero. if (SrcVT == MVT::i1) { SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); if (!InputReg) return false; InputRegIsKill = true; } // If the result is i1, truncate to the target's type for i1 first. if (DstVT == MVT::i1) DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT); unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; UpdateValueMap(I, ResultReg); return true; }
bool FastISel::SelectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); if (Reg == 0) return false; UpdateValueMap(I, Reg); return true; } // Bitcasts of other values become reg-reg copies or BITCAST operators. EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple() || !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) // Unhandled type. Halt "fast" selection and bail. return false; unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); } } // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), ISD::BITCAST, Op0, Op0IsKill); if (!ResultReg) return false; UpdateValueMap(I, ResultReg); return true; }
bool FastISel::SelectBitCast(User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); if (Reg == 0) return false; UpdateValueMap(I, Reg); return true; } // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple() || !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) // Unhandled type. Halt "fast" selection and bail. return false; unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); ResultReg = createResultReg(DstClass); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Op0, DstClass, SrcClass); if (!InsertedCopy) ResultReg = 0; } // If the reg-reg copy failed, select a BIT_CONVERT opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), ISD::BIT_CONVERT, Op0); if (!ResultReg) return false; UpdateValueMap(I, ResultReg); return true; }
SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue Data = Op.getOperand(0); VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); DebugLoc DL = Op.getDebugLoc(); EVT DVT = Data.getValueType(); EVT BVT = BaseType->getVT(); unsigned baseBits = BVT.getScalarType().getSizeInBits(); unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; unsigned shiftBits = srcBits - baseBits; if (srcBits < 32) { // If the op is less than 32 bits, then it needs to extend to 32bits // so it can properly keep the upper bits valid. EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); shiftBits = 32 - baseBits; DVT = IVT; } SDValue Shift = DAG.getConstant(shiftBits, DVT); // Shift left by 'Shift' bits. Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); // Signed shift Right by 'Shift' bits. Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); if (srcBits < 32) { // Once the sign extension is done, the op needs to be converted to // its original type. Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); } return Data; }
unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { assert(!Src->isVoidTy() && "Invalid type"); std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); // Assuming that all loads of legal types cost 1. unsigned Cost = LT.first; if (Src->isVectorTy() && Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { // This is a vector load that legalizes to a larger type than the vector // itself. Unless the corresponding extending load or truncating store is // legal, then this will scalarize. TargetLowering::LegalizeAction LA = TargetLowering::Expand; EVT MemVT = getTLI()->getValueType(Src, true); if (MemVT.isSimple() && MemVT != MVT::Other) { if (Opcode == Instruction::Store) LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); else LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); } if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { // This is a vector load/store for some illegal type that is scalarized. // We must account for the cost of building or decomposing the vector. Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, Opcode == Instruction::Store); } } return Cost; }
SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Index = Node->getOperand(1); int OpCode; // Get the type of parameter we are reading EVT VT = Node->getValueType(0); assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); MVT Type = VT.getSimpleVT(); if (Type == MVT::i1) OpCode = PTX::READPARAMPRED; else if (Type == MVT::i16) OpCode = PTX::READPARAMI16; else if (Type == MVT::i32) OpCode = PTX::READPARAMI32; else if (Type == MVT::i64) OpCode = PTX::READPARAMI64; else if (Type == MVT::f32) OpCode = PTX::READPARAMF32; else { assert(Type == MVT::f64 && "Unexpected type!"); OpCode = PTX::READPARAMF64; } SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); DebugLoc dl = Node->getDebugLoc(); SDValue Ops[] = { Index, Pred, PredOp, Chain }; return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); }
unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { EVT VT = TLI.getValueType(C->getType(), true); // Only handle simple types. if (!VT.isSimple()) return 0; // TODO: This should be safe for fp because they're just bits from the // Constant. // TODO: Theoretically we could materialize fp constants with instructions // from VFP3. // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(C->getType()); if (Align == 0) { // TODO: Figure out if this is correct. Align = TD.getTypeAllocSize(C->getType()); } unsigned Idx = MCP.getConstantPoolIndex(C, Align); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); // Different addressing modes between ARM/Thumb2 for constant pool loads. if (isThumb) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci)) .addReg(DestReg).addConstantPoolIndex(Idx)); else AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp)) .addReg(DestReg).addConstantPoolIndex(Idx) .addReg(0).addImm(0)); return DestReg; }
unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, // LowerVectorFP_TO_INT { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 }, { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, }; int Idx = ConvertCostTableLookup<MVT>( ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return ConversionTbl[Idx].Cost; return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); }
bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { VT = TLI.getValueType(Ty, true); // Only handle simple types. if (VT == MVT::Other || !VT.isSimple()) return false; // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); }
bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; VT = evt.getSimpleVT(); // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); }
unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; if (ST->hasAVX()) { int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl, array_lengthof(AVXConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return AVXConversionTbl[Idx].Cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); }
unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) return 0; // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless // of whether FastISel can handle them. MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { // Promote MVT::i1 to a legal type though, because it's common and easy. if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; } // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); if (I != FuncInfo.ValueMap.end()) { unsigned Reg = I->second; return Reg; } unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; // In bottom-up mode, just create the virtual register which will be used // to hold the value. It will be materialized later. if (isa<Instruction>(V) && (!isa<AllocaInst>(V) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) return FuncInfo.InitializeRegForValue(V); SavePoint SaveInsertPt = enterLocalValueArea(); // Materialize the value in a register. Emit any instructions in the // local value area. Reg = materializeRegForValue(V, VT); leaveLocalValueArea(SaveInsertPt); return Reg; }
SDValue PTXTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); assert(PtrVT.isSimple() && "Pointer must be to primitive type."); SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, PtrVT.getSimpleVT(), targetGlobal); return movInstr; }
// Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return (UnsupportedFPMode) ? 0 : materializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return materializeGV(GV, VT); else if (isa<ConstantInt>(C)) return materializeInt(C, VT); return 0; }
SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Value = Node->getOperand(1); int OpCode; //Node->dumpr(CurDAG); // Get the type of parameter we are writing EVT VT = Value->getValueType(0); assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); MVT Type = VT.getSimpleVT(); if (Type == MVT::i1) OpCode = PTX::WRITEPARAMPRED; else if (Type == MVT::i16) OpCode = PTX::WRITEPARAMI16; else if (Type == MVT::i32) OpCode = PTX::WRITEPARAMI32; else if (Type == MVT::i64) OpCode = PTX::WRITEPARAMI64; else if (Type == MVT::f32) OpCode = PTX::WRITEPARAMF32; else if (Type == MVT::f64) OpCode = PTX::WRITEPARAMF64; else llvm_unreachable("Invalid type in SelectWRITEPARAM"); SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); DebugLoc dl = Node->getDebugLoc(); SDValue Ops[] = { Value, Pred, PredOp, Chain }; SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); //dbgs() << "SelectWRITEPARAM produced:\n\t"; //Ret->dumpr(CurDAG); return Ret; }
unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) return 0; // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless // of whether FastISel can handle them. MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; } // Look up the value to see if we already have a register for it. unsigned Reg = lookUpRegForValue(V); if (Reg != 0) return Reg; // In bottom-up mode, just create the virtual register which will be used // to hold the value. It will be materialized later. if (isa<Instruction>(V) && (!isa<AllocaInst>(V) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) return FuncInfo.InitializeRegForValue(V); SavePoint SaveInsertPt = enterLocalValueArea(); // Materialize the value in a register. Emit any instructions in the // local value area. Reg = materializeRegForValue(V, VT); leaveLocalValueArea(SaveInsertPt); return Reg; }
bool FastISel::SelectExtractValue(const User *U) { const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); if (!EVI) return false; // Make sure we only try to handle extracts with a legal result. But also // allow i1 because it's easy. EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true); if (!RealVT.isSimple()) return false; MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT) && VT != MVT::i1) return false; const Value *Op0 = EVI->getOperand(0); const Type *AggTy = Op0->getType(); // Get the base result register. unsigned ResultReg; DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); if (I != FuncInfo.ValueMap.end()) ResultReg = I->second; else if (isa<Instruction>(Op0)) ResultReg = FuncInfo.InitializeRegForValue(Op0); else return false; // fast-isel can't handle aggregate constants at the moment // Get the actual result register, which is an offset from the base register. unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); for (unsigned i = 0; i < VTIndex; i++) ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); UpdateValueMap(EVI, ResultReg); return true; }
bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Trying to emit for an unhandled type!"); return false; case MVT::i16: Opc = isThumb ? ARM::tLDRH : ARM::LDRH; VT = MVT::i32; break; case MVT::i8: Opc = isThumb ? ARM::tLDRB : ARM::LDRB; VT = MVT::i32; break; case MVT::i32: Opc = isThumb ? ARM::tLDR : ARM::LDR; break; } ResultReg = createResultReg(TLI.getRegClassFor(VT)); // TODO: Fix the Addressing modes so that these can share some code. // Since this is a Thumb1 load this will work in Thumb1 or 2 mode. if (isThumb) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(Reg).addImm(Offset).addReg(0)); else AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(Reg).addReg(0).addImm(Offset)); return true; }
int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); EVT SrcTy = TLI->getValueType(DL, Src); EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT::SimpleValueType> ConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, // The number of shll instructions for the extension. { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, // Complex: to v2f32 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, // Complex: to v4f32 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, // Complex: to v8f32 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, // Complex: to v16f32 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, // Complex: to v2f64 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, // LowerVectorFP_TO_INT { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, }; int Idx = ConvertCostTableLookup(ConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return ConversionTbl[Idx].Cost; return BaseT::getCastInstrCost(Opcode, Dst, Src); }
unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); // Single to/from double precision conversions. static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = { // Vector fptrunc/fpext conversions. { ISD::FP_ROUND, MVT::v2f64, 2 }, { ISD::FP_EXTEND, MVT::v2f32, 2 }, { ISD::FP_EXTEND, MVT::v4f32, 4 } }; if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND)) { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); if (Idx != -1) return LT.first * NEONFltDblTbl[Idx].Cost; } EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. static const TypeConversionCostTblEntry<MVT::SimpleValueType> NEONVectorConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, // The number of vmovl instructions for the extension. { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, // Operations that we legalize using splitting. { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, // Vector double <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } }; if (SrcTy.isVector() && ST->hasNEON()) { int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONVectorConversionTbl[Idx].Cost; } // Scalar float to integer conversions. static const TypeConversionCostTblEntry<MVT::SimpleValueType> NEONFloatConversionTbl[] = { { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } }; if (SrcTy.isFloatingPoint() && ST->hasNEON()) { int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONFloatConversionTbl[Idx].Cost; } // Scalar integer to float conversions. static const TypeConversionCostTblEntry<MVT::SimpleValueType> NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } }; if (SrcTy.isInteger() && ST->hasNEON()) { int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONIntegerConversionTbl[Idx].Cost; } // Scalar integer conversion costs. static const TypeConversionCostTblEntry<MVT::SimpleValueType> ARMIntegerConversionTbl[] = { // i16 -> i64 requires two dependent operations. { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, // Truncates on i64 are assumed to be free. { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } }; if (SrcTy.isInteger()) { int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return ARMIntegerConversionTbl[Idx].Cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); }
unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src); std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst); static const TypeConversionCostTblEntry<MVT::SimpleValueType> SSE2ConvTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take // legalization into account the throughput will be overestimated. { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, // There are faster sequences for float conversions. { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, }; if (ST->hasSSE2() && !ST->hasAVX()) { int Idx = ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second); if (Idx != -1) return LTSrc.first * SSE2ConvTbl[Idx].Cost; } EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT::SimpleValueType> AVX2ConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 }, }; static const TypeConversionCostTblEntry<MVT::SimpleValueType> AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, // The generic code to compute the scalar overhead is currently broken. // Workaround this limitation by estimating the scalarization overhead // here. We have roughly 10 instructions per scalar element. // Multiply that by the vector width. // FIXME: remove that when PR19268 is fixed. { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 }, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, // This node is expanded into scalarized operations but BasicTTI is overly // optimistic estimating its cost. It computes 3 per element (one // vector-extract, one scalar conversion and one vector-insert). The // problem is that the inserts form a read-modify-write chain so latency // should be factored in too. Inflating the cost per element by 1. { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 }, }; if (ST->hasAVX2()) { int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return AVX2ConversionTbl[Idx].Cost; } if (ST->hasAVX()) { int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return AVXConversionTbl[Idx].Cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); }
/// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; // We only handle legal types. For example, on x86-32 the instruction // selector contains all of the 64-bit instructions from x86-64, // under the assumption that i64 won't be used if the target doesn't // support it. if (!TLI.isTypeLegal(VT)) { // MVT::i1 is special. Allow AND, OR, or XOR because they // don't require additional zeroing, which makes them easy. if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || ISDOpcode == ISD::XOR)) VT = TLI.getTypeToTransformTo(I->getContext(), VT); else return false; } unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, CI->getZExtValue()); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); return true; } } // Check if the second operand is a constant float. if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, CF); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); return true; } } unsigned Op1 = getRegForValue(I->getOperand(1)); if (Op1 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); if (ResultReg == 0) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. return false; // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); return true; }
unsigned FastISel::getRegForValue(Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) return 0; // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless // of whether FastISel can handle them. MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { // Promote MVT::i1 to a legal type though, because it's common and easy. if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; } // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominatess-use requirement enforced. if (ValueMap.count(V)) return ValueMap[V]; unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); } else if (isa<AllocaInst>(V)) { Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); } else if (isa<ConstantPointerNull>(V)) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { const APFloat &Flt = CF->getValueAPF(); EVT IntVT = TLI.getPointerTy(); uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, 2, x); unsigned IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); } } } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (!SelectOperator(CE, CE->getOpcode())) return 0; Reg = LocalValueMap[CE]; } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg); } // If target-independent code couldn't handle the value, give target-specific // code a try. if (!Reg && isa<Constant>(V)) Reg = TargetMaterializeConstant(cast<Constant>(V)); // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. if (Reg != 0) LocalValueMap[V] = Reg; return Reg; }
unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src); std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst); static const TypeConversionCostTblEntry<MVT::SimpleValueType> SSE2ConvTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take // legalization into account the throughput will be overestimated. { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, // There are faster sequences for float conversions. { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, }; if (ST->hasSSE2() && !ST->hasAVX()) { int Idx = ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second); if (Idx != -1) return LTSrc.first * SSE2ConvTbl[Idx].Cost; } EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT::SimpleValueType> AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; if (ST->hasAVX()) { int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return AVXConversionTbl[Idx].Cost; } return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); }
int GCNTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) { EVT OrigTy = TLI->getValueType(DL, Ty); if (!OrigTy.isSimple()) { return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); } // Legalize the type. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); // Because we don't have any legal vector operations, but the legal types, we // need to account for split vectors. unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy; switch (ISD) { case ISD::SHL: case ISD::SRL: case ISD::SRA: if (SLT == MVT::i64) return get64BitInstrCost() * LT.first * NElts; // i32 return getFullRateInstrCost() * LT.first * NElts; case ISD::ADD: case ISD::SUB: case ISD::AND: case ISD::OR: case ISD::XOR: if (SLT == MVT::i64){ // and, or and xor are typically split into 2 VALU instructions. return 2 * getFullRateInstrCost() * LT.first * NElts; } return LT.first * NElts * getFullRateInstrCost(); case ISD::MUL: { const int QuarterRateCost = getQuarterRateInstrCost(); if (SLT == MVT::i64) { const int FullRateCost = getFullRateInstrCost(); return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts; } // i32 return QuarterRateCost * NElts * LT.first; } case ISD::FADD: case ISD::FSUB: case ISD::FMUL: if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(); if (SLT == MVT::f32 || SLT == MVT::f16) return LT.first * NElts * getFullRateInstrCost(); break; case ISD::FDIV: case ISD::FREM: // FIXME: frem should be handled separately. The fdiv in it is most of it, // but the current lowering is also not entirely correct. if (SLT == MVT::f64) { int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost(); // Add cost of workaround. if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) Cost += 3 * getFullRateInstrCost(); return LT.first * Cost * NElts; } if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) { // TODO: This is more complicated, unsafe flags etc. if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) || (SLT == MVT::f16 && ST->has16BitInsts())) { return LT.first * getQuarterRateInstrCost() * NElts; } } if (SLT == MVT::f16 && ST->has16BitInsts()) { // 2 x v_cvt_f32_f16 // f32 rcp // f32 fmul // v_cvt_f16_f32 // f16 div_fixup int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost(); return LT.first * Cost * NElts; } if (SLT == MVT::f32 || SLT == MVT::f16) { int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); if (!ST->hasFP32Denormals()) { // FP mode switches. Cost += 2 * getFullRateInstrCost(); } return LT.first * NElts * Cost; } break; default: break; } return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); }