// Make sure the returned mutation makes sense for the match type. static bool mutationIsSane(const LegalizeRule &Rule, const LegalityQuery &Q, std::pair<unsigned, LLT> Mutation) { const unsigned TypeIdx = Mutation.first; const LLT OldTy = Q.Types[TypeIdx]; const LLT NewTy = Mutation.second; switch (Rule.getAction()) { case FewerElements: case MoreElements: { if (!OldTy.isVector()) return false; if (NewTy.isVector()) { if (Rule.getAction() == FewerElements) { // Make sure the element count really decreased. if (NewTy.getNumElements() >= OldTy.getNumElements()) return false; } else { // Make sure the element count really increased. if (NewTy.getNumElements() <= OldTy.getNumElements()) return false; } } // Make sure the element type didn't change. return NewTy.getScalarType() == OldTy.getElementType(); } case NarrowScalar: case WidenScalar: { if (OldTy.isVector()) { // Number of elements should not change. if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements()) return false; } else { // Both types must be vectors if (NewTy.isVector()) return false; } if (Rule.getAction() == NarrowScalar) { // Make sure the size really decreased. if (NewTy.getScalarSizeInBits() >= OldTy.getScalarSizeInBits()) return false; } else { // Make sure the size really increased. if (NewTy.getScalarSizeInBits() <= OldTy.getScalarSizeInBits()) return false; } return true; } default: return true; } }
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; return Ty.isVector() && Ty.getNumElements() % 2 != 0 && Ty.getElementType().getSizeInBits() < 32; }; }
const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getSameKindOfOperandsMapping( const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumOperands = MI.getNumOperands(); assert(NumOperands <= 3 && "This code is for instructions with 3 or less operands"); LLT Ty = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = Ty.getSizeInBits(); bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; #ifndef NDEBUG // Make sure all the operands are using similar size and type. // Should probably be checked by the machine verifier. // This code won't catch cases where the number of lanes is // different between the operands. // If we want to go to that level of details, it is probably // best to check that the types are the same, period. // Currently, we just check that the register banks are the same // for each types. for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); assert( AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( RBIdx, OpTy.getSizeInBits()) == AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && "Operand has incompatible size"); bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); (void)OpIsFPR; assert(IsFPR == OpIsFPR && "Operand has incompatible type"); } #endif // End NDEBUG. return getInstructionMapping(DefaultMappingID, 1, getValueMapping(RBIdx, Size), NumOperands); }
void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend) { #ifndef NDEBUG LLT SrcTy = MRI->getType(Src); LLT DstTy = MRI->getType(Dst); if (DstTy.isVector()) { assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector"); assert(SrcTy.getNumElements() == DstTy.getNumElements() && "different number of elements in a trunc/ext"); } else assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc"); if (IsExtend) assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && "invalid narrowing extend"); else assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() && "invalid widening trunc"); #endif }
void MachineLegalizer::computeTables() { for (auto &Op : Actions) { LLT Ty = Op.first.second; if (!Ty.isVector()) continue; auto &Entry = MaxLegalVectorElts[std::make_pair(Op.first.first, Ty.getElementType())]; Entry = std::max(Entry, Ty.getNumElements()); } TablesInitialized = true; }
// FIXME: inefficient implementation for now. Without ComputeValueVTs we're // probably going to need specialized lookup structures for various types before // we have any hope of doing well with something like <13 x i3>. Even the common // cases should do better than what we have now. std::pair<MachineLegalizer::LegalizeAction, LLT> MachineLegalizer::getAction(const InstrAspect &Aspect) const { assert(TablesInitialized && "backend forgot to call computeTables"); // These *have* to be implemented for now, they're the fundamental basis of // how everything else is transformed. // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). if (Aspect.Opcode == TargetOpcode::G_SEQUENCE || Aspect.Opcode == TargetOpcode::G_EXTRACT) return std::make_pair(Legal, Aspect.Type); LegalizeAction Action = findInActions(Aspect); if (Action != NotFound) return findLegalAction(Aspect, Action); unsigned Opcode = Aspect.Opcode; LLT Ty = Aspect.Type; if (!Ty.isVector()) { auto DefaultAction = DefaultActions.find(Aspect.Opcode); if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal) return std::make_pair(Legal, Ty); assert(DefaultAction->second == NarrowScalar && "unexpected default"); return findLegalAction(Aspect, NarrowScalar); } LLT EltTy = Ty.getElementType(); int NumElts = Ty.getNumElements(); auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy)); if (ScalarAction != ScalarInVectorActions.end() && ScalarAction->second != Legal) return findLegalAction(Aspect, ScalarAction->second); // The element type is legal in principle, but the number of elements is // wrong. auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy)); if (MaxLegalElts > NumElts) return findLegalAction(Aspect, MoreElements); if (MaxLegalElts == 0) { // Scalarize if there's no legal vector type, which is just a special case // of FewerElements. return std::make_pair(FewerElements, EltTy); } return findLegalAction(Aspect, FewerElements); }
void LegalizerInfo::computeTables() { for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) { for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) { for (auto &Action : Actions[Opcode][Idx]) { LLT Ty = Action.first; if (!Ty.isVector()) continue; auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp, Ty.getElementType())]; Entry = std::max(Entry, Ty.getNumElements()); } } } TablesInitialized = true; }
RegisterBankInfo::InstructionMapping AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; // As a top-level guess, vectors go in FPRs, scalars in GPRs. Obviously this // won't work for normal floating-point types (or NZCV). When such // instructions exist we'll need to look at the MI's opcode. LLT Ty = MI.getType(); unsigned BankID; if (Ty.isVector()) BankID = AArch64::FPRRegBankID; else BankID = AArch64::GPRRegBankID; Mapping = InstructionMapping{1, 1, MI.getNumOperands()}; int Size = Ty.isSized() ? Ty.getSizeInBits() : 0; for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) Mapping.setOperandMapping(Idx, Size, getRegBank(BankID)); return Mapping; }
bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { MachineFunction &MF = MIRBuilder.getMF(); MIRBuilder.setInstr(MI); unsigned Dst = MI.getOperand(0).getReg(); unsigned Src = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); unsigned DestAS = DstTy.getAddressSpace(); unsigned SrcAS = SrcTy.getAddressSpace(); // TODO: Avoid reloading from the queue ptr for each cast, or at least each // vector element. assert(!DstTy.isVector()); const AMDGPUTargetMachine &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget()); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) { MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST)); return true; } if (SrcAS == AMDGPUAS::FLAT_ADDRESS) { assert(DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS); unsigned NullVal = TM.getNullPointerValue(DestAS); auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal); auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0); unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy); // Extract low 32-bits of the pointer. MIRBuilder.buildExtract(PtrLo32, Src, 0); unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0)); MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); MI.eraseFromParent(); return true; } assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS); auto SegmentNull = MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); auto FlatNull = MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder); unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0)); unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy); // Coerce the type of the low half of the result so we can use merge_values. unsigned SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32)); MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT) .addDef(SrcAsInt) .addUse(Src); // TODO: Should we allow mismatched types but matching sizes in merges to // avoid the ptrtoint? MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0)); MI.eraseFromParent(); return true; }
static LegalityPredicate numElementsNotEven(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0; }; }
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; return QueryTy.isVector() && QueryTy.getSizeInBits() > Size; }; }
RegisterBankInfo::InstructionMapping AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{DefaultMappingID, 1, MI.getNumOperands()}; // Track the size and bank of each register. We don't do partial mappings. SmallVector<unsigned, 4> OpBaseIdx(MI.getNumOperands()); SmallVector<unsigned, 4> OpFinalIdx(MI.getNumOperands()); for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { auto &MO = MI.getOperand(Idx); if (!MO.isReg()) continue; LLT Ty = MRI.getType(MO.getReg()); unsigned RBIdx = AArch64::getRegBankBaseIdx(Ty.getSizeInBits()); OpBaseIdx[Idx] = RBIdx; // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. // For floating-point instructions, scalars go in FPRs. if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc)) { assert(RBIdx < (AArch64::LastFPR - AArch64::FirstFPR) + 1 && "Index out of bound"); OpFinalIdx[Idx] = AArch64::FirstFPR + RBIdx; } else { assert(RBIdx < (AArch64::LastGPR - AArch64::FirstGPR) + 1 && "Index out of bound"); OpFinalIdx[Idx] = AArch64::FirstGPR + RBIdx; } } // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: { OpFinalIdx = {OpBaseIdx[0] + AArch64::FirstFPR, OpBaseIdx[1] + AArch64::FirstGPR}; break; } case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: { OpFinalIdx = {OpBaseIdx[0] + AArch64::FirstGPR, OpBaseIdx[1] + AArch64::FirstFPR}; break; } case TargetOpcode::G_FCMP: { OpFinalIdx = {OpBaseIdx[0] + AArch64::FirstGPR, /* Predicate */ 0, OpBaseIdx[2] + AArch64::FirstFPR, OpBaseIdx[3] + AArch64::FirstFPR}; break; } } // Finally construct the computed mapping. for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) if (MI.getOperand(Idx).isReg()) Mapping.setOperandMapping( Idx, ValueMapping{&AArch64::PartMappings[OpFinalIdx[Idx]], 1}); return Mapping; }
// FIXME: inefficient implementation for now. Without ComputeValueVTs we're // probably going to need specialized lookup structures for various types before // we have any hope of doing well with something like <13 x i3>. Even the common // cases should do better than what we have now. std::pair<LegalizerInfo::LegalizeAction, LLT> LegalizerInfo::getAction(const InstrAspect &Aspect) const { assert(TablesInitialized && "backend forgot to call computeTables"); // These *have* to be implemented for now, they're the fundamental basis of // how everything else is transformed. // FIXME: the long-term plan calls for expansion in terms of load/store (if // they're not legal). if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); LLT Ty = Aspect.Type; LegalizeAction Action = findInActions(Aspect); // LegalizerHelper is not able to handle non-power-of-2 types right now, so do // not try to legalize them unless they are marked as Legal or Custom. // FIXME: This is a temporary hack until the general non-power-of-2 // legalization works. if (!isPowerOf2_64(Ty.getSizeInBits()) && !(Action == Legal || Action == Custom)) return std::make_pair(Unsupported, LLT()); if (Action != NotFound) return findLegalAction(Aspect, Action); unsigned Opcode = Aspect.Opcode; if (!Ty.isVector()) { auto DefaultAction = DefaultActions.find(Aspect.Opcode); if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal) return std::make_pair(Legal, Ty); if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower) return std::make_pair(Lower, Ty); if (DefaultAction == DefaultActions.end() || DefaultAction->second != NarrowScalar) return std::make_pair(Unsupported, LLT()); return findLegalAction(Aspect, NarrowScalar); } LLT EltTy = Ty.getElementType(); int NumElts = Ty.getNumElements(); auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy)); if (ScalarAction != ScalarInVectorActions.end() && ScalarAction->second != Legal) return findLegalAction(Aspect, ScalarAction->second); // The element type is legal in principle, but the number of elements is // wrong. auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy)); if (MaxLegalElts > NumElts) return findLegalAction(Aspect, MoreElements); if (MaxLegalElts == 0) { // Scalarize if there's no legal vector type, which is just a special case // of FewerElements. return std::make_pair(FewerElements, EltTy); } return findLegalAction(Aspect, FewerElements); }
RegisterBankInfo::InstructionMapping AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. // Arithmetic ops. case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_GEP: case TargetOpcode::G_MUL: case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: // Bitwise ops. case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: // Shifts. case TargetOpcode::G_SHL: case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: // Floating point ops. case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: return getSameKindOfOperandsMapping(MI); case TargetOpcode::G_BITCAST: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); unsigned Size = DstTy.getSizeInBits(); bool DstIsGPR = !DstTy.isVector(); bool SrcIsGPR = !SrcTy.isVector(); const RegisterBank &DstRB = DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; const RegisterBank &SrcRB = SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; return InstructionMapping{ DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), /*NumOperands*/ 2}; } case TargetOpcode::G_SEQUENCE: // FIXME: support this, but the generic code is really not going to do // anything sane. return InstructionMapping(); default: break; } unsigned NumOperands = MI.getNumOperands(); // Track the size and bank of each register. We don't do partial mappings. SmallVector<unsigned, 4> OpSize(NumOperands); SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { auto &MO = MI.getOperand(Idx); if (!MO.isReg() || !MO.getReg()) continue; LLT Ty = MRI.getType(MO.getReg()); OpSize[Idx] = Ty.getSizeInBits(); // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. // For floating-point instructions, scalars go in FPRs. if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc)) OpRegBankIdx[Idx] = PMI_FirstFPR; else OpRegBankIdx[Idx] = PMI_FirstGPR; } unsigned Cost = 1; // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; break; case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; break; case TargetOpcode::G_FCMP: OpRegBankIdx = {PMI_FirstGPR, /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; break; case TargetOpcode::G_BITCAST: // This is going to be a cross register bank copy and this is expensive. if (OpRegBankIdx[0] != OpRegBankIdx[1]) Cost = copyCost( *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]); break; case TargetOpcode::G_LOAD: // Loading in vector unit is slightly more expensive. // This is actually only true for the LD1R and co instructions, // but anyway for the fast mode this number does not matter and // for the greedy mode the cost of the cross bank copy will // offset this number. // FIXME: Should be derived from the scheduling model. if (OpRegBankIdx[0] >= PMI_FirstFPR) Cost = 2; break; } // Finally construct the computed mapping. RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands}; SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); if (!Mapping->isValid()) return InstructionMapping(); OpdsMapping[Idx] = Mapping; } } Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); return Mapping; }
const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) { const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. // Arithmetic ops. case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_GEP: case TargetOpcode::G_MUL: case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: // Bitwise ops. case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: // Shifts. case TargetOpcode::G_SHL: case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: // Floating point ops. case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: return getSameKindOfOperandsMapping(MI); case TargetOpcode::G_BITCAST: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); unsigned Size = DstTy.getSizeInBits(); bool DstIsGPR = !DstTy.isVector(); bool SrcIsGPR = !SrcTy.isVector(); const RegisterBank &DstRB = DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; const RegisterBank &SrcRB = SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; return getInstructionMapping( DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), /*NumOperands*/ 2); } default: break; } unsigned NumOperands = MI.getNumOperands(); // Track the size and bank of each register. We don't do partial mappings. SmallVector<unsigned, 4> OpSize(NumOperands); SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { auto &MO = MI.getOperand(Idx); if (!MO.isReg() || !MO.getReg()) continue; LLT Ty = MRI.getType(MO.getReg()); OpSize[Idx] = Ty.getSizeInBits(); // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. // For floating-point instructions, scalars go in FPRs. if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || Ty.getSizeInBits() > 64) OpRegBankIdx[Idx] = PMI_FirstFPR; else OpRegBankIdx[Idx] = PMI_FirstGPR; } unsigned Cost = 1; // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; break; case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; break; case TargetOpcode::G_FCMP: OpRegBankIdx = {PMI_FirstGPR, /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; break; case TargetOpcode::G_BITCAST: // This is going to be a cross register bank copy and this is expensive. if (OpRegBankIdx[0] != OpRegBankIdx[1]) Cost = copyCost( *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]); break; case TargetOpcode::G_LOAD: // Loading in vector unit is slightly more expensive. // This is actually only true for the LD1R and co instructions, // but anyway for the fast mode this number does not matter and // for the greedy mode the cost of the cross bank copy will // offset this number. // FIXME: Should be derived from the scheduling model. if (OpRegBankIdx[0] != PMI_FirstGPR) Cost = 2; else // Check if that load feeds fp instructions. // In that case, we want the default mapping to be on FPR // instead of blind map every scalar to GPR. for (const MachineInstr &UseMI : MRI.use_instructions(MI.getOperand(0).getReg())) // If we have at least one direct use in a FP instruction, // assume this was a floating point load in the IR. // If it was not, we would have had a bitcast before // reaching that instruction. if (isPreISelGenericFloatingPointOpcode(UseMI.getOpcode())) { OpRegBankIdx[0] = PMI_FirstFPR; break; } break; case TargetOpcode::G_STORE: // Check if that store is fed by fp instructions. if (OpRegBankIdx[0] == PMI_FirstGPR) { unsigned VReg = MI.getOperand(0).getReg(); if (!VReg) break; MachineInstr *DefMI = MRI.getVRegDef(VReg); if (isPreISelGenericFloatingPointOpcode(DefMI->getOpcode())) OpRegBankIdx[0] = PMI_FirstFPR; break; } } // Finally construct the computed mapping. SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); if (!Mapping->isValid()) return getInvalidInstructionMapping(); OpdsMapping[Idx] = Mapping; } } return getInstructionMapping(DefaultMappingID, Cost, getOperandsMapping(OpdsMapping), NumOperands); }
void LegalizerInfo::computeTables() { assert(TablesInitialized == false); for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { const unsigned Opcode = FirstOp + OpcodeIdx; for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); ++TypeIdx) { // 0. Collect information specified through the setAction API, i.e. // for specific bit sizes. // For scalar types: SizeAndActionsVec ScalarSpecifiedActions; // For pointer types: std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; // For vector types: std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { const LLT Type = LLT2Action.first; const LegalizeAction Action = LLT2Action.second; auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); if (Type.isPointer()) AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( SizeAction); else if (Type.isVector()) ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] .push_back(SizeAction); else ScalarSpecifiedActions.push_back(SizeAction); } // 1. Handle scalar types { // Decide how to handle bit sizes for which no explicit specification // was given. SizeChangeStrategy S = &unsupportedForDifferentSizes; if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end()); checkPartialSizeAndActionsVector(ScalarSpecifiedActions); setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); } // 2. Handle pointer types for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { std::sort(PointerSpecifiedActions.second.begin(), PointerSpecifiedActions.second.end()); checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); // For pointer types, we assume that there isn't a meaningfull way // to change the number of bits used in the pointer. setPointerAction( Opcode, TypeIdx, PointerSpecifiedActions.first, unsupportedForDifferentSizes(PointerSpecifiedActions.second)); } // 3. Handle vector types SizeAndActionsVec ElementSizesSeen; for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { std::sort(VectorSpecifiedActions.second.begin(), VectorSpecifiedActions.second.end()); const uint16_t ElementSize = VectorSpecifiedActions.first; ElementSizesSeen.push_back({ElementSize, Legal}); checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); // For vector types, we assume that the best way to adapt the number // of elements is to the next larger number of elements type for which // the vector type is legal, unless there is no such type. In that case, // legalize towards a vector type with a smaller number of elements. SizeAndActionsVec NumElementsActions; for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { assert(BitsizeAndAction.first % ElementSize == 0); const uint16_t NumElements = BitsizeAndAction.first / ElementSize; NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); } setVectorNumElementAction( Opcode, TypeIdx, ElementSize, moreToWiderTypesAndLessToWidest(NumElementsActions)); } std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); SizeChangeStrategy VectorElementSizeChangeStrategy = &unsupportedForDifferentSizes; if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) VectorElementSizeChangeStrategy = VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; setScalarInVectorAction( Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); } } TablesInitialized = true; }