void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, const MCSubtargetInfo &STI) { // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII); printInstFlags(MI, OS); // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". // // TODO: Probably this hack should be redesigned via InstAlias in // InstrInfo.td as soon as Requires clause is supported properly // for InstAlias. if (MI->getOpcode() == X86::CALLpcrel32 && (STI.getFeatureBits()[X86::Mode64Bit])) { OS << "\tcallq\t"; printPCRelImm(MI, 0, OS); } // data16 and data32 both have the same encoding of 0x66. While data32 is // valid only in 16 bit systems, data16 is valid in the rest. // There seems to be some lack of support of the Requires clause that causes // 0x66 to be interpreted as "data16" by the asm printer. // Thus we add an adjustment here in order to print the "right" instruction. else if (MI->getOpcode() == X86::DATA16_PREFIX && STI.getFeatureBits()[X86::Mode16Bit]) { OS << "\tdata32"; } // Try to print any aliases first. else if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); // Next always print the annotation. printAnnotation(OS, Annot); }
void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (MI.getOpcode() == R600::RETURN || MI.getOpcode() == R600::FETCH_CLAUSE || MI.getOpcode() == R600::ALU_CLAUSE || MI.getOpcode() == R600::BUNDLE || MI.getOpcode() == R600::KILL) { return; } else if (IS_VTX(Desc)) { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) { InstWord2 |= 1 << 19; // Mega-Fetch bit } Emit(InstWord01, OS); Emit(InstWord2, OS); Emit((uint32_t) 0, OS); } else if (IS_TEX(Desc)) { int64_t Sampler = MI.getOperand(14).getImm(); int64_t SrcSelect[4] = { MI.getOperand(2).getImm(), MI.getOperand(3).getImm(), MI.getOperand(4).getImm(), MI.getOperand(5).getImm() }; int64_t Offsets[3] = { MI.getOperand(6).getImm() & 0x1F, MI.getOperand(7).getImm() & 0x1F, MI.getOperand(8).getImm() & 0x1F }; uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups, STI); uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | Offsets[2] << 10; Emit(Word01, OS); Emit(Word2, OS); Emit((uint32_t) 0, OS); } else { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI); if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) && ((Desc.TSFlags & R600_InstFlag::OP1) || Desc.TSFlags & R600_InstFlag::OP2)) { uint64_t ISAOpCode = Inst & (0x3FFULL << 39); Inst &= ~(0x3FFULL << 39); Inst |= ISAOpCode << 1; } Emit(Inst, OS); } }
Optional<double> MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc) { Optional<double> Throughput; const MCSchedModel &SM = STI.getSchedModel(); const MCWriteProcResEntry *I = STI.getWriteProcResBegin(&SCDesc); const MCWriteProcResEntry *E = STI.getWriteProcResEnd(&SCDesc); for (; I != E; ++I) { if (!I->Cycles) continue; unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits; double Temp = NumUnits * 1.0 / I->Cycles; Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; } return Throughput ? 1 / Throughput.getValue() : Throughput; }
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); if (TSFlags & X86II::LOCK) OS << "\tlock\t"; // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". // // TODO: Probably this hack should be redesigned via InstAlias in // InstrInfo.td as soon as Requires clause is supported properly // for InstAlias. if (MI->getOpcode() == X86::CALLpcrel32 && (STI.getFeatureBits()[X86::Mode64Bit])) { OS << "\tcallq\t"; printPCRelImm(MI, 0, OS); } // Try to print any aliases first. else if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); // Next always print the annotation. printAnnotation(OS, Annot); }
static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && "cannot predicate thumb instructions"); assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments"); bool ListContainsPC = false, ListContainsLR = false; for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) { assert(MI.getOperand(OI).isReg() && "expected register"); switch (MI.getOperand(OI).getReg()) { default: break; case ARM::LR: ListContainsLR = true; break; case ARM::PC: ListContainsPC = true; break; case ARM::SP: Info = "use of SP in the list is deprecated"; return true; } } if (ListContainsPC && ListContainsLR) { Info = "use of LR and PC simultaneously in the list is deprecated"; return true; } return false; }
static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string &Info) { if (STI.getFeatureBits() & llvm::ARM::HasV7Ops && (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) && (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) && // Checks for the deprecated CP15ISB encoding: // mcr p15, #0, rX, c7, c5, #4 (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) { if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) { if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) { Info = "deprecated since v7, use 'isb'"; return true; } // Checks for the deprecated CP15DSB encoding: // mcr p15, #0, rX, c7, c10, #4 if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) { Info = "deprecated since v7, use 'dsb'"; return true; } } // Checks for the deprecated CP15DMB encoding: // mcr p15, #0, rX, c7, c10, #5 if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 && (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) { Info = "deprecated since v7, use 'dmb'"; return true; } } return false; }
void WebAssemblyMCCodeEmitter::encodeInstruction( const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { // FIXME: This is not the real binary encoding. This is an extremely // over-simplified encoding where we just use uint64_t for everything. This // is a temporary measure. support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode()); const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (Desc.isVariadic()) support::endian::Writer<support::little>(OS).write<uint64_t>( MI.getNumOperands() - Desc.NumOperands); for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg()); } else if (MO.isImm()) { support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm()); } else if (MO.isFPImm()) { support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm()); } else if (MO.isExpr()) { support::endian::Writer<support::little>(OS).write<uint64_t>(0); Fixups.push_back(MCFixup::create( (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t), MO.getExpr(), STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4, MI.getLoc())); ++MCNumFixups; } else { llvm_unreachable("unexpected operand kind"); } } ++MCNumEmitted; // Keep track of the # of mi's emitted. }
PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : MCInstPrinter(MAI, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); }
void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI); if (Ctx.getAsmInfo()->isLittleEndian()) { // Output the bits in little-endian byte order. support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); } else { // Output the bits in big-endian byte order. support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); } unsigned tlsOpNo = 0; switch (MI.getOpcode()) { default: break; case SP::TLS_CALL: tlsOpNo = 1; break; case SP::TLS_ADDrr: case SP::TLS_ADDXrr: case SP::TLS_LDrr: case SP::TLS_LDXrr: tlsOpNo = 3; break; } if (tlsOpNo != 0) { const MCOperand &MO = MI.getOperand(tlsOpNo); uint64_t op = getMachineOpValue(MI, MO, Fixups, STI); assert(op == 0 && "Unexpected operand value!"); (void)op; // suppress warning. } ++MCNumEmitted; // Keep track of the # of mi's emitted. }
/// Return the slots this instruction can execute out of unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI) { const InstrItinerary *II = STI.getSchedModel().InstrItineraries; int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); return ((II[SchedClass].FirstStage + HexagonStages)->getUnits()); }
static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string &Info) { if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) { Info = "applying IT instruction to more than one subsequent instruction is deprecated"; return true; } return false; }
void DispatchStage::updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI) { SmallVector<WriteRef, 4> DependentWrites; collectWrites(DependentWrites, RS.getRegisterID()); RS.setDependentWrites(DependentWrites.size()); // We know that this read depends on all the writes in DependentWrites. // For each write, check if we have ReadAdvance information, and use it // to figure out in how many cycles this read becomes available. const ReadDescriptor &RD = RS.getDescriptor(); const MCSchedModel &SM = STI.getSchedModel(); const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); for (WriteRef &WR : DependentWrites) { WriteState &WS = *WR.getWriteState(); unsigned WriteResID = WS.getWriteResourceID(); int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); WS.addUser(&RS, ReadAdvance); } }
bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) const { if (ComplexDeprecationInfo) return ComplexDeprecationInfo(MI, STI, Info); if (DeprecatedFeature != -1 && STI.getFeatureBits()[DeprecatedFeature]) { // FIXME: it would be nice to include the subtarget feature here. Info = "deprecated"; return true; } return false; }
unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op, const MCSubtargetInfo &STI) const { bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2]; bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps]; switch (Op) { default: return Op; case ARM::tBcc: return HasThumb2 ? (unsigned)ARM::t2Bcc : Op; case ARM::tLDRpci: return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op; case ARM::tADR: return HasThumb2 ? (unsigned)ARM::t2ADR : Op; case ARM::tB: return HasV8MBaselineOps ? (unsigned)ARM::t2B : Op; case ARM::tCBZ: return ARM::tHINT; case ARM::tCBNZ: return ARM::tHINT; } }
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc) { int Latency = 0; for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries; DefIdx != DefEnd; ++DefIdx) { // Lookup the definition's write latency in SubtargetInfo. const MCWriteLatencyEntry *WLEntry = STI.getWriteLatencyEntry(&SCDesc, DefIdx); // Early exit if we found an invalid latency. if (WLEntry->Cycles < 0) return WLEntry->Cycles; Latency = std::max(Latency, static_cast<int>(WLEntry->Cycles)); } return Latency; }
AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( MCStreamer &S, const MCSubtargetInfo &STI) : AMDGPUTargetStreamer(S), Streamer(S) { MCAssembler &MCA = getStreamer().getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); EFlags &= ~ELF::EF_AMDGPU_MACH; EFlags |= getMACH(STI.getCPU()); EFlags &= ~ELF::EF_AMDGPU_XNACK; if (AMDGPU::hasXNACK(STI)) EFlags |= ELF::EF_AMDGPU_XNACK; MCA.setELFHeaderEFlags(EFlags); }
static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && "cannot predicate thumb instructions"); assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments"); for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) { assert(MI.getOperand(OI).isReg() && "expected register"); if (MI.getOperand(OI).getReg() == ARM::SP || MI.getOperand(OI).getReg() == ARM::PC) { Info = "use of SP or PC in the list is deprecated"; return true; } } return false; }
void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); if (MI.getOpcode() == AArch64::TLSDESCCALL) { // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the // following (BLR) instruction. It doesn't emit any code itself so it // doesn't go through the normal TableGenerated channels. MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup)); return; } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++MCNumEmitted; // Keep track of the # of mi's emitted. }
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, const MCInst &Inst) const { unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass(); const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass); if (!SCDesc->isValid()) return 0; unsigned CPUID = getProcessorID(); while (SCDesc->isVariant()) { SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID); SCDesc = getSchedClassDesc(SchedClass); } if (SchedClass) return MCSchedModel::computeInstrLatency(STI, *SCDesc); llvm_unreachable("unsupported variant scheduling class"); }
Optional<double> MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, const MCInst &Inst) const { Optional<double> Throughput; unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass(); const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass); if (!SCDesc->isValid()) return Throughput; unsigned CPUID = getProcessorID(); while (SCDesc->isVariant()) { SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID); SCDesc = getSchedClassDesc(SchedClass); } if (SchedClass) return MCSchedModel::getReciprocalThroughput(STI, *SCDesc); llvm_unreachable("unsupported variant scheduling class"); }
unsigned HexagonMCInstrInfo::getOtherReservedSlots(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI) { const InstrItinerary *II = STI.getSchedModel().InstrItineraries; int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); unsigned Slots = 0; // FirstStage are slots that this instruction can execute in. // FirstStage+1 are slots that are also consumed by this instruction. // For example: vmemu can only execute in slot 0 but also consumes slot 1. for (unsigned Stage = II[SchedClass].FirstStage + 1; Stage < II[SchedClass].LastStage; ++Stage) { unsigned Units = (Stage + HexagonStages)->getUnits(); if (Units > HexagonGetLastSlot()) break; // fyi: getUnits() will return 0x1, 0x2, 0x4 or 0x8 Slots |= Units; } // if 0 is returned, then no additional slots are consumed by this inst. return Slots; }
// Expand PseudoAddTPRel to a simple ADD with the correct relocation. void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { MCOperand DestReg = MI.getOperand(0); MCOperand SrcReg = MI.getOperand(1); MCOperand TPReg = MI.getOperand(2); assert(TPReg.isReg() && TPReg.getReg() == RISCV::X4 && "Expected thread pointer as second input to TP-relative add"); MCOperand SrcSymbol = MI.getOperand(3); assert(SrcSymbol.isExpr() && "Expected expression as third input to TP-relative add"); const RISCVMCExpr *Expr = dyn_cast<RISCVMCExpr>(SrcSymbol.getExpr()); assert(Expr && Expr->getKind() == RISCVMCExpr::VK_RISCV_TPREL_ADD && "Expected tprel_add relocation on TP-relative symbol"); // Emit the correct tprel_add relocation for the symbol. Fixups.push_back(MCFixup::create( 0, Expr, MCFixupKind(RISCV::fixup_riscv_tprel_add), MI.getLoc())); // Emit fixup_riscv_relax for tprel_add where the relax feature is enabled. if (STI.getFeatureBits()[RISCV::FeatureRelax]) { const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); Fixups.push_back(MCFixup::create( 0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc())); } // Emit a normal ADD instruction with the given operands. MCInst TmpInst = MCInstBuilder(RISCV::ADD) .addOperand(DestReg) .addOperand(SrcReg) .addOperand(TPReg); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(OS, Binary, support::little); }
static void populateWrites(InstrDesc &ID, const MCInst &MCI, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI) { // Set if writes through this opcode may update super registers. // TODO: on x86-64, a 4 byte write of a general purpose register always // fully updates the super-register. // More in general, (at least on x86) not all register writes perform // a partial (super-)register update. // For example, an AVX instruction that writes on a XMM register implicitly // zeroes the upper half of every aliasing super-register. // // For now, we pessimistically assume that writes are all potentially // partial register updates. This is a good default for most targets, execept // for those like x86 which implement a special semantic for certain opcodes. // At least on x86, this may lead to an inaccurate prediction of the // instruction level parallelism. bool FullyUpdatesSuperRegisters = false; // Now Populate Writes. // This algorithm currently works under the strong (and potentially incorrect) // assumption that information related to register def/uses can be obtained // from MCInstrDesc. // // However class MCInstrDesc is used to describe MachineInstr objects and not // MCInst objects. To be more specific, MCInstrDesc objects are opcode // descriptors that are automatically generated via tablegen based on the // instruction set information available from the target .td files. That // means, the number of (explicit) definitions according to MCInstrDesc always // matches the cardinality of the `(outs)` set in tablegen. // // By constructions, definitions must appear first in the operand sequence of // a MachineInstr. Also, the (outs) sequence is preserved (example: the first // element in the outs set is the first operand in the corresponding // MachineInstr). That's the reason why MCInstrDesc only needs to declare the // total number of register definitions, and not where those definitions are // in the machine operand sequence. // // Unfortunately, it is not safe to use the information from MCInstrDesc to // also describe MCInst objects. An MCInst object can be obtained from a // MachineInstr through a lowering step which may restructure the operand // sequence (and even remove or introduce new operands). So, there is a high // risk that the lowering step breaks the assumptions that register // definitions are always at the beginning of the machine operand sequence. // // This is a fundamental problem, and it is still an open problem. Essentially // we have to find a way to correlate def/use operands of a MachineInstr to // operands of an MCInst. Otherwise, we cannot correctly reconstruct data // dependencies, nor we can correctly interpret the scheduling model, which // heavily uses machine operand indices to define processor read-advance // information, and to identify processor write resources. Essentially, we // either need something like a MCInstrDesc, but for MCInst, or a way // to map MCInst operands back to MachineInstr operands. // // Unfortunately, we don't have that information now. So, this prototype // currently work under the strong assumption that we can always safely trust // the content of an MCInstrDesc. For example, we can query a MCInstrDesc to // obtain the number of explicit and implicit register defintions. We also // assume that register definitions always come first in the operand sequence. // This last assumption usually makes sense for MachineInstr, where register // definitions always appear at the beginning of the operands sequence. In // reality, these assumptions could be broken by the lowering step, which can // decide to lay out operands in a different order than the original order of // operand as specified by the MachineInstr. // // Things get even more complicated in the presence of "optional" register // definitions. For MachineInstr, optional register definitions are always at // the end of the operand sequence. Some ARM instructions that may update the // status flags specify that register as a optional operand. Since we don't // have operand descriptors for MCInst, we assume for now that the optional // definition is always the last operand of a MCInst. Again, this assumption // may be okay for most targets. However, there is no guarantee that targets // would respect that. // // In conclusion: these are for now the strong assumptions made by the tool: // * The number of explicit and implicit register definitions in a MCInst // matches the number of explicit and implicit definitions according to // the opcode descriptor (MCInstrDesc). // * Register definitions take precedence over register uses in the operands // list. // * If an opcode specifies an optional definition, then the optional // definition is always the last operand in the sequence, and it can be // set to zero (i.e. "no register"). // // These assumptions work quite well for most out-of-order in-tree targets // like x86. This is mainly because the vast majority of instructions is // expanded to MCInst using a straightforward lowering logic that preserves // the ordering of the operands. // // In the longer term, we need to find a proper solution for this issue. unsigned NumExplicitDefs = MCDesc.getNumDefs(); unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; if (MCDesc.hasOptionalDef()) TotalDefs++; ID.Writes.resize(TotalDefs); // Iterate over the operands list, and skip non-register operands. // The first NumExplictDefs register operands are expected to be register // definitions. unsigned CurrentDef = 0; unsigned i = 0; for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { const MCOperand &Op = MCI.getOperand(i); if (!Op.isReg()) continue; WriteDescriptor &Write = ID.Writes[CurrentDef]; Write.OpIndex = i; if (CurrentDef < NumWriteLatencyEntries) { const MCWriteLatencyEntry &WLE = *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); // Conservatively default to MaxLatency. Write.Latency = WLE.Cycles == -1 ? ID.MaxLatency : WLE.Cycles; Write.SClassOrWriteResourceID = WLE.WriteResourceID; } else { // Assign a default latency for this write. Write.Latency = ID.MaxLatency; Write.SClassOrWriteResourceID = 0; } Write.FullyUpdatesSuperRegs = FullyUpdatesSuperRegisters; Write.IsOptionalDef = false; LLVM_DEBUG({ dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", Latency=" << Write.Latency << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; }); CurrentDef++; }
NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : MCInstPrinter(MAI, MII, MRI) { setAvailableFeatures(STI.getFeatureBits()); }
bool MipsMCCodeEmitter::isMips32r6(const MCSubtargetInfo &STI) const { return STI.getFeatureBits()[Mips::FeatureMips32r6]; }
bool MipsMCCodeEmitter::isMicroMips(const MCSubtargetInfo &STI) const { return STI.getFeatureBits() & Mips::FeatureMicroMips; }
/// Emit the build attributes that only depend on the hardware that we expect // /to be available, and not on the ABI, or any source-language choices. void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { switchVendor("aeabi"); const StringRef CPUString = STI.getCPU(); if (!CPUString.empty() && !CPUString.startswith("generic")) { // FIXME: remove krait check when GNU tools support krait cpu if (STI.hasFeature(ARM::ProcKrait)) { emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); // We consider krait as a "cortex-a9" + hwdiv CPU // Enable hwdiv through ".arch_extension idiv" if (STI.hasFeature(ARM::FeatureHWDivThumb) || STI.hasFeature(ARM::FeatureHWDivARM)) emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM); } else { emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); } } emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI)); if (STI.hasFeature(ARM::FeatureAClass)) { emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::ApplicationProfile); } else if (STI.hasFeature(ARM::FeatureRClass)) { emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::RealTimeProfile); } else if (STI.hasFeature(ARM::FeatureMClass)) { emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::MicroControllerProfile); } emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM) ? ARMBuildAttrs::Not_Allowed : ARMBuildAttrs::Allowed); if (isV8M(STI)) { emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumbDerived); } else if (STI.hasFeature(ARM::FeatureThumb2)) { emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumb32); } else if (STI.hasFeature(ARM::HasV4TOps)) { emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } if (STI.hasFeature(ARM::FeatureNEON)) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ if (STI.hasFeature(ARM::FeatureFPARMv8)) { if (STI.hasFeature(ARM::FeatureCrypto)) emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); else emitFPU(ARM::FK_NEON_FP_ARMV8); } else if (STI.hasFeature(ARM::FeatureVFP4)) emitFPU(ARM::FK_NEON_VFPV4); else emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16 : ARM::FK_NEON); // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasFeature(ARM::HasV8Ops)) emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, STI.hasFeature(ARM::HasV8_1aOps) ? ARMBuildAttrs::AllowNeonARMv8_1a : ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFeature(ARM::FeatureFPARMv8)) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. emitFPU(STI.hasFeature(ARM::FeatureD16) ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) : ARM::FK_FP_ARMV8); else if (STI.hasFeature(ARM::FeatureVFP4)) emitFPU(STI.hasFeature(ARM::FeatureD16) ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) : ARM::FK_VFPV4); else if (STI.hasFeature(ARM::FeatureVFP3)) emitFPU( STI.hasFeature(ARM::FeatureD16) // +d16 ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) // -d16 : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); else if (STI.hasFeature(ARM::FeatureVFP2)) emitFPU(ARM::FK_VFPV2); } // ABI_HardFP_use attribute to indicate single precision FP. if (STI.hasFeature(ARM::FeatureVFPOnlySP)) emitAttribute(ARMBuildAttrs::ABI_HardFP_use, ARMBuildAttrs::HardFPSinglePrecision); if (STI.hasFeature(ARM::FeatureFP16)) emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); if (STI.hasFeature(ARM::FeatureMP)) emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); // Hardware divide in ARM mode is part of base arch, starting from ARMv8. // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). // It is not possible to produce DisallowDIV: if hwdiv is present in the base // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; // otherwise, the default value (AllowDIVIfExists) applies. if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops)) emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI)) emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); if (STI.hasFeature(ARM::FeatureStrictAlign)) emitAttribute(ARMBuildAttrs::CPU_unaligned_access, ARMBuildAttrs::Not_Allowed); else emitAttribute(ARMBuildAttrs::CPU_unaligned_access, ARMBuildAttrs::Allowed); if (STI.hasFeature(ARM::FeatureTrustZone) && STI.hasFeature(ARM::FeatureVirtualization)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZVirtualization); else if (STI.hasFeature(ARM::FeatureTrustZone)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ); else if (STI.hasFeature(ARM::FeatureVirtualization)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowVirtualization); }
static bool isV8M(const MCSubtargetInfo &STI) { // Note that v8M Baseline is a subset of v6T2! return (STI.hasFeature(ARM::HasV8MBaselineOps) && !STI.hasFeature(ARM::HasV6T2Ops)) || STI.hasFeature(ARM::HasV8MMainlineOps); }
static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { if (STI.getCPU() == "xscale") return ARMBuildAttrs::v5TEJ; if (STI.hasFeature(ARM::HasV8Ops)) { if (STI.hasFeature(ARM::FeatureRClass)) return ARMBuildAttrs::v8_R; return ARMBuildAttrs::v8_A; } else if (STI.hasFeature(ARM::HasV8MMainlineOps)) return ARMBuildAttrs::v8_M_Main; else if (STI.hasFeature(ARM::HasV7Ops)) { if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP)) return ARMBuildAttrs::v7E_M; return ARMBuildAttrs::v7; } else if (STI.hasFeature(ARM::HasV6T2Ops)) return ARMBuildAttrs::v6T2; else if (STI.hasFeature(ARM::HasV8MBaselineOps)) return ARMBuildAttrs::v8_M_Base; else if (STI.hasFeature(ARM::HasV6MOps)) return ARMBuildAttrs::v6S_M; else if (STI.hasFeature(ARM::HasV6Ops)) return ARMBuildAttrs::v6; else if (STI.hasFeature(ARM::HasV5TEOps)) return ARMBuildAttrs::v5TE; else if (STI.hasFeature(ARM::HasV5TOps)) return ARMBuildAttrs::v5T; else if (STI.hasFeature(ARM::HasV4TOps)) return ARMBuildAttrs::v4T; else return ARMBuildAttrs::v4; }
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ STREAM << "\t\t" << DIRECTIVE << " " \ << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n'; OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size << '\n'; OS << "\t\t.amdhsa_private_segment_fixed_size " << KD.private_segment_fixed_size << '\n'; PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); PRINT_FIELD( OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); // These directives are required. OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; if (!ReserveVCC) OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; if (IVersion.Major >= 7 && !ReserveFlatScr) OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); if (IVersion.Major >= 9) PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_div_zero", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); #undef PRINT_FIELD OS << "\t.end_amdhsa_kernel\n"; }