static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { if (STI.getCPU() == "xscale") return ARMBuildAttrs::v5TEJ; if (STI.hasFeature(ARM::HasV8Ops)) { if (STI.hasFeature(ARM::FeatureRClass)) return ARMBuildAttrs::v8_R; return ARMBuildAttrs::v8_A; } else if (STI.hasFeature(ARM::HasV8MMainlineOps)) return ARMBuildAttrs::v8_M_Main; else if (STI.hasFeature(ARM::HasV7Ops)) { if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP)) return ARMBuildAttrs::v7E_M; return ARMBuildAttrs::v7; } else if (STI.hasFeature(ARM::HasV6T2Ops)) return ARMBuildAttrs::v6T2; else if (STI.hasFeature(ARM::HasV8MBaselineOps)) return ARMBuildAttrs::v8_M_Base; else if (STI.hasFeature(ARM::HasV6MOps)) return ARMBuildAttrs::v6S_M; else if (STI.hasFeature(ARM::HasV6Ops)) return ARMBuildAttrs::v6; else if (STI.hasFeature(ARM::HasV5TEOps)) return ARMBuildAttrs::v5TE; else if (STI.hasFeature(ARM::HasV5TOps)) return ARMBuildAttrs::v5T; else if (STI.hasFeature(ARM::HasV4TOps)) return ARMBuildAttrs::v4T; else return ARMBuildAttrs::v4; }
AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( MCStreamer &S, const MCSubtargetInfo &STI) : AMDGPUTargetStreamer(S), Streamer(S) { MCAssembler &MCA = getStreamer().getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); EFlags &= ~ELF::EF_AMDGPU_MACH; EFlags |= getMACH(STI.getCPU()); EFlags &= ~ELF::EF_AMDGPU_XNACK; if (AMDGPU::hasXNACK(STI)) EFlags |= ELF::EF_AMDGPU_XNACK; MCA.setELFHeaderEFlags(EFlags); }
/// Emit the build attributes that only depend on the hardware that we expect // /to be available, and not on the ABI, or any source-language choices. void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { switchVendor("aeabi"); const StringRef CPUString = STI.getCPU(); if (!CPUString.empty() && !CPUString.startswith("generic")) { // FIXME: remove krait check when GNU tools support krait cpu if (STI.hasFeature(ARM::ProcKrait)) { emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); // We consider krait as a "cortex-a9" + hwdiv CPU // Enable hwdiv through ".arch_extension idiv" if (STI.hasFeature(ARM::FeatureHWDivThumb) || STI.hasFeature(ARM::FeatureHWDivARM)) emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM); } else { emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); } } emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI)); if (STI.hasFeature(ARM::FeatureAClass)) { emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::ApplicationProfile); } else if (STI.hasFeature(ARM::FeatureRClass)) { emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::RealTimeProfile); } else if (STI.hasFeature(ARM::FeatureMClass)) { emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::MicroControllerProfile); } emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM) ? ARMBuildAttrs::Not_Allowed : ARMBuildAttrs::Allowed); if (isV8M(STI)) { emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumbDerived); } else if (STI.hasFeature(ARM::FeatureThumb2)) { emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumb32); } else if (STI.hasFeature(ARM::HasV4TOps)) { emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } if (STI.hasFeature(ARM::FeatureNEON)) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ if (STI.hasFeature(ARM::FeatureFPARMv8)) { if (STI.hasFeature(ARM::FeatureCrypto)) emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); else emitFPU(ARM::FK_NEON_FP_ARMV8); } else if (STI.hasFeature(ARM::FeatureVFP4)) emitFPU(ARM::FK_NEON_VFPV4); else emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16 : ARM::FK_NEON); // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasFeature(ARM::HasV8Ops)) emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, STI.hasFeature(ARM::HasV8_1aOps) ? ARMBuildAttrs::AllowNeonARMv8_1a : ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFeature(ARM::FeatureFPARMv8)) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. emitFPU(STI.hasFeature(ARM::FeatureD16) ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) : ARM::FK_FP_ARMV8); else if (STI.hasFeature(ARM::FeatureVFP4)) emitFPU(STI.hasFeature(ARM::FeatureD16) ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) : ARM::FK_VFPV4); else if (STI.hasFeature(ARM::FeatureVFP3)) emitFPU( STI.hasFeature(ARM::FeatureD16) // +d16 ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) // -d16 : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); else if (STI.hasFeature(ARM::FeatureVFP2)) emitFPU(ARM::FK_VFPV2); } // ABI_HardFP_use attribute to indicate single precision FP. if (STI.hasFeature(ARM::FeatureVFPOnlySP)) emitAttribute(ARMBuildAttrs::ABI_HardFP_use, ARMBuildAttrs::HardFPSinglePrecision); if (STI.hasFeature(ARM::FeatureFP16)) emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); if (STI.hasFeature(ARM::FeatureMP)) emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); // Hardware divide in ARM mode is part of base arch, starting from ARMv8. // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). // It is not possible to produce DisallowDIV: if hwdiv is present in the base // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; // otherwise, the default value (AllowDIVIfExists) applies. if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops)) emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI)) emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); if (STI.hasFeature(ARM::FeatureStrictAlign)) emitAttribute(ARMBuildAttrs::CPU_unaligned_access, ARMBuildAttrs::Not_Allowed); else emitAttribute(ARMBuildAttrs::CPU_unaligned_access, ARMBuildAttrs::Allowed); if (STI.hasFeature(ARM::FeatureTrustZone) && STI.hasFeature(ARM::FeatureVirtualization)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZVirtualization); else if (STI.hasFeature(ARM::FeatureTrustZone)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ); else if (STI.hasFeature(ARM::FeatureVirtualization)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowVirtualization); }
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ STREAM << "\t\t" << DIRECTIVE << " " \ << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n'; OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size << '\n'; OS << "\t\t.amdhsa_private_segment_fixed_size " << KD.private_segment_fixed_size << '\n'; PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); PRINT_FIELD( OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); // These directives are required. OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; if (!ReserveVCC) OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; if (IVersion.Major >= 7 && !ReserveFlatScr) OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); if (IVersion.Major >= 9) PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_div_zero", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); #undef PRINT_FIELD OS << "\t.end_amdhsa_kernel\n"; }
/// non-Symmetrical. See if these two instructions are fit for duplex pair. bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa, bool ExtendedA, MCInst const &MIb, bool ExtendedB, bool bisReversable, MCSubtargetInfo const &STI) { // Slot 1 cannot be extended in duplexes PRM 10.5 if (ExtendedA) return false; // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5 if (ExtendedB) { unsigned Opcode = MIb.getOpcode(); if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi)) return false; } unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa), MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb); static std::map<unsigned, unsigned> subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData)); // If a duplex contains 2 insns in the same group, the insns must be // ordered such that the numerically smaller opcode is in slot 1. if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) { MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa); MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb); unsigned zeroedSubInstS0 = subinstOpcodeMap.find(SubInst0.getOpcode())->second; unsigned zeroedSubInstS1 = subinstOpcodeMap.find(SubInst1.getOpcode())->second; if (zeroedSubInstS0 < zeroedSubInstS1) // subinstS0 (maps to slot 0) must be greater than // subinstS1 (maps to slot 1) return false; } // allocframe must always be in slot 0 if (MIb.getOpcode() == Hexagon::S2_allocframe) return false; if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) { // Prevent 2 instructions with extenders from duplexing // Note that MIb (slot1) can be extended and MIa (slot0) // can never be extended if (subInstWouldBeExtended(MIa)) return false; // If duplexing produces an extender, but the original did not // have an extender, do not duplex. if (subInstWouldBeExtended(MIb) && !ExtendedB) return false; } // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb). if (MIbG == HexagonII::HSIG_L2) { if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() && (MIb.getOperand(1).getReg() == Hexagon::R31)) return false; if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() && (MIb.getOperand(0).getReg() == Hexagon::R31)) return false; } if (STI.getCPU().equals_lower("hexagonv4") || STI.getCPU().equals_lower("hexagonv5") || STI.getCPU().equals_lower("hexagonv55") || STI.getCPU().equals_lower("hexagonv60")) { // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb); // therefore, not duplexable if slot 1 is a store, and slot 0 is not. if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) { if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2)) return false; } } return (isDuplexPairMatch(MIaG, MIbG)); }
bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI, const MCInst &Inst) const { if (STI.getCPU() == "btver2") { // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and // Jaguar pipeline", subsection 8 "Dependency-breaking instructions". switch (Inst.getOpcode()) { default: return false; case X86::SUB32rr: case X86::SUB64rr: case X86::SBB32rr: case X86::SBB64rr: case X86::XOR32rr: case X86::XOR64rr: case X86::XORPSrr: case X86::XORPDrr: case X86::VXORPSrr: case X86::VXORPDrr: case X86::ANDNPSrr: case X86::VANDNPSrr: case X86::ANDNPDrr: case X86::VANDNPDrr: case X86::PXORrr: case X86::VPXORrr: case X86::PANDNrr: case X86::VPANDNrr: case X86::PSUBBrr: case X86::PSUBWrr: case X86::PSUBDrr: case X86::PSUBQrr: case X86::VPSUBBrr: case X86::VPSUBWrr: case X86::VPSUBDrr: case X86::VPSUBQrr: case X86::PCMPEQBrr: case X86::PCMPEQWrr: case X86::PCMPEQDrr: case X86::PCMPEQQrr: case X86::VPCMPEQBrr: case X86::VPCMPEQWrr: case X86::VPCMPEQDrr: case X86::VPCMPEQQrr: case X86::PCMPGTBrr: case X86::PCMPGTWrr: case X86::PCMPGTDrr: case X86::PCMPGTQrr: case X86::VPCMPGTBrr: case X86::VPCMPGTWrr: case X86::VPCMPGTDrr: case X86::VPCMPGTQrr: case X86::MMX_PXORirr: case X86::MMX_PANDNirr: case X86::MMX_PSUBBirr: case X86::MMX_PSUBDirr: case X86::MMX_PSUBQirr: case X86::MMX_PSUBWirr: case X86::MMX_PCMPGTBirr: case X86::MMX_PCMPGTDirr: case X86::MMX_PCMPGTWirr: case X86::MMX_PCMPEQBirr: case X86::MMX_PCMPEQDirr: case X86::MMX_PCMPEQWirr: return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg(); case X86::CMP32rr: case X86::CMP64rr: return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg(); } } return false; }
HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { reset(); HexagonCVIResource::SetupTUL(&TUL, STI.getCPU()); }