void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                                  StringRef Annot, const MCSubtargetInfo &STI) {
  // If verbose assembly is enabled, we can print some informative comments.
  if (CommentStream)
    HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);

  printInstFlags(MI, OS);

  // Output CALLpcrel32 as "callq" in 64-bit mode.
  // In Intel annotation it's always emitted as "call".
  //
  // TODO: Probably this hack should be redesigned via InstAlias in
  // InstrInfo.td as soon as Requires clause is supported properly
  // for InstAlias.
  if (MI->getOpcode() == X86::CALLpcrel32 &&
      (STI.getFeatureBits()[X86::Mode64Bit])) {
    OS << "\tcallq\t";
    printPCRelImm(MI, 0, OS);
  }
  // data16 and data32 both have the same encoding of 0x66. While data32 is
  // valid only in 16 bit systems, data16 is valid in the rest.
  // There seems to be some lack of support of the Requires clause that causes
  // 0x66 to be interpreted as "data16" by the asm printer.
  // Thus we add an adjustment here in order to print the "right" instruction.
  else if (MI->getOpcode() == X86::DATA16_PREFIX &&
           STI.getFeatureBits()[X86::Mode16Bit]) {
   OS << "\tdata32";
  }
  // Try to print any aliases first.
  else if (!printAliasInstr(MI, OS))
    printInstruction(MI, OS);

  // Next always print the annotation.
  printAnnotation(OS, Annot);
}
Beispiel #2
0
void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                       SmallVectorImpl<MCFixup> &Fixups,
                                       const MCSubtargetInfo &STI) const {
  verifyInstructionPredicates(MI,
                              computeAvailableFeatures(STI.getFeatureBits()));

  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
  if (MI.getOpcode() == R600::RETURN ||
    MI.getOpcode() == R600::FETCH_CLAUSE ||
    MI.getOpcode() == R600::ALU_CLAUSE ||
    MI.getOpcode() == R600::BUNDLE ||
    MI.getOpcode() == R600::KILL) {
    return;
  } else if (IS_VTX(Desc)) {
    uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
    uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
    if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
      InstWord2 |= 1 << 19; // Mega-Fetch bit
    }

    Emit(InstWord01, OS);
    Emit(InstWord2, OS);
    Emit((uint32_t) 0, OS);
  } else if (IS_TEX(Desc)) {
      int64_t Sampler = MI.getOperand(14).getImm();

      int64_t SrcSelect[4] = {
        MI.getOperand(2).getImm(),
        MI.getOperand(3).getImm(),
        MI.getOperand(4).getImm(),
        MI.getOperand(5).getImm()
      };
      int64_t Offsets[3] = {
        MI.getOperand(6).getImm() & 0x1F,
        MI.getOperand(7).getImm() & 0x1F,
        MI.getOperand(8).getImm() & 0x1F
      };

      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups, STI);
      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
          Offsets[2] << 10;

      Emit(Word01, OS);
      Emit(Word2, OS);
      Emit((uint32_t) 0, OS);
  } else {
    uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
    if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
       ((Desc.TSFlags & R600_InstFlag::OP1) ||
         Desc.TSFlags & R600_InstFlag::OP2)) {
      uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
      Inst &= ~(0x3FFULL << 39);
      Inst |= ISAOpCode << 1;
    }
    Emit(Inst, OS);
  }
}
Beispiel #3
0
Optional<double>
MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
                                      const MCSchedClassDesc &SCDesc) {
  Optional<double> Throughput;
  const MCSchedModel &SM = STI.getSchedModel();
  const MCWriteProcResEntry *I = STI.getWriteProcResBegin(&SCDesc);
  const MCWriteProcResEntry *E = STI.getWriteProcResEnd(&SCDesc);
  for (; I != E; ++I) {
    if (!I->Cycles)
      continue;
    unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
    double Temp = NumUnits * 1.0 / I->Cycles;
    Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
  }
  return Throughput ? 1 / Throughput.getValue() : Throughput;
}
Beispiel #4
0
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                                  StringRef Annot, const MCSubtargetInfo &STI) {
  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
  uint64_t TSFlags = Desc.TSFlags;

  // If verbose assembly is enabled, we can print some informative comments.
  if (CommentStream)
    HasCustomInstComment =
        EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);

  if (TSFlags & X86II::LOCK)
    OS << "\tlock\t";

  // Output CALLpcrel32 as "callq" in 64-bit mode.
  // In Intel annotation it's always emitted as "call".
  //
  // TODO: Probably this hack should be redesigned via InstAlias in
  // InstrInfo.td as soon as Requires clause is supported properly
  // for InstAlias.
  if (MI->getOpcode() == X86::CALLpcrel32 &&
      (STI.getFeatureBits()[X86::Mode64Bit])) {
    OS << "\tcallq\t";
    printPCRelImm(MI, 0, OS);
  }
  // Try to print any aliases first.
  else if (!printAliasInstr(MI, OS))
    printInstruction(MI, OS);

  // Next always print the annotation.
  printAnnotation(OS, Annot);
}
Beispiel #5
0
static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                      std::string &Info) {
  assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
         "cannot predicate thumb instructions");

  assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
  bool ListContainsPC = false, ListContainsLR = false;
  for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
    assert(MI.getOperand(OI).isReg() && "expected register");
    switch (MI.getOperand(OI).getReg()) {
    default:
      break;
    case ARM::LR:
      ListContainsLR = true;
      break;
    case ARM::PC:
      ListContainsPC = true;
      break;
    case ARM::SP:
      Info = "use of SP in the list is deprecated";
      return true;
    }
  }

  if (ListContainsPC && ListContainsLR) {
    Info = "use of LR and PC simultaneously in the list is deprecated";
    return true;
  }

  return false;
}
Beispiel #6
0
static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                  std::string &Info) {
  if (STI.getFeatureBits() & llvm::ARM::HasV7Ops &&
      (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
      (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
      // Checks for the deprecated CP15ISB encoding:
      // mcr p15, #0, rX, c7, c5, #4
      (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) {
    if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) {
      if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) {
        Info = "deprecated since v7, use 'isb'";
        return true;
      }

      // Checks for the deprecated CP15DSB encoding:
      // mcr p15, #0, rX, c7, c10, #4
      if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) {
        Info = "deprecated since v7, use 'dsb'";
        return true;
      }
    }
    // Checks for the deprecated CP15DMB encoding:
    // mcr p15, #0, rX, c7, c10, #5
    if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 &&
        (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) {
      Info = "deprecated since v7, use 'dmb'";
      return true;
    }
  }
  return false;
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
    const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
    const MCSubtargetInfo &STI) const {
  // FIXME: This is not the real binary encoding. This is an extremely
  // over-simplified encoding where we just use uint64_t for everything. This
  // is a temporary measure.
  support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode());
  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
  if (Desc.isVariadic())
    support::endian::Writer<support::little>(OS).write<uint64_t>(
        MI.getNumOperands() - Desc.NumOperands);
  for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
    const MCOperand &MO = MI.getOperand(i);
    if (MO.isReg()) {
      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg());
    } else if (MO.isImm()) {
      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm());
    } else if (MO.isFPImm()) {
      support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm());
    } else if (MO.isExpr()) {
      support::endian::Writer<support::little>(OS).write<uint64_t>(0);
      Fixups.push_back(MCFixup::create(
          (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t),
          MO.getExpr(),
          STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
          MI.getLoc()));
      ++MCNumFixups;
    } else {
      llvm_unreachable("unexpected operand kind");
    }
  }

  ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
Beispiel #8
0
PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
                               const MCRegisterInfo &MRI,
                               const MCSubtargetInfo &STI) :
  MCInstPrinter(MAI, MRI) {
  // Initialize the set of available features.
  setAvailableFeatures(STI.getFeatureBits());
}
void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                           SmallVectorImpl<MCFixup> &Fixups,
                                           const MCSubtargetInfo &STI) const {
  verifyInstructionPredicates(MI,
                              computeAvailableFeatures(STI.getFeatureBits()));

  unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI);

  if (Ctx.getAsmInfo()->isLittleEndian()) {
    // Output the bits in little-endian byte order.
    support::endian::Writer<support::little>(OS).write<uint32_t>(Bits);
  } else {
    // Output the bits in big-endian byte order.
    support::endian::Writer<support::big>(OS).write<uint32_t>(Bits);
  }
  unsigned tlsOpNo = 0;
  switch (MI.getOpcode()) {
  default: break;
  case SP::TLS_CALL:   tlsOpNo = 1; break;
  case SP::TLS_ADDrr:
  case SP::TLS_ADDXrr:
  case SP::TLS_LDrr:
  case SP::TLS_LDXrr:  tlsOpNo = 3; break;
  }
  if (tlsOpNo != 0) {
    const MCOperand &MO = MI.getOperand(tlsOpNo);
    uint64_t op = getMachineOpValue(MI, MO, Fixups, STI);
    assert(op == 0 && "Unexpected operand value!");
    (void)op; // suppress warning.
  }

  ++MCNumEmitted;  // Keep track of the # of mi's emitted.
}
Beispiel #10
0
/// Return the slots this instruction can execute out of
unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
                                      MCSubtargetInfo const &STI,
                                      MCInst const &MCI) {
  const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
  int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
  return ((II[SchedClass].FirstStage + HexagonStages)->getUnits());
}
Beispiel #11
0
static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                  std::string &Info) {
  if (STI.getFeatureBits() & llvm::ARM::HasV8Ops &&
      MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) {
    Info = "applying IT instruction to more than one subsequent instruction is deprecated";
    return true;
  }

  return false;
}
void DispatchStage::updateRAWDependencies(ReadState &RS,
                                          const MCSubtargetInfo &STI) {
  SmallVector<WriteRef, 4> DependentWrites;

  collectWrites(DependentWrites, RS.getRegisterID());
  RS.setDependentWrites(DependentWrites.size());
  // We know that this read depends on all the writes in DependentWrites.
  // For each write, check if we have ReadAdvance information, and use it
  // to figure out in how many cycles this read becomes available.
  const ReadDescriptor &RD = RS.getDescriptor();
  const MCSchedModel &SM = STI.getSchedModel();
  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
  for (WriteRef &WR : DependentWrites) {
    WriteState &WS = *WR.getWriteState();
    unsigned WriteResID = WS.getWriteResourceID();
    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
    WS.addUser(&RS, ReadAdvance);
  }
}
Beispiel #13
0
bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                    std::string &Info) const {
  if (ComplexDeprecationInfo)
    return ComplexDeprecationInfo(MI, STI, Info);
  if (DeprecatedFeature != -1 && STI.getFeatureBits()[DeprecatedFeature]) {
    // FIXME: it would be nice to include the subtarget feature here.
    Info = "deprecated";
    return true;
  }
  return false;
}
Beispiel #14
0
unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op,
                                         const MCSubtargetInfo &STI) const {
  bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2];
  bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps];

  switch (Op) {
  default:
    return Op;
  case ARM::tBcc:
    return HasThumb2 ? (unsigned)ARM::t2Bcc : Op;
  case ARM::tLDRpci:
    return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op;
  case ARM::tADR:
    return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
  case ARM::tB:
    return HasV8MBaselineOps ? (unsigned)ARM::t2B : Op;
  case ARM::tCBZ:
    return ARM::tHINT;
  case ARM::tCBNZ:
    return ARM::tHINT;
  }
}
Beispiel #15
0
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
                                      const MCSchedClassDesc &SCDesc) {
  int Latency = 0;
  for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
       DefIdx != DefEnd; ++DefIdx) {
    // Lookup the definition's write latency in SubtargetInfo.
    const MCWriteLatencyEntry *WLEntry =
        STI.getWriteLatencyEntry(&SCDesc, DefIdx);
    // Early exit if we found an invalid latency.
    if (WLEntry->Cycles < 0)
      return WLEntry->Cycles;
    Latency = std::max(Latency, static_cast<int>(WLEntry->Cycles));
  }
  return Latency;
}
Beispiel #16
0
AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
    MCStreamer &S, const MCSubtargetInfo &STI)
    : AMDGPUTargetStreamer(S), Streamer(S) {
  MCAssembler &MCA = getStreamer().getAssembler();
  unsigned EFlags = MCA.getELFHeaderEFlags();

  EFlags &= ~ELF::EF_AMDGPU_MACH;
  EFlags |= getMACH(STI.getCPU());

  EFlags &= ~ELF::EF_AMDGPU_XNACK;
  if (AMDGPU::hasXNACK(STI))
    EFlags |= ELF::EF_AMDGPU_XNACK;

  MCA.setELFHeaderEFlags(EFlags);
}
Beispiel #17
0
static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                       std::string &Info) {
  assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
         "cannot predicate thumb instructions");

  assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
  for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) {
    assert(MI.getOperand(OI).isReg() && "expected register");
    if (MI.getOperand(OI).getReg() == ARM::SP ||
        MI.getOperand(OI).getReg() == ARM::PC) {
      Info = "use of SP or PC in the list is deprecated";
      return true;
    }
  }
  return false;
}
Beispiel #18
0
void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                             SmallVectorImpl<MCFixup> &Fixups,
                                             const MCSubtargetInfo &STI) const {
  verifyInstructionPredicates(MI,
                              computeAvailableFeatures(STI.getFeatureBits()));

  if (MI.getOpcode() == AArch64::TLSDESCCALL) {
    // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
    // following (BLR) instruction. It doesn't emit any code itself so it
    // doesn't go through the normal TableGenerated channels.
    MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
    Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup));
    return;
  }

  uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
  support::endian::Writer<support::little>(OS).write<uint32_t>(Binary);
  ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
Beispiel #19
0
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
                                      const MCInstrInfo &MCII,
                                      const MCInst &Inst) const {
  unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
  const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
  if (!SCDesc->isValid())
    return 0;

  unsigned CPUID = getProcessorID();
  while (SCDesc->isVariant()) {
    SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID);
    SCDesc = getSchedClassDesc(SchedClass);
  }

  if (SchedClass)
    return MCSchedModel::computeInstrLatency(STI, *SCDesc);

  llvm_unreachable("unsupported variant scheduling class");
}
Beispiel #20
0
Optional<double>
MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
                                      const MCInstrInfo &MCII,
                                      const MCInst &Inst) const {
  Optional<double> Throughput;
  unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
  const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
  if (!SCDesc->isValid())
    return Throughput;

  unsigned CPUID = getProcessorID();
  while (SCDesc->isVariant()) {
    SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, CPUID);
    SCDesc = getSchedClassDesc(SchedClass);
  }

  if (SchedClass)
    return MCSchedModel::getReciprocalThroughput(STI, *SCDesc);

  llvm_unreachable("unsupported variant scheduling class");
}
Beispiel #21
0
unsigned HexagonMCInstrInfo::getOtherReservedSlots(MCInstrInfo const &MCII,
                                                   MCSubtargetInfo const &STI,
                                                   MCInst const &MCI) {
  const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
  int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
  unsigned Slots = 0;

  // FirstStage are slots that this instruction can execute in.
  // FirstStage+1 are slots that are also consumed by this instruction.
  // For example: vmemu can only execute in slot 0 but also consumes slot 1.
  for (unsigned Stage = II[SchedClass].FirstStage + 1;
       Stage < II[SchedClass].LastStage; ++Stage) {
    unsigned Units = (Stage + HexagonStages)->getUnits();
    if (Units > HexagonGetLastSlot())
      break;
    // fyi: getUnits() will return 0x1, 0x2, 0x4 or 0x8
    Slots |= Units;
  }

  // if 0 is returned, then no additional slots are consumed by this inst.
  return Slots;
}
Beispiel #22
0
// Expand PseudoAddTPRel to a simple ADD with the correct relocation.
void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
  MCOperand DestReg = MI.getOperand(0);
  MCOperand SrcReg = MI.getOperand(1);
  MCOperand TPReg = MI.getOperand(2);
  assert(TPReg.isReg() && TPReg.getReg() == RISCV::X4 &&
         "Expected thread pointer as second input to TP-relative add");

  MCOperand SrcSymbol = MI.getOperand(3);
  assert(SrcSymbol.isExpr() &&
         "Expected expression as third input to TP-relative add");

  const RISCVMCExpr *Expr = dyn_cast<RISCVMCExpr>(SrcSymbol.getExpr());
  assert(Expr && Expr->getKind() == RISCVMCExpr::VK_RISCV_TPREL_ADD &&
         "Expected tprel_add relocation on TP-relative symbol");

  // Emit the correct tprel_add relocation for the symbol.
  Fixups.push_back(MCFixup::create(
      0, Expr, MCFixupKind(RISCV::fixup_riscv_tprel_add), MI.getLoc()));

  // Emit fixup_riscv_relax for tprel_add where the relax feature is enabled.
  if (STI.getFeatureBits()[RISCV::FeatureRelax]) {
    const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
    Fixups.push_back(MCFixup::create(
        0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc()));
  }

  // Emit a normal ADD instruction with the given operands.
  MCInst TmpInst = MCInstBuilder(RISCV::ADD)
                       .addOperand(DestReg)
                       .addOperand(SrcReg)
                       .addOperand(TPReg);
  uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
  support::endian::write(OS, Binary, support::little);
}
Beispiel #23
0
static void populateWrites(InstrDesc &ID, const MCInst &MCI,
                           const MCInstrDesc &MCDesc,
                           const MCSchedClassDesc &SCDesc,
                           const MCSubtargetInfo &STI) {
  // Set if writes through this opcode may update super registers.
  // TODO: on x86-64, a 4 byte write of a general purpose register always
  // fully updates the super-register.
  // More in general, (at least on x86) not all register writes perform
  // a partial (super-)register update.
  // For example, an AVX instruction that writes on a XMM register implicitly
  // zeroes the upper half of every aliasing super-register.
  //
  // For now, we pessimistically assume that writes are all potentially
  // partial register updates. This is a good default for most targets, execept
  // for those like x86 which implement a special semantic for certain opcodes.
  // At least on x86, this may lead to an inaccurate prediction of the
  // instruction level parallelism.
  bool FullyUpdatesSuperRegisters = false;

  // Now Populate Writes.

  // This algorithm currently works under the strong (and potentially incorrect)
  // assumption that information related to register def/uses can be obtained
  // from MCInstrDesc.
  //
  // However class MCInstrDesc is used to describe MachineInstr objects and not
  // MCInst objects. To be more specific, MCInstrDesc objects are opcode
  // descriptors that are automatically generated via tablegen based on the
  // instruction set information available from the target .td files.  That
  // means, the number of (explicit) definitions according to MCInstrDesc always
  // matches the cardinality of the `(outs)` set in tablegen.
  //
  // By constructions, definitions must appear first in the operand sequence of
  // a MachineInstr. Also, the (outs) sequence is preserved (example: the first
  // element in the outs set is the first operand in the corresponding
  // MachineInstr).  That's the reason why MCInstrDesc only needs to declare the
  // total number of register definitions, and not where those definitions are
  // in the machine operand sequence.
  //
  // Unfortunately, it is not safe to use the information from MCInstrDesc to
  // also describe MCInst objects. An MCInst object can be obtained from a
  // MachineInstr through a lowering step which may restructure the operand
  // sequence (and even remove or introduce new operands). So, there is a high
  // risk that the lowering step breaks the assumptions that register
  // definitions are always at the beginning of the machine operand sequence.
  //
  // This is a fundamental problem, and it is still an open problem. Essentially
  // we have to find a way to correlate def/use operands of a MachineInstr to
  // operands of an MCInst. Otherwise, we cannot correctly reconstruct data
  // dependencies, nor we can correctly interpret the scheduling model, which
  // heavily uses machine operand indices to define processor read-advance
  // information, and to identify processor write resources.  Essentially, we
  // either need something like a MCInstrDesc, but for MCInst, or a way
  // to map MCInst operands back to MachineInstr operands.
  //
  // Unfortunately, we don't have that information now. So, this prototype
  // currently work under the strong assumption that we can always safely trust
  // the content of an MCInstrDesc.  For example, we can query a MCInstrDesc to
  // obtain the number of explicit and implicit register defintions.  We also
  // assume that register definitions always come first in the operand sequence.
  // This last assumption usually makes sense for MachineInstr, where register
  // definitions always appear at the beginning of the operands sequence. In
  // reality, these assumptions could be broken by the lowering step, which can
  // decide to lay out operands in a different order than the original order of
  // operand as specified by the MachineInstr.
  //
  // Things get even more complicated in the presence of "optional" register
  // definitions. For MachineInstr, optional register definitions are always at
  // the end of the operand sequence. Some ARM instructions that may update the
  // status flags specify that register as a optional operand.  Since we don't
  // have operand descriptors for MCInst, we assume for now that the optional
  // definition is always the last operand of a MCInst.  Again, this assumption
  // may be okay for most targets. However, there is no guarantee that targets
  // would respect that.
  //
  // In conclusion: these are for now the strong assumptions made by the tool:
  //  * The number of explicit and implicit register definitions in a MCInst
  //    matches the number of explicit and implicit definitions according to
  //    the opcode descriptor (MCInstrDesc).
  //  * Register definitions take precedence over register uses in the operands
  //    list.
  //  * If an opcode specifies an optional definition, then the optional
  //    definition is always the last operand in the sequence, and it can be
  //    set to zero (i.e. "no register").
  //
  // These assumptions work quite well for most out-of-order in-tree targets
  // like x86. This is mainly because the vast majority of instructions is
  // expanded to MCInst using a straightforward lowering logic that preserves
  // the ordering of the operands.
  //
  // In the longer term, we need to find a proper solution for this issue.
  unsigned NumExplicitDefs = MCDesc.getNumDefs();
  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
  if (MCDesc.hasOptionalDef())
    TotalDefs++;
  ID.Writes.resize(TotalDefs);
  // Iterate over the operands list, and skip non-register operands.
  // The first NumExplictDefs register operands are expected to be register
  // definitions.
  unsigned CurrentDef = 0;
  unsigned i = 0;
  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
    const MCOperand &Op = MCI.getOperand(i);
    if (!Op.isReg())
      continue;

    WriteDescriptor &Write = ID.Writes[CurrentDef];
    Write.OpIndex = i;
    if (CurrentDef < NumWriteLatencyEntries) {
      const MCWriteLatencyEntry &WLE =
          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
      // Conservatively default to MaxLatency.
      Write.Latency = WLE.Cycles == -1 ? ID.MaxLatency : WLE.Cycles;
      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
    } else {
      // Assign a default latency for this write.
      Write.Latency = ID.MaxLatency;
      Write.SClassOrWriteResourceID = 0;
    }
    Write.FullyUpdatesSuperRegs = FullyUpdatesSuperRegisters;
    Write.IsOptionalDef = false;
    LLVM_DEBUG({
      dbgs() << "\t\tOpIdx=" << Write.OpIndex << ", Latency=" << Write.Latency
             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
    });
    CurrentDef++;
  }
Beispiel #24
0
NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                                   const MCRegisterInfo &MRI,
                                   const MCSubtargetInfo &STI)
  : MCInstPrinter(MAI, MII, MRI) {
  setAvailableFeatures(STI.getFeatureBits());
}
Beispiel #25
0
bool MipsMCCodeEmitter::isMips32r6(const MCSubtargetInfo &STI) const {
  return STI.getFeatureBits()[Mips::FeatureMips32r6];
}
Beispiel #26
0
bool MipsMCCodeEmitter::isMicroMips(const MCSubtargetInfo &STI) const {
    return STI.getFeatureBits() & Mips::FeatureMicroMips;
}
/// Emit the build attributes that only depend on the hardware that we expect
// /to be available, and not on the ABI, or any source-language choices.
void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
  switchVendor("aeabi");

  const StringRef CPUString = STI.getCPU();
  if (!CPUString.empty() && !CPUString.startswith("generic")) {
    // FIXME: remove krait check when GNU tools support krait cpu
    if (STI.hasFeature(ARM::ProcKrait)) {
      emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
      // We consider krait as a "cortex-a9" + hwdiv CPU
      // Enable hwdiv through ".arch_extension idiv"
      if (STI.hasFeature(ARM::FeatureHWDivThumb) ||
          STI.hasFeature(ARM::FeatureHWDivARM))
        emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM);
    } else {
      emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
    }
  }

  emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI));

  if (STI.hasFeature(ARM::FeatureAClass)) {
    emitAttribute(ARMBuildAttrs::CPU_arch_profile,
                      ARMBuildAttrs::ApplicationProfile);
  } else if (STI.hasFeature(ARM::FeatureRClass)) {
    emitAttribute(ARMBuildAttrs::CPU_arch_profile,
                      ARMBuildAttrs::RealTimeProfile);
  } else if (STI.hasFeature(ARM::FeatureMClass)) {
    emitAttribute(ARMBuildAttrs::CPU_arch_profile,
                      ARMBuildAttrs::MicroControllerProfile);
  }

  emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM)
                                                ? ARMBuildAttrs::Not_Allowed
                                                : ARMBuildAttrs::Allowed);

  if (isV8M(STI)) {
    emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
                      ARMBuildAttrs::AllowThumbDerived);
  } else if (STI.hasFeature(ARM::FeatureThumb2)) {
    emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
                      ARMBuildAttrs::AllowThumb32);
  } else if (STI.hasFeature(ARM::HasV4TOps)) {
    emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
  }

  if (STI.hasFeature(ARM::FeatureNEON)) {
    /* NEON is not exactly a VFP architecture, but GAS emit one of
     * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
    if (STI.hasFeature(ARM::FeatureFPARMv8)) {
      if (STI.hasFeature(ARM::FeatureCrypto))
        emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
      else
        emitFPU(ARM::FK_NEON_FP_ARMV8);
    } else if (STI.hasFeature(ARM::FeatureVFP4))
      emitFPU(ARM::FK_NEON_VFPV4);
    else
      emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16
                                               : ARM::FK_NEON);
    // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
    if (STI.hasFeature(ARM::HasV8Ops))
      emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
                    STI.hasFeature(ARM::HasV8_1aOps)
                        ? ARMBuildAttrs::AllowNeonARMv8_1a
                        : ARMBuildAttrs::AllowNeonARMv8);
  } else {
    if (STI.hasFeature(ARM::FeatureFPARMv8))
      // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
      // FPU, but there are two different names for it depending on the CPU.
      emitFPU(STI.hasFeature(ARM::FeatureD16)
                  ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
                                                           : ARM::FK_FPV5_D16)
                  : ARM::FK_FP_ARMV8);
    else if (STI.hasFeature(ARM::FeatureVFP4))
      emitFPU(STI.hasFeature(ARM::FeatureD16)
                  ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
                                                           : ARM::FK_VFPV4_D16)
                  : ARM::FK_VFPV4);
    else if (STI.hasFeature(ARM::FeatureVFP3))
      emitFPU(
          STI.hasFeature(ARM::FeatureD16)
              // +d16
              ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
                     ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
                                                         : ARM::FK_VFPV3XD)
                     : (STI.hasFeature(ARM::FeatureFP16)
                            ? ARM::FK_VFPV3_D16_FP16
                            : ARM::FK_VFPV3_D16))
              // -d16
              : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
                                                  : ARM::FK_VFPV3));
    else if (STI.hasFeature(ARM::FeatureVFP2))
      emitFPU(ARM::FK_VFPV2);
  }

  // ABI_HardFP_use attribute to indicate single precision FP.
  if (STI.hasFeature(ARM::FeatureVFPOnlySP))
    emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
                  ARMBuildAttrs::HardFPSinglePrecision);

  if (STI.hasFeature(ARM::FeatureFP16))
    emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);

  if (STI.hasFeature(ARM::FeatureMP))
    emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);

  // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
  // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
  // It is not possible to produce DisallowDIV: if hwdiv is present in the base
  // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
  // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
  // otherwise, the default value (AllowDIVIfExists) applies.
  if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops))
    emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);

  if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI))
    emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);

  if (STI.hasFeature(ARM::FeatureStrictAlign))
    emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
                  ARMBuildAttrs::Not_Allowed);
  else
    emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
                  ARMBuildAttrs::Allowed);

  if (STI.hasFeature(ARM::FeatureTrustZone) &&
      STI.hasFeature(ARM::FeatureVirtualization))
    emitAttribute(ARMBuildAttrs::Virtualization_use,
                  ARMBuildAttrs::AllowTZVirtualization);
  else if (STI.hasFeature(ARM::FeatureTrustZone))
    emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ);
  else if (STI.hasFeature(ARM::FeatureVirtualization))
    emitAttribute(ARMBuildAttrs::Virtualization_use,
                  ARMBuildAttrs::AllowVirtualization);
}
static bool isV8M(const MCSubtargetInfo &STI) {
  // Note that v8M Baseline is a subset of v6T2!
  return (STI.hasFeature(ARM::HasV8MBaselineOps) &&
          !STI.hasFeature(ARM::HasV6T2Ops)) ||
         STI.hasFeature(ARM::HasV8MMainlineOps);
}
static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
  if (STI.getCPU() == "xscale")
    return ARMBuildAttrs::v5TEJ;

  if (STI.hasFeature(ARM::HasV8Ops)) {
    if (STI.hasFeature(ARM::FeatureRClass))
      return ARMBuildAttrs::v8_R;
    return ARMBuildAttrs::v8_A;
  } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
    return ARMBuildAttrs::v8_M_Main;
  else if (STI.hasFeature(ARM::HasV7Ops)) {
    if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
      return ARMBuildAttrs::v7E_M;
    return ARMBuildAttrs::v7;
  } else if (STI.hasFeature(ARM::HasV6T2Ops))
    return ARMBuildAttrs::v6T2;
  else if (STI.hasFeature(ARM::HasV8MBaselineOps))
    return ARMBuildAttrs::v8_M_Base;
  else if (STI.hasFeature(ARM::HasV6MOps))
    return ARMBuildAttrs::v6S_M;
  else if (STI.hasFeature(ARM::HasV6Ops))
    return ARMBuildAttrs::v6;
  else if (STI.hasFeature(ARM::HasV5TEOps))
    return ARMBuildAttrs::v5TE;
  else if (STI.hasFeature(ARM::HasV5TOps))
    return ARMBuildAttrs::v5T;
  else if (STI.hasFeature(ARM::HasV4TOps))
    return ARMBuildAttrs::v4T;
  else
    return ARMBuildAttrs::v4;
}
Beispiel #30
0
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
    const MCSubtargetInfo &STI, StringRef KernelName,
    const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
    bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
  IsaVersion IVersion = getIsaVersion(STI.getCPU());

  OS << "\t.amdhsa_kernel " << KernelName << '\n';

#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME)   \
  STREAM << "\t\t" << DIRECTIVE << " "                                         \
         << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';

  OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
     << '\n';
  OS << "\t\t.amdhsa_private_segment_fixed_size "
     << KD.private_segment_fixed_size << '\n';

  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
              kernel_code_properties,
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
  PRINT_FIELD(
      OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
      compute_pgm_rsrc2,
      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
  PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);

  // These directives are required.
  OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
  OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';

  if (!ReserveVCC)
    OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
  if (IVersion.Major >= 7 && !ReserveFlatScr)
    OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
  if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
    OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';

  PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
              compute_pgm_rsrc1,
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
  PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
              compute_pgm_rsrc1,
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
              compute_pgm_rsrc1,
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
              compute_pgm_rsrc1,
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
  PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
              compute_pgm_rsrc1,
              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
  PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
              compute_pgm_rsrc1,
              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
  if (IVersion.Major >= 9)
    PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
                compute_pgm_rsrc1,
                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
  PRINT_FIELD(
      OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
      compute_pgm_rsrc2,
      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
  PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
  PRINT_FIELD(
      OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
      compute_pgm_rsrc2,
      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
  PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
              compute_pgm_rsrc2,
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
#undef PRINT_FIELD

  OS << "\t.end_amdhsa_kernel\n";
}