Example #1
0
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
    MachineFunction &MF, unsigned StackBumpBytes) const {
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();

  if (AFI->getLocalStackSize() == 0)
    return false;

  // 512 is the maximum immediate for stp/ldp that will be used for
  // callee-save save/restores
  if (StackBumpBytes >= 512)
    return false;

  if (MFI.hasVarSizedObjects())
    return false;

  if (RegInfo->needsStackRealignment(MF))
    return false;

  // This isn't strictly necessary, but it simplifies things a bit since the
  // current RedZone handling code assumes the SP is adjusted by the
  // callee-save save/restore code.
  if (canUseRedZone(MF))
    return false;

  return true;
}
Example #2
0
/// Based on the use to defs information (in ADRPMode), compute the
/// opportunities of LOH ADRP-related.
static void computeADRP(const InstrToInstrs &UseToDefs,
                        AArch64FunctionInfo &AArch64FI,
                        const MachineDominatorTree *MDT) {
  DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
  for (const auto &Entry : UseToDefs) {
    unsigned Size = Entry.second.size();
    if (Size == 0)
      continue;
    if (Size == 1) {
      const MachineInstr *L2 = *Entry.second.begin();
      const MachineInstr *L1 = Entry.first;
      if (!MDT->dominates(L2, L1)) {
        DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
                     << '\n');
        continue;
      }
      DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
      SmallVector<const MachineInstr *, 2> Args;
      Args.push_back(L2);
      Args.push_back(L1);
      AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
      ++NumADRPSimpleCandidate;
    }
#ifdef DEBUG
    else if (Size == 2)
      ++NumADRPComplexCandidate2;
    else if (Size == 3)
      ++NumADRPComplexCandidate3;
    else
      ++NumADRPComplexCandidateOther;
#endif
    // if Size < 1, the use should have been removed from the candidates
    assert(Size >= 1 && "No reaching defs for that use!");
  }
}
Example #3
0
static bool registerADRCandidate(const MachineInstr &Use,
                                 const InstrToInstrs &UseToDefs,
                                 const InstrToInstrs *DefsPerColorToUses,
                                 AArch64FunctionInfo &AArch64FI,
                                 SetOfMachineInstr *InvolvedInLOHs,
                                 const MapRegToId &RegToId) {
  // Look for opportunities to turn ADRP -> ADD or
  // ADRP -> LDR GOTPAGEOFF into ADR.
  // If ADRP has more than one use. Give up.
  if (Use.getOpcode() != AArch64::ADDXri &&
      (Use.getOpcode() != AArch64::LDRXui ||
       !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
    return false;
  InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
  // The map may contain garbage that we need to ignore.
  if (It == UseToDefs.end() || It->second.empty())
    return false;
  const MachineInstr &Def = **It->second.begin();
  if (Def.getOpcode() != AArch64::ADRP)
    return false;
  // Check the number of users of ADRP.
  const SetOfMachineInstr *Users =
      getUses(DefsPerColorToUses,
              RegToId.find(Def.getOperand(0).getReg())->second, Def);
  if (Users->size() > 1) {
    ++NumADRComplexCandidate;
    return false;
  }
  ++NumADRSimpleCandidate;
  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
         "ADRP already involved in LOH.");
  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
         "ADD already involved in LOH.");
  DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');

  SmallVector<const MachineInstr *, 2> Args;
  Args.push_back(&Def);
  Args.push_back(&Use);

  AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd
                                                           : MCLOH_AdrpLdrGot,
                          Args);
  return true;
}
Example #4
0
/// Update state when seeing and ADRP instruction.
static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
                       LOHInfo &Info) {
  if (Info.LastADRP != nullptr) {
    DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n"
                 << '\t' << MI << '\n'
                 << '\t' << *Info.LastADRP << '\n');
    AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
    ++NumADRPSimpleCandidate;
  }

  // Produce LOH directive if possible.
  if (Info.IsCandidate) {
    switch (Info.Type) {
    case MCLOH_AdrpAdd:
      DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n"
                   << '\t' << MI << '\n'
                   << '\t' << *Info.MI0 << '\n');
      AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
      ++NumADRSimpleCandidate;
      break;
    case MCLOH_AdrpLdr:
      if (supportLoadFromLiteral(*Info.MI0)) {
        DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n"
                     << '\t' << MI << '\n'
                     << '\t' << *Info.MI0 << '\n');
        AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
        ++NumADRPToLDR;
      }
      break;
    case MCLOH_AdrpAddLdr:
      DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n"
                   << '\t' << MI << '\n'
                   << '\t' << *Info.MI1 << '\n'
                   << '\t' << *Info.MI0 << '\n');
      AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
      ++NumADDToLDR;
      break;
    case MCLOH_AdrpAddStr:
      if (Info.MI1 != nullptr) {
        DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n"
                     << '\t' << MI << '\n'
                     << '\t' << *Info.MI1 << '\n'
                     << '\t' << *Info.MI0 << '\n');
        AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
        ++NumADDToSTR;
      }
      break;
    case MCLOH_AdrpLdrGotLdr:
      DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n"
                   << '\t' << MI << '\n'
                   << '\t' << *Info.MI1 << '\n'
                   << '\t' << *Info.MI0 << '\n');
      AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
      ++NumLDRToLDR;
      break;
    case MCLOH_AdrpLdrGotStr:
      DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n"
                   << '\t' << MI << '\n'
                   << '\t' << *Info.MI1 << '\n'
                   << '\t' << *Info.MI0 << '\n');
      AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
      ++NumLDRToSTR;
      break;
    case MCLOH_AdrpLdrGot:
      DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n"
                   << '\t' << MI << '\n'
                   << '\t' << *Info.MI0 << '\n');
      AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
      break;
    case MCLOH_AdrpAdrp:
      llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
    }
  }

  handleClobber(Info);
  Info.LastADRP = &MI;
}
Example #5
0
bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
                                               const Function &F,
                                               ArrayRef<unsigned> VRegs) const {
  MachineFunction &MF = MIRBuilder.getMF();
  MachineBasicBlock &MBB = MIRBuilder.getMBB();
  MachineRegisterInfo &MRI = MF.getRegInfo();
  auto &DL = F.getParent()->getDataLayout();

  SmallVector<ArgInfo, 8> SplitArgs;
  unsigned i = 0;
  for (auto &Arg : F.args()) {
    if (DL.getTypeStoreSize(Arg.getType()) == 0)
      continue;
    ArgInfo OrigArg{VRegs[i], Arg.getType()};
    setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
    bool Split = false;
    LLT Ty = MRI.getType(VRegs[i]);
    unsigned Dst = VRegs[i];

    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
                      [&](unsigned Reg, uint64_t Offset) {
                        if (!Split) {
                          Split = true;
                          Dst = MRI.createGenericVirtualRegister(Ty);
                          MIRBuilder.buildUndef(Dst);
                        }
                        unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
                        MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
                        Dst = Tmp;
                      });

    if (Dst != VRegs[i])
      MIRBuilder.buildCopy(VRegs[i], Dst);
    ++i;
  }

  if (!MBB.empty())
    MIRBuilder.setInstr(*MBB.begin());

  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  CCAssignFn *AssignFn =
      TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);

  FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
  if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
    return false;

  if (F.isVarArg()) {
    if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
      // FIXME: we need to reimplement saveVarArgsRegisters from
      // AArch64ISelLowering.
      return false;
    }

    // We currently pass all varargs at 8-byte alignment.
    uint64_t StackOffset = alignTo(Handler.StackUsed, 8);

    auto &MFI = MIRBuilder.getMF().getFrameInfo();
    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
    FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
  }

  auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  if (Subtarget.hasCustomCallingConv())
    Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);

  // Move back to the end of the basic block.
  MIRBuilder.setMBB(MBB);

  return true;
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                BitVector &SavedRegs,
                                                RegScavenger *RS) const {
  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  SmallVector<unsigned, 4> UnspilledCSGPRs;
  SmallVector<unsigned, 4> UnspilledCSFPRs;

  // The frame record needs to be created by saving the appropriate registers
  if (hasFP(MF)) {
    SavedRegs.set(AArch64::FP);
    SavedRegs.set(AArch64::LR);
  }

  // Spill the BasePtr if it's used. Do this first thing so that the
  // getCalleeSavedRegs() below will get the right answer.
  if (RegInfo->hasBasePointer(MF))
    SavedRegs.set(RegInfo->getBaseRegister());

  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
    SavedRegs.set(AArch64::X9);

  // If any callee-saved registers are used, the frame cannot be eliminated.
  unsigned NumGPRSpilled = 0;
  unsigned NumFPRSpilled = 0;
  bool ExtraCSSpill = false;
  bool CanEliminateFrame = true;
  DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

  // Check pairs of consecutive callee-saved registers.
  for (unsigned i = 0; CSRegs[i]; i += 2) {
    assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");

    const unsigned OddReg = CSRegs[i];
    const unsigned EvenReg = CSRegs[i + 1];
    assert((AArch64::GPR64RegClass.contains(OddReg) &&
            AArch64::GPR64RegClass.contains(EvenReg)) ^
               (AArch64::FPR64RegClass.contains(OddReg) &&
                AArch64::FPR64RegClass.contains(EvenReg)) &&
           "Register class mismatch!");

    const bool OddRegUsed = SavedRegs.test(OddReg);
    const bool EvenRegUsed = SavedRegs.test(EvenReg);

    // Early exit if none of the registers in the register pair is actually
    // used.
    if (!OddRegUsed && !EvenRegUsed) {
      if (AArch64::GPR64RegClass.contains(OddReg)) {
        UnspilledCSGPRs.push_back(OddReg);
        UnspilledCSGPRs.push_back(EvenReg);
      } else {
        UnspilledCSFPRs.push_back(OddReg);
        UnspilledCSFPRs.push_back(EvenReg);
      }
      continue;
    }

    unsigned Reg = AArch64::NoRegister;
    // If only one of the registers of the register pair is used, make sure to
    // mark the other one as used as well.
    if (OddRegUsed ^ EvenRegUsed) {
      // Find out which register is the additional spill.
      Reg = OddRegUsed ? EvenReg : OddReg;
      SavedRegs.set(Reg);
    }

    DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
    DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));

    assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
            (RegInfo->getEncodingValue(OddReg) + 1 ==
             RegInfo->getEncodingValue(EvenReg))) &&
           "Register pair of non-adjacent registers!");
    if (AArch64::GPR64RegClass.contains(OddReg)) {
      NumGPRSpilled += 2;
      // If it's not a reserved register, we can use it in lieu of an
      // emergency spill slot for the register scavenger.
      // FIXME: It would be better to instead keep looking and choose another
      // unspilled register that isn't reserved, if there is one.
      if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
        ExtraCSSpill = true;
    } else
      NumFPRSpilled += 2;

    CanEliminateFrame = false;
  }

  // FIXME: Set BigStack if any stack slot references may be out of range.
  // For now, just conservatively guestimate based on unscaled indexing
  // range. We'll end up allocating an unnecessary spill slot a lot, but
  // realistically that's not a big deal at this stage of the game.
  // The CSR spill slots have not been allocated yet, so estimateStackSize
  // won't include them.
  MachineFrameInfo *MFI = MF.getFrameInfo();
  unsigned CFSize =
      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
  DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
  bool BigStack = (CFSize >= 256);
  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    AFI->setHasStackFrame(true);

  // Estimate if we might need to scavenge a register at some point in order
  // to materialize a stack offset. If so, either spill one additional
  // callee-saved register or reserve a special spill slot to facilitate
  // register scavenging. If we already spilled an extra callee-saved register
  // above to keep the number of spills even, we don't need to do anything else
  // here.
  if (BigStack && !ExtraCSSpill) {

    // If we're adding a register to spill here, we have to add two of them
    // to keep the number of regs to spill even.
    assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
    unsigned Count = 0;
    while (!UnspilledCSGPRs.empty() && Count < 2) {
      unsigned Reg = UnspilledCSGPRs.back();
      UnspilledCSGPRs.pop_back();
      DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
                   << " to get a scratch register.\n");
      SavedRegs.set(Reg);
      ExtraCSSpill = true;
      ++Count;
    }

    // If we didn't find an extra callee-saved register to spill, create
    // an emergency spill slot.
    if (!ExtraCSSpill) {
      const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
      int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
      RS->addScavengingFrameIndex(FI);
      DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
                   << " as the emergency spill slot.\n");
    }
  }
}
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);

  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc DL;

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();

    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (NumBytes && !canUseRedZone(MF)) {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else if (NumBytes) {
      ++NumRedZoneFunctions;
    }

    return;
  }

  // Only set up FP if we actually need to.
  int FPOffset = 0;
  if (HasFP)
    FPOffset = getFPOffsetInPrologue(MBBI);

  // Move past the saves of the callee-saved registers.
  while (isCSSave(MBBI)) {
    ++MBBI;
    NumBytes -= 16;
  }
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
  if (HasFP) {
    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes);

  // Allocate space for the rest of the frame.

  const unsigned Alignment = MFI->getMaxAlignment();
  const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
  unsigned scratchSPReg = AArch64::SP;
  if (NumBytes && NeedsRealignment) {
    // Use the first callee-saved register as a scratch register.
    scratchSPReg = AArch64::X9;
  }

  // If we're a leaf function, try using the red zone.
  if (NumBytes && !canUseRedZone(MF))
    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
    // the correct value here, as NumBytes also includes padding bytes,
    // which shouldn't be counted here.
    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
                    MachineInstr::FrameSetup);

  if (NumBytes && NeedsRealignment) {
    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
    assert(NrBitsToZero > 1);
    assert(scratchSPReg != AArch64::SP);

    // SUB X9, SP, NumBytes
    //   -- X9 is temporary register, so shouldn't contain any live data here,
    //   -- free to use. This is already produced by emitFrameOffset above.
    // AND SP, X9, 0b11111...0000
    // The logical immediates have a non-trivial encoding. The following
    // formula computes the encoded immediate with all ones but
    // NrBitsToZero zero bits as least significant bits.
    uint32_t andMaskEncoded =
        (1                   <<12) // = N
      | ((64-NrBitsToZero)   << 6) // immr
      | ((64-NrBitsToZero-1) << 0) // imms
      ;
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
      .addReg(scratchSPReg, RegState::Kill)
      .addImm(andMaskEncoded);
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  if (RegInfo->hasBasePointer(MF)) {
    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                     false);
  }

  if (needsFrameMoves) {
    const DataLayout &TD = MF.getDataLayout();
    const int StackGrowth = -TD.getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);

      // Record the location of the stored LR
      unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);

      // Record the location of the stored FP
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }

    // Now emit the moves for whatever callee saved regs we have.
    emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
  }
}
Example #8
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);
  DebugLoc DL = MBB.findDebugLoc(MBBI);

  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();

    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (NumBytes && !canUseRedZone(MF)) {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    } else if (NumBytes) {
      ++NumRedZoneFunctions;
    }

    return;
  }

  // Only set up FP if we actually need to.
  int FPOffset = 0;
  if (HasFP) {
    // First instruction must a) allocate the stack  and b) have an immediate
    // that is a multiple of -2.
    assert((MBBI->getOpcode() == AArch64::STPXpre ||
            MBBI->getOpcode() == AArch64::STPDpre) &&
           MBBI->getOperand(3).getReg() == AArch64::SP &&
           MBBI->getOperand(4).getImm() < 0 &&
           (MBBI->getOperand(4).getImm() & 1) == 0);

    // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
    // required for the callee saved register area we get the frame pointer
    // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
    FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
    assert(FPOffset >= 0 && "Bad Framepointer Offset");
  }

  // Move past the saves of the callee-saved registers.
  while (MBBI->getOpcode() == AArch64::STPXi ||
         MBBI->getOpcode() == AArch64::STPDi ||
         MBBI->getOpcode() == AArch64::STPXpre ||
         MBBI->getOpcode() == AArch64::STPDpre) {
    ++MBBI;
    NumBytes -= 16;
  }
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
  if (HasFP) {
    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes);

  // Allocate space for the rest of the frame.
  if (NumBytes) {
    // If we're a leaf function, try using the red zone.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  //
  if (RegInfo->hasBasePointer(MF))
    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);

  if (needsFrameMoves) {
    const DataLayout *TD = MF.getSubtarget().getDataLayout();
    const int StackGrowth = -TD->getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);

    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);

      // Record the location of the stored LR
      unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);

      // Record the location of the stored FP
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    }

    // Now emit the moves for whatever callee saved regs we have.
    emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
  }
}
static void computeCalleeSaveRegisterPairs(
    MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
    const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {

  if (CSI.empty())
    return;

  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  CallingConv::ID CC = MF.getFunction()->getCallingConv();
  unsigned Count = CSI.size();
  (void)CC;
  // MachO's compact unwind format relies on all registers being stored in
  // pairs.
  assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
          CC == CallingConv::PreserveMost ||
          (Count & 1) == 0) &&
         "Odd number of callee-saved regs to spill!");
  unsigned Offset = AFI->getCalleeSavedStackSize();

  for (unsigned i = 0; i < Count; ++i) {
    RegPairInfo RPI;
    RPI.Reg1 = CSI[i].getReg();

    assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
           AArch64::FPR64RegClass.contains(RPI.Reg1));
    RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);

    // Add the next reg to the pair if it is in the same register class.
    if (i + 1 < Count) {
      unsigned NextReg = CSI[i + 1].getReg();
      if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
          (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
        RPI.Reg2 = NextReg;
    }

    // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
    // list to come in sorted by frame index so that we can issue the store
    // pair instructions directly. Assert if we see anything otherwise.
    //
    // The order of the registers in the list is controlled by
    // getCalleeSavedRegs(), so they will always be in-order, as well.
    assert((!RPI.isPaired() ||
            (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
           "Out of order callee saved regs!");

    // MachO's compact unwind format relies on all registers being stored in
    // adjacent register pairs.
    assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
            CC == CallingConv::PreserveMost ||
            (RPI.isPaired() &&
             ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
              RPI.Reg1 + 1 == RPI.Reg2))) &&
           "Callee-save registers not saved as adjacent register pair!");

    RPI.FrameIdx = CSI[i].getFrameIdx();

    if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
      // Round up size of non-pair to pair size if we need to pad the
      // callee-save area to ensure 16-byte alignment.
      Offset -= 16;
      assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
      MFI->setObjectSize(RPI.FrameIdx, 16);
    } else
      Offset -= RPI.isPaired() ? 16 : 8;
    assert(Offset % 8 == 0);
    RPI.Offset = Offset / 8;
    assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
           "Offset out of bounds for LDP/STP immediate");

    RegPairs.push_back(RPI);
    if (RPI.isPaired())
      ++i;
  }

  // Align first offset to even 16-byte boundary to avoid additional SP
  // adjustment instructions.
  // Last pair offset is size of whole callee-save region for SP
  // pre-dec/post-inc.
  RegPairInfo &LastPair = RegPairs.back();
  assert(AFI->getCalleeSavedStackSize() % 8 == 0);
  LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
}
Example #10
0
/// Based on the use to defs information (in non-ADRPMode), compute the
/// opportunities of LOH non-ADRP-related
static void computeOthers(const InstrToInstrs &UseToDefs,
                          const InstrToInstrs *DefsPerColorToUses,
                          AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
                          const MachineDominatorTree *MDT) {
  SetOfMachineInstr *InvolvedInLOHs = nullptr;
#ifdef DEBUG
  SetOfMachineInstr InvolvedInLOHsStorage;
  InvolvedInLOHs = &InvolvedInLOHsStorage;
#endif // DEBUG
  DEBUG(dbgs() << "*** Compute LOH for Others\n");
  // ADRP -> ADD/LDR -> LDR/STR pattern.
  // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.

  // FIXME: When the statistics are not important,
  // This initial filtering loop can be merged into the next loop.
  // Currently, we didn't do it to have the same code for both DEBUG and
  // NDEBUG builds. Indeed, the iterator of the second loop would need
  // to be changed.
  SetOfMachineInstr PotentialCandidates;
  SetOfMachineInstr PotentialADROpportunities;
  for (auto &Use : UseToDefs) {
    // If no definition is available, this is a non candidate.
    if (Use.second.empty())
      continue;
    // Keep only instructions that are load or store and at the end of
    // a ADRP -> ADD/LDR/Nothing chain.
    // We already filtered out the no-chain cases.
    if (!isCandidate(Use.first, UseToDefs, MDT)) {
      PotentialADROpportunities.insert(Use.first);
      continue;
    }
    PotentialCandidates.insert(Use.first);
  }

  // Make the following distinctions for statistics as the linker does
  // know how to decode instructions:
  // - ADD/LDR/Nothing make there different patterns.
  // - LDR/STR make two different patterns.
  // Hence, 6 - 1 base patterns.
  // (because ADRP-> Nothing -> STR is not simplifiable)

  // The linker is only able to have a simple semantic, i.e., if pattern A
  // do B.
  // However, we want to see the opportunity we may miss if we were able to
  // catch more complex cases.

  // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
  // A potential candidate becomes a candidate, if its current immediate
  // operand is zero and all nodes of the chain have respectively only one user
#ifdef DEBUG
  SetOfMachineInstr DefsOfPotentialCandidates;
#endif
  for (const MachineInstr *Candidate : PotentialCandidates) {
    // Get the definition of the candidate i.e., ADD or LDR.
    const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
    // Record the elements of the chain.
    const MachineInstr *L1 = Def;
    const MachineInstr *L2 = nullptr;
    unsigned ImmediateDefOpc = Def->getOpcode();
    if (Def->getOpcode() != AArch64::ADRP) {
      // Check the number of users of this node.
      const SetOfMachineInstr *Users =
          getUses(DefsPerColorToUses,
                  RegToId.find(Def->getOperand(0).getReg())->second, *Def);
      if (Users->size() > 1) {
#ifdef DEBUG
        // if all the uses of this def are in potential candidate, this is
        // a complex candidate of level 2.
        bool IsLevel2 = true;
        for (const MachineInstr *MI : *Users) {
          if (!PotentialCandidates.count(MI)) {
            ++NumTooCplxLvl2;
            IsLevel2 = false;
            break;
          }
        }
        if (IsLevel2)
          ++NumCplxLvl2;
#endif // DEBUG
        PotentialADROpportunities.insert(Def);
        continue;
      }
      L2 = Def;
      Def = *UseToDefs.find(Def)->second.begin();
      L1 = Def;
    } // else the element in the middle of the chain is nothing, thus
      // Def already contains the first element of the chain.

    // Check the number of users of the first node in the chain, i.e., ADRP
    const SetOfMachineInstr *Users =
        getUses(DefsPerColorToUses,
                RegToId.find(Def->getOperand(0).getReg())->second, *Def);
    if (Users->size() > 1) {
#ifdef DEBUG
      // if all the uses of this def are in the defs of the potential candidate,
      // this is a complex candidate of level 1
      if (DefsOfPotentialCandidates.empty()) {
        // lazy init
        DefsOfPotentialCandidates = PotentialCandidates;
        for (const MachineInstr *Candidate : PotentialCandidates) {
          if (!UseToDefs.find(Candidate)->second.empty())
            DefsOfPotentialCandidates.insert(
                *UseToDefs.find(Candidate)->second.begin());
        }
      }
      bool Found = false;
      for (auto &Use : *Users) {
        if (!DefsOfPotentialCandidates.count(Use)) {
          ++NumTooCplxLvl1;
          Found = true;
          break;
        }
      }
      if (!Found)
        ++NumCplxLvl1;
#endif // DEBUG
      continue;
    }

    bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
    // If the chain is three instructions long and ldr is the second element,
    // then this ldr must load form GOT, otherwise this is not a correct chain.
    if (L2 && !IsL2Add &&
        !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
      continue;
    SmallVector<const MachineInstr *, 3> Args;
    MCLOHType Kind;
    if (isCandidateLoad(Candidate)) {
      if (!L2) {
        // At this point, the candidate LOH indicates that the ldr instruction
        // may use a direct access to the symbol. There is not such encoding
        // for loads of byte and half.
        if (!supportLoadFromLiteral(Candidate))
          continue;

        DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
                     << '\n');
        Kind = MCLOH_AdrpLdr;
        Args.push_back(L1);
        Args.push_back(Candidate);
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
               "L1 already involved in LOH.");
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
               "Candidate already involved in LOH.");
        ++NumADRPToLDR;
      } else {
        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
                     << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
                     << '\n');

        Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
        Args.push_back(L1);
        Args.push_back(L2);
        Args.push_back(Candidate);

        PotentialADROpportunities.remove(L2);
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
               "L1 already involved in LOH.");
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
               "L2 already involved in LOH.");
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
               "Candidate already involved in LOH.");
#ifdef DEBUG
        // get the immediate of the load
        if (Candidate->getOperand(2).getImm() == 0)
          if (ImmediateDefOpc == AArch64::ADDXri)
            ++NumADDToLDR;
          else
            ++NumLDRToLDR;
        else if (ImmediateDefOpc == AArch64::ADDXri)
          ++NumADDToLDRWithImm;
        else
          ++NumLDRToLDRWithImm;
#endif // DEBUG
      }
    } else {
      if (ImmediateDefOpc == AArch64::ADRP)
        continue;
      else {

        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
                     << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
                     << '\n');

        Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
        Args.push_back(L1);
        Args.push_back(L2);
        Args.push_back(Candidate);

        PotentialADROpportunities.remove(L2);
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
               "L1 already involved in LOH.");
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
               "L2 already involved in LOH.");
        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
               "Candidate already involved in LOH.");
#ifdef DEBUG
        // get the immediate of the store
        if (Candidate->getOperand(2).getImm() == 0)
          if (ImmediateDefOpc == AArch64::ADDXri)
            ++NumADDToSTR;
          else
            ++NumLDRToSTR;
        else if (ImmediateDefOpc == AArch64::ADDXri)
          ++NumADDToSTRWithImm;
        else
          ++NumLDRToSTRWithImm;
#endif // DEBUG
      }
    }
    AArch64FI.addLOHDirective(Kind, Args);
  }

  // Now, we grabbed all the big patterns, check ADR opportunities.
  for (const MachineInstr *Candidate : PotentialADROpportunities)
    registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
                         InvolvedInLOHs, RegToId);
}