Beispiel #1
0
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.begin();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  MachineModuleInfo &MMI = MF.getMMI();
  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  const ThumbRegisterInfo *RegInfo =
      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
  const Thumb1InstrInfo &TII =
      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());

  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
  unsigned NumBytes = MFI.getStackSize();
  assert(NumBytes >= ArgRegsSaveSize &&
         "ArgRegsSaveSize is included in NumBytes");
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();

  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc dl;
  
  unsigned FramePtr = RegInfo->getFrameRegister(MF);
  unsigned BasePtr = RegInfo->getBaseRegister();
  int CFAOffset = 0;

  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
  NumBytes = (NumBytes + 3) & ~3;
  MFI.setStackSize(NumBytes);

  // Determine the sizes of each callee-save spill areas and record which frame
  // belongs to which callee-save spill areas.
  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
  int FramePtrSpillFI = 0;

  if (ArgRegsSaveSize) {
    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
                 MachineInstr::FrameSetup);
    CFAOffset -= ArgRegsSaveSize;
    unsigned CFIIndex = MF.addFrameInst(
        MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
  }

  if (!AFI->hasStackFrame()) {
    if (NumBytes - ArgRegsSaveSize != 0) {
      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
                   MachineInstr::FrameSetup);
      CFAOffset -= NumBytes - ArgRegsSaveSize;
      unsigned CFIIndex = MF.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }
    return;
  }

  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
    unsigned Reg = CSI[i].getReg();
    int FI = CSI[i].getFrameIdx();
    switch (Reg) {
    case ARM::R8:
    case ARM::R9:
    case ARM::R10:
    case ARM::R11:
      if (STI.splitFramePushPop(MF)) {
        GPRCS2Size += 4;
        break;
      }
      LLVM_FALLTHROUGH;
    case ARM::R4:
    case ARM::R5:
    case ARM::R6:
    case ARM::R7:
    case ARM::LR:
      if (Reg == FramePtr)
        FramePtrSpillFI = FI;
      GPRCS1Size += 4;
      break;
    default:
      DPRCSSize += 8;
    }
  }

  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
    ++MBBI;
  }

  // Determine starting offsets of spill areas.
  unsigned DPRCSOffset  = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
  bool HasFP = hasFP(MF);
  if (HasFP)
    AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
                                NumBytes);
  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
  NumBytes = DPRCSOffset;

  int FramePtrOffsetInBlock = 0;
  unsigned adjustedGPRCS1Size = GPRCS1Size;
  if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
      tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
    FramePtrOffsetInBlock = NumBytes;
    adjustedGPRCS1Size += NumBytes;
    NumBytes = 0;
  }

  if (adjustedGPRCS1Size) {
    CFAOffset -= adjustedGPRCS1Size;
    unsigned CFIIndex = MF.addFrameInst(
        MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
  }
  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
         E = CSI.end(); I != E; ++I) {
    unsigned Reg = I->getReg();
    int FI = I->getFrameIdx();
    switch (Reg) {
    case ARM::R8:
    case ARM::R9:
    case ARM::R10:
    case ARM::R11:
    case ARM::R12:
      if (STI.splitFramePushPop(MF))
        break;
      LLVM_FALLTHROUGH;
    case ARM::R0:
    case ARM::R1:
    case ARM::R2:
    case ARM::R3:
    case ARM::R4:
    case ARM::R5:
    case ARM::R6:
    case ARM::R7:
    case ARM::LR:
      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
      break;
    }
  }

  // Adjust FP so it point to the stack slot that contains the previous FP.
  if (HasFP) {
    FramePtrOffsetInBlock +=
        MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
        .addReg(ARM::SP)
        .addImm(FramePtrOffsetInBlock / 4)
        .setMIFlags(MachineInstr::FrameSetup)
        .add(predOps(ARMCC::AL));
    if(FramePtrOffsetInBlock) {
      CFAOffset += FramePtrOffsetInBlock;
      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
          nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else {
      unsigned CFIIndex =
          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
              nullptr, MRI->getDwarfRegNum(FramePtr, true)));
      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }
    if (NumBytes > 508)
      // If offset is > 508 then sp cannot be adjusted in a single instruction,
      // try restoring from fp instead.
      AFI->setShouldRestoreSPFromFP(true);
  }

  // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
  // and tMOVr instructions. We don't need to add any call frame information
  // in-between these instructions, because they do not modify the high
  // registers.
  while (true) {
    MachineBasicBlock::iterator OldMBBI = MBBI;
    // Skip a run of tMOVr instructions
    while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
      MBBI++;
    if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
      MBBI++;
    } else {
      // We have reached an instruction which is not a push, so the previous
      // run of tMOVr instructions (which may have been empty) was not part of
      // the prologue. Reset MBBI back to the last PUSH of the prologue.
      MBBI = OldMBBI;
      break;
    }
  }

  // Emit call frame information for the callee-saved high registers.
  for (auto &I : CSI) {
    unsigned Reg = I.getReg();
    int FI = I.getFrameIdx();
    switch (Reg) {
    case ARM::R8:
    case ARM::R9:
    case ARM::R10:
    case ARM::R11:
    case ARM::R12: {
      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
      break;
    }
    default:
      break;
    }
  }

  if (NumBytes) {
    // Insert it after all the callee-save spills.
    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
                 MachineInstr::FrameSetup);
    if (!HasFP) {
      CFAOffset -= NumBytes;
      unsigned CFIIndex = MF.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }
  }

  if (STI.isTargetELF() && HasFP)
    MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
                            AFI->getFramePtrSpillOffset());

  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);

  if (RegInfo->needsStackRealignment(MF)) {
    const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment());
    // Emit the following sequence, using R4 as a temporary, since we cannot use
    // SP as a source or destination register for the shifts:
    // mov  r4, sp
    // lsrs r4, r4, #NrBitsToZero
    // lsls r4, r4, #NrBitsToZero
    // mov  sp, r4
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
      .addReg(ARM::SP, RegState::Kill)
      .add(predOps(ARMCC::AL));

    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4)
      .addDef(ARM::CPSR)
      .addReg(ARM::R4, RegState::Kill)
      .addImm(NrBitsToZero)
      .add(predOps(ARMCC::AL));

    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4)
      .addDef(ARM::CPSR)
      .addReg(ARM::R4, RegState::Kill)
      .addImm(NrBitsToZero)
      .add(predOps(ARMCC::AL));

    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
      .addReg(ARM::R4, RegState::Kill)
      .add(predOps(ARMCC::AL));

    AFI->setShouldRestoreSPFromFP(true);
  }

  // If we need a base pointer, set it up here. It's whatever the value
  // of the stack pointer is at this point. Any variable size objects
  // will be allocated after this, so we can still use the base pointer
  // to reference locals.
  if (RegInfo->hasBasePointer(MF))
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
        .addReg(ARM::SP)
        .add(predOps(ARMCC::AL));

  // If the frame has variable sized objects then the epilogue must restore
  // the sp from fp. We can assume there's an FP here since hasFP already
  // checks for hasVarSizedObjects.
  if (MFI.hasVarSizedObjects())
    AFI->setShouldRestoreSPFromFP(true);

  // In some cases, virtual registers have been introduced, e.g. by uses of
  // emitThumbRegPlusImmInReg.
  MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
}
Beispiel #2
0
/// Insert prolog code into the function.
/// For ARC, this inserts a call to a function that puts required callee saved
/// registers onto the stack, when enough callee saved registers are required.
void ARCFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  LLVM_DEBUG(dbgs() << "Emit Prologue: " << MF.getName() << "\n");
  auto *AFI = MF.getInfo<ARCFunctionInfo>();
  MachineModuleInfo &MMI = MF.getMMI();
  MCContext &Context = MMI.getContext();
  const MCRegisterInfo *MRI = Context.getRegisterInfo();
  const ARCInstrInfo *TII = MF.getSubtarget<ARCSubtarget>().getInstrInfo();
  MachineBasicBlock::iterator MBBI = MBB.begin();
  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc dl;
  MachineFrameInfo &MFI = MF.getFrameInfo();
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  unsigned Last = determineLastCalleeSave(CSI);
  unsigned StackSlotsUsedByFunclet = 0;
  bool SavedBlink = false;
  unsigned AlreadyAdjusted = 0;
  if (MF.getFunction().isVarArg()) {
    // Add in the varargs area here first.
    LLVM_DEBUG(dbgs() << "Varargs\n");
    unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
    BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(VarArgsBytes);
  }
  if (hasFP(MF)) {
    LLVM_DEBUG(dbgs() << "Saving FP\n");
    BuildMI(MBB, MBBI, dl, TII->get(ARC::ST_AW_rs9))
        .addReg(ARC::SP, RegState::Define)
        .addReg(ARC::FP)
        .addReg(ARC::SP)
        .addImm(-4);
    AlreadyAdjusted += 4;
  }
  if (UseSaveRestoreFunclet && Last > ARC::R14) {
    LLVM_DEBUG(dbgs() << "Creating store funclet.\n");
    // BL to __save_r13_to_<TRI->getRegAsmName()>
    StackSlotsUsedByFunclet = Last - ARC::R12;
    BuildMI(MBB, MBBI, dl, TII->get(ARC::PUSH_S_BLINK));
    BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(4 * StackSlotsUsedByFunclet);
    BuildMI(MBB, MBBI, dl, TII->get(ARC::BL))
        .addExternalSymbol(store_funclet_name[Last - ARC::R15])
        .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill);
    AlreadyAdjusted += 4 * (StackSlotsUsedByFunclet + 1);
    SavedBlink = true;
  }
  // If we haven't saved BLINK, but we need to...do that now.
  if (MFI.hasCalls() && !SavedBlink) {
    LLVM_DEBUG(dbgs() << "Creating save blink.\n");
    BuildMI(MBB, MBBI, dl, TII->get(ARC::PUSH_S_BLINK));
    AlreadyAdjusted += 4;
  }
  if (AFI->MaxCallStackReq > 0)
    MFI.setStackSize(MFI.getStackSize() + AFI->MaxCallStackReq);
  // We have already saved some of the stack...
  LLVM_DEBUG(dbgs() << "Adjusting stack by: "
                    << (MFI.getStackSize() - AlreadyAdjusted) << "\n");
  generateStackAdjustment(MBB, MBBI, *ST.getInstrInfo(), dl,
                          -(MFI.getStackSize() - AlreadyAdjusted), ARC::SP);

  if (hasFP(MF)) {
    LLVM_DEBUG(dbgs() << "Setting FP from SP.\n");
    BuildMI(MBB, MBBI, dl,
            TII->get(isUInt<6>(MFI.getStackSize()) ? ARC::ADD_rru6
                                                   : ARC::ADD_rrlimm),
            ARC::FP)
        .addReg(ARC::SP)
        .addImm(MFI.getStackSize());
  }

  // Emit CFI records:
  // .cfi_def_cfa_offset StackSize
  // .cfi_offset fp, -StackSize
  // .cfi_offset blink, -StackSize+4
  unsigned CFIIndex = MF.addFrameInst(
      MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
  BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
      .addCFIIndex(CFIIndex)
      .setMIFlags(MachineInstr::FrameSetup);

  int CurOffset = -4;
  if (hasFP(MF)) {
    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
        nullptr, MRI->getDwarfRegNum(ARC::FP, true), CurOffset));
    BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
    CurOffset -= 4;
  }

  if (MFI.hasCalls()) {
    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
        nullptr, MRI->getDwarfRegNum(ARC::BLINK, true), CurOffset));
    BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
  }
  // CFI for the rest of the registers.
  for (const auto &Entry : CSI) {
    unsigned Reg = Entry.getReg();
    int FI = Entry.getFrameIdx();
    // Skip BLINK and FP.
    if ((hasFP(MF) && Reg == ARC::FP) || (MFI.hasCalls() && Reg == ARC::BLINK))
      continue;
    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
        nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
    BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
  }
}
Beispiel #3
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);

  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc DL;

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  int NumBytes = (int)MFI.getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    if (!NumBytes)
      return;
    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (canUseRedZone(MF))
      ++NumRedZoneFunctions;
    else {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Label used to tie together the PROLOG_LABEL and the MachineMoves.
      MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MF.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }
    return;
  }

  auto CSStackSize = AFI->getCalleeSavedStackSize();
  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes - CSStackSize);

  bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
  if (CombineSPBump) {
    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                    MachineInstr::FrameSetup);
    NumBytes = 0;
  } else if (CSStackSize != 0) {
    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
                                                     -CSStackSize);
    NumBytes -= CSStackSize;
  }
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  // Move past the saves of the callee-saved registers, fixing up the offsets
  // and pre-inc if we decided to combine the callee-save and local stack
  // pointer bump above.
  MachineBasicBlock::iterator End = MBB.end();
  while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
    if (CombineSPBump)
      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
    ++MBBI;
  }
  if (HasFP) {
    // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
    int FPOffset = CSStackSize - 16;
    if (CombineSPBump)
      FPOffset += AFI->getLocalStackSize();

    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // Allocate space for the rest of the frame.
  if (NumBytes) {
    const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
    unsigned scratchSPReg = AArch64::SP;

    if (NeedsRealignment) {
      scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
      assert(scratchSPReg != AArch64::NoRegister);
    }

    // If we're a leaf function, try using the red zone.
    if (!canUseRedZone(MF))
      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
      // the correct value here, as NumBytes also includes padding bytes,
      // which shouldn't be counted here.
      emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

    if (NeedsRealignment) {
      const unsigned Alignment = MFI.getMaxAlignment();
      const unsigned NrBitsToZero = countTrailingZeros(Alignment);
      assert(NrBitsToZero > 1);
      assert(scratchSPReg != AArch64::SP);

      // SUB X9, SP, NumBytes
      //   -- X9 is temporary register, so shouldn't contain any live data here,
      //   -- free to use. This is already produced by emitFrameOffset above.
      // AND SP, X9, 0b11111...0000
      // The logical immediates have a non-trivial encoding. The following
      // formula computes the encoded immediate with all ones but
      // NrBitsToZero zero bits as least significant bits.
      uint32_t andMaskEncoded = (1 << 12)                         // = N
                                | ((64 - NrBitsToZero) << 6)      // immr
                                | ((64 - NrBitsToZero - 1) << 0); // imms

      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
          .addReg(scratchSPReg, RegState::Kill)
          .addImm(andMaskEncoded);
      AFI->setStackRealigned(true);
    }
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  if (RegInfo->hasBasePointer(MF)) {
    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                     false);
  }

  if (needsFrameMoves) {
    const DataLayout &TD = MF.getDataLayout();
    const int StackGrowth = -TD.getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MF.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MF.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }

    // Now emit the moves for whatever callee saved regs we have (including FP,
    // LR if those are saved).
    emitCalleeSavedFrameMoves(MBB, MBBI);
  }
}