static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { if (CSI.empty()) return; AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction()->getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); unsigned Offset = AFI->getCalleeSavedStackSize(); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) RPI.Reg2 = NextReg; } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); MFI->setObjectSize(RPI.FrameIdx, 16); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; } // Align first offset to even 16-byte boundary to avoid additional SP // adjustment instructions. // Last pair offset is size of whole callee-save region for SP // pre-dec/post-inc. RegPairInfo &LastPair = RegPairs.back(); assert(AFI->getCalleeSavedStackSize() % 8 == 0); LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); if (!NumBytes) return; // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (canUseRedZone(MF)) ++NumRedZoneFunctions; else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } return; } NumBytes -= AFI->getCalleeSavedStackSize(); assert(NumBytes >= 0 && "Negative stack allocation size!?"); // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Move past the saves of the callee-saved registers. MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; if (HasFP) { // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. int FPOffset = AFI->getCalleeSavedStackSize() - 16; // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // Allocate space for the rest of the frame. if (NumBytes) { const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); assert(scratchSPReg != AArch64::NoRegister); } // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); if (NeedsRealignment) { const unsigned Alignment = MFI->getMaxAlignment(); const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 << 12) // = N | ((64 - NrBitsToZero) << 6) // immr | ((64 - NrBitsToZero - 1) << 0); // imms BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); AFI->setStackRealigned(true); } } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); } if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have (including FP, // LR if those are saved). emitCalleeSavedFrameMoves(MBB, MBBI); } }