bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; unsigned StrOpc; // Issue sequence of spills for cs regs. The first spill may be converted // to a pre-decrement store later by emitPrologue if the callee-save stack // area allocation can't be combined with the local stack area allocation. // For example: // stp x22, x21, [sp, #0] // addImm(+0) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. if (RPI.IsGPR) StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; else StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); if (RPI.isPaired()) dbgs() << ", " << TRI->getName(Reg2); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); MBB.addLiveIn(Reg1); if (RPI.isPaired()) { MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), MachineMemOperand::MOStore, 8, 8)); } MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit .setMIFlag(MachineInstr::FrameSetup); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), MachineMemOperand::MOStore, 8, 8)); } return true; }
bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; if (MI != MBB.end()) DL = MI->getDebugLoc(); computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; // Issue sequence of restores for cs regs. The last restore may be converted // to a post-increment load later by emitEpilogue if the callee-save stack // area allocation can't be combined with the local stack area allocation. // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp, #0] // addImm(+0) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; if (RPI.IsGPR) LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; else LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); if (RPI.isPaired()) dbgs() << ", " << TRI->getName(Reg2); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (RPI.isPaired()) { MIB.addReg(Reg2, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), MachineMemOperand::MOLoad, 8, 8)); } MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit .setMIFlag(MachineInstr::FrameDestroy); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), MachineMemOperand::MOLoad, 8, 8)); } return true; }
bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; if (MI != MBB.end()) DL = MI->getDebugLoc(); computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only // the last load is sp-pi post-increment and de-allocates the stack: // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; bool BumpSP = RPII == std::prev(RegPairs.end()); if (RPI.IsGPR) { if (BumpSP) LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost; else LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; } else { if (BumpSP) LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost; else LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; } DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); if (RPI.isPaired()) dbgs() << ", " << TRI->getName(Reg2); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; dbgs() << ")\n"); const int Offset = RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); if (RPI.isPaired()) MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit .setMIFlag(MachineInstr::FrameDestroy); else MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled .setMIFlag(MachineInstr::FrameDestroy); } return true; }
bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; unsigned StrOpc; // Issue sequence of non-sp increment and pi sp spills for cs regs. The // first spill is a pre-increment that allocates the stack. // For example: // stp x22, x21, [sp, #-48]! // addImm(-6) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. bool BumpSP = RPII == RegPairs.rbegin(); if (RPI.IsGPR) { // For first spill use pre-increment store. if (BumpSP) StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre; else StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; } else { // For first spill use pre-increment store. if (BumpSP) StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre; else StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; } DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); if (RPI.isPaired()) dbgs() << ", " << TRI->getName(Reg2); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; dbgs() << ")\n"); const int Offset = BumpSP ? -RPI.Offset : RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); if (RPI.isPaired()) { MBB.addLiveIn(Reg1); MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); } else { MBB.addLiveIn(Reg1); MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled .setMIFlag(MachineInstr::FrameSetup); } } return true; }
static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { if (CSI.empty()) return; AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction()->getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); unsigned Offset = AFI->getCalleeSavedStackSize(); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) RPI.Reg2 = NextReg; } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); MFI->setObjectSize(RPI.FrameIdx, 16); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; } // Align first offset to even 16-byte boundary to avoid additional SP // adjustment instructions. // Last pair offset is size of whole callee-save region for SP // pre-dec/post-inc. RegPairInfo &LastPair = RegPairs.back(); assert(AFI->getCalleeSavedStackSize() % 8 == 0); LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; }