static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { if (CSI.empty()) return; AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction()->getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); unsigned Offset = AFI->getCalleeSavedStackSize(); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) RPI.Reg2 = NextReg; } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); MFI->setObjectSize(RPI.FrameIdx, 16); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; } // Align first offset to even 16-byte boundary to avoid additional SP // adjustment instructions. // Last pair offset is size of whole callee-save region for SP // pre-dec/post-inc. RegPairInfo &LastPair = RegPairs.back(); assert(AFI->getCalleeSavedStackSize() % 8 == 0); LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (CalleeSavedStackSize)| | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { ++LastPopI; break; } } NumBytes -= AFI->getCalleeSavedStackSize(); assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). if (RedZone && ArgumentPopSize == 0) return; bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0; int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += ArgumentPopSize; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, StackRestoreBytes, TII, MachineInstr::FrameDestroy); // If we were able to combine the local stack pop with the argument pop, // then we're done. if (NoCalleeSaveRestore || ArgumentPopSize == 0) return; NumBytes = 0; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (MFI->hasVarSizedObjects() || AFI->isStackRealigned()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, MachineInstr::FrameDestroy); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, MachineInstr::FrameDestroy); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (ArgumentPopSize) emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, ArgumentPopSize, TII, MachineInstr::FrameDestroy); }
int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); int FPOffset = MFI->getObjectOffset(FI) + 16; int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't // reliable as a base). Make sure useFPForScavengingIndex() does the // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. // If the FPOffset is negative, we have to keep in mind that the // available offset range for negative offsets is smaller than for // positive ones. If we have variable sized objects, we're stuck with // using the FP regardless, though, as the SP offset is unknown // and we don't have a base pointer available. If an offset is // available via the FP and the SP, use whichever is closest. if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || (FPOffset >= -256 && Offset > -FPOffset)) UseFP = true; } } assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && "In the presence of dynamic stack pointer realignment, " "non-argument objects cannot be accessed through the frame pointer"); if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } // Use the base pointer if we have one. if (RegInfo->hasBasePointer(MF)) FrameReg = RegInfo->getBaseRegister(); else { FrameReg = AArch64::SP; // If we're using the red zone for this function, the SP won't actually // be adjusted, so the offsets will be negative. They're also all // within range of the signed 9-bit immediate instructions. if (canUseRedZone(MF)) Offset -= AFI->getLocalStackSize(); } return Offset; }
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (MF.empty()) return false; DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); init(MF); for (MachineBasicBlock &MBB : MF) { DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); for (const MachineInstr &MI : MBB) { if (!useOrDefCSROrFI(MI)) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. updateSaveRestorePoints(MBB); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { DEBUG(dbgs() << "No Shrink wrap candidate found\n"); return false; } // No need to look for other instructions, this basic block // will already be part of the handled region. break; } } if (!ArePointsInteresting()) { // If the points are not interesting at this point, then they must be null // because it means we did not encounter any frame/CSR related code. // Otherwise, we would have returned from the previous loop. assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); DEBUG(dbgs() << "Nothing to shrink-wrap\n"); return false; } DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq << '\n'); do { DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " << Save->getNumber() << ' ' << Save->getName() << ' ' << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << ' ' << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); bool IsSaveCheap; if ((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) break; DEBUG(dbgs() << "New points are too expensive\n"); MachineBasicBlock *NewBB; if (!IsSaveCheap) { Save = FindIDom<>(*Save, Save->predecessors(), *MDT); if (!Save) break; NewBB = Save; } else { // Restore is expensive. Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); if (!Restore) break; NewBB = Restore; } updateSaveRestorePoints(*NewBB); } while (Save && Restore); if (!ArePointsInteresting()) { ++NumCandidatesDropped; return false; } DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() << ' ' << Save->getName() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << '\n'); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setSavePoint(Save); MFI->setRestorePoint(Restore); ++NumCandidates; return false; }
/// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function, handling shrink wrapping. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. MachineFrameInfo *FFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); FFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); MachineBasicBlock::iterator I; if (! ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, CSI[i].getFrameIdx(), CSI[i].getRegClass()); } } // Restore using target interface. for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { MachineBasicBlock* MBB = ReturnBlocks[ri]; I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that preceed it. if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), CSI[i].getFrameIdx(), CSI[i].getRegClass()); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } } return; } // Insert spills. std::vector<CalleeSavedInfo> blockCSI; for (CSRegBlockMap::iterator BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet save = BI->second; if (save.empty()) continue; blockCSI.clear(); for (CSRegSet::iterator RI = save.begin(), RE = save.end(); RI != RE; ++RI) { blockCSI.push_back(CSI[*RI]); } assert(blockCSI.size() > 0 && "Could not collect callee saved register info"); I = MBB->begin(); // When shrink wrapping, use stack slot stores/loads. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), true, blockCSI[i].getFrameIdx(), blockCSI[i].getRegClass()); } } for (CSRegBlockMap::iterator BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet restore = BI->second; if (restore.empty()) continue; blockCSI.clear(); for (CSRegSet::iterator RI = restore.begin(), RE = restore.end(); RI != RE; ++RI) { blockCSI.push_back(CSI[*RI]); } assert(blockCSI.size() > 0 && "Could not find callee saved register info"); // If MBB is empty and needs restores, insert at the _beginning_. if (MBB->empty()) { I = MBB->begin(); } else { I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the // return sequence. if (! I->getDesc().isTerminator()) { ++I; } else { MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) I = I2; } } bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that preceed it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), blockCSI[i].getFrameIdx(), blockCSI[i].getRegClass()); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } }
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), ScratchRSrcReg(AMDGPU::NoRegister), ScratchWaveOffsetReg(AMDGPU::NoRegister), PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), DispatchPtrUserSGPR(AMDGPU::NoRegister), QueuePtrUserSGPR(AMDGPU::NoRegister), KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), DispatchIDUserSGPR(AMDGPU::NoRegister), FlatScratchInitUserSGPR(AMDGPU::NoRegister), PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), PSInputAddr(0), ReturnsVoid(true), MaximumWorkGroupSize(0), DebuggerReservedVGPRCount(0), LDSWaveSpillSize(0), PSInputEna(0), NumUserSGPRs(0), NumSystemSGPRs(0), HasSpilledSGPRs(false), HasSpilledVGPRs(false), HasNonSpillStackObjects(false), HasFlatInstructions(false), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), DispatchID(false), KernargSegmentPtr(false), FlatScratchInit(false), GridWorkgroupCountX(false), GridWorkgroupCountY(false), GridWorkgroupCountZ(false), WorkGroupIDX(false), WorkGroupIDY(false), WorkGroupIDZ(false), WorkGroupInfo(false), PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false) { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); const Function *F = MF.getFunction(); PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); if (!AMDGPU::isShader(F->getCallingConv())) { KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; } if (F->hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; if (F->hasFnAttribute("amdgpu-work-group-id-z")) WorkGroupIDZ = true; if (F->hasFnAttribute("amdgpu-work-item-id-y")) WorkItemIDY = true; if (F->hasFnAttribute("amdgpu-work-item-id-z")) WorkItemIDZ = true; // X, XY, and XYZ are the only supported combinations, so make sure Y is // enabled if Z is. if (WorkItemIDZ) WorkItemIDY = true; bool MaySpill = ST.isVGPRSpillingEnabled(*F); bool HasStackObjects = FrameInfo->hasStackObjects(); if (HasStackObjects || MaySpill) PrivateSegmentWaveByteOffset = true; if (ST.isAmdHsaOS()) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; if (F->hasFnAttribute("amdgpu-dispatch-ptr")) DispatchPtr = true; if (F->hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; } // We don't need to worry about accessing spills with flat instructions. // TODO: On VI where we must use flat for global, we should be able to omit // this if it is never used for generic access. if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS && ST.isAmdHsaOS()) FlatScratchInit = true; if (AMDGPU::isCompute(F->getCallingConv())) MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F); else MaximumWorkGroupSize = ST.getWavefrontSize(); if (ST.debuggerReserveRegs()) DebuggerReservedVGPRCount = 4; }
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; Changed = FirstInLoop = false; const TargetSubtargetInfo &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TLI = ST.getTargetLowering(); TRI = ST.getRegisterInfo(); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); SchedModel.init(ST.getSchedModel(), &ST, TII); PreRegAlloc = MRI->isSSA(); if (PreRegAlloc) DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. unsigned NumRPS = TRI->getNumRegPressureSets(); RegPressure.resize(NumRPS); std::fill(RegPressure.begin(), RegPressure.end(), 0); RegLimit.resize(NumRPS); for (unsigned i = 0, e = NumRPS; i != e; ++i) RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); } // Get our Loop information... MLI = &getAnalysis<MachineLoopInfo>(); DT = &getAnalysis<MachineDominatorTree>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = nullptr; ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { Worklist.append(CurLoop->begin(), CurLoop->end()); continue; } CurLoop->getExitBlocks(ExitBlocks); if (!PreRegAlloc) HoistRegionPostRA(); else { // CSEMap is initialized for loop header when the first instruction is // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); if (SinkInstsToAvoidSpills) SinkIntoLoop(); } } return Changed; }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (Mips::FGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, 1), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } if (MipsFI->callsEhReturn()) { // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg unsigned VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI->getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = - (signed) MFI->getMaxAlignment(); BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); if (hasBP(MF)) { // move $s7, $sp unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) .addReg(SP) .addReg(ZERO); } } } }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) { // First instruction must a) allocate the stack and b) have an immediate // that is a multiple of -2. assert((MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) && MBBI->getOperand(3).getReg() == AArch64::SP && MBBI->getOperand(4).getImm() < 0 && (MBBI->getOperand(4).getImm() & 1) == 0); // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space // required for the callee saved register area we get the frame pointer // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; assert(FPOffset >= 0 && "Bad Framepointer Offset"); } // Move past the saves of the callee-saved registers. while (MBBI->getOpcode() == AArch64::STPXi || MBBI->getOpcode() == AArch64::STPDi || MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. if (NumBytes) { // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. // if (RegInfo->hasBasePointer(MF)) TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); if (needsFrameMoves) { const DataLayout *TD = MF.getTarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo, dl); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4); } else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(FramePtr); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (VARegSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we // pop the old LR into R3 as a temporary. // Move back past the callee-saved register restoration while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); // erase the old tBX_RET instruction MBB.erase(MBBI); } }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (VARegSaveSize) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetDarwin()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); if (NumBytes > 7) // If offset is > 7 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameInfo *TFI = TM.getFrameInfo(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) Size = -Size; SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).isFI()) { // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. int Value; unsigned VReg = TRI.eliminateFrameIndex(MI, SPAdj, &Value, FrameIndexVirtualScavenging ? NULL : RS); if (VReg) { assert (FrameIndexVirtualScavenging && "Not scavenging, but virtual returned from " "eliminateFrameIndex()!"); FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = 0; break; } if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); } }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Get MachineModuleInfo so that we can track the construction of the // frame. if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>()) Fn.getFrameInfo()->setMachineModuleInfo(MMI); // Calculate the MaxCallFrameSize and HasCalls variables for the function's // frame information. Also eliminates call frame pseudo instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: // - with shrink wrapping, place spills and restores to tightly // enclose regions in the Machine CFG of the function where // they are used. Without shrink wrapping // - default (no shrink wrapping), place all spills in the // entry block, all restores in return blocks. placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers if (!F->hasFnAttr(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TRI->processFunctionBeforeFrameFinalized(Fn); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters // must be called before this function in order to set the HasCalls // and MaxCallFrameSize variables. if (!F->hasFnAttr(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimiation // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); delete RS; clearAllSets(); return true; }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *FFI = Fn.getFrameInfo(); unsigned MaxAlign = 1; // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -FFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If stack grows down, we need to add size of find the lowest // address of the object. Offset += FFI->getObjectSize(i); unsigned Align = FFI->getObjectAlignment(i); // If the alignment of this object is greater than that of the stack, // then increase the stack alignment to match. MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; FFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = FFI->getObjectAlignment(i); // If the alignment of this object is greater than that of the stack, // then increase the stack alignment to match. MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; FFI->setObjectOffset(i, Offset); Offset += FFI->getObjectSize(i); } } // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. if (FFI->getStackProtectorIndex() >= 0) AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; if (FFI->isDeadObjectIndex(i)) continue; if (FFI->getStackProtectorIndex() == (int)i) continue; AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) Offset += FFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (FFI->hasCalls() || FFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative // to SP not FP; align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... FFI->setStackSize(Offset - LocalAreaOffset); // Remember the required stack alignment in case targets need it to perform // dynamic stack alignment. if (MaxAlign > FFI->getMaxAlignment()) FFI->setMaxAlignment(MaxAlign); }
void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; bool HasGP = MipsFI->needGPSaveRestore(); // Min and Max CSI FrameIndex. int MinCSFI = -1, MaxCSFI = -1; // See the description at MipsMachineFunction.h int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; // Replace the dummy '0' SPOffset by the negative offsets, as explained on // LowerFormalArguments. Leaving '0' for while is necessary to avoid // the approach done by calculateFrameObjectOffsets to the stack frame. MipsFI->adjustLoadArgsFI(MFI); MipsFI->adjustStoreVarArgsFI(MFI); // It happens that the default stack frame allocation order does not directly // map to the convention used for mips. So we must fix it. We move the callee // save register slots after the local variables area, as described in the // stack frame above. unsigned CalleeSavedAreaSize = 0; if (!CSI.empty()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size()-1].getFrameIdx(); } for (unsigned i = 0, e = CSI.size(); i != e; ++i) CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize) : (Subtarget.isABI_O32() ? 16 : 0); // Adjust local variables. They should come on the stack right // after the arguments. int LastOffsetFI = -1; for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFI && i <= MaxCSFI) continue; if (MFI->isDeadObjectIndex(i)) continue; unsigned Offset = StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize; if (LastOffsetFI == -1) LastOffsetFI = i; if (Offset > MFI->getObjectOffset(LastOffsetFI)) LastOffsetFI = i; MFI->setObjectOffset(i, Offset); } // Adjust CPU Callee Saved Registers Area. Registers RA and FP must // be saved in this CPU Area. This whole area must be aligned to the // default Stack Alignment requirements. if (LastOffsetFI >= 0) StackOffset = MFI->getObjectOffset(LastOffsetFI)+ MFI->getObjectSize(LastOffsetFI); StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) break; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } // Stack locations for FP and RA. If only one of them is used, // the space must be allocated for both, otherwise no space at all. if (hasFP(MF) || MFI->hasCalls()) { // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setFPStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += RegSize; // SP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setRAStackOffset(StackOffset); StackOffset += RegSize; if (MFI->hasCalls()) TopCPUSavedRegOff += RegSize; } StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Adjust FPU Callee Saved Registers Area. This Area must be // aligned to the default Stack Alignment requirements. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) continue; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopFPUSavedRegOff = StackOffset; StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Update frame info MFI->setStackSize(StackOffset); // Recalculate the final tops offset. The final values must be '0' // if there isn't a callee saved register for CPU or FPU, otherwise // a negative offset is needed. if (TopCPUSavedRegOff >= 0) MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); if (TopFPUSavedRegOff >= 0) MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); DebugLoc DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 16) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBBI; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (LastPopI != MBB.begin()) { do { ++NumRestores; --LastPopI; } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); if (!isCSRestore(LastPopI, CSRegs)) { ++LastPopI; --NumRestores; } } NumBytes -= NumRestores * 16; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); }
// hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. bool MipsRegisterInfo:: hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return NoFramePointerElim || MFI->hasVarSizedObjects(); }
void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( MachineFunction &MF, RegScavenger *RS) const { const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineRegisterInfo *MRI = &MF.getRegInfo(); SmallVector<unsigned, 4> UnspilledCSGPRs; SmallVector<unsigned, 4> UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { MRI->setPhysRegUsed(AArch64::FP); MRI->setPhysRegUsed(AArch64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the // getCalleeSavedRegs() below will get the right answer. if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; bool CanEliminateFrame = true; DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Check pairs of consecutive callee-saved registers. for (unsigned i = 0; CSRegs[i]; i += 2) { assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); const unsigned OddReg = CSRegs[i]; const unsigned EvenReg = CSRegs[i + 1]; assert((AArch64::GPR64RegClass.contains(OddReg) && AArch64::GPR64RegClass.contains(EvenReg)) ^ (AArch64::FPR64RegClass.contains(OddReg) && AArch64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); // Early exit if none of the registers in the register pair is actually // used. if (!OddRegUsed && !EvenRegUsed) { if (AArch64::GPR64RegClass.contains(OddReg)) { UnspilledCSGPRs.push_back(OddReg); UnspilledCSGPRs.push_back(EvenReg); } else { UnspilledCSFPRs.push_back(OddReg); UnspilledCSFPRs.push_back(EvenReg); } continue; } unsigned Reg = AArch64::NoRegister; // If only one of the registers of the register pair is used, make sure to // mark the other one as used as well. if (OddRegUsed ^ EvenRegUsed) { // Find out which register is the additional spill. Reg = OddRegUsed ? EvenReg : OddReg; MRI->setPhysRegUsed(Reg); } DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || (RegInfo->getEncodingValue(OddReg) + 1 == RegInfo->getEncodingValue(EvenReg))) && "Register pair of non-adjacent registers!"); if (AArch64::GPR64RegClass.contains(OddReg)) { NumGPRSpilled += 2; // If it's not a reserved register, we can use it in lieu of an // emergency spill slot for the register scavenger. // FIXME: It would be better to instead keep looking and choose another // unspilled register that isn't reserved, if there is one. if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } else NumFPRSpilled += 2; CanEliminateFrame = false; } // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing // range. We'll end up allocating an unnecessary spill slot a lot, but // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { // If we're adding a register to spill here, we have to add two of them // to keep the number of regs to spill even. assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); unsigned Count = 0; while (!UnspilledCSGPRs.empty() && Count < 2) { unsigned Reg = UnspilledCSGPRs.back(); UnspilledCSGPRs.pop_back(); DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) << " to get a scratch register.\n"); MRI->setPhysRegUsed(Reg); ExtraCSSpill = true; ++Count; } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill) { const TargetRegisterClass *RC = &AArch64::GPR64RegClass; int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); } } }
void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, const MachineFunction &MF, ModuleSlotTracker &MST) { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); // Process fixed stack objects. unsigned ID = 0; for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { if (MFI.isDeadObjectIndex(I)) continue; yaml::FixedMachineStackObject YamlObject; YamlObject.ID = ID; YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::FixedMachineStackObject::SpillSlot : yaml::FixedMachineStackObject::DefaultType; YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); YMF.FixedStackObjects.push_back(YamlObject); StackObjectOperandMapping.insert( std::make_pair(I, FrameIndexOperand::createFixed(ID++))); } // Process ordinary stack objects. ID = 0; for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) { if (MFI.isDeadObjectIndex(I)) continue; yaml::MachineStackObject YamlObject; YamlObject.ID = ID; if (const auto *Alloca = MFI.getObjectAllocation(I)) YamlObject.Name.Value = Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>"; YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::MachineStackObject::SpillSlot : MFI.isVariableSizedObjectIndex(I) ? yaml::MachineStackObject::VariableSized : yaml::MachineStackObject::DefaultType; YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); YMF.StackObjects.push_back(YamlObject); StackObjectOperandMapping.insert(std::make_pair( I, FrameIndexOperand::create(YamlObject.Name.Value, ID++))); } for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { yaml::StringValue Reg; printReg(CSInfo.getReg(), Reg, TRI); auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; if (StackObject.IsFixed) YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; else YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { auto LocalObject = MFI.getLocalFrameObjectMap(I); auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first); assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; assert(!StackObject.IsFixed && "Expected a locally mapped stack object"); YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; } // Print the stack object references in the frame information class after // converting the stack objects. if (MFI.hasStackProtectorIndex()) { raw_string_ostream StrOS(YMF.FrameInfo.StackProtector.Value); MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) .printStackObjectReference(MFI.getStackProtectorIndex()); } // Print the debug variable information. for (const MachineFunction::VariableDbgInfo &DebugVar : MF.getVariableDbgInfo()) { auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot); assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); auto &Object = YMF.StackObjects[StackObject.ID]; { raw_string_ostream StrOS(Object.DebugVar.Value); DebugVar.Var->printAsOperand(StrOS, MST); } { raw_string_ostream StrOS(Object.DebugExpr.Value); DebugVar.Expr->printAsOperand(StrOS, MST); } { raw_string_ostream StrOS(Object.DebugLoc.Value); DebugVar.Loc->printAsOperand(StrOS, MST); } } }
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); }
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); switch (MI->getOpcode()) { // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.VGPR == AMDGPU::NoRegister) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("Ran out of VGPRs for spilling SGPR"); } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg) .addImm(Spill.Lane); } MI->eraseFromParent(); break; } // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.VGPR == AMDGPU::NoRegister) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("Ran out of VGPRs for spilling SGPR"); } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); } TII->insertNOPs(MI, 3); MI->eraseFromParent(); break; } // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned SrcReg = MI->getOperand(0).getReg(); int64_t Offset = FrameInfo->getObjectOffset(Index); unsigned Size = NumSubRegs * 4; unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); for (unsigned i = 0, e = NumSubRegs; i != e; ++i) { unsigned SubReg = NumSubRegs > 1 ? getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) : SrcReg; Offset += (i * 4); MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize); unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg, Offset, Size); if (AddrReg == AMDGPU::NoRegister) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("Ran out of VGPRs for spilling VGPRS"); AddrReg = AMDGPU::VGPR0; } // Store the value in LDS BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32)) .addImm(0) // gds .addReg(AddrReg, RegState::Kill) // addr .addReg(SubReg) // data0 .addImm(0); // offset } MI->eraseFromParent(); break; } case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned DstReg = MI->getOperand(0).getReg(); int64_t Offset = FrameInfo->getObjectOffset(Index); unsigned Size = NumSubRegs * 4; unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); // FIXME: We could use DS_READ_B64 here to optimize for larger registers. for (unsigned i = 0, e = NumSubRegs; i != e; ++i) { unsigned SubReg = NumSubRegs > 1 ? getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) : DstReg; Offset += (i * 4); unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg, Offset, Size); if (AddrReg == AMDGPU::NoRegister) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("Ran out of VGPRs for spilling VGPRs"); AddrReg = AMDGPU::VGPR0; } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg) .addImm(0) // gds .addReg(AddrReg, RegState::Kill) // addr .addImm(0); //offset } MI->eraseFromParent(); break; } default: { int64_t Offset = FrameInfo->getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false); } } } }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); if (!NumBytes) return; // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (canUseRedZone(MF)) ++NumRedZoneFunctions; else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } return; } NumBytes -= AFI->getCalleeSavedStackSize(); assert(NumBytes >= 0 && "Negative stack allocation size!?"); // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Move past the saves of the callee-saved registers. MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; if (HasFP) { // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. int FPOffset = AFI->getCalleeSavedStackSize() - 16; // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // Allocate space for the rest of the frame. if (NumBytes) { const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); assert(scratchSPReg != AArch64::NoRegister); } // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); if (NeedsRealignment) { const unsigned Alignment = MFI->getMaxAlignment(); const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 << 12) // = N | ((64 - NrBitsToZero) << 6) // immr | ((64 - NrBitsToZero - 1) << 0); // imms BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); AFI->setStackRealigned(true); } } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); } if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have (including FP, // LR if those are saved). emitCalleeSavedFrameMoves(MBB, MBBI); } }
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { return Fn.getFrameInfo()->hasStackObjects(); }
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); MachineFrameInfo *FFI = Fn.getFrameInfo(); // Get the callee saved register list... const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; // Early exit for targets which have no callee saved registers. if (CSRegs == 0 || CSRegs[0] == 0) return; // Figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. const TargetRegisterClass * const *CSRegClasses = RegInfo->getCalleeSavedRegClasses(&Fn); std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); *AliasSet; ++AliasSet) { // Check alias registers too. if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); break; } } } } if (CSI.empty()) return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; const TargetFrameInfo::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); const TargetRegisterClass *RC = I->getRegClass(); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { I->setFrameIdx(FrameIdx); continue; } // Check to see if this physreg must be spilled to a particular stack slot // on this target. const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true, false); } I->setFrameIdx(FrameIdx); } FFI->setCalleeSavedInfo(CSI); }