/// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo. unsigned FrameSize = MFI->getStackSize(); // Get the alignments provided by the target. unsigned TargetAlign = MF.getSubtarget().getFrameLowering()->getStackAlignment(); // Get the maximum call frame size of all the calls. unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so // that allocations will be aligned. if (MFI->hasVarSizedObjects()) maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign); // Update maximum call frame size. MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; // Make sure the frame is aligned. FrameSize = RoundUpToAlignment(FrameSize, TargetAlign); // Update frame info. MFI->setStackSize(FrameSize); }
void SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { // Determine whether R15/R14 will ever be clobbered inside the function. And // if yes - mark it as 'callee' saved. MachineFrameInfo *FFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Check whether high FPRs are ever used, if yes - we need to save R15 as // well. static const unsigned HighFPRs[] = { SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, }; bool HighFPRsUsed = false; for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); if (FFI->hasCalls()) /* FIXME: function is varargs */ /* FIXME: function grabs RA */ /* FIXME: function calls eh_return */ MRI.setPhysRegUsed(SystemZ::R14D); if (HighFPRsUsed || FFI->hasCalls() || FFI->getObjectIndexEnd() != 0 || // Contains automatic variables FFI->hasVarSizedObjects() // Function calls dynamic alloca's /* FIXME: function is varargs */) MRI.setPhysRegUsed(SystemZ::R15D); }
void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo()); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64InstrInfo &TII = *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); if (hasFP(MF)) { MF.getRegInfo().setPhysRegUsed(AArch64::X29); MF.getRegInfo().setPhysRegUsed(AArch64::X30); } // If addressing of local variables is going to be more complicated than // shoving a base register and an offset into the instruction then we may well // need to scavenge registers. We should either specifically add an // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); if (!BigStack) return; // We certainly need some slack space for the scavenger, preferably an extra // register. const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); MCPhysReg ExtraReg = AArch64::NoRegister; for (unsigned i = 0; CSRegs[i]; ++i) { if (AArch64::GPR64RegClass.contains(CSRegs[i]) && !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { ExtraReg = CSRegs[i]; break; } } if (ExtraReg != 0) { MF.getRegInfo().setPhysRegUsed(ExtraReg); } else { assert(RS && "Expect register scavenger to be available"); // Create a stack slot for scavenging purposes. PrologEpilogInserter // helpfully places it near either SP or FP for us to avoid // infinitely-regression during scavenging. const TargetRegisterClass *RC = &AArch64::GPR64RegClass; RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } }
// Generates the following sequence for function entry: // st %fp,-4[*%sp] !push old FP // add %sp,8,%fp !generate new FP // sub %sp,0x4,%sp !allocate stack space (as needed) void LanaiFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); const LanaiInstrInfo &LII = *static_cast<const LanaiInstrInfo *>(STI.getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // Determine the correct frame layout determineFrameLayout(MF); // FIXME: This appears to be overallocating. Needs investigation. // Get the number of bytes to allocate from the FrameInfo. unsigned StackSize = MFI->getStackSize(); // Push old FP // st %fp,-4[*%sp] BuildMI(MBB, MBBI, DL, LII.get(Lanai::SW_RI)) .addReg(Lanai::FP) .addReg(Lanai::SP) .addImm(-4) .addImm(LPAC::makePreOp(LPAC::ADD)) .setMIFlag(MachineInstr::FrameSetup); // Generate new FP // add %sp,8,%fp BuildMI(MBB, MBBI, DL, LII.get(Lanai::ADD_I_LO), Lanai::FP) .addReg(Lanai::SP) .addImm(8) .setMIFlag(MachineInstr::FrameSetup); // Allocate space on the stack if needed // sub %sp,StackSize,%sp if (StackSize != 0) { BuildMI(MBB, MBBI, DL, LII.get(Lanai::SUB_I_LO), Lanai::SP) .addReg(Lanai::SP) .addImm(StackSize) .setMIFlag(MachineInstr::FrameSetup); } // Replace ADJDYNANALLOC if (MFI->hasVarSizedObjects()) replaceAdjDynAllocPseudo(MF); }
void VideoCore4FrameLowering::determineFrameLayout(MachineFunction& MF) const { MachineFrameInfo* MFI = MF.getFrameInfo(); unsigned alignment = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned SLsize = MFI->getStackSize(); unsigned Asize = MFI->getMaxCallFrameSize(); if (MFI->hasVarSizedObjects()) Asize = RoundUpToAlignment(Asize, alignment); MFI->setMaxCallFrameSize(Asize); unsigned SLAsize = SLsize + Asize; SLAsize = RoundUpToAlignment(SLAsize, alignment); MFI->setStackSize(SLAsize); }
bool X86SelectionDAGInfo::isBaseRegConflictPossible( SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const { // We cannot use TRI->hasBasePointer() until *after* we select all basic // blocks. Legalization may introduce new stack temporaries with large // alignment requirements. Fall back to generic code if there are any // dynamic stack adjustments (hopefully rare) and the base pointer would // conflict if we had to use it. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment()) return false; const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>( DAG.getSubtarget().getRegisterInfo()); unsigned BaseReg = TRI->getBaseRegister(); for (unsigned R : ClobberSet) if (BaseReg == R) return true; return false; }
/// needsFrameBaseReg - Returns true if the instruction's frame index /// reference would be better served by a base register other than FP /// or SP. Used by LocalStackFrameAllocation to determine which frame index /// references it should create new base registers for. bool ARMBaseRegisterInfo:: needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) { assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); } // It's the load/store FI references that cause issues, as it can be difficult // to materialize the offset if it won't fit in the literal field. Estimate // based on the size of the local frame and some conservative assumptions // about the rest of the stack frame (note, this is pre-regalloc, so // we don't know everything for certain yet) whether this offset is likely // to be out of range of the immediate. Return true if so. // We only generate virtual base registers for loads and stores, so // return false for everything else. unsigned Opc = MI->getOpcode(); switch (Opc) { case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12: case ARM::STRi12: case ARM::STRH: case ARM::STRBi12: case ARM::t2LDRi12: case ARM::t2LDRi8: case ARM::t2STRi12: case ARM::t2STRi8: case ARM::VLDRS: case ARM::VLDRD: case ARM::VSTRS: case ARM::VSTRD: case ARM::tSTRspi: case ARM::tLDRspi: if (ForceAllBaseRegAlloc) return true; break; default: return false; } // Without a virtual base register, if the function has variable sized // objects, all fixed-size local references will be via the frame pointer, // Approximate the offset and see if it's legal for the instruction. // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // Estimate an offset from the frame pointer. // Conservatively assume all callee-saved registers get pushed. R4-R6 // will be earlier than the FP, so we ignore those. // R7, LR int64_t FPOffset = Offset - 8; // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15 if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction()) FPOffset -= 80; // Estimate an offset from the stack pointer. // The incoming offset is relating to the SP at the start of the function, // but when we access the local it'll be relative to the SP after local // allocation, so adjust our SP-relative offset by that allocation size. Offset = -Offset; Offset += MFI->getLocalFrameSize(); // Assume that we'll have at least some spill slots allocated. // FIXME: This is a total SWAG number. We should run some statistics // and pick a real one. Offset += 128; // 128 bytes of spill slots // If there is a frame pointer, try using it. // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); if (TFI->hasFP(MF) && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, FPOffset)) return false; } // If we can reference via the stack pointer, try that. // FIXME: This (and the code that resolves the references) can be improved // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) return false; // The offset likely isn't legal, we want to allocate a virtual base register. return true; }
void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); const Function* Fn = MF.getFunction(); const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.begin(); bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || !Fn->doesNotThrow() || UnwindTablesMandatory; // Prepare for frame info. unsigned FrameLabelId = 0; // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); // Get desired stack alignment uint64_t MaxAlign = MFI->getMaxAlignment(); // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta)); // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). if (Is64Bit && !DisableRedZone && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->hasCalls()) { // No calls. uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (hasFP(MF)) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); } // Insert stack pointer adjustment for later moving of return addr. Only // applies to tail call optimized functions where the callee argument stack // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); } uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - SlotSize; if (needsStackRealignment(MF)) FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save EBP into the appropriate stack slot... BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) .addReg(FramePtr, /*isDef=*/false, /*isImp=*/false, /*isKill=*/true); if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); } // Update EBP with the new base value... BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); // Realign stack if (needsStackRealignment(MF)) { MachineInstr *MI = BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr).addReg(StackPtr).addImm(-MaxAlign); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); } } else NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); unsigned ReadyLabelId = 0; if (needsFrameMoves) { // Mark effective beginning of when frame pointer is ready. ReadyLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); } // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) ++MBBI; if (NumBytes) { // adjust stack pointer: ESP -= numbytes if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { // Check, whether EAX is livein for this function bool isEAXAlive = false; for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { unsigned Reg = II->first; isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || Reg == X86::AH || Reg == X86::AL); } // Function prologue calls _alloca to probe the stack when allocating // more than 4k bytes in one go. Touching the stack at 4K increments is // necessary to ensure that the guard pages used by the OS virtual memory // manager are allocated in correct sequence. if (!isEAXAlive) { BuildMI(MBB, MBBI, TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes); BuildMI(MBB, MBBI, TII.get(X86::CALLpcrel32)) .addExternalSymbol("_alloca"); } else { // Save EAX BuildMI(MBB, MBBI, TII.get(X86::PUSH32r)) .addReg(X86::EAX, /*isDef=*/false, /*isImp=*/false, /*isKill=*/true); // Allocate NumBytes-4 bytes on stack. We'll also use 4 already // allocated bytes for EAX. BuildMI(MBB, MBBI, TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4); BuildMI(MBB, MBBI, TII.get(X86::CALLpcrel32)) .addExternalSymbol("_alloca"); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, TII.get(X86::MOV32rm),X86::EAX), StackPtr, false, NumBytes-4); MBB.insert(MBBI, MI); } } else { // If there is an SUB32ri of ESP immediately before this instruction, // merge the two. This can be the case when tail call elimination is // enabled and the callee has more arguments then the caller. NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); // If there is an ADD32ri or SUB32ri of ESP immediately after this // instruction, merge the two instructions. mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); if (NumBytes) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } } if (needsFrameMoves) emitFrameMoves(MF, FrameLabelId, ReadyLabelId); }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI->getUseLocalStackAllocationBlock()) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = (Offset + Align - 1) / Align * Align; DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI->setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI->getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> LargeStackObjs; if (MFI->getStackProtectorIndex() >= 0) { AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (!MFI->MayNeedStackProtector(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); LargeStackObjs.insert(i); } } // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (LargeStackObjs.count(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || !RegInfo->useFPForScavengingIndex(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI->setStackSize(StackSize); NumBytesStackSpace += StackSize; }
static bool CantUseSP(const MachineFrameInfo &MFI) { return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 8) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; unsigned Restores = getNumCSRestores(*LastPopI, CSRegs); NumRestores += Restores; if (Restores == 0) { ++LastPopI; break; } } NumBytes -= NumRestores * 8; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags); }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. if (MFI->getStackProtectorIndex() >= 0) AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... MFI->setStackSize(Offset - LocalAreaOffset); }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis<StackProtector>(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // Skew to be applied to alignment. unsigned Skew = TFI.getStackAlignmentSkew(Fn); // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool EarlyScavengingSlots = (TFI.hasFP(Fn) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI->getUseLocalStackAllocationBlock()) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI->setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI->getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign, Skew); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { case StackProtector::SSPLK_None: continue; case StackProtector::SSPLK_SmallArray: SmallArrayObjs.insert(i); continue; case StackProtector::SSPLK_AddrOf: AddrOfObjs.insert(i); continue; case StackProtector::SSPLK_LargeArray: LargeArrayObjs.insert(i); continue; } llvm_unreachable("Unexpected SSPLayoutKind."); } AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); } SmallVector<int, 8> ObjectsToAllocate; int EHRegNodeFrameIndex = INT_MAX; if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; // Then prepare to assign frame offsets to stack objects that are not used to // spill callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (EHRegNodeFrameIndex == (int)i) continue; if (ProtectedObjs.count(i)) continue; // Add the objects that we need to allocate to our working set. ObjectsToAllocate.push_back(i); } // Allocate the EH registration node first if one is present. if (EHRegNodeFrameIndex != INT_MAX) AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, MaxAlign, Skew); // Give the targets a chance to order the objects the way they like it. if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && Fn.getTarget().Options.StackSymbolOrdering) TFI.orderFrameObjects(Fn, ObjectsToAllocate); // Now walk the objects and actually assign base offsets to them. for (auto &Object : ObjectsToAllocate) AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); Offset = alignTo(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI->setStackSize(StackSize); NumBytesStackSpace += StackSize; }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); CFAOffset -= ArgRegsSaveSize; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); CFAOffset -= NumBytes - ArgRegsSaveSize; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (STI.isTargetMachO()) { GPRCS2Size += 4; break; } // fallthrough case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } if (adjustedGPRCS1Size) { CFAOffset -= adjustedGPRCS1Size; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); int FI = I->getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: case ARM::R12: if (STI.isTargetMachO()) break; // fallthough case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); break; } } // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); if(FramePtrOffsetInBlock) { CFAOffset += FramePtrOffsetInBlock; unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (!HasFP) { CFAOffset -= NumBytes; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } } if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // Thumb1 does not currently support dynamic stack realignment. Report a // fatal error rather then silently generate bad code. if (RegInfo->needsStackRealignment(MF)) report_fatal_error("Dynamic stack realignment not supported for thumb1."); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (VARegSaveSize) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetDarwin()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup)); if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
/// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to /// generate the exception handling frames. void X86FrameInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); const X86InstrInfo &TII = *TM.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || !Fn->doesNotThrow() || UnwindTablesMandatory; uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); bool Is64Bit = STI.is64Bit(); bool IsWin64 = STI.isTargetWin64(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame // info, we need to know the ABI stack alignment as well in case we // have a call out. Otherwise just make sure we have some alignment - we'll // go with the minimum SlotSize. if (ForceStackAlign) { if (MFI->hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) MaxAlign = SlotSize; } // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); } else if (IsWin64) { // We need to always allocate 32 bytes as register spill area. // FIXME: We might reuse these 32 bytes for leaf functions. StackSize += 32; MFI->setStackSize(StackSize); } // Insert stack pointer adjustment for later moving of return addr. Only // applies to tail call optimized functions where the callee argument stack // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } // Mapping for machine moves: // // DST: VirtualFP AND // SRC: VirtualFP => DW_CFA_def_cfa_offset // ELSE => DW_CFA_def_cfa // // SRC: VirtualFP AND // DST: Register => DW_CFA_def_cfa_register // // ELSE // OFFSET < 0 => DW_CFA_offset_extended_sf // REG < 64 => DW_CFA_offset + Reg // ELSE => DW_CFA_offset_extended std::vector<MachineMove> &Moves = MMI.getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); uint64_t NumBytes = 0; int stackGrowth = -TD->getPointerSize(); if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (RegInfo->needsStackRealignment(MF)) FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) .addReg(FramePtr, RegState::Kill); if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); } else { // FIXME: Verify & implement for FP MachineLocation SPDst(StackPtr); MachineLocation SPSrc(StackPtr, stackGrowth); Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); } // Change the rule for the FramePtr to be an "offset" rule. MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); MachineLocation FPSrc(FramePtr); Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); } // Update EBP with the new base value... BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr); if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); MachineLocation FPSrc(MachineLocation::VirtualFP); Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); } // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); // Realign stack if (RegInfo->needsStackRealignment(MF)) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr).addReg(StackPtr).addImm(-MaxAlign); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); } } else { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } // Skip the callee-saved push instructions. bool PushedRegs = false; int StackOffset = 2 * stackGrowth; while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; ++MBBI; if (!HasFP && needsFrameMoves) { // Mark callee-saved push instruction. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr; MachineLocation SPDst(Ptr); MachineLocation SPSrc(Ptr, StackOffset); Moves.push_back(MachineMove(Label, SPDst, SPSrc)); StackOffset += stackGrowth; } } DL = MBB.findDebugLoc(MBBI); // If there is an SUB32ri of ESP immediately before this instruction, merge // the two. This can be the case when tail call elimination is enabled and // the callee has more arguments then the caller. NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); // If there is an ADD32ri or SUB32ri of ESP immediately after this // instruction, merge the two instructions. mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); // Adjust stack pointer: ESP -= numbytes. // Windows and cygwin/mingw require a prologue helper routine when allocating // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the // stack and adjust the stack pointer in one go. The 64-bit version of // __chkstk is only responsible for probing the stack. The 64-bit prologue is // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. if (NumBytes >= 4096 && (STI.isTargetCygMing() || STI.isTargetWin32())) { // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); const char *StackProbeSymbol = STI.isTargetWindows() ? "_chkstk" : "_alloca"; unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; if (!isEAXAlive) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(CallOp)) .addExternalSymbol(StackProbeSymbol) .addReg(StackPtr, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); } else { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); // Allocate NumBytes-4 bytes on stack. We'll also use 4 already // allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes - 4); BuildMI(MBB, MBBI, DL, TII.get(CallOp)) .addExternalSymbol(StackProbeSymbol) .addReg(StackPtr, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), StackPtr, false, NumBytes - 4); MBB.insert(MBBI, MI); } } else if (NumBytes >= 4096 && STI.isTargetWin64()) { // Sanity check that EAX is not livein for this function. It should // should not be, so throw an assert. assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) .addExternalSymbol("__chkstk") .addReg(StackPtr, RegState::Define | RegState::Implicit); emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } else if (NumBytes) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); if ((NumBytes || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. if (StackSize) { MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -StackSize + stackGrowth); Moves.push_back(MachineMove(Label, SPDst, SPSrc)); } else { // FIXME: Verify & implement for FP MachineLocation SPDst(StackPtr); MachineLocation SPSrc(StackPtr, stackGrowth); Moves.push_back(MachineMove(Label, SPDst, SPSrc)); } } // Emit DWARF info specifying the offsets of the callee-saved registers. if (PushedRegs) emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); } }
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, int64_t SPOffset) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); MipsABIInfo ABI = static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI(); const MipsRegisterInfo *RegInfo = static_cast<const MipsRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; int MaxCSFI = -1; if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex); // The following stack frame objects are always referenced relative to $sp: // 1. Outgoing arguments. // 2. Pointer to dynamically allocated stack space. // 3. Locations for callee-saved registers. // 4. Locations for eh data registers. // Everything else is referenced relative to whatever register // getFrameRegister() returns. unsigned FrameReg; if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) FrameReg = ABI.GetStackPtr(); else if (RegInfo->needsStackRealignment(MF)) { if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex)) FrameReg = ABI.GetBasePtr(); else if (MFI->isFixedObjectIndex(FrameIndex)) FrameReg = getFrameRegister(MF); else FrameReg = ABI.GetStackPtr(); } else FrameReg = getFrameRegister(MF); // Calculate final offset. // - There is no need to change the offset if the frame object is one of the // following: an outgoing argument, pointer to a dynamically allocated // stack space or a $gp restore location, // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. bool IsKill = false; int64_t Offset; Offset = SPOffset + (int64_t)StackSize; Offset += MI.getOperand(OpNo + 1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); if (!MI.isDebugValue()) { // Make sure Offset fits within the field available. // For MSA instructions, this is a 10-bit signed immediate (scaled by // element size), otherwise it is a 16-bit signed immediate. unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(MI.getOpcode()); unsigned OffsetAlign = getLoadStoreOffsetAlign(MI.getOpcode()); if (OffsetBitSize < 16 && isInt<16>(Offset) && (!isIntN(OffsetBitSize, Offset) || OffsetToAlignment(Offset, OffsetAlign) != 0)) { // If we have an offset that needs to fit into a signed n-bit immediate // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); unsigned Reg = RegInfo.createVirtualRegister(PtrRC); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>( MBB.getParent()->getSubtarget().getInstrInfo()); BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiuOp()), Reg) .addReg(FrameReg) .addImm(Offset); FrameReg = Reg; Offset = 0; IsKill = true; } else if (!isInt<16>(Offset)) { // Otherwise split the offset into 16-bit pieces and add it in multiple // instructions. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned NewImm = 0; const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>( MBB.getParent()->getSubtarget().getInstrInfo()); unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, OffsetBitSize == 16 ? &NewImm : nullptr); BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAdduOp()), Reg).addReg(FrameReg) .addReg(Reg, RegState::Kill); FrameReg = Reg; Offset = SignExtend64<16>(NewImm); IsKill = true; } } MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); }
void ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. bool CanEliminateFrame = true; bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. Also spill R4 if Thumb2 function has varsized objects, // since it's not always possible to restore sp from fp in a single // instruction. // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) MF.getRegInfo().setPhysRegUsed(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getVarArgsRegSaveSize() > 0) MF.getRegInfo().setPhysRegUsed(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good // enough. If there anything changes it, it'll be a spill, which implies // we've used all the registers and so R4 is already used, so not marking // it here will be OK. Also spill R4 if Thumb1 function requires stack // realignment. // FIXME: It will be better just to find spare register here. unsigned StackSize = estimateStackSize(MF); if (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) || StackSize > 508) MF.getRegInfo().setPhysRegUsed(ARM::R4); } // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; if (MF.getRegInfo().isPhysRegUsed(Reg)) { Spilled = true; CanEliminateFrame = false; } else { // Check alias registers too. for (const unsigned *Aliases = RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { Spilled = true; CanEliminateFrame = false; } } } if (!ARM::GPRRegisterClass->contains(Reg)) continue; if (Spilled) { NumGPRSpills++; if (!STI.isTargetDarwin()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; continue; } // Keep track if LR and any of R4, R5, R6, and R7 is spilled. switch (Reg) { case ARM::LR: LRSpilled = true; // Fallthrough case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: CS1Spilled = true; break; default: break; } } else { if (!STI.isTargetDarwin()) { UnspilledCS1GPRs.push_back(Reg); continue; } switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: UnspilledCS1GPRs.push_back(Reg); break; default: UnspilledCS2GPRs.push_back(Reg); break; } } } bool ForceLRSpill = false; if (!LRSpilled && AFI->isThumb1OnlyFunction()) { unsigned FnSize = GetFunctionSizeInBytes(MF, TII); // Force LR to be spilled if the Thumb function size is > 2048. This enables // use of BL to implement far jump. If it turns out that it's not needed // then the branch fix up path will undo it. if (FnSize >= (1 << 11)) { CanEliminateFrame = false; ForceLRSpill = true; } } // If any of the stack slot references may be out of range of an immediate // offset, make sure a register (or a spill slot) is available for the // register scavenger. Note that if we're indexing off the frame pointer, the // effective stack size is 4 bytes larger since the FP points to the stack // slot of the previous FP. Also, if we have variable sized objects in the // function, stack slot references will often be negative, and some of // our instructions are positive-offset only, so conservatively consider // that case to want a spill slot (or register) as well. Similarly, if // the function adjusts the stack pointer during execution and the // adjustments aren't already part of our stack size estimate, our offset // calculations may be off, so be conservative. // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? bool BigStack = (RS && (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { MF.getRegInfo().setPhysRegUsed(ARM::LR); NumGPRSpills++; UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); ForceLRSpill = false; ExtraCSSpill = true; } if (hasFP(MF)) { MF.getRegInfo().setPhysRegUsed(FramePtr); NumGPRSpills++; } // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. unsigned TargetAlign = getStackAlignment(); if (TargetAlign == 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; // Don't spill high register if the function is thumb1 if (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR) { MF.getRegInfo().setPhysRegUsed(Reg); if (!RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; break; } } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); MF.getRegInfo().setPhysRegUsed(Reg); if (!RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } } // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. if (BigStack && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; SmallVector<unsigned, 2> Extras; while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); if (!RegInfo->isReservedReg(MF, Reg) && (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR)) { Extras.push_back(Reg); NumExtras--; } } // For non-Thumb1 functions, also check for hi-reg CS registers if (!AFI->isThumb1OnlyFunction()) { while (NumExtras && !UnspilledCS2GPRs.empty()) { unsigned Reg = UnspilledCS2GPRs.back(); UnspilledCS2GPRs.pop_back(); if (!RegInfo->isReservedReg(MF, Reg)) { Extras.push_back(Reg); NumExtras--; } } } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { MF.getRegInfo().setPhysRegUsed(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } } } if (ForceLRSpill) { MF.getRegInfo().setPhysRegUsed(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } }
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetDarwin()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } // Move past area 1. if (GPRCS1Size > 0) MBBI++; // Set FP to point to the stack slot that contains the previous FP. // For Darwin, FP is R7, which has now been stored in spill area 1. // Otherwise, if this is not Darwin, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. bool HasFP = hasFP(MF); if (HasFP) { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlag(MachineInstr::FrameSetup); AddDefaultCC(AddDefaultPred(MIB)); } // Move past area 2. if (GPRCS2Size > 0) MBBI++; // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. if (DPRCSSize > 0) { MBBI++; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) MBBI++; } NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have // taken two instructions: // mov sp, r7 // sub sp, #24 // If an interrupt is taken between the two instructions, then sp is in // an inconsistent state (pointing to the middle of callee-saved area). // The interrupt handler can end up clobbering the registers. AFI->setShouldRestoreSPFromFP(true); } if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need dynamic stack realignment, do it here. Be paranoid and make // sure if we also have VLAs, we have a base pointer for frame access. if (RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { // Emit bic sp, sp, MaxAlign AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::BICri), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addImm(MaxAlign-1))); } else { // We cannot use sp as source/dest register here, thus we're emitting the // following sequence: // mov r4, sp // bic r4, r4, MaxAlign // mov sp, r4 // FIXME: It will be better just to find spare register here. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) .addReg(ARM::SP, RegState::Kill)); AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2BICri), ARM::R4) .addReg(ARM::R4, RegState::Kill) .addImm(MaxAlign-1))); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister()) .addReg(ARM::SP) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister()) .addReg(ARM::SP)); } // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }