void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (needPopSpecialFixUp(MF)) { bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); (void)Done; assert(Done && "Emission of the special fixup failed!?"); } }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI->getUseLocalStackAllocationBlock()) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = (Offset + Align - 1) / Align * Align; DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI->setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI->getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> LargeStackObjs; if (MFI->getStackProtectorIndex() >= 0) { AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (!MFI->MayNeedStackProtector(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); LargeStackObjs.insert(i); } } // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (LargeStackObjs.count(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || !RegInfo->useFPForScavengingIndex(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI->setStackSize(StackSize); NumBytesStackSpace += StackSize; }
void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( MachineFunction &MF, RegScavenger *RS) const { const ARM64RegisterInfo *RegInfo = static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo()); ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); MachineRegisterInfo *MRI = &MF.getRegInfo(); SmallVector<unsigned, 4> UnspilledCSGPRs; SmallVector<unsigned, 4> UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { MRI->setPhysRegUsed(ARM64::FP); MRI->setPhysRegUsed(ARM64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the // getCalleeSavedRegs() below will get the right answer. if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; bool CanEliminateFrame = true; DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Check pairs of consecutive callee-saved registers. for (unsigned i = 0; CSRegs[i]; i += 2) { assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); const unsigned OddReg = CSRegs[i]; const unsigned EvenReg = CSRegs[i + 1]; assert((ARM64::GPR64RegClass.contains(OddReg) && ARM64::GPR64RegClass.contains(EvenReg)) ^ (ARM64::FPR64RegClass.contains(OddReg) && ARM64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); // Early exit if none of the registers in the register pair is actually // used. if (!OddRegUsed && !EvenRegUsed) { if (ARM64::GPR64RegClass.contains(OddReg)) { UnspilledCSGPRs.push_back(OddReg); UnspilledCSGPRs.push_back(EvenReg); } else { UnspilledCSFPRs.push_back(OddReg); UnspilledCSFPRs.push_back(EvenReg); } continue; } unsigned Reg = ARM64::NoRegister; // If only one of the registers of the register pair is used, make sure to // mark the other one as used as well. if (OddRegUsed ^ EvenRegUsed) { // Find out which register is the additional spill. Reg = OddRegUsed ? EvenReg : OddReg; MRI->setPhysRegUsed(Reg); } DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) || (RegInfo->getEncodingValue(OddReg) + 1 == RegInfo->getEncodingValue(EvenReg))) && "Register pair of non-adjacent registers!"); if (ARM64::GPR64RegClass.contains(OddReg)) { NumGPRSpilled += 2; // If it's not a reserved register, we can use it in lieu of an // emergency spill slot for the register scavenger. // FIXME: It would be better to instead keep looking and choose another // unspilled register that isn't reserved, if there is one. if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } else NumFPRSpilled += 2; CanEliminateFrame = false; } // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing // range. We'll end up allocating an unnecessary spill slot a lot, but // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { // If we're adding a register to spill here, we have to add two of them // to keep the number of regs to spill even. assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); unsigned Count = 0; while (!UnspilledCSGPRs.empty() && Count < 2) { unsigned Reg = UnspilledCSGPRs.back(); UnspilledCSGPRs.pop_back(); DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) << " to get a scratch register.\n"); MRI->setPhysRegUsed(Reg); ExtraCSSpill = true; ++Count; } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill) { const TargetRegisterClass *RC = &ARM64::GPR64RegClass; int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); } } }
void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); bool FP = hasFP(MF); // Work out frame sizes. int FrameSize = MFI->getStackSize(); assert(FrameSize%4 == 0 && "Misaligned frame size"); FrameSize/=4; bool isU6 = isImmU6(FrameSize); if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. cerr << "emitPrologue Frame size too big: " << FrameSize << "\n"; abort(); } bool emitFrameMoves = needsFrameMoves(MF); // Do we need to allocate space on the stack? if (FrameSize) { bool saveLR = XFI->getUsesLR(); bool LRSavedOnEntry = false; int Opcode; if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; MBB.addLiveIn(XCore::LR); saveLR = false; LRSavedOnEntry = true; } else { Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; } BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); if (emitFrameMoves) { std::vector<MachineMove> &Moves = MMI->getFrameMoves(); // Show update of SP. unsigned FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId); MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); if (LRSavedOnEntry) { MachineLocation CSDst(MachineLocation::VirtualFP, 0); MachineLocation CSSrc(XCore::LR); Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); } } if (saveLR) { int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl); MBB.addLiveIn(XCore::LR); if (emitFrameMoves) { unsigned SaveLRLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId); MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); MachineLocation CSSrc(XCore::LR); MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId, CSDst, CSSrc)); } } } if (FP) { // Save R10 to the stack. int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl); // R10 is live-in. It is killed at the spill. MBB.addLiveIn(XCore::R10); if (emitFrameMoves) { unsigned SaveR10LabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId); MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); MachineLocation CSSrc(XCore::R10); MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId, CSDst, CSSrc)); } // Set the FP from the SP. unsigned FramePtr = XCore::R10; BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) .addImm(0); if (emitFrameMoves) { // Show FP is now valid. unsigned FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId); MachineLocation SPDst(FramePtr); MachineLocation SPSrc(MachineLocation::VirtualFP); MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); } } if (emitFrameMoves) { // Frame moves for callee saved. std::vector<MachineMove> &Moves = MMI->getFrameMoves(); std::vector<std::pair<unsigned, CalleeSavedInfo> >&SpillLabels = XFI->getSpillLabels(); for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { unsigned SpillLabel = SpillLabels[I].first; CalleeSavedInfo &CSI = SpillLabels[I].second; int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); unsigned Reg = CSI.getReg(); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); } } }
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; // Early exit for targets which have no callee saved registers. if (CSRegs == 0 || CSRegs[0] == 0) return; // In Naked functions we aren't going to save any registers. if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) return; std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } } if (CSI.empty()) return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; const TargetFrameLowering::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { I->setFrameIdx(FrameIdx); continue; } // Check to see if this physreg must be spilled to a particular stack slot // on this target. const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); } MFI->setCalleeSavedInfo(CSI); }
/// needsFrameBaseReg - Returns true if the instruction's frame index /// reference would be better served by a base register other than FP /// or SP. Used by LocalStackFrameAllocation to determine which frame index /// references it should create new base registers for. bool ARMBaseRegisterInfo:: needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) { assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); } // It's the load/store FI references that cause issues, as it can be difficult // to materialize the offset if it won't fit in the literal field. Estimate // based on the size of the local frame and some conservative assumptions // about the rest of the stack frame (note, this is pre-regalloc, so // we don't know everything for certain yet) whether this offset is likely // to be out of range of the immediate. Return true if so. // We only generate virtual base registers for loads and stores, so // return false for everything else. unsigned Opc = MI->getOpcode(); switch (Opc) { case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12: case ARM::STRi12: case ARM::STRH: case ARM::STRBi12: case ARM::t2LDRi12: case ARM::t2LDRi8: case ARM::t2STRi12: case ARM::t2STRi8: case ARM::VLDRS: case ARM::VLDRD: case ARM::VSTRS: case ARM::VSTRD: case ARM::tSTRspi: case ARM::tLDRspi: break; default: return false; } // Without a virtual base register, if the function has variable sized // objects, all fixed-size local references will be via the frame pointer, // Approximate the offset and see if it's legal for the instruction. // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // Estimate an offset from the frame pointer. // Conservatively assume all callee-saved registers get pushed. R4-R6 // will be earlier than the FP, so we ignore those. // R7, LR int64_t FPOffset = Offset - 8; // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15 if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction()) FPOffset -= 80; // Estimate an offset from the stack pointer. // The incoming offset is relating to the SP at the start of the function, // but when we access the local it'll be relative to the SP after local // allocation, so adjust our SP-relative offset by that allocation size. Offset = -Offset; Offset += MFI->getLocalFrameSize(); // Assume that we'll have at least some spill slots allocated. // FIXME: This is a total SWAG number. We should run some statistics // and pick a real one. Offset += 128; // 128 bytes of spill slots // If there is a frame pointer, try using it. // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); if (TFI->hasFP(MF) && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, FPOffset)) return false; } // If we can reference via the stack pointer, try that. // FIXME: This (and the code that resolves the references) can be improved // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) return false; // The offset likely isn't legal, we want to allocate a virtual base register. return true; }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (VARegSaveSize) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetDarwin()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup); if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (CalleeSavedStackSize)| | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { ++LastPopI; break; } } NumBytes -= AFI->getCalleeSavedStackSize(); assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). if (RedZone && ArgumentPopSize == 0) return; bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0; int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += ArgumentPopSize; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, StackRestoreBytes, TII, MachineInstr::FrameDestroy); // If we were able to combine the local stack pop with the argument pop, // then we're done. if (NoCalleeSaveRestore || ArgumentPopSize == 0) return; NumBytes = 0; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, MachineInstr::FrameDestroy); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (ArgumentPopSize) emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, ArgumentPopSize, TII, MachineInstr::FrameDestroy); }
static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { if (CSI.empty()) return; AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction()->getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); unsigned Offset = AFI->getCalleeSavedStackSize(); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) RPI.Reg2 = NextReg; } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); MFI->setObjectSize(RPI.FrameIdx, 16); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; } // Align first offset to even 16-byte boundary to avoid additional SP // adjustment instructions. // Last pair offset is size of whole callee-save region for SP // pre-dec/post-inc. RegPairInfo &LastPair = RegPairs.back(); assert(AFI->getCalleeSavedStackSize() % 8 == 0); LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis<StackProtector>(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // Skew to be applied to alignment. unsigned Skew = TFI.getStackAlignmentSkew(Fn); // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool EarlyScavengingSlots = (TFI.hasFP(Fn) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI->getUseLocalStackAllocationBlock()) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI->setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI->getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign, Skew); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { case StackProtector::SSPLK_None: continue; case StackProtector::SSPLK_SmallArray: SmallArrayObjs.insert(i); continue; case StackProtector::SSPLK_AddrOf: AddrOfObjs.insert(i); continue; case StackProtector::SSPLK_LargeArray: LargeArrayObjs.insert(i); continue; } llvm_unreachable("Unexpected SSPLayoutKind."); } AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); } SmallVector<int, 8> ObjectsToAllocate; int EHRegNodeFrameIndex = INT_MAX; if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; // Then prepare to assign frame offsets to stack objects that are not used to // spill callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; if (MFI->getStackProtectorIndex() == (int)i) continue; if (EHRegNodeFrameIndex == (int)i) continue; if (ProtectedObjs.count(i)) continue; // Add the objects that we need to allocate to our working set. ObjectsToAllocate.push_back(i); } // Allocate the EH registration node first if one is present. if (EHRegNodeFrameIndex != INT_MAX) AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, MaxAlign, Skew); // Give the targets a chance to order the objects the way they like it. if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && Fn.getTarget().Options.StackSymbolOrdering) TFI.orderFrameObjects(Fn, ObjectsToAllocate); // Now walk the objects and actually assign base offsets to them. for (auto &Object : ObjectsToAllocate) AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); Offset = alignTo(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI->setStackSize(StackSize); NumBytesStackSpace += StackSize; }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (Mips::FGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, 1), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } if (MipsFI->callsEhReturn()) { // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg unsigned VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI->getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = - (signed) MFI->getMaxAlignment(); BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); if (hasBP(MF)) { // move $s7, $sp unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) .addReg(SP) .addReg(ZERO); } } } }
void PEI::assignCalleeSavedSpillSlots(MachineFunction &F, const BitVector &SavedRegs) { // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; if (SavedRegs.empty()) return; const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (SavedRegs.test(Reg)) CSI.push_back(CalleeSavedInfo(Reg)); } const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = F.getFrameInfo(); if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { // If target doesn't implement this, use generic code. if (CSI.empty()) return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; const TargetFrameLowering::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (auto &CS : CSI) { unsigned Reg = CS.getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { CS.setFrameIdx(FrameIdx); continue; } // Check to see if this physreg must be spilled to a particular stack slot // on this target. const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. FrameIdx = MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); } CS.setFrameIdx(FrameIdx); } } MFI->setCalleeSavedInfo(CSI); }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const MipsRegisterInfo *RegInfo = static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); std::vector<MachineMove> &Moves = MMI.getFrameMoves(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); DstML = MachineLocation(MachineLocation::VirtualFP); SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { MachineLocation DstML0(MachineLocation::VirtualFP, Offset); MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); if (!STI.isLittle()) std::swap(SrcML0, SrcML1); Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); } else { // Reg is either in CPURegs or FGR32. DstML = MachineLocation(MachineLocation::VirtualFP, Offset); SrcML = MachineLocation(Reg); Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); } } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); // emit ".cfi_def_cfa_register $fp" MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); DstML = MachineLocation(FP); SrcML = MachineLocation(MachineLocation::VirtualFP); Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); } }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); CFAOffset -= ArgRegsSaveSize; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); CFAOffset -= NumBytes - ArgRegsSaveSize; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (STI.isTargetMachO()) { GPRCS2Size += 4; break; } // fallthrough case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } if (adjustedGPRCS1Size) { CFAOffset -= adjustedGPRCS1Size; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); int FI = I->getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: case ARM::R12: if (STI.isTargetMachO()) break; // fallthough case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); break; } } // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); if(FramePtrOffsetInBlock) { CFAOffset += FramePtrOffsetInBlock; unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (!HasFP) { CFAOffset -= NumBytes; unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } } if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // Thumb1 does not currently support dynamic stack realignment. Report a // fatal error rather then silently generate bad code. if (RegInfo->needsStackRealignment(MF)) report_fatal_error("Dynamic stack realignment not supported for thumb1."); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
SDValue Cpu0TargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Cpu0 target does not yet support tail call optimization. isTailCall = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; Cpu0FunctionInfo *Cpu0FI = MF.getInfo<Cpu0FunctionInfo>(); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Cpu0); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. if (IsPIC && Cpu0FI->globalBaseRegFixed() && !Cpu0FI->getGPFI()) Cpu0FI->setGPFI(MFI->CreateFixedObject(4, 0, true)); // Get the frame index of the stack frame object that points to the location // of dynamically allocated area on the stack. int DynAllocFI = Cpu0FI->getDynAllocFI(); unsigned MaxCallFrameSize = Cpu0FI->getMaxCallFrameSize(); if (MaxCallFrameSize < NextStackOffset) { Cpu0FI->setMaxCallFrameSize(NextStackOffset); // Set the offsets relative to $sp of the $gp restore slot and dynamically // allocated stack space. These offsets must be aligned to a boundary // determined by the stack alignment of the ABI. unsigned StackAlignment = TFL->getStackAlignment(); NextStackOffset = (NextStackOffset + StackAlignment - 1) / StackAlignment * StackAlignment; MFI->setObjectOffset(DynAllocFI, NextStackOffset); } // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. SDValue Chain, CallSeqStart, ByValChain; SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); ByValChain = InChain; // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Outs[i].Flags; // ByVal Arg. if (Flags.isByVal()) { assert("!!!Error!!!, Flags.isByVal()==true"); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); continue; } // Register can't get to this point... assert(VA.isMemLoc()); // Create the frame index object for this incoming parameter LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } // Extend range of indices of frame objects for outgoing arguments that were // created during this function call. Skip this step if no such objects were // created. if (LastFI) Cpu0FI->extendOutArgFIRange(FirstFI, LastFI); // If a memcpy has been created to copy a byval arg to a stack, replace the // chain input of CallSeqStart with ByValChain. if (InChain != ByValChain) DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, NextStackOffsetVal); // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. unsigned char OpFlag; bool IsPICCall = IsPIC; // true if calls are translated to jalr $25 bool GlobalOrExternal = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { OpFlag = IsPICCall ? Cpu0II::MO_GOT_CALL : Cpu0II::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { if (!IsPIC) // static OpFlag = Cpu0II::MO_NO_FLAG; else // O32 & PIC OpFlag = Cpu0II::MO_GOT_CALL; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); GlobalOrExternal = true; } SDValue InFlag; // Create nodes that load address of callee and copy it to T9 if (IsPICCall) { if (GlobalOrExternal) { // Load callee address Callee = DAG.getNode(Cpu0ISD::Wrapper, dl, getPointerTy(), GetGlobalReg(DAG, getPointerTy()), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, false, 0); // Use GOT+LO if callee has internal linkage. if (CalleeLo.getNode()) { SDValue Lo = DAG.getNode(Cpu0ISD::Lo, dl, getPointerTy(), CalleeLo); Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo); } else Callee = LoadValue; } } // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { // copy to T9 unsigned T9Reg = Cpu0::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); Callee = DAG.getRegister(T9Reg, getPointerTy()); } // Cpu0JmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(Cpu0ISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); }
/// LowerCCCArguments - transform physical registers into virtual registers and /// generate load operations for arguments places on the stack. // FIXME: struct return stuff // FIXME: varargs SDValue SystemZTargetLowering::LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); if (isVarArg) report_fatal_error("Varargs not supported yet"); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue ArgValue; CCValAssign &VA = ArgLocs[i]; EVT LocVT = VA.getLocVT(); if (VA.isRegLoc()) { // Arguments passed in registers TargetRegisterClass *RC; switch (LocVT.getSimpleVT().SimpleTy) { default: #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " << LocVT.getSimpleVT().SimpleTy << "\n"; #endif llvm_unreachable(0); case MVT::i64: RC = SystemZ::GR64RegisterClass; break; case MVT::f32: RC = SystemZ::FP32RegisterClass; break; case MVT::f64: RC = SystemZ::FP64RegisterClass; break; } unsigned VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); } else { // Sanity check assert(VA.isMemLoc()); // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, VA.getLocMemOffset(), true, false); // Create the SelectionDAG nodes corresponding to a load // from this parameter SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, false, false, 0); } // If this is an 8/16/32-bit value, it is really passed promoted to 64 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); } return Chain; }
/// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue Cpu0TargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); Cpu0FunctionInfo *Cpu0FI = MF.getInfo<Cpu0FunctionInfo>(); Cpu0FI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector<SDValue> OutChains; // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_Cpu0); Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); int LastFI = 0;// Cpu0FI->LastInArgFI is 0 at the entry of this function. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) { CCValAssign &VA = ArgLocs[i]; EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; bool IsRegLoc = VA.isRegLoc(); if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); continue; } // sanity check assert(VA.isMemLoc()); // The stack pointer offset is relative to the caller stack frame. LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(LastFI), false, false, false, 0)); } Cpu0FI->setLastInArgFI(LastFI); // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], OutChains.size()); } return Chain; }
void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, SmallVectorImpl<SDValue> &ArgValues, DebugLoc dl) { // // add beautiful description of IA64 stack frame format // here (from intel 24535803.pdf most likely) // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); GP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); SP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); RP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); MachineBasicBlock& BB = MF.front(); unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35, IA64::r36, IA64::r37, IA64::r38, IA64::r39}; unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, IA64::F12,IA64::F13,IA64::F14, IA64::F15}; unsigned argVreg[8]; unsigned argPreg[8]; unsigned argOpc[8]; unsigned used_FPArgs = 0; // how many FP args have been used so far? unsigned ArgOffset = 0; int count = 0; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { SDValue newroot, argt; if(count < 8) { // need to fix this logic? maybe. switch (getValueType(I->getType()).getSimpleVT()) { default: assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n"); case MVT::f32: // fixme? (well, will need to for weird FP structy stuff, // see intel ABI docs) case MVT::f64: //XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]); MF.getRegInfo().addLiveIn(args_FP[used_FPArgs]); // mark this reg as liveIn // floating point args go into f8..f15 as-needed, the increment argVreg[count] = // is below..: MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::f64)); // FP args go into f8..f15 as needed: (hence the ++) argPreg[count] = args_FP[used_FPArgs++]; argOpc[count] = IA64::FMOV; argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::f64); if (I->getType() == Type::FloatTy) argt = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, argt, DAG.getIntPtrConstant(0)); break; case MVT::i1: // NOTE: as far as C abi stuff goes, // bools are just boring old ints case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: //XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]); MF.getRegInfo().addLiveIn(args_int[count]); // mark this register as liveIn argVreg[count] = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); argPreg[count] = args_int[count]; argOpc[count] = IA64::MOV; argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::i64); if ( getValueType(I->getType()) != MVT::i64) argt = DAG.getNode(ISD::TRUNCATE, dl, getValueType(I->getType()), newroot); break; } } else { // more than 8 args go into the frame // Create the frame index object for this incoming parameter... ArgOffset = 16 + 8 * (count - 8); int FI = MFI->CreateFixedObject(8, ArgOffset); // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); argt = newroot = DAG.getLoad(getValueType(I->getType()), dl, DAG.getEntryNode(), FIN, NULL, 0); } ++count; DAG.setRoot(newroot.getValue(1)); ArgValues.push_back(argt); } // Create a vreg to hold the output of (what will become) // the "alloc" instruction VirtGPR = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); BuildMI(&BB, TII->get(IA64::PSEUDO_ALLOC), VirtGPR); // we create a PSEUDO_ALLOC (pseudo)instruction for now /* BuildMI(&BB, IA64::IDEF, 0, IA64::r1); // hmm: BuildMI(&BB, IA64::IDEF, 0, IA64::r12); BuildMI(&BB, IA64::IDEF, 0, IA64::rp); // ..hmm. BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1); // hmm: BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12); BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp); // ..hmm. */ unsigned tempOffset=0; // if this is a varargs function, we simply lower llvm.va_start by // pointing to the first entry if(F.isVarArg()) { tempOffset=0; VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset); } // here we actually do the moving of args, and store them to the stack // too if this is a varargs function: for (int i = 0; i < count && i < 8; ++i) { BuildMI(&BB, TII->get(argOpc[i]), argVreg[i]).addReg(argPreg[i]); if(F.isVarArg()) { // if this is a varargs function, we copy the input registers to the stack int FI = MFI->CreateFixedObject(8, tempOffset); tempOffset+=8; //XXX: is it safe to use r22 like this? BuildMI(&BB, TII->get(IA64::MOV), IA64::r22).addFrameIndex(FI); // FIXME: we should use st8.spill here, one day BuildMI(&BB, TII->get(IA64::ST8), IA64::r22).addReg(argPreg[i]); } } // Finally, inform the code generator which regs we return values in. // (see the ISD::RET: case in the instruction selector) switch (getValueType(F.getReturnType()).getSimpleVT()) { default: assert(0 && "i have no idea where to return this type!"); case MVT::isVoid: break; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: MF.getRegInfo().addLiveOut(IA64::r8); break; case MVT::f32: case MVT::f64: MF.getRegInfo().addLiveOut(IA64::F8); break; } }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (VARegSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we // pop the old LR into R3 as a temporary. // Move back past the callee-saved register restoration while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill)); // erase the old tBX_RET instruction MBB.erase(MBBI); } }
void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = &MF.getMMI(); const XCoreRegisterInfo *RegInfo = static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo()); const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool FP = hasFP(MF); bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest); if (Nested) { loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); } // Work out frame sizes. int FrameSize = MFI->getStackSize(); assert(FrameSize%4 == 0 && "Misaligned frame size"); FrameSize/=4; bool isU6 = isImmU6(FrameSize); if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); } bool emitFrameMoves = RegInfo->needsFrameMoves(MF); // Do we need to allocate space on the stack? if (FrameSize) { bool saveLR = XFI->getUsesLR(); bool LRSavedOnEntry = false; int Opcode; if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; MBB.addLiveIn(XCore::LR); saveLR = false; LRSavedOnEntry = true; } else { Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; } BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); if (emitFrameMoves) { std::vector<MachineMove> &Moves = MMI->getFrameMoves(); // Show update of SP. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); if (LRSavedOnEntry) { MachineLocation CSDst(MachineLocation::VirtualFP, 0); MachineLocation CSSrc(XCore::LR); Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); } } if (saveLR) { int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII); MBB.addLiveIn(XCore::LR); if (emitFrameMoves) { MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); MachineLocation CSSrc(XCore::LR); MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc)); } } } if (FP) { // Save R10 to the stack. int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII); // R10 is live-in. It is killed at the spill. MBB.addLiveIn(XCore::R10); if (emitFrameMoves) { MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); MachineLocation CSSrc(XCore::R10); MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc)); } // Set the FP from the SP. unsigned FramePtr = XCore::R10; BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) .addImm(0); if (emitFrameMoves) { // Show FP is now valid. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(FramePtr); MachineLocation SPSrc(MachineLocation::VirtualFP); MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc)); } } if (emitFrameMoves) { // Frame moves for callee saved. std::vector<MachineMove> &Moves = MMI->getFrameMoves(); std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels = XFI->getSpillLabels(); for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { MCSymbol *SpillLabel = SpillLabels[I].first; CalleeSavedInfo &CSI = SpillLabels[I].second; int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); unsigned Reg = CSI.getReg(); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); } } }
void MCS51FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); MCS51MachineFunctionInfo *MCS51FI = MF.getInfo<MCS51MachineFunctionInfo>(); const MCS51InstrInfo &TII = *static_cast<const MCS51InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - MCS51FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save FPW into the appropriate stack slot... BuildMI(MBB, MBBI, DL, TII.get(MCS51::PUSH16r)) .addReg(MCS51::FPW, RegState::Kill); // Update FPW with the new base value... BuildMI(MBB, MBBI, DL, TII.get(MCS51::MOV16rr), MCS51::FPW) .addReg(MCS51::SPW); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MCS51::FPW); } else NumBytes = StackSize - MCS51FI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == MCS51::PUSH16r)) ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); if (NumBytes) { // adjust stack pointer: SPW -= numbytes // If there is an SUB16ri of SPW immediately before this instruction, merge // the two. //NumBytes -= mergeSPUpdates(MBB, MBBI, true); // If there is an ADD16ri or SUB16ri of SPW immediately after this // instruction, merge the two instructions. // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MCS51::SUB16ri), MCS51::SPW) .addReg(MCS51::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: // place all spills in the entry block, all restores in return blocks. calculateSets(Fn); // Add the code to save and restore the callee saved registers if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); uint64_t StackSize = MFI->getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(*F, StackSize); F->getContext().diagnose(DiagStackSize); } delete RS; ReturnBlocks.clear(); return true; }
void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; bool HasGP = MipsFI->needGPSaveRestore(); // Min and Max CSI FrameIndex. int MinCSFI = -1, MaxCSFI = -1; // See the description at MipsMachineFunction.h int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; // Replace the dummy '0' SPOffset by the negative offsets, as explained on // LowerFormalArguments. Leaving '0' for while is necessary to avoid // the approach done by calculateFrameObjectOffsets to the stack frame. MipsFI->adjustLoadArgsFI(MFI); MipsFI->adjustStoreVarArgsFI(MFI); // It happens that the default stack frame allocation order does not directly // map to the convention used for mips. So we must fix it. We move the callee // save register slots after the local variables area, as described in the // stack frame above. unsigned CalleeSavedAreaSize = 0; if (!CSI.empty()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size()-1].getFrameIdx(); } for (unsigned i = 0, e = CSI.size(); i != e; ++i) CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize) : (Subtarget.isABI_O32() ? 16 : 0); // Adjust local variables. They should come on the stack right // after the arguments. int LastOffsetFI = -1; for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFI && i <= MaxCSFI) continue; if (MFI->isDeadObjectIndex(i)) continue; unsigned Offset = StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize; if (LastOffsetFI == -1) LastOffsetFI = i; if (Offset > MFI->getObjectOffset(LastOffsetFI)) LastOffsetFI = i; MFI->setObjectOffset(i, Offset); } // Adjust CPU Callee Saved Registers Area. Registers RA and FP must // be saved in this CPU Area. This whole area must be aligned to the // default Stack Alignment requirements. if (LastOffsetFI >= 0) StackOffset = MFI->getObjectOffset(LastOffsetFI)+ MFI->getObjectSize(LastOffsetFI); StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) break; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } // Stack locations for FP and RA. If only one of them is used, // the space must be allocated for both, otherwise no space at all. if (hasFP(MF) || MFI->hasCalls()) { // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setFPStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += RegSize; // SP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setRAStackOffset(StackOffset); StackOffset += RegSize; if (MFI->hasCalls()) TopCPUSavedRegOff += RegSize; } StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Adjust FPU Callee Saved Registers Area. This Area must be // aligned to the default Stack Alignment requirements. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) continue; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopFPUSavedRegOff = StackOffset; StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Update frame info MFI->setStackSize(StackOffset); // Recalculate the final tops offset. The final values must be '0' // if there isn't a callee saved register for CPU or FPU, otherwise // a negative offset is needed. if (TopCPUSavedRegOff >= 0) MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); if (TopFPUSavedRegOff >= 0) MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); }
/// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. MachineFrameInfo *MFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); MachineBasicBlock::iterator I; // Spill using target interface. I = EntryBlock->begin(); if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } // Restore using target interface. for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { MachineBasicBlock *MBB = ReturnBlocks[ri]; I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } } }
/// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function, handling shrink wrapping. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. MachineFrameInfo *MFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; if (! ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } // Restore using target interface. for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { MachineBasicBlock* MBB = ReturnBlocks[ri]; I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } } return; } // Insert spills. std::vector<CalleeSavedInfo> blockCSI; for (CSRegBlockMap::iterator BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet save = BI->second; if (save.empty()) continue; blockCSI.clear(); for (CSRegSet::iterator RI = save.begin(), RE = save.end(); RI != RE; ++RI) { blockCSI.push_back(CSI[*RI]); } assert(blockCSI.size() > 0 && "Could not collect callee saved register info"); I = MBB->begin(); // When shrink wrapping, use stack slot stores/loads. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(*MBB, I, Reg, true, blockCSI[i].getFrameIdx(), RC, TRI); } } for (CSRegBlockMap::iterator BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet restore = BI->second; if (restore.empty()) continue; blockCSI.clear(); for (CSRegSet::iterator RI = restore.begin(), RE = restore.end(); RI != RE; ++RI) { blockCSI.push_back(CSI[*RI]); } assert(blockCSI.size() > 0 && "Could not find callee saved register info"); // If MBB is empty and needs restores, insert at the _beginning_. if (MBB->empty()) { I = MBB->begin(); } else { I = MBB->end(); --I; // Skip over all terminator instructions, which are part of the // return sequence. if (! I->isTerminator()) { ++I; } else { MachineBasicBlock::iterator I2 = I; while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; } } bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; if (!AtStart) --BeforeI; // Restore all registers immediately before the return and any // terminators that precede it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(*MBB, I, Reg, blockCSI[i].getFrameIdx(), RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert // multiple instructions. if (AtStart) I = MBB->begin(); else { I = BeforeI; ++I; } } } }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); bool NeedsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); uint64_t NumInitialBytes, NumResidualBytes; // Currently we expect the stack to be laid out by // sub sp, sp, #initial // stp x29, x30, [sp, #offset] // ... // str xxx, [sp, #offset] // sub sp, sp, #rest (possibly via extra instructions). if (MFI->getCalleeSavedInfo().size()) { // If there are callee-saved registers, we want to store them efficiently as // a block, and virtual base assignment happens too early to do it for us so // we adjust the stack in two phases: first just for callee-saved fiddling, // then to allocate the rest of the frame. splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); } else { // If there aren't any callee-saved registers, two-phase adjustment is // inefficient. It's more efficient to adjust with NumInitialBytes too // because when we're in a "callee pops argument space" situation, that pop // must be tacked onto Initial for correctness. NumInitialBytes = MFI->getStackSize(); NumResidualBytes = 0; } // Tell everyone else how much adjustment we're expecting them to use. In // particular if an adjustment is required for a tail call the epilogue could // have a different view of things. FuncInfo->setInitialStackAdjust(NumInitialBytes); emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, MachineInstr::FrameSetup); if (NeedsFrameMoves && NumInitialBytes) { // We emit this update even if the CFA is set from a frame pointer later so // that the CFA is valid in the interim. MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(SPLabel); MachineLocation Dst(MachineLocation::VirtualFP); unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); MMI.addFrameInst( MCCFIInstruction::createDefCfa(SPLabel, Reg, -NumInitialBytes)); } // Otherwise we need to set the frame pointer and/or add a second stack // adjustment. bool FPNeedsSetting = hasFP(MF); for (; MBBI != MBB.end(); ++MBBI) { // Note that this search makes strong assumptions about the operation used // to store the frame-pointer: it must be "STP x29, x30, ...". This could // change in future, but until then there's no point in implementing // untestable more generic cases. if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR && MBBI->getOperand(0).getReg() == AArch64::X29) { int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); ++MBBI; emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, AArch64::X29, NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), MachineInstr::FrameSetup); // The offset adjustment used when emitting debugging locations relative // to whatever frame base is set. AArch64 uses the default frame base (FP // or SP) and this adjusts the calculations to be correct. MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) - MFI->getStackSize()); if (NeedsFrameMoves) { MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(FPLabel); unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true); unsigned Offset = MFI->getObjectOffset(X29FrameIdx); MMI.addFrameInst(MCCFIInstruction::createDefCfa(FPLabel, Reg, Offset)); } FPNeedsSetting = false; } if (!MBBI->getFlag(MachineInstr::FrameSetup)) break; } assert(!FPNeedsSetting && "Frame pointer couldn't be set"); emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, MachineInstr::FrameSetup); // Now we emit the rest of the frame setup information, if necessary: we've // already noted the FP and initial SP moves so we're left with the prologue's // final SP update and callee-saved register locations. if (!NeedsFrameMoves) return; // Reuse the label if appropriate, so create it in this outer scope. MCSymbol *CSLabel = 0; // The rest of the stack adjustment if (!hasFP(MF) && NumResidualBytes) { CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(CSLabel); MachineLocation Dst(MachineLocation::VirtualFP); unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); unsigned Offset = NumResidualBytes + NumInitialBytes; MMI.addFrameInst(MCCFIInstruction::createDefCfa(CSLabel, Reg, -Offset)); } // And any callee-saved registers (it's fine to leave them to the end here, // because the old values are still valid at this point. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { if (!CSLabel) { CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(CSLabel); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, Reg, Offset)); } } }
void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); const ARM64InstrInfo *TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo()); const ARM64RegisterInfo *RegInfo = static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo()); DebugLoc DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); int NumBytes = MFI->getStackSize(); const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == ARM64::TCRETURNdi || RetOpcode == ARM64::TCRETURNri) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 16) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // ARM64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of ARM64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBBI; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (LastPopI != MBB.begin()) { do { ++NumRestores; --LastPopI; } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); if (!isCSRestore(LastPopI, CSRegs)) { ++LastPopI; --NumRestores; } } NumBytes -= NumRestores * 16; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP, -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); }
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineOperand &FIOp = MI->getOperand(FIOperandNum); int Index = MI->getOperand(FIOperandNum).getIndex(); switch (MI->getOpcode()) { // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(SuperReg, &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(IsKill)) .addImm(Spill.Lane); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } else { // Spill SGPR to a frame index. // FIXME we should use S_STORE_DWORD here for VI. MachineInstrBuilder Mov = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addReg(SubReg, SubKillState); // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { // The last implicit use of the SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; if (i + 1 == e) SuperKillState |= getKillRegState(IsKill); Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); } unsigned Size = FrameInfo->getObjectSize(Index); unsigned Align = FrameInfo->getObjectAlignment(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) .addReg(TmpReg, RegState::Kill) // src .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); } } MI->eraseFromParent(); break; } // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), &AMDGPU::SGPR_32RegClass, i); struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); if (Spill.hasReg()) { BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) .addImm(Spill.Lane) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } else { // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned Align = FrameInfo->getObjectAlignment(Index); unsigned Size = FrameInfo->getObjectSize(Index); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, Index); MachineMemOperand *MMO = MF->getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, Size, Align); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) .addFrameIndex(Index) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(i * 4) // offset .addMemOperand(MMO); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpReg, RegState::Kill) .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); } } MI->eraseFromParent(); break; } // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index) + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); MI->eraseFromParent(); break; } default: { int64_t Offset = FrameInfo->getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); } } } }
/// LowerCCCArguments - transform physical registers into virtual registers and /// generate load operations for arguments places on the stack. // FIXME: struct return stuff // FIXME: varargs SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430); assert(!isVarArg && "Varargs not supported yet"); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { default: { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getSimpleVT().SimpleTy << "\n"; #endif llvm_unreachable(0); } case MVT::i16: unsigned VReg = RegInfo.createVirtualRegister(MSP430::GR16RegisterClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); // If this is an 8-bit value, it is really passed promoted to 16 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); } } else { // Sanity check assert(VA.isMemLoc()); // Load the argument to a virtual register unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; if (ObjSize > 2) { errs() << "LowerFormalArguments Unhandled argument type: " << EVT(VA.getLocVT()).getEVTString() << "\n"; } // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); } } return Chain; }
void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (hasFP(MF)) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. if (NumBytes) emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, TII, *this, dl); else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(FramePtr); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes); } else emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } } if (VARegSaveSize) { // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); MBB.erase(MBBI); } }