/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: // place all spills in the entry block, all restores in return blocks. calculateSets(Fn); // Add the code to save and restore the callee saved registers if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); uint64_t StackSize = MFI->getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(*F, StackSize); F->getContext().diagnose(DiagStackSize); } delete RS; RestoreBlocks.clear(); return true; }
bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << fn.getFunction()->getName() << '\n'); LiveIntervals *lis = &getAnalysis<LiveIntervals>(); MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>(); const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); MachineRegisterInfo *mri = &fn.getRegInfo(); SmallSet<unsigned, 4> processed; for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); mbbi != mbbe; ++mbbi) { MachineBasicBlock* mbb = mbbi; SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); MachineLoop* loop = loopInfo->getLoopFor(mbb); unsigned loopDepth = loop ? loop->getLoopDepth() : 0; bool isExiting = loop ? loop->isLoopExiting(mbb) : false; for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ++mii) { const MachineInstr *mi = mii; if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue()) continue; for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { const MachineOperand &mopi = mi->getOperand(i); if (!mopi.isReg() || mopi.getReg() == 0) continue; unsigned reg = mopi.getReg(); if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) continue; // Multiple uses of reg by the same instruction. It should not // contribute to spill weight again. if (!processed.insert(reg)) continue; bool hasDef = mopi.isDef(); bool hasUse = !hasDef; for (unsigned j = i+1; j != e; ++j) { const MachineOperand &mopj = mi->getOperand(j); if (!mopj.isReg() || mopj.getReg() != reg) continue; hasDef |= mopj.isDef(); hasUse |= mopj.isUse(); if (hasDef && hasUse) break; } LiveInterval ®Int = lis->getInterval(reg); float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); if (hasDef && isExiting) { // Looks like this is a loop count variable update. SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); const LiveRange *dlr = lis->getInterval(reg).getLiveRangeContaining(defIdx); if (dlr->end >= mbbEnd) weight *= 3.0F; } regInt.weight += weight; } processed.clear(); } } for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { LiveInterval &li = *I->second; if (TargetRegisterInfo::isVirtualRegister(li.reg)) { // If the live interval length is essentially zero, i.e. in every live // range the use follows def immediately, it doesn't make sense to spill // it and hope it will be easier to allocate for this li. if (isZeroLengthInterval(&li)) { li.weight = HUGE_VALF; continue; } bool isLoad = false; SmallVector<LiveInterval*, 4> spillIs; if (lis->isReMaterializable(li, spillIs, isLoad)) { // If all of the definitions of the interval are re-materializable, // it is a preferred candidate for spilling. If non of the defs are // loads, then it's potentially very cheap to re-materialize. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. if (isLoad) li.weight *= 0.9F; else li.weight *= 0.5F; } // Slightly prefer live interval that has been assigned a preferred reg. std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg); if (Hint.first || Hint.second) li.weight *= 1.01F; lis->normalizeSpillWeight(li); } } return false; }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineModuleInfo &MMI = MF.getMMI(); std::vector<MachineMove> &Moves = MMI.getFrameMoves(); bool NeedsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); uint64_t NumInitialBytes, NumResidualBytes; // Currently we expect the stack to be laid out by // sub sp, sp, #initial // stp x29, x30, [sp, #offset] // ... // str xxx, [sp, #offset] // sub sp, sp, #rest (possibly via extra instructions). if (MFI->getCalleeSavedInfo().size()) { // If there are callee-saved registers, we want to store them efficiently as // a block, and virtual base assignment happens too early to do it for us so // we adjust the stack in two phases: first just for callee-saved fiddling, // then to allocate the rest of the frame. splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); } else { // If there aren't any callee-saved registers, two-phase adjustment is // inefficient. It's more efficient to adjust with NumInitialBytes too // because when we're in a "callee pops argument space" situation, that pop // must be tacked onto Initial for correctness. NumInitialBytes = MFI->getStackSize(); NumResidualBytes = 0; } // Tell everyone else how much adjustment we're expecting them to use. In // particular if an adjustment is required for a tail call the epilogue could // have a different view of things. FuncInfo->setInitialStackAdjust(NumInitialBytes); emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, MachineInstr::FrameSetup); if (NeedsFrameMoves && NumInitialBytes) { // We emit this update even if the CFA is set from a frame pointer later so // that the CFA is valid in the interim. MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(SPLabel); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(AArch64::XSP, NumInitialBytes); Moves.push_back(MachineMove(SPLabel, Dst, Src)); } // Otherwise we need to set the frame pointer and/or add a second stack // adjustment. bool FPNeedsSetting = hasFP(MF); for (; MBBI != MBB.end(); ++MBBI) { // Note that this search makes strong assumptions about the operation used // to store the frame-pointer: it must be "STP x29, x30, ...". This could // change in future, but until then there's no point in implementing // untestable more generic cases. if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR && MBBI->getOperand(0).getReg() == AArch64::X29) { int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); ++MBBI; emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, AArch64::X29, NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), MachineInstr::FrameSetup); // The offset adjustment used when emitting debugging locations relative // to whatever frame base is set. AArch64 uses the default frame base (FP // or SP) and this adjusts the calculations to be correct. MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) - MFI->getStackSize()); if (NeedsFrameMoves) { MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(FPLabel); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); Moves.push_back(MachineMove(FPLabel, Dst, Src)); } FPNeedsSetting = false; } if (!MBBI->getFlag(MachineInstr::FrameSetup)) break; } assert(!FPNeedsSetting && "Frame pointer couldn't be set"); emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, MachineInstr::FrameSetup); // Now we emit the rest of the frame setup information, if necessary: we've // already noted the FP and initial SP moves so we're left with the prologue's // final SP update and callee-saved register locations. if (!NeedsFrameMoves) return; // Reuse the label if appropriate, so create it in this outer scope. MCSymbol *CSLabel = 0; // The rest of the stack adjustment if (!hasFP(MF) && NumResidualBytes) { CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(CSLabel); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); Moves.push_back(MachineMove(CSLabel, Dst, Src)); } // And any callee-saved registers (it's fine to leave them to the end here, // because the old values are still valid at this point. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { if (!CSLabel) { CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(CSLabel); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx())); MachineLocation Src(I->getReg()); Moves.push_back(MachineMove(CSLabel, Dst, Src)); } } }
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); if (skipFunction(*MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); MRI = &MF.getRegInfo(); bool Changed = false; // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. // Note that updateDomTree() modifies the children of the DomTree node // currently being visited. The df_iterator supports that; it doesn't look at // child_begin() / child_end() until after a node has been visited. for (MachineDomTreeNode *I : depth_first(DomTree)) { MachineBasicBlock *HBB = I->getBlock(); SmallVector<MachineOperand, 4> HeadCond; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) { continue; } // Equivalence check is to skip loops. if (!TBB || TBB == HBB) { continue; } SmallVector<MachineOperand, 4> TrueCond; MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr; if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) { continue; } MachineInstr *HeadCmpMI = findSuitableCompare(HBB); if (!HeadCmpMI) { continue; } MachineInstr *TrueCmpMI = findSuitableCompare(TBB); if (!TrueCmpMI) { continue; } AArch64CC::CondCode HeadCmp; if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) { continue; } AArch64CC::CondCode TrueCmp; if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) { continue; } const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm(); const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm(); DEBUG(dbgs() << "Head branch:\n"); DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp) << '\n'); DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n'); DEBUG(dbgs() << "True branch:\n"); DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp) << '\n'); DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n'); if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) || (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) && std::abs(TrueImm - HeadImm) == 2) { // This branch transforms machine instructions that correspond to // // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...) // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...) // // into // // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...) // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...) CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp); CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp); if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) && std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) { modifyCmp(HeadCmpMI, HeadCmpInfo); modifyCmp(TrueCmpMI, TrueCmpInfo); Changed = true; } } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) || (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) && std::abs(TrueImm - HeadImm) == 1) { // This branch transforms machine instructions that correspond to // // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...) // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...) // // into // // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...) // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...) // GT -> GE transformation increases immediate value, so picking the // smaller one; LT -> LE decreases immediate value so invert the choice. bool adjustHeadCond = (HeadImm < TrueImm); if (HeadCmp == AArch64CC::LT) { adjustHeadCond = !adjustHeadCond; } if (adjustHeadCond) { Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm); } else { Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm); } } // Other transformation cases almost never occur due to generation of < or > // comparisons instead of <= and >=. } return Changed; }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; MRI = &MF.getRegInfo(); ST = &MF.getSubtarget<SISubtarget>(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); // omod is ignored by hardware if IEEE bit is enabled. omod also does not // correctly handle signed zeros. // // TODO: Check nsz on instructions when fast math flags are preserved to MI // level. bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; tryFoldInst(TII, &MI); if (!TII->isFoldableCopy(MI)) { if (IsIEEEMode || !tryFoldOMod(MI)) tryFoldClamp(MI); continue; } MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); // FIXME: We could also be folding things like TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; if (OpToFold.isReg() && !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) continue; // Prevent folding operands backwards in the function. For example, // the COPY opcode must not be replaced by 1 in this example: // // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3 // ... // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use> MachineOperand &Dst = MI.getOperand(0); if (Dst.isReg() && !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) continue; foldInstOperand(MI, OpToFold); } } return false; }
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const X86FrameLowering *TFI = getFrameLowering(MF); // Set the floating point control register as reserved. Reserved.set(X86::FPCW); // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the Shadow Stack Pointer as reserved. Reserved.set(X86::SSP); // Set the instruction pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { CallingConv::ID CC = MF.getFunction().getCallingConv(); const uint32_t *RegMask = getCallPreservedMask(MF, CC); if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) report_fatal_error( "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); Reserved.set(X86::DS); Reserved.set(X86::ES); Reserved.set(X86::FS); Reserved.set(X86::GS); // Mark the floating point stack registers as reserved. for (unsigned n = 0; n != 8; ++n) Reserved.set(X86::ST0 + n); // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { // These 8-bit registers are part of the x86-64 extension even though their // super-registers are old 32-bits. Reserved.set(X86::SIL); Reserved.set(X86::DIL); Reserved.set(X86::BPL); Reserved.set(X86::SPL); Reserved.set(X86::SIH); Reserved.set(X86::DIH); Reserved.set(X86::BPH); Reserved.set(X86::SPH); for (unsigned n = 0; n != 8; ++n) { // R8, R9, ... for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); // XMM8, XMM9, ... for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { for (unsigned n = 16; n != 32; ++n) { for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } assert(checkAllSuperRegsMarked(Reserved, {X86::SIL, X86::DIL, X86::BPL, X86::SPL, X86::SIH, X86::DIH, X86::BPH, X86::SPH})); return Reserved; }
void AVRFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); bool isHandler = (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL); // Early exit if the frame pointer is not needed in this function except for // signal/interrupt handlers where special code generation is required. if (!hasFP(MF) && !isHandler) { return; } MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); DebugLoc DL = MBBI->getDebugLoc(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize(); const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>(); const AVRInstrInfo &TII = *STI.getInstrInfo(); // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal // handlers at the very end of the function, just before reti. if (isHandler) { BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) .addImm(0x3f) .addReg(AVR::R0, RegState::Kill); BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); } if (hasFP(MF)) BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28); // Early exit if there is no need to restore the frame pointer. if (!FrameSize) { return; } // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = std::prev(MBBI); int Opc = PI->getOpcode(); if (Opc != AVR::POPRd && Opc != AVR::POPWRd && !PI->isTerminator()) { break; } --MBBI; } unsigned Opcode; // Select the optimal opcode depending on how big it is. if (isUInt<6>(FrameSize)) { Opcode = AVR::ADIWRdK; } else { Opcode = AVR::SUBIWRdK; FrameSize = -FrameSize; } // Restore the frame pointer by doing FP += <size>. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) .addImm(FrameSize); // The SREG implicit def is dead. MI->getOperand(3).setIsDead(); // Write back R29R28 to SP and temporarily disable interrupts. BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP) .addReg(AVR::R29R28, RegState::Kill); }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) FPOffset = getFPOffsetInPrologue(MBBI); // Move past the saves of the callee-saved registers. while (isCSSave(MBBI)) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. const unsigned Alignment = MFI->getMaxAlignment(); const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NumBytes && NeedsRealignment) { // Use the first callee-saved register as a scratch register. scratchSPReg = AArch64::X9; } // If we're a leaf function, try using the red zone. if (NumBytes && !canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); if (NumBytes && NeedsRealignment) { const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 <<12) // = N | ((64-NrBitsToZero) << 6) // immr | ((64-NrBitsToZero-1) << 0) // imms ; BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); } if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 8) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; unsigned Restores = getNumCSRestores(*LastPopI, CSRegs); NumRestores += Restores; if (Restores == 0) { ++LastPopI; break; } } NumBytes -= NumRestores * 8; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags); }
bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, const yaml::MachineFunction &YamlMF, PerFunctionMIParsingState &PFS) { MachineFrameInfo &MFI = *MF.getFrameInfo(); const Function &F = *MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); MFI.setHasStackMap(YamlMFI.HasStackMap); MFI.setHasPatchPoint(YamlMFI.HasPatchPoint); MFI.setStackSize(YamlMFI.StackSize); MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment); if (YamlMFI.MaxAlignment) MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); MFI.setAdjustsStack(YamlMFI.AdjustsStack); MFI.setHasCalls(YamlMFI.HasCalls); MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); if (!YamlMFI.SavePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS)) return true; MFI.setSavePoint(MBB); } if (!YamlMFI.RestorePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS)) return true; MFI.setRestorePoint(MBB); } std::vector<CalleeSavedInfo> CSIInfo; // Initialize the fixed frame objects. for (const auto &Object : YamlMF.FixedStackObjects) { int ObjectIdx; if (Object.Type != yaml::FixedMachineStackObject::SpillSlot) ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset, Object.IsImmutable, Object.IsAliased); else ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); MFI.setObjectAlignment(ObjectIdx, Object.Alignment); // TODO: Report an error when objects are redefined. PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID, ObjectIdx)); if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, ObjectIdx)) return true; } // Initialize the ordinary frame objects. for (const auto &Object : YamlMF.StackObjects) { int ObjectIdx; const AllocaInst *Alloca = nullptr; const yaml::StringValue &Name = Object.Name; if (!Name.Value.empty()) { Alloca = dyn_cast_or_null<AllocaInst>( F.getValueSymbolTable().lookup(Name.Value)); if (!Alloca) return error(Name.SourceRange.Start, "alloca instruction named '" + Name.Value + "' isn't defined in the function '" + F.getName() + "'"); } if (Object.Type == yaml::MachineStackObject::VariableSized) ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); else ObjectIdx = MFI.CreateStackObject( Object.Size, Object.Alignment, Object.Type == yaml::MachineStackObject::SpillSlot, Alloca); MFI.setObjectOffset(ObjectIdx, Object.Offset); // TODO: Report an error when objects are redefined. PFS.StackObjectSlots.insert(std::make_pair(Object.ID, ObjectIdx)); if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, ObjectIdx)) return true; } MFI.setCalleeSavedInfo(CSIInfo); if (!CSIInfo.empty()) MFI.setCalleeSavedInfoValid(true); return false; }
/// Returns true if the local user-space stack pointer needs to be written back /// to memory by this function (this is not meaningful if needsSP is false). If /// false, the stack red zone can be used and only a local SP is needed. bool WebAssemblyFrameLowering::needsSPWriteback( const MachineFunction &MF, const MachineFrameInfo &MFI) const { assert(needsSP(MF, MFI)); return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() || MF.getFunction().hasFnAttribute(Attribute::NoRedZone); }
bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { auto It = Functions.find(MF.getName()); if (It == Functions.end()) return error(Twine("no machine function information for function '") + MF.getName() + "' in the MIR file"); // TODO: Recreate the machine function. const yaml::MachineFunction &YamlMF = *It->getValue(); if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasInlineAsm(YamlMF.HasInlineAsm); PerFunctionMIParsingState PFS; if (initializeRegisterInfo(MF, YamlMF, PFS)) return true; if (!YamlMF.Constants.empty()) { auto *ConstantPool = MF.getConstantPool(); assert(ConstantPool && "Constant pool must be created"); if (initializeConstantPool(*ConstantPool, YamlMF, MF, PFS.ConstantPoolSlots)) return true; } const auto &F = *MF.getFunction(); for (const auto &YamlMBB : YamlMF.BasicBlocks) { const BasicBlock *BB = nullptr; const yaml::StringValue &Name = YamlMBB.Name; const yaml::StringValue &IRBlock = YamlMBB.IRBlock; if (!Name.Value.empty()) { BB = dyn_cast_or_null<BasicBlock>( F.getValueSymbolTable().lookup(Name.Value)); if (!BB) return error(Name.SourceRange.Start, Twine("basic block '") + Name.Value + "' is not defined in the function '" + MF.getName() + "'"); } if (!IRBlock.Value.empty()) { // TODO: Report an error when both name and ir block are specified. SMDiagnostic Error; if (parseIRBlockReference(BB, SM, MF, IRBlock.Value, PFS, IRSlots, Error)) return error(Error, IRBlock.SourceRange); } auto *MBB = MF.CreateMachineBasicBlock(BB); MF.insert(MF.end(), MBB); bool WasInserted = PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second; if (!WasInserted) return error(Twine("redefinition of machine basic block with id #") + Twine(YamlMBB.ID)); } if (YamlMF.BasicBlocks.empty()) return error(Twine("machine function '") + Twine(MF.getName()) + "' requires at least one machine basic block in its body"); // Initialize the frame information after creating all the MBBs so that the // MBB references in the frame information can be resolved. if (initializeFrameInfo(MF, YamlMF, PFS)) return true; // Initialize the jump table after creating all the MBBs so that the MBB // references can be resolved. if (!YamlMF.JumpTableInfo.Entries.empty() && initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS)) return true; // Initialize the machine basic blocks after creating them all so that the // machine instructions parser can resolve the MBB references. unsigned I = 0; for (const auto &YamlMBB : YamlMF.BasicBlocks) { if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB, PFS)) return true; } // FIXME: This is a temporary workaround until the reserved registers can be // serialized. MF.getRegInfo().freezeReservedRegs(MF); MF.verify(); return false; }
bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const { return !MF.getFunction()->hasFnAttribute("no-realign-stack"); }
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI); VirtRegMap &VRM = getAnalysis<VirtRegMap>(); std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); MF.getRegInfo().freezeReservedRegs(MF); DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); // Allocator main loop: // // * Map current regalloc problem to a PBQP problem // * Solve the PBQP problem // * Map the solution back to a register allocation // * Spill if necessary // // This process is continued till no more spills are generated. // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(MF, LIS); #ifndef NDEBUG const Function &F = *MF.getFunction(); std::string FullyQualifiedName = F.getParent()->getModuleIdentifier() + "." + F.getName().str(); #endif // If there are non-empty intervals allocate them using pbqp. if (!VRegsToAlloc.empty()) { const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl(); std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = llvm::make_unique<PBQPRAConstraintList>(); ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); ConstraintsRoot->addConstraint(llvm::make_unique<Interference>()); if (PBQPCoalescing) ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>()); ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); bool PBQPAllocComplete = false; unsigned Round = 0; while (!PBQPAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); initializeGraph(G); ConstraintsRoot->apply(G); #ifndef NDEBUG if (PBQPDumpGraphs) { std::ostringstream RS; RS << Round; std::string GraphFileName = FullyQualifiedName + "." + RS.str() + ".pbqpgraph"; std::error_code EC; raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); G.dumpToStream(OS); } #endif PBQP::Solution Solution = PBQP::RegAlloc::solve(G); PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); ++Round; } } // Finalise allocation, allocate empty ranges. finalizeAlloc(MF, LIS, VRM); VRegsToAlloc.clear(); EmptyIntervalVRegs.clear(); DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); return true; }
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; // Early exit for targets which have no callee saved registers. if (CSRegs == 0 || CSRegs[0] == 0) return; // In Naked functions we aren't going to save any registers. if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) return; std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); *AliasSet; ++AliasSet) { // Check alias registers too. if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { CSI.push_back(CalleeSavedInfo(Reg)); break; } } } } if (CSI.empty()) return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; const TargetFrameInfo::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { I->setFrameIdx(FrameIdx); continue; } // Check to see if this physreg must be spilled to a particular stack slot // on this target. const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); } MFI->setCalleeSavedInfo(CSI); }
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); SmallVector<unsigned, 4> UnspilledCSGPRs; SmallVector<unsigned, 4> UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { SavedRegs.set(AArch64::FP); SavedRegs.set(AArch64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the // getCalleeSavedRegs() below will get the right answer. if (RegInfo->hasBasePointer(MF)) SavedRegs.set(RegInfo->getBaseRegister()); if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) SavedRegs.set(AArch64::X9); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; bool CanEliminateFrame = true; DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Check pairs of consecutive callee-saved registers. for (unsigned i = 0; CSRegs[i]; i += 2) { assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); const unsigned OddReg = CSRegs[i]; const unsigned EvenReg = CSRegs[i + 1]; assert((AArch64::GPR64RegClass.contains(OddReg) && AArch64::GPR64RegClass.contains(EvenReg)) ^ (AArch64::FPR64RegClass.contains(OddReg) && AArch64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); const bool OddRegUsed = SavedRegs.test(OddReg); const bool EvenRegUsed = SavedRegs.test(EvenReg); // Early exit if none of the registers in the register pair is actually // used. if (!OddRegUsed && !EvenRegUsed) { if (AArch64::GPR64RegClass.contains(OddReg)) { UnspilledCSGPRs.push_back(OddReg); UnspilledCSGPRs.push_back(EvenReg); } else { UnspilledCSFPRs.push_back(OddReg); UnspilledCSFPRs.push_back(EvenReg); } continue; } unsigned Reg = AArch64::NoRegister; // If only one of the registers of the register pair is used, make sure to // mark the other one as used as well. if (OddRegUsed ^ EvenRegUsed) { // Find out which register is the additional spill. Reg = OddRegUsed ? EvenReg : OddReg; SavedRegs.set(Reg); } DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || (RegInfo->getEncodingValue(OddReg) + 1 == RegInfo->getEncodingValue(EvenReg))) && "Register pair of non-adjacent registers!"); if (AArch64::GPR64RegClass.contains(OddReg)) { NumGPRSpilled += 2; // If it's not a reserved register, we can use it in lieu of an // emergency spill slot for the register scavenger. // FIXME: It would be better to instead keep looking and choose another // unspilled register that isn't reserved, if there is one. if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } else NumFPRSpilled += 2; CanEliminateFrame = false; } // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing // range. We'll end up allocating an unnecessary spill slot a lot, but // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned CFSize = MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { // If we're adding a register to spill here, we have to add two of them // to keep the number of regs to spill even. assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); unsigned Count = 0; while (!UnspilledCSGPRs.empty() && Count < 2) { unsigned Reg = UnspilledCSGPRs.back(); UnspilledCSGPRs.pop_back(); DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(Reg); ExtraCSSpill = true; ++Count; } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill) { const TargetRegisterClass *RC = &AArch64::GPR64RegClass; int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); } } }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameConstantRegMap.clear(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: // - with shrink wrapping, place spills and restores to tightly // enclose regions in the Machine CFG of the function where // they are used. Without shrink wrapping // - default (no shrink wrapping), place all spills in the // entry block, all restores in return blocks. placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers if (!F->hasFnAttr(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TRI->processFunctionBeforeFrameFinalized(Fn); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttr(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimiation // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); delete RS; clearAllSets(); return true; }
DenseMap<const MachineBasicBlock *, int> llvm::getFuncletMembership(const MachineFunction &MF) { DenseMap<const MachineBasicBlock *, int> FuncletMembership; // We don't have anything to do if there aren't any EH pads. if (!MF.getMMI().hasEHFunclets()) return FuncletMembership; int EntryBBNumber = MF.front().getNumber(); bool IsSEH = isAsynchronousEHPersonality( classifyEHPersonality(MF.getFunction()->getPersonalityFn())); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks; SmallVector<const MachineBasicBlock *, 16> SEHCatchPads; SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors; for (const MachineBasicBlock &MBB : MF) { if (MBB.isEHFuncletEntry()) { FuncletBlocks.push_back(&MBB); } else if (IsSEH && MBB.isEHPad()) { SEHCatchPads.push_back(&MBB); } else if (MBB.pred_empty()) { UnreachableBlocks.push_back(&MBB); } MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); // CatchPads are not funclets for SEH so do not consider CatchRet to // transfer control to another funclet. if (MBBI->getOpcode() != TII->getCatchReturnOpcode()) continue; // FIXME: SEH CatchPads are not necessarily in the parent function: // they could be inside a finally block. const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB(); const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB(); CatchRetSuccessors.push_back( {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()}); } // We don't have anything to do if there aren't any EH pads. if (FuncletBlocks.empty()) return FuncletMembership; // Identify all the basic blocks reachable from the function entry. collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); // All blocks not part of a funclet are in the parent function. for (const MachineBasicBlock *MBB : UnreachableBlocks) collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); // Next, identify all the blocks inside the funclets. for (const MachineBasicBlock *MBB : FuncletBlocks) collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); // SEH CatchPads aren't really funclets, handle them separately. for (const MachineBasicBlock *MBB : SEHCatchPads) collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); // Finally, identify all the targets of a catchret. for (std::pair<const MachineBasicBlock *, int> CatchRetPair : CatchRetSuccessors) collectFuncletMembers(FuncletMembership, CatchRetPair.second, CatchRetPair.first); return FuncletMembership; }
bool StackColoring::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** Stack Coloring **********\n" << "********** Function: " << ((const Value*)Func.getFunction())->getName() << '\n'); MF = &Func; MFI = MF->getFrameInfo(); Indexes = &getAnalysis<SlotIndexes>(); BlockLiveness.clear(); BasicBlocks.clear(); BasicBlockNumbering.clear(); Markers.clear(); Intervals.clear(); VNInfoAllocator.Reset(); unsigned NumSlots = MFI->getObjectIndexEnd(); // If there are no stack slots then there are no markers to remove. if (!NumSlots) return false; SmallVector<int, 8> SortedSlots; SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); unsigned NumMarkers = collectMarkers(NumSlots); unsigned TotalSize = 0; DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); DEBUG(dbgs()<<"Slot structure:\n"); for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); TotalSize += MFI->getObjectSize(i); } DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { DEBUG(dbgs()<<"Will not try to merge slots.\n"); return removeAllMarkers(); } for (unsigned i=0; i < NumSlots; ++i) { LiveInterval *LI = new LiveInterval(i, 0); Intervals.push_back(LI); LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); SortedSlots.push_back(i); } // Calculate the liveness of each block. calculateLocalLiveness(); // Propagate the liveness information. calculateLiveIntervals(NumSlots); // Search for allocas which are used outside of the declared lifetime // markers. if (ProtectFromEscapedAllocas) removeInvalidSlotRanges(); // Maps old slots to new slots. DenseMap<int, int> SlotRemap; unsigned RemovedSlots = 0; unsigned ReducedSize = 0; // Do not bother looking at empty intervals. for (unsigned I = 0; I < NumSlots; ++I) { if (Intervals[SortedSlots[I]]->empty()) SortedSlots[I] = -1; } // This is a simple greedy algorithm for merging allocas. First, sort the // slots, placing the largest slots first. Next, perform an n^2 scan and look // for disjoint slots. When you find disjoint slots, merge the samller one // into the bigger one and update the live interval. Remove the small alloca // and continue. // Sort the slots according to their size. Place unused slots at the end. std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); bool Chanded = true; while (Chanded) { Chanded = false; for (unsigned I = 0; I < NumSlots; ++I) { if (SortedSlots[I] == -1) continue; for (unsigned J=I+1; J < NumSlots; ++J) { if (SortedSlots[J] == -1) continue; int FirstSlot = SortedSlots[I]; int SecondSlot = SortedSlots[J]; LiveInterval *First = Intervals[FirstSlot]; LiveInterval *Second = Intervals[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); // Merge disjoint slots. if (!First->overlaps(*Second)) { Chanded = true; First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< SecondSlot<<" together.\n"); unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), MFI->getObjectAlignment(SecondSlot)); assert(MFI->getObjectSize(FirstSlot) >= MFI->getObjectSize(SecondSlot) && "Merging a small object into a larger one"); RemovedSlots+=1; ReducedSize += MFI->getObjectSize(SecondSlot); MFI->setObjectAlignment(FirstSlot, MaxAlignment); MFI->RemoveStackObject(SecondSlot); } } } }// While changed. // Record statistics. StackSpaceSaved += ReducedSize; StackSlotMerged += RemovedSlots; DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< ReducedSize<<" bytes\n"); // Scan the entire function and update all machine operands that use frame // indices to use the remapped frame index. expungeSlotMap(SlotRemap, NumSlots); remapInstructions(SlotRemap); // Release the intervals. for (unsigned I = 0; I < NumSlots; ++I) { delete Intervals[I]; } return removeAllMarkers(); }
/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool IsSimple, MachineBasicBlock &TailBB) { // Only duplicate blocks that end with unconditional branches. if (TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. if (TailBB.isSuccessor(&TailBB)) return false; // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough // to allow undoing the effects of tail merging and other optimizations // that rearrange the predecessors of the indirect branch. bool HasIndirectbr = false; if (!TailBB.empty()) HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = 20; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. if (PreRegAlloc && I->isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. if (PreRegAlloc && I->isCall()) return false; if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; } if (HasIndirectbr && PreRegAlloc) return true; if (IsSimple) return true; if (!PreRegAlloc) return true; return canCompletelyDuplicateBB(TailBB); }
void AVRFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc(); const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>(); const AVRInstrInfo &TII = *STI.getInstrInfo(); bool HasFP = hasFP(MF); // Interrupt handlers re-enable interrupts in function entry. if (CallConv == CallingConv::AVR_INTR) { BuildMI(MBB, MBBI, DL, TII.get(AVR::BSETs)) .addImm(0x07) .setMIFlag(MachineInstr::FrameSetup); } // Save the frame pointer if we have one. if (HasFP) { BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) .addReg(AVR::R29R28, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. if (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL) { BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) .addReg(AVR::R1R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0) .addImm(0x3f) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) .addReg(AVR::R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr)) .addReg(AVR::R0, RegState::Define) .addReg(AVR::R0, RegState::Kill) .addReg(AVR::R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } // Early exit if the frame pointer is not needed in this function. if (!HasFP) { return; } const MachineFrameInfo &MFI = MF.getFrameInfo(); const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while ( (MBBI != MBB.end()) && MBBI->getFlag(MachineInstr::FrameSetup) && (MBBI->getOpcode() == AVR::PUSHRr || MBBI->getOpcode() == AVR::PUSHWRr)) { ++MBBI; } // Update Y with the new base value. BuildMI(MBB, MBBI, DL, TII.get(AVR::SPREAD), AVR::R29R28) .addReg(AVR::SP) .setMIFlag(MachineInstr::FrameSetup); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ++I) { I->addLiveIn(AVR::R29R28); } if (!FrameSize) { return; } // Reserve the necessary frame memory by doing FP -= <size>. unsigned Opcode = (isUInt<6>(FrameSize)) ? AVR::SBIWRdK : AVR::SUBIWRdK; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28) .addReg(AVR::R29R28, RegState::Kill) .addImm(FrameSize) .setMIFlag(MachineInstr::FrameSetup); // The SREG implicit def is dead. MI->getOperand(3).setIsDead(); // Write back R29R28 to SP and temporarily disable interrupts. BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP) .addReg(AVR::R29R28) .setMIFlag(MachineInstr::FrameSetup); }
void XTCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const XTCInstrInfo &TII = *static_cast<const XTCInstrInfo*>(MF.getTarget().getInstrInfo()); XTCFunctionInfo *XTCFI = MF.getInfo<XTCFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); bool requiresRA = false; //CallConv == CallingConv::MBLAZE_INTR; // Determine the correct frame layout determineFrameLayout(MF); // Get the number of bytes to allocate from the FrameInfo. unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return; int FPOffset = XTCFI->getFPStackOffset(); int RAOffset = XTCFI->getRAStackOffset(); if (hasFP(MF)) { // Save frame pointer /* BuildMI(MBB, MBBI, DL, TII.get(XTC::STWPREI)) .addReg(XTC::r15).addReg(XTC::r14).addImm(-4); */ BuildMI(MBB, MBBI, DL, TII.get(XTC::STW)) .addReg(XTC::r15).addReg(XTC::r14).addImm(-4); // Set it to SP BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY)) .addReg(XTC::r15).addReg(XTC::r14); } // Adjust stack : addi R1, R1, -imm BuildMI(MBB, MBBI, DL, TII.get(XTC::ADDI), XTC::r15).addReg(XTC::r15).addImm(-StackSize); /* // swi R15, R1, stack_loc if (MFI->adjustsStack() || requiresRA) { BuildMI(MBB, MBBI, DL, TII.get(XTC::SWI)) .addReg(XTC::R15).addReg(XTC::R1).addImm(RAOffset); } if (hasFP(MF)) { // swi R19, R1, stack_loc BuildMI(MBB, MBBI, DL, TII.get(XTC::SWI)) .addReg(XTC::R19).addReg(XTC::R1).addImm(FPOffset); // add R19, R1, R0 BuildMI(MBB, MBBI, DL, TII.get(XTC::ADD), XTC::R19) .addReg(XTC::R1).addReg(XTC::R0); } */ }
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; MachineRegisterInfo &MRI = MF.getRegInfo(); const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); std::vector<unsigned> I1Defs; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) { // If this has a literal constant source that is the same as the // reversed bits of an inline immediate, replace with a bitreverse of // that constant. This saves 4 bytes in the common case of materializing // sign bits. // Test if we are after regalloc. We only want to do this after any // optimizations happen because this will confuse them. // XXX - not exactly a check for post-regalloc run. MachineOperand &Src = MI.getOperand(1); if (Src.isImm() && TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { int32_t ReverseImm; if (isReverseInlineImm(TII, Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); Src.setImm(ReverseImm); continue; } } } // Combine adjacent s_nops to use the immediate operand encoding how long // to wait. // // s_nop N // s_nop M // => // s_nop (N + M) if (MI.getOpcode() == AMDGPU::S_NOP && Next != MBB.end() && (*Next).getOpcode() == AMDGPU::S_NOP) { MachineInstr &NextMI = *Next; // The instruction encodes the amount to wait with an offset of 1, // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back // after adding. uint8_t Nop0 = MI.getOperand(0).getImm() + 1; uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1; // Make sure we don't overflow the bounds. if (Nop0 + Nop1 <= 8) { NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1); MI.eraseFromParent(); } continue; } // FIXME: We also need to consider movs of constant operands since // immediate operands are not folded if they have more than one use, and // the operand folding pass is unaware if the immediate will be free since // it won't know if the src == dest constraint will end up being // satisfied. if (MI.getOpcode() == AMDGPU::S_ADD_I32 || MI.getOpcode() == AMDGPU::S_MUL_I32) { const MachineOperand *Dest = &MI.getOperand(0); MachineOperand *Src0 = &MI.getOperand(1); MachineOperand *Src1 = &MI.getOperand(2); if (!Src0->isReg() && Src1->isReg()) { if (TII->commuteInstruction(MI, false, 1, 2)) std::swap(Src0, Src1); } // FIXME: This could work better if hints worked with subregisters. If // we have a vector add of a constant, we usually don't get the correct // allocation due to the subregister usage. if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) && Src0->isReg()) { MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); continue; } if (Src0->isReg() && Src0->getReg() == Dest->getReg()) { if (Src1->isImm() && isKImmOperand(TII, *Src1)) { unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; MI.setDesc(TII->get(Opc)); MI.tieOperands(0, 1); } } } // Try to use s_cmpk_* if (MI.isCompare() && TII->isSOPC(MI)) { shrinkScalarCompare(TII, MI); continue; } // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. if (MI.getOpcode() == AMDGPU::S_MOV_B32) { const MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); if (Src.isImm() && TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) { int32_t ReverseImm; if (isKImmOperand(TII, Src)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); else if (isReverseInlineImm(TII, Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); Src.setImm(ReverseImm); } } continue; } if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; if (!canShrink(MI, TII, TRI, MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(MI) || !canShrink(MI, TII, TRI, MRI)) continue; } // getVOPe32 could be -1 here if we started with an instruction that had // a 32-bit encoding and then commuted it to an instruction that did not. if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); if (TII->isVOPC(Op32)) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { // VOPC instructions can only write to the VCC register. We can't // force them to use VCC here, because this is only one register and // cannot deal with sequences which would require multiple copies of // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) // // So, instead of forcing the instruction to write to VCC, we provide // a hint to the register allocator to use VCC and then we we will run // this pass again after RA and shrink it if it outputs to VCC. MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); continue; } if (DstReg != AMDGPU::VCC) continue; } if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC // instructions. const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (!Src2->isReg()) continue; unsigned SReg = Src2->getReg(); if (TargetRegisterInfo::isVirtualRegister(SReg)) { MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); continue; } if (SReg != AMDGPU::VCC) continue; } // We can shrink this instruction DEBUG(dbgs() << "Shrinking " << MI); MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); // Add the dst operand if the 32-bit encoding also has an explicit $vdst. // For VOPC instructions, this is replaced by an implicit def of vcc. int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst); if (Op32DstIdx != -1) { // dst Inst32.addOperand(MI.getOperand(0)); } else { assert(MI.getOperand(0).getReg() == AMDGPU::VCC && "Unexpected case"); } Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) Inst32.addOperand(*Src1); const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (Src2) { int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); if (Op32Src2Idx != -1) { Inst32.addOperand(*Src2); } else { // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is // replaced with an implicit read of vcc. This was already added // during the initial BuildMI, so find it to preserve the flags. copyFlagsToImplicitVCC(*Inst32, *Src2); } } ++NumInstructionsShrunk; // Copy extra operands not present in the instruction definition. copyExtraImplicitOps(*Inst32, MF, MI); MI.eraseFromParent(); foldImmediates(*Inst32, TII, MRI); DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); } } return false; }
/// Insert prolog code into the function. /// For ARC, this inserts a call to a function that puts required callee saved /// registers onto the stack, when enough callee saved registers are required. void ARCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { LLVM_DEBUG(dbgs() << "Emit Prologue: " << MF.getName() << "\n"); auto *AFI = MF.getInfo<ARCFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARCInstrInfo *TII = MF.getSubtarget<ARCSubtarget>().getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); unsigned Last = determineLastCalleeSave(CSI); unsigned StackSlotsUsedByFunclet = 0; bool SavedBlink = false; unsigned AlreadyAdjusted = 0; if (MF.getFunction().isVarArg()) { // Add in the varargs area here first. LLVM_DEBUG(dbgs() << "Varargs\n"); unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex()); BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6)) .addReg(ARC::SP) .addReg(ARC::SP) .addImm(VarArgsBytes); } if (hasFP(MF)) { LLVM_DEBUG(dbgs() << "Saving FP\n"); BuildMI(MBB, MBBI, dl, TII->get(ARC::ST_AW_rs9)) .addReg(ARC::SP, RegState::Define) .addReg(ARC::FP) .addReg(ARC::SP) .addImm(-4); AlreadyAdjusted += 4; } if (UseSaveRestoreFunclet && Last > ARC::R14) { LLVM_DEBUG(dbgs() << "Creating store funclet.\n"); // BL to __save_r13_to_<TRI->getRegAsmName()> StackSlotsUsedByFunclet = Last - ARC::R12; BuildMI(MBB, MBBI, dl, TII->get(ARC::PUSH_S_BLINK)); BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6)) .addReg(ARC::SP) .addReg(ARC::SP) .addImm(4 * StackSlotsUsedByFunclet); BuildMI(MBB, MBBI, dl, TII->get(ARC::BL)) .addExternalSymbol(store_funclet_name[Last - ARC::R15]) .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill); AlreadyAdjusted += 4 * (StackSlotsUsedByFunclet + 1); SavedBlink = true; } // If we haven't saved BLINK, but we need to...do that now. if (MFI.hasCalls() && !SavedBlink) { LLVM_DEBUG(dbgs() << "Creating save blink.\n"); BuildMI(MBB, MBBI, dl, TII->get(ARC::PUSH_S_BLINK)); AlreadyAdjusted += 4; } if (AFI->MaxCallStackReq > 0) MFI.setStackSize(MFI.getStackSize() + AFI->MaxCallStackReq); // We have already saved some of the stack... LLVM_DEBUG(dbgs() << "Adjusting stack by: " << (MFI.getStackSize() - AlreadyAdjusted) << "\n"); generateStackAdjustment(MBB, MBBI, *ST.getInstrInfo(), dl, -(MFI.getStackSize() - AlreadyAdjusted), ARC::SP); if (hasFP(MF)) { LLVM_DEBUG(dbgs() << "Setting FP from SP.\n"); BuildMI(MBB, MBBI, dl, TII->get(isUInt<6>(MFI.getStackSize()) ? ARC::ADD_rru6 : ARC::ADD_rrlimm), ARC::FP) .addReg(ARC::SP) .addImm(MFI.getStackSize()); } // Emit CFI records: // .cfi_def_cfa_offset StackSize // .cfi_offset fp, -StackSize // .cfi_offset blink, -StackSize+4 unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); int CurOffset = -4; if (hasFP(MF)) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ARC::FP, true), CurOffset)); BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); CurOffset -= 4; } if (MFI.hasCalls()) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ARC::BLINK, true), CurOffset)); BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // CFI for the rest of the registers. for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); // Skip BLINK and FP. if ((hasFP(MF) && Reg == ARC::FP) || (MFI.hasCalls() && Reg == ARC::BLINK)) continue; CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } }
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); if (skipFunction(*MF.getFunction())) return false; // If we move NewValueJump before register allocation we'll need live variable // analysis here too. QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); if (DisableNewValueJumps) { return false; } int nvjCount = DbgNVJCount; int nvjGenerated = 0; // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock *MBB = &*MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); DEBUG(MBB->dump()); DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; bool MO1IsKill = false; bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; MachineBasicBlock *jmpTarget = nullptr; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr &MI = *--MII; if (MI.isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n"); if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt || MI.getOpcode() == Hexagon::J2_jumpf || MI.getOpcode() == Hexagon::J2_jumptnewpt || MI.getOpcode() == Hexagon::J2_jumptnew || MI.getOpcode() == Hexagon::J2_jumpfnewpt || MI.getOpcode() == Hexagon::J2_jumpfnew)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; jmpInstr = &MI; predReg = MI.getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; // This predicate register is live out out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always // be 2. Check if the predicate register is live in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. bool predLive = false; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SIE = MBB->succ_end(); SI != SIE; ++SI) { MachineBasicBlock* succMBB = *SI; if (succMBB->isLiveIn(predReg)) { predLive = true; } } if (predLive) break; if (!MI.getOperand(1).isMBB()) continue; jmpTarget = MI.getOperand(1).getMBB(); foundJump = true; if (MI.getOpcode() == Hexagon::J2_jumpf || MI.getOpcode() == Hexagon::J2_jumpfnewpt || MI.getOpcode() == Hexagon::J2_jumpfnew) { invertPredicate = true; } continue; } // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. if (foundJump && MI.getNumOperands() == 0) break; if (foundJump && !foundCompare && MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (isNewValueJumpCandidate(MI)) { assert( (MI.getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); isSecondOpReg = MI.getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; cmpInstr = &MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI.getOperand(1).getReg(); if (MI.getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { cmpOp2 = MI.getOperand(2).getReg(); if (MI.getOperand(2).isKill()) MO2IsKill = true; } else cmpOp2 = MI.getOperand(2).getImm(); continue; } } if (foundCompare && foundJump) { // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef() && (MI.getOperand(0).getReg() == cmpReg1 || (isSecondOpReg && MI.getOperand(0).getReg() == (unsigned)cmpOp2))) { unsigned feederReg = MI.getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true // (second operand of the compare is also register), try that one. // TODO: Try to come up with some heuristic to figure out which // feeder would benefit. if (feederReg == cmpReg1) { if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { if (!isSecondOpReg) break; else continue; } else foundFeeder = true; } if (!foundFeeder && isSecondOpReg && feederReg == (unsigned) cmpOp2) if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) break; if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; MO1IsKill = MO2IsKill; cmpOp2 = tmp; MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, // if the second operand (after swap) is the feeder. // And if it is, make a note. if (feederReg == (unsigned)cmpOp2) isSecondOpNewified = true; } // Now that we are moving feeder close the jump, // make sure we are respecting the kill values of // the operands of the feeder. bool updatedIsKill = false; for (unsigned i = 0; i < MI.getNumOperands(); i++) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { MachineInstr &localMI = *localII; for (unsigned j = 0; j < localMI.getNumOperands(); j++) { MachineOperand &localMO = localMI.getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register // Set up a kill flag on the register localMO.setIsKill(false); MO.setIsKill(); updatedIsKill = true; break; } } if (updatedIsKill) break; } } if (updatedIsKill) break; } MBB->splice(jmpPos, MI.getParent(), MI); MBB->splice(jmpPos, MI.getParent(), cmpInstr); DebugLoc dl = MI.getDebugLoc(); MachineInstr *NewMI; assert((isNewValueJumpCandidate(*cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); if (isSecondOpReg) NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && cmpOp2 == -1 ) // Corresponding new-value compare jump instructions don't have the // operand for -1 immediate value. NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addMBB(jmpTarget); else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); if (cmpInstr->getOperand(1).isReg() && cmpInstr->getOperand(1).isKill()) cmpInstr->getOperand(1).setIsKill(false); cmpInstr->eraseFromParent(); jmpInstr->eraseFromParent(); ++nvjGenerated; ++NumNVJGenerated; break; } } } } return true; }
/// Insert epilog code into the function. /// For ARC, this inserts a call to a function that restores callee saved /// registers onto the stack, when enough callee saved registers are required. void ARCFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { LLVM_DEBUG(dbgs() << "Emit Epilogue: " << MF.getName() << "\n"); auto *AFI = MF.getInfo<ARCFunctionInfo>(); const ARCInstrInfo *TII = MF.getSubtarget<ARCSubtarget>().getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MF.getFrameInfo().getStackSize(); bool SavedBlink = false; unsigned AmountAboveFunclet = 0; // If we have variable sized frame objects, then we have to move // the stack pointer to a known spot (fp - StackSize). // Then, replace the frame pointer by (new) [sp,StackSize-4]. // Then, move the stack pointer the rest of the way (sp = sp + StackSize). if (hasFP(MF)) { BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARC::SUB_rru6), ARC::SP) .addReg(ARC::FP) .addImm(StackSize); AmountAboveFunclet += 4; } // Now, move the stack pointer to the bottom of the save area for the funclet. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); unsigned Last = determineLastCalleeSave(CSI); unsigned StackSlotsUsedByFunclet = 0; // Now, restore the callee save registers. if (UseSaveRestoreFunclet && Last > ARC::R14) { // BL to __ld_r13_to_<TRI->getRegAsmName()> StackSlotsUsedByFunclet = Last - ARC::R12; AmountAboveFunclet += 4 * (StackSlotsUsedByFunclet + 1); SavedBlink = true; } if (MFI.hasCalls() && !SavedBlink) { AmountAboveFunclet += 4; SavedBlink = true; } // Move the stack pointer up to the point of the funclet. if (StackSize - AmountAboveFunclet) { BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6)) .addReg(ARC::SP) .addReg(ARC::SP) .addImm(StackSize - AmountAboveFunclet); } if (StackSlotsUsedByFunclet) { BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::BL)) .addExternalSymbol(load_funclet_name[Last - ARC::R15]) .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill); BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6)) .addReg(ARC::SP) .addReg(ARC::SP) .addImm(4 * (StackSlotsUsedByFunclet)); } // Now, pop blink if necessary. if (SavedBlink) { BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::POP_S_BLINK)); } // Now, pop fp if necessary. if (hasFP(MF)) { BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::LD_AB_rs9)) .addReg(ARC::SP, RegState::Define) .addReg(ARC::FP, RegState::Define) .addReg(ARC::SP) .addImm(4); } // Relieve the varargs area if necessary. if (MF.getFunction().isVarArg()) { // Add in the varargs area here first. LLVM_DEBUG(dbgs() << "Varargs\n"); unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex()); BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6)) .addReg(ARC::SP) .addReg(ARC::SP) .addImm(VarArgsBytes); } }
static void interruptFrameLayout(MachineFunction &MF) { const Function *F = MF.getFunction(); llvm::CallingConv::ID CallConv = F->getCallingConv(); // If this function is not using either the interrupt_handler // calling convention or the save_volatiles calling convention // then we don't need to do any additional frame layout. if (CallConv != llvm::CallingConv::MBLAZE_INTR && CallConv != llvm::CallingConv::MBLAZE_SVOL) return; MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const MBlazeInstrInfo &TII = *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo()); // Determine if the calling convention is the interrupt_handler // calling convention. Some pieces of the prologue and epilogue // only need to be emitted if we are lowering and interrupt handler. bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR; // Determine where to put prologue and epilogue additions MachineBasicBlock &MENT = MF.front(); MachineBasicBlock &MEXT = MF.back(); MachineBasicBlock::iterator MENTI = MENT.begin(); MachineBasicBlock::iterator MEXTI = prior(MEXT.end()); DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc(); DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc(); // Store the frame indexes generated during prologue additions for use // when we are generating the epilogue additions. SmallVector<int, 10> VFI; // Build the prologue SWI for R3 - R12 if needed. Note that R11 must // always have a SWI because it is used when processing RMSR. for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) { if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue; int FI = MFI->CreateStackObject(4,4,false,false); VFI.push_back(FI); BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r) .addFrameIndex(FI).addImm(0); } // Build the prologue SWI for R17, R18 int R17FI = MFI->CreateStackObject(4,4,false,false); int R18FI = MFI->CreateStackObject(4,4,false,false); BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17) .addFrameIndex(R17FI).addImm(0); BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18) .addFrameIndex(R18FI).addImm(0); // Buid the prologue SWI and the epilogue LWI for RMSR if needed if (isIntr) { int MSRFI = MFI->CreateStackObject(4,4,false,false); BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11) .addReg(MBlaze::RMSR); BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11) .addFrameIndex(MSRFI).addImm(0); BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11) .addFrameIndex(MSRFI).addImm(0); BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR) .addReg(MBlaze::R11); } // Build the epilogue LWI for R17, R18 BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18) .addFrameIndex(R18FI).addImm(0); BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17) .addFrameIndex(R17FI).addImm(0); // Build the epilogue LWI for R3 - R12 if needed for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) { if (!MRI.isPhysRegUsed(r)) continue; BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r) .addFrameIndex(VFI[--i]).addImm(0); } }
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { if (skipFunction(*mf.getFunction())) return false; MF = &mf; TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); RegClassInfo.runOnMachineFunction(mf); LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " << TRI->getRegClassName(RC) << " **********\n"); // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; const MachineRegisterInfo &MRI = mf.getRegInfo(); for (unsigned Reg : *RC) { if (MRI.isPhysRegUsed(Reg)) { anyregs = true; break; } } if (!anyregs) return false; // Initialize the AliasMap on the first use. if (AliasMap.empty()) { // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and // therefore the LiveRegs array. AliasMap.resize(TRI->getNumRegs()); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); ++AI) AliasMap[*AI].push_back(i); } // Initialize the MMBInfos for (auto &MBB : mf) { MBBInfo InitialInfo; MBBInfos.insert(std::make_pair(&MBB, InitialInfo)); } /* * We want to visit every instruction in every basic block in order to update * it's execution domain or break any false dependencies. However, for the * dependency breaking, we need to know clearances from all predecessors * (including any backedges). One way to do so would be to do two complete * passes over all basic blocks/instructions, the first for recording * clearances, the second to break the dependencies. However, for functions * without backedges, or functions with a lot of straight-line code, and * a small loop, that would be a lot of unnecessary work (since only the * BBs that are part of the loop require two passes). As an example, * consider the following loop. * * * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT * ^ | * +----------------------------------+ * * The iteration order is as follows: * Naive: PH A B C D A' B' C' D' * Optimized: PH A B C A' B' C' D * * Note that we avoid processing D twice, because we can entirely process * the predecessors before getting to D. We call a block that is ready * for its second round of processing `done` (isBlockDone). Once we finish * processing some block, we update the counters in MBBInfos and re-process * any successors that are now done. */ MachineBasicBlock *Entry = &*MF->begin(); ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; // N.B: IncomingProcessed and IncomingCompleted were already updated while // processing this block's predecessors. MBBInfos[MBB].PrimaryCompleted = true; MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed; processBasicBlock(MBB, true); updateSuccessors(MBB, true); } // We need to go through again and finalize any blocks that are not done yet. // This is possible if blocks have dead predecessors, so we didn't visit them // above. for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; if (!isBlockDone(MBB)) { processBasicBlock(MBB, false); // Don't update successors here. We'll get to them anyway through this // loop. } } // Clear the LiveOuts vectors and collapse any remaining DomainValues. for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { auto FI = MBBInfos.find(*MBBI); if (FI == MBBInfos.end() || !FI->second.OutRegs) continue; for (unsigned i = 0, e = NumRegs; i != e; ++i) if (FI->second.OutRegs[i].Value) release(FI->second.OutRegs[i].Value); delete[] FI->second.OutRegs; } MBBInfos.clear(); UndefReads.clear(); Avail.clear(); Allocator.DestroyAll(); return false; }
void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MachineBasicBlock::iterator MBBI = MBB.begin(); const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); determineFrameLayout(MF); // Check if frame moves are needed for EH. bool needsFrameMoves = MMI.hasDebugInfo() || !MF.getFunction()->needsUnwindTableEntry(); // Get the number of bytes to allocate from the FrameInfo. int NumBytes = (int) MFI->getStackSize(); // LLVM expects allocframe not to be the first instruction in the // basic block. MachineBasicBlock::iterator InsertPt = MBB.begin(); // // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset. // HexagonMachineFunctionInfo *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>(); const std::vector<MachineInstr*>& AdjustRegs = FuncInfo->getAllocaAdjustInsts(); for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(), e = AdjustRegs.end(); i != e; ++i) { MachineInstr* MI = *i; assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) && "Expected adjust alloca node"); MachineOperand& MO = MI->getOperand(2); assert(MO.isImm() && "Expected immediate"); MO.setImm(MFI->getMaxCallFrameSize()); } std::vector<MachineMove> &Moves = MMI.getFrameMoves(); if (needsFrameMoves) { // Advance CFA. DW_CFA_def_cfa unsigned FPReg = QRI->getFrameRegister(); unsigned RAReg = QRI->getRARegister(); MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(FPReg, -8); Moves.push_back(MachineMove(0, Dst, Src)); // R31 = (R31 - #4) MachineLocation LRDst(RAReg, -4); MachineLocation LRSrc(RAReg); Moves.push_back(MachineMove(0, LRDst, LRSrc)); // R30 = (R30 - #8) MachineLocation SPDst(FPReg, -8); MachineLocation SPSrc(FPReg); Moves.push_back(MachineMove(0, SPDst, SPSrc)); } // // Only insert ALLOCFRAME if we need to. // if (hasFP(MF)) { // Check for overflow. // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? const int ALLOCFRAME_MAX = 16384; const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); if (NumBytes >= ALLOCFRAME_MAX) { // Emit allocframe(#0). BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0); // Subtract offset from frame pointer. BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real), HEXAGON_RESERVED_REG_1).addImm(NumBytes); BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr), QRI->getStackRegister()). addReg(QRI->getStackRegister()). addReg(HEXAGON_RESERVED_REG_1); } else { BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes); } } }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) { // First instruction must a) allocate the stack and b) have an immediate // that is a multiple of -2. assert((MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) && MBBI->getOperand(3).getReg() == AArch64::SP && MBBI->getOperand(4).getImm() < 0 && (MBBI->getOperand(4).getImm() & 1) == 0); // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space // required for the callee saved register area we get the frame pointer // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; assert(FPOffset >= 0 && "Bad Framepointer Offset"); } // Move past the saves of the callee-saved registers. while (MBBI->getOpcode() == AArch64::STPXi || MBBI->getOpcode() == AArch64::STPDi || MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. if (NumBytes) { // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. // if (RegInfo->hasBasePointer(MF)) TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); if (needsFrameMoves) { const DataLayout *TD = MF.getSubtarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } }