bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, unsigned StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (AFI->getLocalStackSize() == 0) return false; // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores if (StackBumpBytes >= 512) return false; if (MFI.hasVarSizedObjects()) return false; if (RegInfo->needsStackRealignment(MF)) return false; // This isn't strictly necessary, but it simplifies things a bit since the // current RedZone handling code assumes the SP is adjusted by the // callee-save save/restore code. if (canUseRedZone(MF)) return false; return true; }
/// Based on the use to defs information (in ADRPMode), compute the /// opportunities of LOH ADRP-related. static void computeADRP(const InstrToInstrs &UseToDefs, AArch64FunctionInfo &AArch64FI, const MachineDominatorTree *MDT) { DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); for (const auto &Entry : UseToDefs) { unsigned Size = Entry.second.size(); if (Size == 0) continue; if (Size == 1) { const MachineInstr *L2 = *Entry.second.begin(); const MachineInstr *L1 = Entry.first; if (!MDT->dominates(L2, L1)) { DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 << '\n'); continue; } DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); SmallVector<const MachineInstr *, 2> Args; Args.push_back(L2); Args.push_back(L1); AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); ++NumADRPSimpleCandidate; } #ifdef DEBUG else if (Size == 2) ++NumADRPComplexCandidate2; else if (Size == 3) ++NumADRPComplexCandidate3; else ++NumADRPComplexCandidateOther; #endif // if Size < 1, the use should have been removed from the candidates assert(Size >= 1 && "No reaching defs for that use!"); } }
static bool registerADRCandidate(const MachineInstr &Use, const InstrToInstrs &UseToDefs, const InstrToInstrs *DefsPerColorToUses, AArch64FunctionInfo &AArch64FI, SetOfMachineInstr *InvolvedInLOHs, const MapRegToId &RegToId) { // Look for opportunities to turn ADRP -> ADD or // ADRP -> LDR GOTPAGEOFF into ADR. // If ADRP has more than one use. Give up. if (Use.getOpcode() != AArch64::ADDXri && (Use.getOpcode() != AArch64::LDRXui || !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) return false; InstrToInstrs::const_iterator It = UseToDefs.find(&Use); // The map may contain garbage that we need to ignore. if (It == UseToDefs.end() || It->second.empty()) return false; const MachineInstr &Def = **It->second.begin(); if (Def.getOpcode() != AArch64::ADRP) return false; // Check the number of users of ADRP. const SetOfMachineInstr *Users = getUses(DefsPerColorToUses, RegToId.find(Def.getOperand(0).getReg())->second, Def); if (Users->size() > 1) { ++NumADRComplexCandidate; return false; } ++NumADRSimpleCandidate; assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && "ADRP already involved in LOH."); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && "ADD already involved in LOH."); DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); SmallVector<const MachineInstr *, 2> Args; Args.push_back(&Def); Args.push_back(&Use); AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot, Args); return true; }
/// Update state when seeing and ADRP instruction. static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, LOHInfo &Info) { if (Info.LastADRP != nullptr) { DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\n' << '\t' << *Info.LastADRP << '\n'); AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP}); ++NumADRPSimpleCandidate; } // Produce LOH directive if possible. if (Info.IsCandidate) { switch (Info.Type) { case MCLOH_AdrpAdd: DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0}); ++NumADRSimpleCandidate; break; case MCLOH_AdrpLdr: if (supportLoadFromLiteral(*Info.MI0)) { DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0}); ++NumADRPToLDR; } break; case MCLOH_AdrpAddLdr: DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\n' << '\t' << *Info.MI1 << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); ++NumADDToLDR; break; case MCLOH_AdrpAddStr: if (Info.MI1 != nullptr) { DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\n' << '\t' << *Info.MI1 << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0}); ++NumADDToSTR; } break; case MCLOH_AdrpLdrGotLdr: DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\n' << '\t' << *Info.MI1 << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0}); ++NumLDRToLDR; break; case MCLOH_AdrpLdrGotStr: DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\n' << '\t' << *Info.MI1 << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0}); ++NumLDRToSTR; break; case MCLOH_AdrpLdrGot: DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\n' << '\t' << *Info.MI0 << '\n'); AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0}); break; case MCLOH_AdrpAdrp: llvm_unreachable("MCLOH_AdrpAdrp not used in state machine"); } } handleClobber(Info); Info.LastADRP = &MI; }
bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<unsigned> VRegs) const { MachineFunction &MF = MIRBuilder.getMF(); MachineBasicBlock &MBB = MIRBuilder.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); SmallVector<ArgInfo, 8> SplitArgs; unsigned i = 0; for (auto &Arg : F.args()) { if (DL.getTypeStoreSize(Arg.getType()) == 0) continue; ArgInfo OrigArg{VRegs[i], Arg.getType()}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); bool Split = false; LLT Ty = MRI.getType(VRegs[i]); unsigned Dst = VRegs[i]; splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(), [&](unsigned Reg, uint64_t Offset) { if (!Split) { Split = true; Dst = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Dst); } unsigned Tmp = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset); Dst = Tmp; }); if (Dst != VRegs[i]) MIRBuilder.buildCopy(VRegs[i], Dst); ++i; } if (!MBB.empty()) MIRBuilder.setInstr(*MBB.begin()); const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; if (F.isVarArg()) { if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) { // FIXME: we need to reimplement saveVarArgsRegisters from // AArch64ISelLowering. return false; } // We currently pass all varargs at 8-byte alignment. uint64_t StackOffset = alignTo(Handler.StackUsed, 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); } auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); if (Subtarget.hasCustomCallingConv()) Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); // Move back to the end of the basic block. MIRBuilder.setMBB(MBB); return true; }
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); SmallVector<unsigned, 4> UnspilledCSGPRs; SmallVector<unsigned, 4> UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { SavedRegs.set(AArch64::FP); SavedRegs.set(AArch64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the // getCalleeSavedRegs() below will get the right answer. if (RegInfo->hasBasePointer(MF)) SavedRegs.set(RegInfo->getBaseRegister()); if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) SavedRegs.set(AArch64::X9); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; bool CanEliminateFrame = true; DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Check pairs of consecutive callee-saved registers. for (unsigned i = 0; CSRegs[i]; i += 2) { assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); const unsigned OddReg = CSRegs[i]; const unsigned EvenReg = CSRegs[i + 1]; assert((AArch64::GPR64RegClass.contains(OddReg) && AArch64::GPR64RegClass.contains(EvenReg)) ^ (AArch64::FPR64RegClass.contains(OddReg) && AArch64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); const bool OddRegUsed = SavedRegs.test(OddReg); const bool EvenRegUsed = SavedRegs.test(EvenReg); // Early exit if none of the registers in the register pair is actually // used. if (!OddRegUsed && !EvenRegUsed) { if (AArch64::GPR64RegClass.contains(OddReg)) { UnspilledCSGPRs.push_back(OddReg); UnspilledCSGPRs.push_back(EvenReg); } else { UnspilledCSFPRs.push_back(OddReg); UnspilledCSFPRs.push_back(EvenReg); } continue; } unsigned Reg = AArch64::NoRegister; // If only one of the registers of the register pair is used, make sure to // mark the other one as used as well. if (OddRegUsed ^ EvenRegUsed) { // Find out which register is the additional spill. Reg = OddRegUsed ? EvenReg : OddReg; SavedRegs.set(Reg); } DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || (RegInfo->getEncodingValue(OddReg) + 1 == RegInfo->getEncodingValue(EvenReg))) && "Register pair of non-adjacent registers!"); if (AArch64::GPR64RegClass.contains(OddReg)) { NumGPRSpilled += 2; // If it's not a reserved register, we can use it in lieu of an // emergency spill slot for the register scavenger. // FIXME: It would be better to instead keep looking and choose another // unspilled register that isn't reserved, if there is one. if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) ExtraCSSpill = true; } else NumFPRSpilled += 2; CanEliminateFrame = false; } // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing // range. We'll end up allocating an unnecessary spill slot a lot, but // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned CFSize = MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { // If we're adding a register to spill here, we have to add two of them // to keep the number of regs to spill even. assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); unsigned Count = 0; while (!UnspilledCSGPRs.empty() && Count < 2) { unsigned Reg = UnspilledCSGPRs.back(); UnspilledCSGPRs.pop_back(); DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(Reg); ExtraCSSpill = true; ++Count; } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill) { const TargetRegisterClass *RC = &AArch64::GPR64RegClass; int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); } } }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) FPOffset = getFPOffsetInPrologue(MBBI); // Move past the saves of the callee-saved registers. while (isCSSave(MBBI)) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. const unsigned Alignment = MFI->getMaxAlignment(); const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NumBytes && NeedsRealignment) { // Use the first callee-saved register as a scratch register. scratchSPReg = AArch64::X9; } // If we're a leaf function, try using the red zone. if (NumBytes && !canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); if (NumBytes && NeedsRealignment) { const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 <<12) // = N | ((64-NrBitsToZero) << 6) // immr | ((64-NrBitsToZero-1) << 0) // imms ; BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); } if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } }
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (NumBytes && !canUseRedZone(MF)) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (NumBytes) { ++NumRedZoneFunctions; } return; } // Only set up FP if we actually need to. int FPOffset = 0; if (HasFP) { // First instruction must a) allocate the stack and b) have an immediate // that is a multiple of -2. assert((MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) && MBBI->getOperand(3).getReg() == AArch64::SP && MBBI->getOperand(4).getImm() < 0 && (MBBI->getOperand(4).getImm() & 1) == 0); // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space // required for the callee saved register area we get the frame pointer // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; assert(FPOffset >= 0 && "Bad Framepointer Offset"); } // Move past the saves of the callee-saved registers. while (MBBI->getOpcode() == AArch64::STPXi || MBBI->getOpcode() == AArch64::STPDi || MBBI->getOpcode() == AArch64::STPXpre || MBBI->getOpcode() == AArch64::STPDpre) { ++MBBI; NumBytes -= 16; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (HasFP) { // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, MachineInstr::FrameSetup); } // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. if (NumBytes) { // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. // if (RegInfo->hasBasePointer(MF)) TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); if (needsFrameMoves) { const DataLayout *TD = MF.getSubtarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Record the location of the stored LR unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Record the location of the stored FP CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Encode the stack size of the leaf function. unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } // Now emit the moves for whatever callee saved regs we have. emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } }
static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { if (CSI.empty()) return; AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction()->getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); unsigned Offset = AFI->getCalleeSavedStackSize(); for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) RPI.Reg2 = NextReg; } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() || CC == CallingConv::PreserveMost || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. Offset -= 16; assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); MFI->setObjectSize(RPI.FrameIdx, 16); } else Offset -= RPI.isPaired() ? 16 : 8; assert(Offset % 8 == 0); RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; } // Align first offset to even 16-byte boundary to avoid additional SP // adjustment instructions. // Last pair offset is size of whole callee-save region for SP // pre-dec/post-inc. RegPairInfo &LastPair = RegPairs.back(); assert(AFI->getCalleeSavedStackSize() % 8 == 0); LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; }
/// Based on the use to defs information (in non-ADRPMode), compute the /// opportunities of LOH non-ADRP-related static void computeOthers(const InstrToInstrs &UseToDefs, const InstrToInstrs *DefsPerColorToUses, AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, const MachineDominatorTree *MDT) { SetOfMachineInstr *InvolvedInLOHs = nullptr; #ifdef DEBUG SetOfMachineInstr InvolvedInLOHsStorage; InvolvedInLOHs = &InvolvedInLOHsStorage; #endif // DEBUG DEBUG(dbgs() << "*** Compute LOH for Others\n"); // ADRP -> ADD/LDR -> LDR/STR pattern. // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. // FIXME: When the statistics are not important, // This initial filtering loop can be merged into the next loop. // Currently, we didn't do it to have the same code for both DEBUG and // NDEBUG builds. Indeed, the iterator of the second loop would need // to be changed. SetOfMachineInstr PotentialCandidates; SetOfMachineInstr PotentialADROpportunities; for (auto &Use : UseToDefs) { // If no definition is available, this is a non candidate. if (Use.second.empty()) continue; // Keep only instructions that are load or store and at the end of // a ADRP -> ADD/LDR/Nothing chain. // We already filtered out the no-chain cases. if (!isCandidate(Use.first, UseToDefs, MDT)) { PotentialADROpportunities.insert(Use.first); continue; } PotentialCandidates.insert(Use.first); } // Make the following distinctions for statistics as the linker does // know how to decode instructions: // - ADD/LDR/Nothing make there different patterns. // - LDR/STR make two different patterns. // Hence, 6 - 1 base patterns. // (because ADRP-> Nothing -> STR is not simplifiable) // The linker is only able to have a simple semantic, i.e., if pattern A // do B. // However, we want to see the opportunity we may miss if we were able to // catch more complex cases. // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> // A potential candidate becomes a candidate, if its current immediate // operand is zero and all nodes of the chain have respectively only one user #ifdef DEBUG SetOfMachineInstr DefsOfPotentialCandidates; #endif for (const MachineInstr *Candidate : PotentialCandidates) { // Get the definition of the candidate i.e., ADD or LDR. const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); // Record the elements of the chain. const MachineInstr *L1 = Def; const MachineInstr *L2 = nullptr; unsigned ImmediateDefOpc = Def->getOpcode(); if (Def->getOpcode() != AArch64::ADRP) { // Check the number of users of this node. const SetOfMachineInstr *Users = getUses(DefsPerColorToUses, RegToId.find(Def->getOperand(0).getReg())->second, *Def); if (Users->size() > 1) { #ifdef DEBUG // if all the uses of this def are in potential candidate, this is // a complex candidate of level 2. bool IsLevel2 = true; for (const MachineInstr *MI : *Users) { if (!PotentialCandidates.count(MI)) { ++NumTooCplxLvl2; IsLevel2 = false; break; } } if (IsLevel2) ++NumCplxLvl2; #endif // DEBUG PotentialADROpportunities.insert(Def); continue; } L2 = Def; Def = *UseToDefs.find(Def)->second.begin(); L1 = Def; } // else the element in the middle of the chain is nothing, thus // Def already contains the first element of the chain. // Check the number of users of the first node in the chain, i.e., ADRP const SetOfMachineInstr *Users = getUses(DefsPerColorToUses, RegToId.find(Def->getOperand(0).getReg())->second, *Def); if (Users->size() > 1) { #ifdef DEBUG // if all the uses of this def are in the defs of the potential candidate, // this is a complex candidate of level 1 if (DefsOfPotentialCandidates.empty()) { // lazy init DefsOfPotentialCandidates = PotentialCandidates; for (const MachineInstr *Candidate : PotentialCandidates) { if (!UseToDefs.find(Candidate)->second.empty()) DefsOfPotentialCandidates.insert( *UseToDefs.find(Candidate)->second.begin()); } } bool Found = false; for (auto &Use : *Users) { if (!DefsOfPotentialCandidates.count(Use)) { ++NumTooCplxLvl1; Found = true; break; } } if (!Found) ++NumCplxLvl1; #endif // DEBUG continue; } bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); // If the chain is three instructions long and ldr is the second element, // then this ldr must load form GOT, otherwise this is not a correct chain. if (L2 && !IsL2Add && !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)) continue; SmallVector<const MachineInstr *, 3> Args; MCLOHType Kind; if (isCandidateLoad(Candidate)) { if (!L2) { // At this point, the candidate LOH indicates that the ldr instruction // may use a direct access to the symbol. There is not such encoding // for loads of byte and half. if (!supportLoadFromLiteral(Candidate)) continue; DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate << '\n'); Kind = MCLOH_AdrpLdr; Args.push_back(L1); Args.push_back(Candidate); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && "L1 already involved in LOH."); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && "Candidate already involved in LOH."); ++NumADRPToLDR; } else { DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate << '\n'); Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; Args.push_back(L1); Args.push_back(L2); Args.push_back(Candidate); PotentialADROpportunities.remove(L2); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && "L1 already involved in LOH."); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && "L2 already involved in LOH."); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && "Candidate already involved in LOH."); #ifdef DEBUG // get the immediate of the load if (Candidate->getOperand(2).getImm() == 0) if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToLDR; else ++NumLDRToLDR; else if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToLDRWithImm; else ++NumLDRToLDRWithImm; #endif // DEBUG } } else { if (ImmediateDefOpc == AArch64::ADRP) continue; else { DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate << '\n'); Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; Args.push_back(L1); Args.push_back(L2); Args.push_back(Candidate); PotentialADROpportunities.remove(L2); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && "L1 already involved in LOH."); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && "L2 already involved in LOH."); assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && "Candidate already involved in LOH."); #ifdef DEBUG // get the immediate of the store if (Candidate->getOperand(2).getImm() == 0) if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToSTR; else ++NumLDRToSTR; else if (ImmediateDefOpc == AArch64::ADDXri) ++NumADDToSTRWithImm; else ++NumLDRToSTRWithImm; #endif // DEBUG } } AArch64FI.addLOHDirective(Kind, Args); } // Now, we grabbed all the big patterns, check ADR opportunities. for (const MachineInstr *Candidate : PotentialADROpportunities) registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, InvolvedInLOHs, RegToId); }