MachineInstr * LanaiInstrInfo::optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, bool PreferFalse) const { assert(MI.getOpcode() == Lanai::SELECT && "unknown select instruction"); MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = canFoldIntoSelect(MI.getOperand(1).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) DefMI = canFoldIntoSelect(MI.getOperand(2).getReg(), MRI, this); if (!DefMI) return nullptr; // Find new register class to use. MachineOperand FalseReg = MI.getOperand(Invert ? 1 : 2); unsigned DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; // Create a new predicated version of DefMI. MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) NewMI.addOperand(DefMI->getOperand(i)); unsigned CondCode = MI.getOperand(3).getImm(); if (Invert) NewMI.addImm(getOppositeCondition(LPCC::CondCode(CondCode))); else NewMI.addImm(CondCode); NewMI.copyImplicitOps(MI); // The output register value when the predicate is false is an implicit // register operand tied to the first def. The tie makes the register // allocator ensure the FalseReg is allocated the same register as operand 0. FalseReg.setImplicit(); NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // Update SeenMIs set: register newly created MI and erase removed DefMI. SeenMIs.insert(NewMI); SeenMIs.erase(DefMI); // If MI is inside a loop, and DefMI is outside the loop, then kill flags on // DefMI would be invalid when transferred inside the loop. Checking for a // loop is expensive, but at least remove kill flags if they are in different // BBs. if (DefMI->getParent() != MI.getParent()) NewMI->clearKillInfo(); // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; }
void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = std::prev(MBB.end()); DebugLoc dl = MBBI->getDebugLoc(); // // Only insert deallocframe if we need to. Also at -O0. See comment // in emitPrologue above. // if (hasFP(MF) || MF.getTarget().getOptLevel() == CodeGenOpt::None) { MachineBasicBlock::iterator MBBI = std::prev(MBB.end()); MachineBasicBlock::iterator MBBI_end = MBB.end(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Handle EH_RETURN. if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) { assert(MBBI->getOperand(0).isReg() && "Offset should be in register!"); BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe)); BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add), Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28); return; } // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher // versions. if (MF.getSubtarget<HexagonSubtarget>().hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) { // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC // instruction if we encounter it. MachineBasicBlock::iterator BeforeJMPR = MBB.begin() == MBBI ? MBBI : std::prev(MBBI); if (BeforeJMPR != MBBI && BeforeJMPR->getOpcode() == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { // Remove the JMPR node. MBB.erase(MBBI); return; } // Add dealloc_return. MachineInstrBuilder MIB = BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return)); // Transfer the function live-out registers. MIB->copyImplicitOps(*MBB.getParent(), &*MBBI); // Remove the JUMPR node. MBB.erase(MBBI); } else { // Add deallocframe for V2 and V3, and V4 tail calls. // Check for RESTORE_DEALLOC_BEFORE_TAILCALL_V4. We don't need an extra // DEALLOCFRAME instruction after it. MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); MachineBasicBlock::iterator I = Term == MBB.begin() ? MBB.end() : std::prev(Term); if (I != MBB.end() && I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) return; BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe)); } } }
bool Thumb1FrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { if (MBB.succ_empty()) { // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; // ARMv4T requires BX, see emitEpilogue if (!STI.hasV5TOps()) continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); if (MI != MBB.end()) MIB.copyImplicitOps(*MI); MI = MBB.erase(MI); } else // LR may only be popped into PC, as part of return sequence. // If this isn't the return sequence, we'll need emitPopSpecialFixUp // to restore LR the hard way. continue; } MIB.addReg(Reg, getDefRegState(true)); NeedsPop = true; } // It's illegal to emit pop instruction without operands. if (NeedsPop) MBB.insert(MI, &*MIB); else MF.DeleteMachineInstr(MIB); return true; }
bool Thumb1FrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); bool NumRegs = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; // ARMv4T requires BX, see emitEpilogue if (STI.hasV4TOps() && !STI.hasV5TOps()) continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); MIB.copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); NumRegs = true; } // It's illegal to emit pop instruction without operands. if (NumRegs) MBB.insert(MI, &*MIB); else MF.DeleteMachineInstr(MIB); return true; }
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dst vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. bool BranchWithZeroOperand = (I->isBranch() && !I->isPseudo() && I->getOperand(1).isReg() && (I->getOperand(1).getReg() == Mips::ZERO || I->getOperand(1).getReg() == Mips::ZERO_64)); if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's %RA<imp-def> // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.addOperand(I->getOperand(J)); } MIB.addImm(0); } else if (BranchWithZeroOperand) { // For MIPSR6 and microMIPS branches with an explicit zero operand, copy // everything after the zero. MIB.addOperand(I->getOperand(0)); for (unsigned J = 2, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.addOperand(I->getOperand(J)); } } else { // All other cases copy all other operands. for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.addOperand(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); const TargetInstrInfo &TII = *STI.getInstrInfo(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); // If MBBI is a return instruction, we may be able to directly restore // LR in the PC. // This is possible if we do not need to emit any SP update. // Otherwise, we need a temporary register to pop the value // and copy that value into LR. auto MBBI = MBB.getFirstTerminator(); if (!ArgRegsSaveSize && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) { if (!DoIt) return true; MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))) .addReg(ARM::PC, RegState::Define); MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); return true; } // Look for a temporary register to use. // First, compute the liveness information. LivePhysRegs UsedRegs(STI.getRegisterInfo()); UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); DebugLoc dl = DebugLoc(); if (MBBI != MBB.end()) { dl = MBBI->getDebugLoc(); auto InstUpToMBBI = MBB.end(); // The post-decrement is on purpose here. // We want to have the liveness right before MBBI. while (InstUpToMBBI-- != MBBI) UsedRegs.stepBackward(*InstUpToMBBI); } // Look for a register that can be directly use in the POP. unsigned PopReg = 0; // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); for (int Register = GPRsNoLRSP.find_first(); Register != -1; Register = GPRsNoLRSP.find_next(Register)) { if (!UsedRegs.contains(Register)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Register)) { PopReg = Register; TemporaryReg = 0; break; } // Otherwise, remember that the register will be available to // save a pop-friendly register. TemporaryReg = Register; } } if (!DoIt && !PopReg && !TemporaryReg) return false; assert((PopReg || TemporaryReg) && "Cannot get LR"); if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill)); } assert(PopReg && "Do not know how to get LR"); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(PopReg, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) .addReg(PopReg, RegState::Kill); AddDefaultPred(MIB); MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); return true; } AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(PopReg, RegState::Kill)); if (TemporaryReg) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); } return true; }
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // Additional MIPSR6 does not permit the use of register $zero for compact // branches. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. int ZeroOperandPosition = -1; bool BranchWithZeroOperand = false; if (I->isBranch() && !I->isPseudo()) { auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI); BranchWithZeroOperand = ZeroOperandPosition != -1; } if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; case Mips::BEQC64: NewOpc = Mips::BEQZC64; break; case Mips::BNEC64: NewOpc = Mips::BNEZC64; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's %RA<imp-def> // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.add(I->getOperand(J)); } MIB.addImm(0); } else { for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) continue; MIB.add(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
bool Thumb1FrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); ARMRegSet LoRegsToRestore; ARMRegSet HiRegsToRestore; // Low registers (r0-r7) which can be used to restore the high registers. ARMRegSet CopyRegs; for (CalleeSavedInfo I : CSI) { unsigned Reg = I.getReg(); if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { LoRegsToRestore[Reg] = true; } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { HiRegsToRestore[Reg] = true; } else { llvm_unreachable("callee-saved register of unexpected class"); } // If this is a low register not used as the frame pointer, we may want to // use it for restoring the high registers. if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) CopyRegs[Reg] = true; } // If this is a return block, we may be able to use some unused return value // registers for restoring the high regs. auto Terminator = MBB.getFirstTerminator(); if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { CopyRegs[ARM::R0] = true; CopyRegs[ARM::R1] = true; CopyRegs[ARM::R2] = true; CopyRegs[ARM::R3] = true; for (auto Op : Terminator->implicit_operands()) { if (Op.isReg()) CopyRegs[Op.getReg()] = false; } } static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7}; static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); const unsigned *AllHighRegsEnd = std::end(AllHighRegs); // Find the first register to restore. auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), HiRegsToRestore, AllHighRegsEnd); while (HiRegToRestore != AllHighRegsEnd) { assert(!CopyRegs.none()); // Find the first low register to use. auto CopyReg = findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the POP instruction. MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); // Create the MOV from low to high register. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) .addReg(*HiRegToRestore, RegState::Define) .addReg(*CopyReg, RegState::Kill) .add(predOps(ARMCC::AL)); CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); HiRegToRestore = findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); } } MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { CalleeSavedInfo &Info = CSI[i-1]; unsigned Reg = Info.getReg(); // High registers (excluding lr) have already been dealt with if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) continue; if (Reg == ARM::LR) { Info.setRestored(false); if (!MBB.succ_empty() || MI->getOpcode() == ARM::TCRETURNdi || MI->getOpcode() == ARM::TCRETURNri) // LR may only be popped into PC, as part of return sequence. // If this isn't the return sequence, we'll need emitPopSpecialFixUp // to restore LR the hard way. // FIXME: if we don't pass any stack arguments it would be actually // advantageous *and* correct to do the conversion to an ordinary call // instruction here. continue; // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; // ARMv4T requires BX, see emitEpilogue if (!STI.hasV5TOps()) continue; // Pop LR into PC. Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); if (MI != MBB.end()) MIB.copyImplicitOps(*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); NeedsPop = true; } // It's illegal to emit pop instruction without operands. if (NeedsPop) MBB.insert(MI, &*MIB); else MF.DeleteMachineInstr(MIB); return true; }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } bool IsV4PopReturn = false; for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) IsV4PopReturn = true; IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps(); // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. So instead // we have to emit: // POP {r3} // ADD sp, #offset // BX r3 // If this would clobber a return value, then generate this sequence instead: // MOV ip, r3 // POP {r3} // ADD sp, #offset // MOV lr, r3 // MOV r3, ip // BX lr if (ArgRegsSaveSize || IsV4PopReturn) { // Get the last instruction, tBX_RET MBBI = MBB.getLastNonDebugInstr(); assert (MBBI->getOpcode() == ARM::tBX_RET); DebugLoc dl = MBBI->getDebugLoc(); if (AFI->getReturnRegsCount() <= 3) { // Epilogue: pop saved LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) .addReg(ARM::R3, RegState::Kill); AddDefaultPred(MIB); MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } else { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) .addReg(ARM::R3, RegState::Kill)); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::LR, RegState::Define) .addReg(ARM::R3, RegState::Kill)); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(ARM::R3, RegState::Define) .addReg(ARM::R12, RegState::Kill)); // Keep the tBX_RET instruction } } }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (ArgRegsSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we // pop the old LR into R3 as a temporary. // Get the last instruction, tBX_RET MBBI = MBB.getLastNonDebugInstr(); assert (MBBI->getOpcode() == ARM::tBX_RET); // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); AddDefaultPred(MIB); MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } }
void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool)) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (!(Func)(Reg, STI.isTargetDarwin())) continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. DeleteRet = true; } // If NoGap is true, pop consecutive registers and then leave the rest // for other instructions. e.g. // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; Regs.push_back(Reg); } if (Regs.empty()) continue; if (Regs.size() > 1 || LdrOpc == 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet) { MIB->copyImplicitOps(&*MI); MI->eraseFromParent(); } MI = MIB; } else if (Regs.size() == 1) { // If we adjusted the reg to PC from LR above, switch it back here. We // only do that for LDM. if (Regs[0] == ARM::PC) Regs[0] = ARM::LR; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) .addReg(ARM::SP, RegState::Define) .addReg(ARM::SP); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { MIB.addReg(0); MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else MIB.addImm(4); AddDefaultPred(MIB); } Regs.clear(); } }