void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (ArgRegsSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we // pop the old LR into R3 as a temporary. // Move back past the callee-saved register restoration while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); AddDefaultPred(MIB); MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } }
void MipsSEInstrInfo:: loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; const Function *Func = MBB.getParent()->getFunction(); bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") && (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 || DestReg == Mips::HI0 || DestReg == Mips::HI0_64); if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; assert(Opc && "Register class not handled!"); if (!ReqIndirectLoad) BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); else { // Load HI/LO through K0. Notably the DestReg is encoded into the // instruction itself. unsigned Reg = Mips::K0; unsigned LdOp = Mips::MTLO; if (DestReg == Mips::HI0) LdOp = Mips::MTHI; if (Subtarget.getABI().ArePtrs64bit()) { Reg = Mips::K0_64; if (DestReg == Mips::HI0_64) LdOp = Mips::MTHI64; else LdOp = Mips::MTLO64; } BuildMI(MBB, I, DL, get(Opc), Reg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg); } }
void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned NewOpc) const { BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg()); }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const MipsRegisterInfo *RegInfo = static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); std::vector<MachineMove> &Moves = MMI.getFrameMoves(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); DstML = MachineLocation(MachineLocation::VirtualFP); SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { MachineLocation DstML0(MachineLocation::VirtualFP, Offset); MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); if (!STI.isLittle()) std::swap(SrcML0, SrcML1); Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); } else { // Reg is either in CPURegs or FGR32. DstML = MachineLocation(MachineLocation::VirtualFP, Offset); SrcML = MachineLocation(Reg); Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); } } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); // emit ".cfi_def_cfa_register $fp" MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); DstML = MachineLocation(FP); SrcML = MachineLocation(MachineLocation::VirtualFP); Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); } }
/// converToHardwareLoop - check if the loop is a candidate for /// converting to a hardware loop. If so, then perform the /// transformation. /// /// This function works on innermost loops first. A loop can /// be converted if it is a counting loop; either a register /// value or an immediate. /// /// The code makes several assumptions about the representation /// of the loop in llvm. bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { bool Changed = false; // Process nested loops first. for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { Changed |= convertToHardwareLoop(*I); } // If a nested loop has been converted, then we can't convert this loop. if (Changed) { return Changed; } // Are we able to determine the trip count for the loop? CountValue *TripCount = getTripCount(L); if (TripCount == 0) { return false; } // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; } MachineBasicBlock *Preheader = L->getLoopPreheader(); // No preheader means there's not place for the loop instr. if (Preheader == 0) { return false; } MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate hw loop if the loop has more than one exit. if (LastMBB == 0) { return false; } MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); // Determine the loop start. MachineBasicBlock *LoopStart = L->getTopBlock(); if (L->getLoopLatch() != LastMBB) { // When the exit and latch are not the same, use the latch block as the // start. // The loop start address is used only after the 1st iteration, and the loop // latch may contains instrs. that need to be executed after the 1st iter. LoopStart = L->getLoopLatch(); // Make sure the latch is a successor of the exit, otherwise it won't work. if (!LastMBB->isSuccessor(LoopStart)) { return false; } } // Convert the loop to a hardware loop DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); if (TripCount->isReg()) { // Create a copy of the loop count register. MachineFunction *MF = LastMBB->getParent(); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(TripCount->getReg()); unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); } // Add the Loop instruction to the begining of the loop. BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Add the Loop immediate instruction to the beginning of the loop. int64_t CountImm = TripCount->getImm(); BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); } // Make sure the loop start always has a reference in the CFG. We need to // create a BlockAddress operand to get this mechanism to work both the // MachineBasicBlock and BasicBlock objects need the flag set. LoopStart->setHasAddressTaken(); // This line is needed to set the hasAddressTaken flag on the BasicBlock // object BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); // Replace the loop branch with an endloop instruction. DebugLoc dl = LastI->getDebugLoc(); BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); // The loop ends with either: // - a conditional branch followed by an unconditional branch, or // - a conditional branch to the loop start. if (LastI->getOpcode() == Hexagon::JMP_c || LastI->getOpcode() == Hexagon::JMP_cNot) { // delete one and change/add an uncond. branch to out of the loop MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); if (!L->contains(BranchTarget)) { if (LastI != LastMBB->end()) { TII->RemoveBranch(*LastMBB); } SmallVector<MachineOperand, 0> Cond; TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl); } } else { // Conditional branch to loop start; just delete it. LastMBB->erase(LastI); } delete TripCount; ++NumHWLoops; return true; }
bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Count = CSI.size(); DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); if (MI != MBB.end()) DL = MI->getDebugLoc(); for (unsigned i = 0; i < Count; i += 2) { unsigned Reg1 = CSI[i].getReg(); unsigned Reg2 = CSI[i + 1].getReg(); // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && "Out of order callee saved regs!"); // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only // the last load is sp-pi post-increment and de-allocates the stack: // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); if (AArch64::GPR64RegClass.contains(Reg1)) { assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); if (i == Count - 2) LdrOpc = AArch64::LDPXpost; else LdrOpc = AArch64::LDPXi; } else if (AArch64::FPR64RegClass.contains(Reg1)) { assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); if (i == Count - 2) LdrOpc = AArch64::LDPDpost; else LdrOpc = AArch64::LDPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() << ", " << CSI[i + 1].getFrameIdx() << ")\n"); // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; // etc. const int Offset = (i == Count - 2) ? Count : Count - i - 2; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for LDP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost) MIB.addReg(AArch64::SP, RegState::Define); MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit } return true; }
void MSP430FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { case MSP430::RET: case MSP430::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo uint64_t StackSize = MFI->getStackSize(); unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - CSSize; // pop FPW. BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW); } else NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != MSP430::POP16r && !PI->isTerminator()) break; --MBBI; } DL = MBBI->getDebugLoc(); // If there is an ADD16ri or SUB16ri of SPW immediately before this // instruction, merge the two instructions. //if (NumBytes || MFI->hasVarSizedObjects()) // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW); if (CSSize) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(CSSize); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool)) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (!(Func)(Reg, STI.isTargetDarwin())) continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. DeleteRet = true; } // If NoGap is true, pop consecutive registers and then leave the rest // for other instructions. e.g. // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; Regs.push_back(Reg); } if (Regs.empty()) continue; if (Regs.size() > 1 || LdrOpc == 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet) MI->eraseFromParent(); MI = MIB; } else if (Regs.size() == 1) { // If we adjusted the reg to PC from LR above, switch it back here. We // only do that for LDM. if (Regs[0] == ARM::PC) Regs[0] = ARM::LR; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) .addReg(ARM::SP, RegState::Define) .addReg(ARM::SP); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (LdrOpc == ARM::LDR_POST) { MIB.addReg(0); MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else MIB.addImm(4); AddDefaultPred(MIB); } Regs.clear(); } }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } int NumBytes = MFI.getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (CalleeSavedStackSize)| | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. auto CSStackSize = AFI->getCalleeSavedStackSize(); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (!CombineSPBump && CSStackSize != 0) convertCalleeSaveRestoreToSPPrePostIncDec( MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize); // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { ++LastPopI; break; } else if (CombineSPBump) fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); } // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, NumBytes + ArgumentPopSize, TII, MachineInstr::FrameDestroy); return; } NumBytes -= CSStackSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). if (RedZone && ArgumentPopSize == 0) return; bool NoCalleeSaveRestore = CSStackSize == 0; int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += ArgumentPopSize; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, StackRestoreBytes, TII, MachineInstr::FrameDestroy); // If we were able to combine the local stack pop with the argument pop, // then we're done. if (NoCalleeSaveRestore || ArgumentPopSize == 0) return; NumBytes = 0; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -CSStackSize + 16, TII, MachineInstr::FrameDestroy); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, MachineInstr::FrameDestroy); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (ArgumentPopSize) emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, ArgumentPopSize, TII, MachineInstr::FrameDestroy); }
void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); if (!isCSRestore(MBBI, TII, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; if (NumBytes) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, ARMCC::AL, 0, TII); else { // It's not possible to restore SP from FP in a single instruction. // For Darwin, this looks like: // mov sp, r7 // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) .addReg(ARM::R4); } } else { // Thumb2 or ARM. if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) .addReg(FramePtr); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. if (AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop // instructions in the epilogue. while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); else { assert(JumpTarget.isSymbol()); MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } else if (RetOpcode == ARM::TCRETURNriND) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); MBBI = NewMI; } if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); }
void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool(*Func)(unsigned, bool), unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); SmallVector<std::pair<unsigned,bool>, 4> Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (!(Func)(Reg, STI.isTargetDarwin())) continue; // Add the callee-saved register as live-in unless it's LR and // @llvm.returnaddress is called. If LR is returned for // @llvm.returnaddress then it's already added to the function and // entry block live-in sets. bool isKill = true; if (Reg == ARM::LR) { if (MF.getFrameInfo()->isReturnAddressTaken() && MF.getRegInfo().isLiveIn(Reg)) isKill = false; } if (isKill) MBB.addLiveIn(Reg); // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; Regs.push_back(std::make_pair(Reg, isKill)); } if (Regs.empty()) continue; if (Regs.size() > 1 || StrOpc== 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) .addReg(ARM::SP).setMIFlags(MIFlags)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) .addReg(ARM::SP).setMIFlags(MIFlags); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (StrOpc == ARM::STR_PRE) { MIB.addReg(0); MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift)); } else MIB.addImm(-4); AddDefaultPred(MIB); } Regs.clear(); } }
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetDarwin()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } // Move past area 1. if (GPRCS1Size > 0) MBBI++; // Set FP to point to the stack slot that contains the previous FP. // For Darwin, FP is R7, which has now been stored in spill area 1. // Otherwise, if this is not Darwin, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. bool HasFP = hasFP(MF); if (HasFP) { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlag(MachineInstr::FrameSetup); AddDefaultCC(AddDefaultPred(MIB)); } // Move past area 2. if (GPRCS2Size > 0) MBBI++; // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. if (DPRCSSize > 0) { MBBI++; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) MBBI++; } NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have // taken two instructions: // mov sp, r7 // sub sp, #24 // If an interrupt is taken between the two instructions, then sp is in // an inconsistent state (pointing to the middle of callee-saved area). // The interrupt handler can end up clobbering the registers. AFI->setShouldRestoreSPFromFP(true); } if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need dynamic stack realignment, do it here. Be paranoid and make // sure if we also have VLAs, we have a base pointer for frame access. if (RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { // Emit bic sp, sp, MaxAlign AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::BICri), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addImm(MaxAlign-1))); } else { // We cannot use sp as source/dest register here, thus we're emitting the // following sequence: // mov r4, sp // bic r4, r4, MaxAlign // mov sp, r4 // FIXME: It will be better just to find spare register here. BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4) .addReg(ARM::SP, RegState::Kill); AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2BICri), ARM::R4) .addReg(ARM::R4, RegState::Kill) .addImm(MaxAlign-1))); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4, RegState::Kill); } AFI->setShouldRestoreSPFromFP(true); } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister()) .addReg(ARM::SP) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister()) .addReg(ARM::SP); } // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); bool FP = hasFP(MF); //handle GOP offset BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) .addGlobalAddress(const_cast<Function*>(MF.getFunction())) .addReg(Alpha::R27).addImm(++curgpdist); BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) .addGlobalAddress(const_cast<Function*>(MF.getFunction())) .addReg(Alpha::R29).addImm(curgpdist); //evil const_cast until MO stuff setup to handle const BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) .addGlobalAddress(const_cast<Function*>(MF.getFunction())); // Get the number of bytes to allocate from the FrameInfo long NumBytes = MFI->getStackSize(); if (FP) NumBytes += 8; //reserve space for the old FP // Do we need to allocate space on the stack? if (NumBytes == 0) return; unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); NumBytes = (NumBytes+Align-1)/Align*Align; // Update frame info to pretend that this is part of the stack... MFI->setStackSize(NumBytes); // adjust stack pointer: r30 -= numbytes NumBytes = -NumBytes; if (NumBytes >= IMM_LOW) { BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) .addReg(Alpha::R30); } else if (getUpper16(NumBytes) >= IMM_LOW) { BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { std::string msg; raw_string_ostream Msg(msg); Msg << "Too big a stack frame at " + NumBytes; llvm_report_error(Msg.str()); } //now if we need to, save the old FP and set the new if (FP) { BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ)) .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30); //this must be the last instr in the prolog BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15) .addReg(Alpha::R30).addReg(Alpha::R30); } }
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup)); if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } if (NumBytes) // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // Thumb1 does not currently support dynamic stack realignment. Report a // fatal error rather then silently generate bad code. if (RegInfo->needsStackRealignment(MF)) report_fatal_error("Dynamic stack realignment not supported for thumb1."); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); DebugLoc DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (NumRestores * 16) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. NumBytes += ArgumentPopSize; unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBBI; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (LastPopI != MBB.begin()) { do { ++NumRestores; --LastPopI; } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); if (!isCSRestore(LastPopI, CSRegs)) { ++LastPopI; --NumRestores; } } NumBytes -= NumRestores * 16; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { // If this was a redzone leaf function, we don't need to restore the // stack pointer. if (!canUseRedZone(MF)) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII); return; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); }
void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (VARegSaveSize) emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); return; } for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; if (STI.isTargetDarwin()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; } break; default: AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); } // Darwin ABI requires FP to point to the stack slot that contains the // previous FP. if (STI.isTargetDarwin() || hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); } // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); } if (STI.isTargetELF() && hasFP(MF)) { MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); }
bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Count = CSI.size(); DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); if (MI != MBB.end()) DL = MI->getDebugLoc(); for (unsigned i = 0; i < Count; i += 2) { unsigned idx = Count - i - 2; unsigned Reg1 = CSI[idx].getReg(); unsigned Reg2 = CSI[idx + 1].getReg(); // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && "Out of order callee saved regs!"); unsigned StrOpc; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); // Issue sequence of non-sp increment and pi sp spills for cs regs. The // first spill is a pre-increment that allocates the stack. // For example: // stp x22, x21, [sp, #-48]! // addImm(-6) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rational and sequence for restores in epilog. if (AArch64::GPR64RegClass.contains(Reg1)) { assert(AArch64::GPR64RegClass.contains(Reg2) && "Expected GPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) StrOpc = AArch64::STPXpre; else StrOpc = AArch64::STPXi; } else if (AArch64::FPR64RegClass.contains(Reg1)) { assert(AArch64::FPR64RegClass.contains(Reg2) && "Expected FPR64 callee-saved register pair!"); // For first spill use pre-increment store. if (i == 0) StrOpc = AArch64::STPDpre; else StrOpc = AArch64::STPDi; } else llvm_unreachable("Unexpected callee saved register!"); DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); // Compute offset: i = 0 => offset = -Count; // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. const int Offset = (i == 0) ? -Count : i; assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for STP immediate"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) MIB.addReg(AArch64::SP, RegState::Define); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); } return true; }
void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); assert((MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); if (hasFP(MF)) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. if (NumBytes) emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, TII, *this, dl); else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(FramePtr); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes); } else emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } } if (VARegSaveSize) { // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); MBB.erase(MBBI); } }
MachineInstrBuilder MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest // Pick the zero form of the branch for readable assembly and for greater // branch distance in non-microMIPS mode. // Additional MIPSR6 does not permit the use of register $zero for compact // branches. // FIXME: Certain atomic sequences on mips64 generate 32bit references to // Mips::ZERO, which is incorrect. This test should be updated to use // Subtarget.getABI().GetZeroReg() when those atomic sequences and others // are fixed. int ZeroOperandPosition = -1; bool BranchWithZeroOperand = false; if (I->isBranch() && !I->isPseudo()) { auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI); BranchWithZeroOperand = ZeroOperandPosition != -1; } if (BranchWithZeroOperand) { switch (NewOpc) { case Mips::BEQC: NewOpc = Mips::BEQZC; break; case Mips::BNEC: NewOpc = Mips::BNEZC; break; case Mips::BGEC: NewOpc = Mips::BGEZC; break; case Mips::BLTC: NewOpc = Mips::BLTZC; break; case Mips::BEQC64: NewOpc = Mips::BEQZC64; break; case Mips::BNEC64: NewOpc = Mips::BNEZC64; break; } } MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an // immediate 0 as an operand and requires the removal of it's implicit-def %ra // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || NewOpc == Mips::JIALC64) { if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64) MIB->RemoveOperand(0); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { MIB.add(I->getOperand(J)); } MIB.addImm(0); } else { for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) continue; MIB.add(I->getOperand(J)); } } MIB.copyImplicitOps(*I); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; }
void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Outgoing arguments should be part of the frame"); MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); auto *TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SystemZFrameLowering *TFI = getFrameLowering(MF); DebugLoc DL = MI->getDebugLoc(); // Decompose the frame index into a base and offset. int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); unsigned BasePtr; int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) + MI->getOperand(FIOperandNum + 1).getImm()); // Special handling of dbg_value instructions. if (MI->isDebugValue()) { MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false); MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } // See if the offset is in range, or if an equivalent instruction that // accepts the offset exists. unsigned Opcode = MI->getOpcode(); unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); if (OpcodeForOffset) { if (OpcodeForOffset == SystemZ::LE && MF.getSubtarget<SystemZSubtarget>().hasVector()) { // If LE is ok for offset, use LDE instead on z13. OpcodeForOffset = SystemZ::LDE32; } MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); } else { // Create an anchor point that is in range. Start at 0xffff so that // can use LLILH to load the immediate. int64_t OldOffset = Offset; int64_t Mask = 0xffff; do { Offset = OldOffset & Mask; OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset); Mask >>= 1; assert(Mask && "One offset must be OK"); } while (!OpcodeForOffset); unsigned ScratchReg = MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); int64_t HighOffset = OldOffset - Offset; if (MI->getDesc().TSFlags & SystemZII::HasIndex && MI->getOperand(FIOperandNum + 2).getReg() == 0) { // Load the offset into the scratch register and use it as an index. // The scratch register then dies here. TII->loadImmediate(MBB, MI, ScratchReg, HighOffset); MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg, false, false, true); } else { // Load the anchor address into a scratch register. unsigned LAOpcode = TII->getOpcodeForOffset(SystemZ::LA, HighOffset); if (LAOpcode) BuildMI(MBB, MI, DL, TII->get(LAOpcode),ScratchReg) .addReg(BasePtr).addImm(HighOffset).addReg(0); else { // Load the high offset into the scratch register and use it as // an index. TII->loadImmediate(MBB, MI, ScratchReg, HighOffset); BuildMI(MBB, MI, DL, TII->get(SystemZ::AGR),ScratchReg) .addReg(ScratchReg, RegState::Kill).addReg(BasePtr); } // Use the scratch register as the base. It then dies here. MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); } } MI->setDesc(TII->get(OpcodeForOffset)); MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); }
void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); const MSP430InstrInfo &TII = *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { // Calculate required stack adjustment uint64_t FrameSize = StackSize - 2; NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize(); // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); // Save FPW into the appropriate stack slot... BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r)) .addReg(MSP430::FPW, RegState::Kill); // Update FPW with the new base value... BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW) .addReg(MSP430::SPW); // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MSP430::FPW); } else NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize(); // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r)) ++MBBI; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); if (NumBytes) { // adjust stack pointer: SPW -= numbytes // If there is an SUB16ri of SPW immediately before this instruction, merge // the two. //NumBytes -= mergeSPUpdates(MBB, MBBI, true); // If there is an ADD16ri or SUB16ri of SPW immediately after this // instruction, merge the two instructions. // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); if (NumBytes) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(NumBytes); // The SRW implicit def is dead. MI->getOperand(3).setIsDead(); } } }
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ThumbRegisterInfo *RegInfo = static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { do --MBBI; while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); if (!isCSRestore(MBBI, CSRegs)) ++MBBI; } // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(ARM::R4)); } else AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) .addReg(FramePtr)); } else { if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } if (needPopSpecialFixUp(MF)) { bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); (void)Done; assert(Done && "Emission of the special fixup failed!?"); } }
bool GCMachineCodeFixup::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; const TargetMachine &TM = MF.getTarget(); const TargetInstrInfo *TII = TM.getInstrInfo(); GCModuleInfo &GMI = getAnalysis<GCModuleInfo>(); GCFunctionInfo &GCFI = GMI.getFunctionInfo(*MF.getFunction()); for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); MBBI != MBBE; ++MBBI) { for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); MII != MIE;) { if (!MII->isGCRegRoot() || !MII->getOperand(0).isReg()) { ++MII; continue; } // Trace the register back to its location at the site of the call (either // a physical reg or a frame index). bool TracingReg = true; unsigned TracedReg = MII->getOperand(0).getReg(); int FrameIndex; MachineBasicBlock::iterator PrevII = MII; for (--PrevII;; --PrevII) { if (PrevII->isGCRegRoot() && PrevII->getOperand(0).isReg()) break; if (PrevII->isCall()) break; int FI; // Trace back through register reloads. unsigned Reg = TM.getInstrInfo()->isLoadFromStackSlotPostFE(&*PrevII, FI); if (Reg) { // This is a reload. If we're tracing this register, start tracing the // frame index instead. if (TracingReg && TracedReg == Reg) { TracingReg = false; FrameIndex = FI; } continue; } // Trace back through spills. if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&*PrevII, FI)) continue; // Trace back through register-to-register copies. if (PrevII->isCopy()) { if (TracingReg && TracedReg == PrevII->getOperand(0).getReg()) TracedReg = PrevII->getOperand(1).getReg(); continue; } // Trace back through non-register GC_REG_ROOT instructions. if (PrevII->isGCRegRoot() && !PrevII->getOperand(0).isReg()) continue; DEBUG(dbgs() << "Bad instruction: " << *PrevII); llvm_unreachable("GC_REG_ROOT found in an unexpected location!"); } // Now we've reached either a call or another GC_REG_ROOT instruction. // Move the GC_REG_ROOT instruction we're considering to the right place, // and rewrite it if necessary. // // Also, tell the GCFunctionInfo about the frame index, since this is // our only chance -- the frame indices will be deleted by the time // GCMachineCodeAnalysis runs. ++PrevII; unsigned RootIndex = MII->getOperand(1).getImm(); MachineInstr *NewMI; if (TracingReg) { MachineInstrBuilder MIB = BuildMI(MF, MII->getDebugLoc(), TII->get(TargetOpcode::GC_REG_ROOT)); MIB.addReg(TracedReg).addImm(RootIndex); NewMI = MIB; } else { NewMI = TII->emitFrameIndexGCRegRoot(MF, FrameIndex, RootIndex, MII->getDebugLoc()); GCFI.spillRegRoot(RootIndex, FrameIndex); } MBBI->insert(PrevII, NewMI); MachineBasicBlock::iterator NextII = MII; ++NextII; MII->eraseFromParent(); MII = NextII; } } return true; }
void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); if (!MipsFI->globalBaseRegSet()) return; MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC; if (Subtarget.isABI_N64()) RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; else RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; V0 = RegInfo.createVirtualRegister(RC); V1 = RegInfo.createVirtualRegister(RC); if (Subtarget.isABI_N64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); MBB.addLiveIn(Mips::T9_64); // lui $v0, %hi(%neg(%gp_rel(fname))) // daddu $v1, $v0, $t9 // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) const GlobalValue *FName = MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); return; } if (MF.getTarget().getRelocationModel() == Reloc::Static) { // Set global register to __gnu_local_gp. // // lui $v0, %hi(__gnu_local_gp) // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); return; } MF.getRegInfo().addLiveIn(Mips::T9); MBB.addLiveIn(Mips::T9); if (Subtarget.isABI_N32()) { // lui $v0, %hi(%neg(%gp_rel(fname))) // addu $v1, $v0, $t9 // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) const GlobalValue *FName = MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); return; } assert(Subtarget.isABI_O32()); // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: // // 0. lui $2, %hi(_gp_disp) // 1. addiu $2, $2, %lo(_gp_disp) // 2. addu $globalbasereg, $2, $t9 // // We emit only the last instruction here. // // GNU linker requires that the first two instructions appear at the beginning // of a function and no instructions be inserted before or between them. // The two instructions are emitted during lowering to MC layer in order to // avoid any reordering. // // Register $2 (Mips::V0) is added to the list of live-in registers to ensure // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) // reads it. MF.getRegInfo().addLiveIn(Mips::V0); MBB.addLiveIn(Mips::V0); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) .addReg(Mips::V0).addReg(Mips::T9); }
MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) ++NextI; int SExtIdx = Flags.getSExtIdx(); unsigned Opc = SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); bool IsUnscaled = isUnscaledLdSt(Opc); int OffsetStride = IsUnscaled ? getMemScale(I) : 1; bool MergeForward = Flags.getMergeForward(); unsigned NewOpc = getMatchingPairOpcode(Opc); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. const MachineOperand &BaseRegOp = MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; if (getLdStOffsetOp(I).getImm() == getLdStOffsetOp(Paired).getImm() + OffsetStride) { RtMI = Paired; Rt2MI = I; // Here we swapped the assumption made for SExtIdx. // I.e., we turn ldp I, Paired into ldp Paired, I. // Update the index accordingly. if (SExtIdx != -1) SExtIdx = (SExtIdx + 1) % 2; } else { RtMI = I; Rt2MI = Paired; } int OffsetImm = getLdStOffsetOp(RtMI).getImm(); if (isSmallTypeLdMerge(Opc)) { // Change the scaled offset from small to large type. if (!IsUnscaled) OffsetImm /= 2; MachineInstr *RtNewDest = MergeForward ? I : Paired; // Construct the new load instruction. // FIXME: currently we support only halfword unsigned load. We need to // handle byte type, signed, and store instructions as well. MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; NewMemMI = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(RtNewDest)) .addOperand(BaseRegOp) .addImm(OffsetImm); // Copy MachineMemOperands from the original loads. concatenateMemOperands(NewMemMI, I, Paired); DEBUG( dbgs() << "Creating the new load and extract. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instructions:\n "); DEBUG((NewMemMI)->print(dbgs())); MachineInstr *ExtDestMI = MergeForward ? Paired : I; if (ExtDestMI == Rt2MI) { // Create the bitfield extract for high half. BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::UBFMWri)) .addOperand(getLdStRegOp(Rt2MI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(16) .addImm(31); // Create the bitfield extract for low half. BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::ANDWri)) .addOperand(getLdStRegOp(RtMI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(15); } else { // Create the bitfield extract for low half. BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::ANDWri)) .addOperand(getLdStRegOp(RtMI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(15); // Create the bitfield extract for high half. BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::UBFMWri)) .addOperand(getLdStRegOp(Rt2MI)) .addReg(getLdStRegOp(RtNewDest).getReg()) .addImm(16) .addImm(31); } DEBUG(dbgs() << " "); DEBUG((BitExtMI1)->print(dbgs())); DEBUG(dbgs() << " "); DEBUG((BitExtMI2)->print(dbgs())); DEBUG(dbgs() << "\n"); // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); return NextI; } // Handle Unscaled if (IsUnscaled) OffsetImm /= OffsetStride; // Construct the new instruction. MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(NewOpc)) .addOperand(getLdStRegOp(RtMI)) .addOperand(getLdStRegOp(Rt2MI)) .addOperand(BaseRegOp) .addImm(OffsetImm); (void)MIB; // FIXME: Do we need/want to copy the mem operands from the source // instructions? Probably. What uses them after this? DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(Paired->print(dbgs())); DEBUG(dbgs() << " with instruction:\n "); if (SExtIdx != -1) { // Generate the sign extension for the proper result of the ldp. // I.e., with X1, that would be: // %W1<def> = KILL %W1, %X1<imp-def> // %X1<def> = SBFMXri %X1<kill>, 0, 31 MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. unsigned DstRegX = DstMO.getReg(); // Get the W variant of that register. unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); // Update the result of LDP to use the W instead of the X variant. DstMO.setReg(DstRegW); DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); // Make the machine verifier happy by providing a definition for // the X register. // Insert this definition right after the generated LDP, i.e., before // InsertionPoint. MachineInstrBuilder MIBKill = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(TargetOpcode::KILL), DstRegW) .addReg(DstRegW) .addReg(DstRegX, RegState::Define); MIBKill->getOperand(2).setImplicit(); // Create the sign extension. MachineInstrBuilder MIBSXTW = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), TII->get(AArch64::SBFMXri), DstRegX) .addReg(DstRegX) .addImm(0) .addImm(31); (void)MIBSXTW; DEBUG(dbgs() << " Extend operand:\n "); DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); DEBUG(dbgs() << "\n"); } else { DEBUG(((MachineInstr *)MIB)->print(dbgs())); DEBUG(dbgs() << "\n"); } // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); return NextI; }
void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool reseveCallFrame = TFI->hasReservedCallFrame(MF); int Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); if (!reseveCallFrame) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, <amt>' and the // adjcallstackdown instruction into 'add ESP, <amt>' // TODO: consider using push / pop instead of sub + store / add if (Amount == 0) return; // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); } else { assert(Opcode == TII.getCallFrameDestroyOpcode()); // Factor out the amount the callee already popped. Amount -= CalleeAmt; if (Amount) { unsigned Opc = getADDriOpcode(Is64Bit, Amount); New = BuildMI(MF, DL, TII.get(Opc), StackPtr) .addReg(StackPtr).addImm(Amount); } } if (New) { // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); // Replace the pseudo instruction with a new instruction. MBB.insert(I, New); } return; } if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) .addReg(StackPtr).addImm(CalleeAmt); // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); // We are not tracking the stack pointer adjustment by the callee, so make // sure we restore the stack pointer immediately after the call, there may // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. MachineBasicBlock::iterator B = MBB.begin(); while (I != B && !llvm::prior(I)->isCall()) --I; MBB.insert(I, New); } }
void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET)); }
void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); FrameSetup->getOperand(1).setImm(Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); bool Is64Bit = STI->is64Bit(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; unsigned PushOpcode; switch (MOV->getOpcode()) { default: llvm_unreachable("Unexpected Opcode!"); case X86::MOV32mi: case X86::MOV64mi32: PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a // PUSH instruction with a shorter encoding. // Note that isImm() may fail even though this is a MOVmi, because // the operand can also be a symbol. if (PushOp.isImm()) { int64_t Val = PushOp.getImm(); if (isInt<8>(Val)) PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8; } Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addOperand(PushOp); break; case X86::MOV32mr: case X86::MOV64mr: unsigned int Reg = PushOp.getReg(); // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg // in preparation for the PUSH64. The upper 32 bits can be undef. if (Is64Bit && MOV->getOpcode() == X86::MOV32mr) { unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); Reg = MRI->createVirtualRegister(&X86::GR64RegClass); BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg) .addReg(UndefReg) .addOperand(PushOp) .addImm(X86::sub_32bit); } // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm; Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) Push->addOperand(DefMov->getOperand(i)); DefMov->eraseFromParent(); } else { PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r; Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) .addReg(Reg) .getInstr(); } break; } // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. // TODO: This is needed only if we require precise CFA. if (!TFL->hasFP(MF)) TFL->BuildCFI( MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize)); MBB.erase(MOV); } // The stack-pointer copy is no longer used in the call sequences. // There should not be any other users, but we can't commit to that, so: if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg())) Context.SPCopy->eraseFromParent(); // Once we've done this, we need to make sure PEI doesn't assume a reserved // frame. X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); FuncInfo->setHasPushSequences(true); }
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { // Find the instruction past the last instruction that saves a callee-saved // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else if (Mips::FGR64RegClass.contains(Reg)) { unsigned Reg0 = MRI->getDwarfRegNum(Reg, true); unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1; if (!STI.isLittle()) std::swap(Reg0, Reg1); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg0, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } else { // Reg is either in GPR32 or FGR32. unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, 1), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } if (MipsFI->callsEhReturn()) { // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg unsigned VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI->getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = - (signed) MFI->getMaxAlignment(); BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); if (hasBP(MF)) { // move $s7, $sp unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) .addReg(SP) .addReg(ZERO); } } } }
void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = &MF.getMMI(); const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool FP = hasFP(MF); const AttrListPtr &PAL = MF.getFunction()->getAttributes(); for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I) if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) { loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); break; } // Work out frame sizes. int FrameSize = MFI->getStackSize(); assert(FrameSize%4 == 0 && "Misaligned frame size"); FrameSize/=4; bool isU6 = isImmU6(FrameSize); if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); } bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF); // Do we need to allocate space on the stack? if (FrameSize) { bool saveLR = XFI->getUsesLR(); bool LRSavedOnEntry = false; int Opcode; if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; MBB.addLiveIn(XCore::LR); saveLR = false; LRSavedOnEntry = true; } else { Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; } BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); if (emitFrameMoves) { std::vector<MachineMove> &Moves = MMI->getFrameMoves(); // Show update of SP. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); if (LRSavedOnEntry) { MachineLocation CSDst(MachineLocation::VirtualFP, 0); MachineLocation CSSrc(XCore::LR); Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); } } if (saveLR) { int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII); MBB.addLiveIn(XCore::LR); if (emitFrameMoves) { MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); MachineLocation CSSrc(XCore::LR); MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc)); } } } if (FP) { // Save R10 to the stack. int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII); // R10 is live-in. It is killed at the spill. MBB.addLiveIn(XCore::R10); if (emitFrameMoves) { MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); MachineLocation CSSrc(XCore::R10); MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc)); } // Set the FP from the SP. unsigned FramePtr = XCore::R10; BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) .addImm(0); if (emitFrameMoves) { // Show FP is now valid. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(FramePtr); MachineLocation SPSrc(MachineLocation::VirtualFP); MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc)); } } if (emitFrameMoves) { // Frame moves for callee saved. std::vector<MachineMove> &Moves = MMI->getFrameMoves(); std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels = XFI->getSpillLabels(); for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { MCSymbol *SpillLabel = SpillLabels[I].first; CalleeSavedInfo &CSI = SpillLabels[I].second; int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); unsigned Reg = CSI.getReg(); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); } } }