void MIRPrinter::print(const MachineFunction &MF) { initRegisterMaskIds(MF); yaml::MachineFunction YamlMF; YamlMF.Name = MF.getName(); YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); YamlMF.RegBankSelected = MF.getProperties().hasProperty( MachineFunctionProperties::Property::RegBankSelected); YamlMF.Selected = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Selected); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); ModuleSlotTracker MST(MF.getFunction().getParent()); MST.incorporateFunction(MF.getFunction()); convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); if (const auto *ConstantPool = MF.getConstantPool()) convert(YamlMF, *ConstantPool); if (const auto *JumpTableInfo = MF.getJumpTableInfo()) convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo); raw_string_ostream StrOS(YamlMF.Body.Value.Value); bool IsNewlineNeeded = false; for (const auto &MBB : MF) { if (IsNewlineNeeded) StrOS << "\n"; MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) .print(MBB); IsNewlineNeeded = true; } StrOS.flush(); yaml::Output Out(OS); if (!SimplifyMIR) Out.setWriteDefaultValues(true); Out << YamlMF; }
void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass // pipeline is set up without giving the passes a chance to look at the // TargetMachine. // FIXME: Find a way to express this in getRequiredProperties. assert(Fn.getProperties().hasProperty( MachineFunctionProperties::Property::NoVRegs)); const Function &F = Fn.getFunction(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = Fn.getFrameInfo(); MinCSFrameIndex = std::numeric_limits<unsigned>::max(); MaxCSFrameIndex = 0; // Determine which of the registers in the callee save list should be saved. BitVector SavedRegs; TFI->determineCalleeSaves(Fn, SavedRegs, RS); // Assign stack slots for any callee-saved registers that must be spilled. assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); // Add the code to save and restore the callee saved registers. if (!F.hasFnAttribute(Attribute::Naked)) { MFI.setCalleeSavedInfoValid(true); std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) { insertCSRSaves(*SaveBlock, CSI); // Update the live-in information of all the blocks up to the save // point. updateLiveness(Fn); } for (MachineBasicBlock *RestoreBlock : RestoreBlocks) insertCSRRestores(*RestoreBlock, CSI); } } }
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); OS << "Frame Objects:\n"; for (unsigned i = 0, e = Objects.size(); i != e; ++i) { const StackObject &SO = Objects[i]; OS << " fi#" << (int)(i-NumFixedObjects) << ": "; if (SO.StackID != 0) OS << "id=" << static_cast<unsigned>(SO.StackID) << ' '; if (SO.Size == ~0ULL) { OS << "dead\n"; continue; } if (SO.Size == 0) OS << "variable sized"; else OS << "size=" << SO.Size; OS << ", align=" << SO.Alignment; if (i < NumFixedObjects) OS << ", fixed"; if (i < NumFixedObjects || SO.SPOffset != -1) { int64_t Off = SO.SPOffset - ValOffset; OS << ", at location [SP"; if (Off > 0) OS << "+" << Off; else if (Off < 0) OS << Off; OS << "]"; } OS << "\n"; } }
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); switch (RC->getID()) { default: return 0; case AArch64::GPR32RegClassID: case AArch64::GPR32spRegClassID: case AArch64::GPR32allRegClassID: case AArch64::GPR64spRegClassID: case AArch64::GPR64allRegClassID: case AArch64::GPR64RegClassID: case AArch64::GPR32commonRegClassID: case AArch64::GPR64commonRegClassID: return 32 - 1 // XZR/SP - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register - hasBasePointer(MF); // X19 case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: case AArch64::FPR64RegClassID: case AArch64::FPR128RegClassID: return 32; case AArch64::DDRegClassID: case AArch64::DDDRegClassID: case AArch64::DDDDRegClassID: case AArch64::QQRegClassID: case AArch64::QQQRegClassID: case AArch64::QQQQRegClassID: return 32; case AArch64::FPR128_loRegClassID: return 16; } }
bool IRTranslator::runOnMachineFunction(MachineFunction &MF) { const Function &F = *MF.getFunction(); if (F.empty()) return false; CLI = MF.getSubtarget().getCallLowering(); MIRBuilder.setMF(MF); MRI = &MF.getRegInfo(); // Setup the arguments. MachineBasicBlock &MBB = getOrCreateBB(F.front()); MIRBuilder.setMBB(MBB); SmallVector<unsigned, 8> VRegArgs; for (const Argument &Arg: F.args()) VRegArgs.push_back(getOrCreateVReg(Arg)); bool Succeeded = CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs); if (!Succeeded) report_fatal_error("Unable to lower arguments"); for (const BasicBlock &BB: F) { MachineBasicBlock &MBB = getOrCreateBB(BB); // Set the insertion point of all the following translations to // the end of this basic block. MIRBuilder.setMBB(MBB); for (const Instruction &Inst: BB) { bool Succeeded = translate(Inst); if (!Succeeded) { DEBUG(dbgs() << "Cannot translate: " << Inst << '\n'); report_fatal_error("Unable to translate instruction"); } } } // Now that the MachineFrameInfo has been configured, no further changes to // the reserved registers are possible. MRI->freezeReservedRegs(MF); return false; }
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const ARMSubtarget &STI = static_cast<const ARMSubtarget &>(Fn.getSubtarget()); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); TRI = STI.getRegisterInfo(); restrictIT = STI.restrictIT(); if (!AFI->isThumbFunction()) return false; bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) { MachineBasicBlock &MBB = *MFI; ++MFI; Modified |= InsertITInstructions(MBB); } if (Modified) AFI->setHasITBlocks(true); return Modified; }
bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); if (!FI->getStrategy().needsSafePoints()) return false; MMI = &getAnalysis<MachineModuleInfo>(); TII = MF.getSubtarget().getInstrInfo(); // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); // Find all safe points. FindSafePoints(MF); // Find the stack offsets for all roots. FindStackOffsets(MF); return false; }
void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const SparcInstrInfo &TII = *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); assert(MBBI->getOpcode() == SP::RETL && "Can only put epilog before 'retl' instruction!"); if (!FuncInfo->isLeafProc()) { BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) .addReg(SP::G0); return; } MachineFrameInfo *MFI = MF.getFrameInfo(); int NumBytes = (int) MFI->getStackSize(); if (NumBytes == 0) return; NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes); emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); }
void NyuziFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineInstr &MI = *MBBI; const NyuziInstrInfo &TII = *static_cast<const NyuziInstrInfo *>(MF.getSubtarget().getInstrInfo()); // Note the check for hasReservedCallFrame. If it returns true, // PEI::calculateFrameObjectOffsets has already reserved stack locations for // these variables and we don't need to adjust the stack here. int Amount = MI.getOperand(0).getImm(); if (Amount != 0 && !hasReservedCallFrame(MF)) { assert(hasFP(MF) && "Cannot adjust stack mid-function without a frame pointer"); if (MI.getOpcode() == Nyuzi::ADJCALLSTACKDOWN) Amount = -Amount; TII.adjustStackPointer(MBB, MBBI, Amount); } MBB.erase(MBBI); }
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. if (MF.getFunction()->hasFnAttribute("no-realign-stack")) return false; if (AFI->isThumb1OnlyFunction()) return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>()))) return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. return MRI->canReserveReg(BasePtr); }
bool EquivSubstPass::runOnMachineFunction(MachineFunction &Fn) { const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); if (!RNG) RNG.reset(Fn.getFunction()->getParent()->createRNG(this)); bool Changed = false; std::vector<const EquivInsnFilter*> Candidates; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { ++PreEquivSubstInstructionCount; Candidates.clear(); for (size_t i = 0; i < array_lengthof(Filters); i++) if (Filters[i]->check(*BB, *I)) Candidates.push_back(Filters[i]); if (Candidates.empty()) { ++I; continue; } unsigned int Roll = RNG->Random(100); ++EquivSubstCandidates; if (Roll >= multicompiler::EquivSubstPercentage) { ++I; continue; } unsigned int Pick = RNG->Random(Candidates.size()); MachineBasicBlock::iterator J = I; ++I; Candidates[Pick]->subst(*BB, TII, J); Changed = true; ++EquivSubstituted; } return Changed; }
void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u && "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known"); MaxCallFrameSize = 0; for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { unsigned Opcode = MI.getOpcode(); if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) { unsigned Size = TII.getFrameSize(MI); MaxCallFrameSize = std::max(MaxCallFrameSize, Size); AdjustsStack = true; } else if (MI.isInlineAsm()) { // Some inline asm's need a stack frame, as indicated by operand 1. unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_IsAlignStack) AdjustsStack = true; } } } }
/// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. void PEI::insertPrologEpilogCode(MachineFunction &MF) { const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); // Add prologue to the function... for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.emitPrologue(MF, *SaveBlock); // Add epilogue to restore the callee-save registers in each exiting block. for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(MF, *RestoreBlock); for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.inlineStackProbe(MF, *SaveBlock); // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. if (MF.shouldSplitStack()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForSegmentedStacks(MF, *SaveBlock); // Record that there are split-stack functions, so we will emit a // special section to tell the linker. MF.getMMI().setHasSplitStack(true); } else MF.getMMI().setHasNosplitStack(true); // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The // approach is rather similar to that of Segmented Stacks, but it uses a // different conditional check and another BIF for allocating more stack // space. if (MF.getFunction().getCallingConv() == CallingConv::HiPE) for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForHiPEPrologue(MF, *SaveBlock); }
bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not // specified. const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (!ST.debuggerInsertNops()) return false; // Skip machine functions without debug info. if (!MF.getMMI().hasDebugInfo()) return false; // Target instruction info. const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); // Set containing line numbers that have nop inserted. DenseSet<unsigned> NopInserted; for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { // Skip DBG_VALUE instructions and instructions without location. if (MI->isDebugValue() || !MI->getDebugLoc()) continue; // Insert nop instruction if line number does not have nop inserted. auto DL = MI->getDebugLoc(); if (NopInserted.find(DL.getLine()) == NopInserted.end()) { BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP)) .addImm(0); NopInserted.insert(DL.getLine()); } } } return true; }
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); if (skipFunction(*MF.getFunction())) return false; // If we move NewValueJump before register allocation we'll need live variable // analysis here too. QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); if (DisableNewValueJumps) { return false; } int nvjCount = DbgNVJCount; int nvjGenerated = 0; // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { MachineBasicBlock *MBB = &*MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); DEBUG(MBB->dump()); DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; bool MO1IsKill = false; bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; MachineBasicBlock *jmpTarget = nullptr; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr &MI = *--MII; if (MI.isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n"); if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt || MI.getOpcode() == Hexagon::J2_jumpf || MI.getOpcode() == Hexagon::J2_jumptnewpt || MI.getOpcode() == Hexagon::J2_jumptnew || MI.getOpcode() == Hexagon::J2_jumpfnewpt || MI.getOpcode() == Hexagon::J2_jumpfnew)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; jmpInstr = &MI; predReg = MI.getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; // This predicate register is live out out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always // be 2. Check if the predicate register is live in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. bool predLive = false; for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SIE = MBB->succ_end(); SI != SIE; ++SI) { MachineBasicBlock* succMBB = *SI; if (succMBB->isLiveIn(predReg)) { predLive = true; } } if (predLive) break; if (!MI.getOperand(1).isMBB()) continue; jmpTarget = MI.getOperand(1).getMBB(); foundJump = true; if (MI.getOpcode() == Hexagon::J2_jumpf || MI.getOpcode() == Hexagon::J2_jumpfnewpt || MI.getOpcode() == Hexagon::J2_jumpfnew) { invertPredicate = true; } continue; } // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. if (foundJump && MI.getNumOperands() == 0) break; if (foundJump && !foundCompare && MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (isNewValueJumpCandidate(MI)) { assert( (MI.getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); isSecondOpReg = MI.getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; cmpInstr = &MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI.getOperand(1).getReg(); if (MI.getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { cmpOp2 = MI.getOperand(2).getReg(); if (MI.getOperand(2).isKill()) MO2IsKill = true; } else cmpOp2 = MI.getOperand(2).getImm(); continue; } } if (foundCompare && foundJump) { // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef() && (MI.getOperand(0).getReg() == cmpReg1 || (isSecondOpReg && MI.getOperand(0).getReg() == (unsigned)cmpOp2))) { unsigned feederReg = MI.getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true // (second operand of the compare is also register), try that one. // TODO: Try to come up with some heuristic to figure out which // feeder would benefit. if (feederReg == cmpReg1) { if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { if (!isSecondOpReg) break; else continue; } else foundFeeder = true; } if (!foundFeeder && isSecondOpReg && feederReg == (unsigned) cmpOp2) if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) break; if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; MO1IsKill = MO2IsKill; cmpOp2 = tmp; MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, // if the second operand (after swap) is the feeder. // And if it is, make a note. if (feederReg == (unsigned)cmpOp2) isSecondOpNewified = true; } // Now that we are moving feeder close the jump, // make sure we are respecting the kill values of // the operands of the feeder. bool updatedIsKill = false; for (unsigned i = 0; i < MI.getNumOperands(); i++) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { MachineInstr &localMI = *localII; for (unsigned j = 0; j < localMI.getNumOperands(); j++) { MachineOperand &localMO = localMI.getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register // Set up a kill flag on the register localMO.setIsKill(false); MO.setIsKill(); updatedIsKill = true; break; } } if (updatedIsKill) break; } } if (updatedIsKill) break; } MBB->splice(jmpPos, MI.getParent(), MI); MBB->splice(jmpPos, MI.getParent(), cmpInstr); DebugLoc dl = MI.getDebugLoc(); MachineInstr *NewMI; assert((isNewValueJumpCandidate(*cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); if (isSecondOpReg) NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && cmpOp2 == -1 ) // Corresponding new-value compare jump instructions don't have the // operand for -1 immediate value. NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addMBB(jmpTarget); else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); assert(NewMI && "New Value Jump Instruction Not created!"); (void)NewMI; if (cmpInstr->getOperand(0).isReg() && cmpInstr->getOperand(0).isKill()) cmpInstr->getOperand(0).setIsKill(false); if (cmpInstr->getOperand(1).isReg() && cmpInstr->getOperand(1).isKill()) cmpInstr->getOperand(1).setIsKill(false); cmpInstr->eraseFromParent(); jmpInstr->eraseFromParent(); ++nvjGenerated; ++NumNVJGenerated; break; } } } } return true; }
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo &MFI = MF.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // Skew to be applied to alignment. unsigned Skew = TFI.getStackAlignmentSkew(MF); // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // Adjust 'Offset' to point to the end of last fixed sized preallocated // object. for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI.getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // First assign frame offsets to stack objects that are used to spill // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI.getObjectSize(i); unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); MFI.setObjectOffset(i, -Offset); // Set the computed offset } } else if (MaxCSFrameIndex >= MinCSFrameIndex) { // Be careful about underflow in comparisons agains MinCSFrameIndex. for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { if (MFI.isDeadObjectIndex(i)) continue; unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(i, Offset); Offset += MFI.getObjectSize(i); } } // FixedCSEnd is the stack offset to the end of the fixed and callee-save // stack area. int64_t FixedCSEnd = Offset; unsigned MaxAlign = MFI.getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(MF) && !RegInfo->needsStackRealignment(MF)); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { unsigned Align = MFI.getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI.setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI.getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // Retrieve the Exception Handler registration node. int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo()) EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; if (MFI.getStackProtectorIndex() >= 0) { StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign, Skew); // Assign large stack objects first. for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI.isDeadObjectIndex(i)) continue; if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) continue; switch (MFI.getObjectSSPLayout(i)) { case MachineFrameInfo::SSPLK_None: continue; case MachineFrameInfo::SSPLK_SmallArray: SmallArrayObjs.insert(i); continue; case MachineFrameInfo::SSPLK_AddrOf: AddrOfObjs.insert(i); continue; case MachineFrameInfo::SSPLK_LargeArray: LargeArrayObjs.insert(i); continue; } llvm_unreachable("Unexpected SSPLayoutKind."); } AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, Offset, MaxAlign, Skew); } SmallVector<int, 8> ObjectsToAllocate; // Then prepare to assign frame offsets to stack objects that are not used to // spill callee saved registers. for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI.isDeadObjectIndex(i)) continue; if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) continue; if (ProtectedObjs.count(i)) continue; // Add the objects that we need to allocate to our working set. ObjectsToAllocate.push_back(i); } // Allocate the EH registration node first if one is present. if (EHRegNodeFrameIndex != std::numeric_limits<int>::max()) AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, MaxAlign, Skew); // Give the targets a chance to order the objects the way they like it. if (MF.getTarget().getOptLevel() != CodeGenOpt::None && MF.getTarget().Options.StackSymbolOrdering) TFI.orderFrameObjects(MF, ObjectsToAllocate); // Keep track of which bytes in the fixed and callee-save range are used so we // can use the holes when allocating later stack objects. Only do this if // stack protector isn't being used and the target requests it and we're // optimizing. BitVector StackBytesFree; if (!ObjectsToAllocate.empty() && MF.getTarget().getOptLevel() != CodeGenOpt::None && MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF)) computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, FixedCSEnd, StackBytesFree); // Now walk the objects and actually assign base offsets to them. for (auto &Object : ObjectsToAllocate) if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign, StackBytesFree)) AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF)) Offset += MFI.getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); Offset = alignTo(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI.setStackSize(StackSize); NumBytesStackSpace += StackSize; }
static void assignCalleeSavedSpillSlots(MachineFunction &F, const BitVector &SavedRegs, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) { if (SavedRegs.empty()) return; const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (SavedRegs.test(Reg)) CSI.push_back(CalleeSavedInfo(Reg)); } const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = F.getFrameInfo(); if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { // If target doesn't implement this, use generic code. if (CSI.empty()) return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; const TargetFrameLowering::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (auto &CS : CSI) { unsigned Reg = CS.getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { CS.setFrameIdx(FrameIdx); continue; } // Check to see if this physreg must be spilled to a particular stack slot // on this target. const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; unsigned Size = RegInfo->getSpillSize(*RC); if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RegInfo->getSpillAlignment(*RC); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); FrameIdx = MFI.CreateStackObject(Size, Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset); } CS.setFrameIdx(FrameIdx); } } MFI.setCalleeSavedInfo(CSI); }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &MF) { const Function &F = MF.getFunction(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || TRI->requiresFrameIndexReplacementScavenging(MF); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallFrameInfo(MF); // Determine placement of CSR spill/restore code and prolog/epilog code: // place all spills in the entry block, all restores in return blocks. calculateSaveRestoreBlocks(MF); // Handle CSR spilling and restoring, for targets that need it. if (MF.getTarget().usesPhysRegsForPEI()) spillCalleeSavedRegs(MF); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TFI->processFunctionBeforeFrameFinalized(MF, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(MF); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F.hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(MF); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(MF); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(MF, *RS); // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(F, StackSize); F.getContext().diagnose(DiagStackSize); } ORE->emit([&]() { return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", MF.getFunction().getSubprogram(), &MF.front()) << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; }); delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); MFI.setSavePoint(nullptr); MFI.setRestorePoint(nullptr); return true; }
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj) { assert(MF.getSubtarget().getRegisterInfo() && "getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (RS && FrameIndexEliminationScavenging) RS->enterBasicBlock(*BB); bool InsideCallSequence = false; for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (TII.isFrameInstr(*I)) { InsideCallSequence = TII.isFrameSetup(*I); SPAdj += TII.getSPAdjust(*I); I = TFI->eliminateCallFramePseudoInstr(MF, *BB, I); continue; } MachineInstr &MI = *I; bool DoIncr = true; bool DidFinishLoop = true; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isFI()) continue; // Frame indices in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI.isDebugValue()) { assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; int64_t Offset = TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg); MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); MI.getOperand(0).setIsDebug(); auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(), DIExpression::NoDeref, Offset); MI.getOperand(3).setMetadata(DIExpr); continue; } // TODO: This code should be commoned with the code for // PATCHPOINT. There's no good reason for the difference in // implementation other than historical accident. The only // remaining difference is the unconditional use of the stack // pointer as the base register. if (MI.getOpcode() == TargetOpcode::STATEPOINT) { assert((!MI.isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI.getOperand(i + 1); int refOffset = TFI->getFrameIndexReferencePreferSP( MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); Offset.setImm(Offset.getImm() + refOffset); MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexEliminationScavenging ? RS : nullptr); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } DidFinishLoop = false; break; } // If we are looking at a call sequence, we need to keep track of // the SP adjustment made by each instruction in the sequence. // This includes both the frame setup/destroy pseudos (handled above), // as well as other instructions that have side effects w.r.t the SP. // Note that this must come after eliminateFrameIndex, because // if I itself referred to a frame index, we shouldn't count its own // adjustment. if (DidFinishLoop && InsideCallSequence) SPAdj += TII.getSPAdjust(MI); if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && FrameIndexEliminationScavenging && DidFinishLoop) RS->forward(MI); } }
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); init(MF); ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) { // If MF is irreducible, a block may be in a loop without // MachineLoopInfo reporting it. I.e., we may use the // post-dominance property in loops, which lead to incorrect // results. Moreover, we may miss that the prologue and // epilogue are not in the same loop, leading to unbalanced // construction/deconstruction of the stack frame. LLVM_DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n"); return false; } const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); std::unique_ptr<RegScavenger> RS( TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); for (MachineBasicBlock &MBB : MF) { LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); if (MBB.isEHFuncletEntry()) { LLVM_DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); return false; } if (MBB.isEHPad()) { // Push the prologue and epilogue outside of // the region that may throw by making sure // that all the landing pads are at least at the // boundary of the save and restore points. // The problem with exceptions is that the throw // is not properly modeled and in particular, a // basic block can jump out from the middle. updateSaveRestorePoints(MBB, RS.get()); if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n"); return false; } continue; } for (const MachineInstr &MI : MBB) { if (!useOrDefCSROrFI(MI, RS.get())) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. updateSaveRestorePoints(MBB, RS.get()); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); return false; } // No need to look for other instructions, this basic block // will already be part of the handled region. break; } } if (!ArePointsInteresting()) { // If the points are not interesting at this point, then they must be null // because it means we did not encounter any frame/CSR related code. // Otherwise, we would have returned from the previous loop. assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n"); return false; } LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq << '\n'); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); do { LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " << Save->getNumber() << ' ' << Save->getName() << ' ' << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << ' ' << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && TFI->canUseAsEpilogue(*Restore))) break; LLVM_DEBUG( dbgs() << "New points are too expensive or invalid for the target\n"); MachineBasicBlock *NewBB; if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { Save = FindIDom<>(*Save, Save->predecessors(), *MDT); if (!Save) break; NewBB = Save; } else { // Restore is expensive. Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); if (!Restore) break; NewBB = Restore; } updateSaveRestorePoints(*NewBB, RS.get()); } while (Save && Restore); if (!ArePointsInteresting()) { ++NumCandidatesDropped; return false; } LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() << ' ' << Save->getName() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setSavePoint(Save); MFI.setRestorePoint(Restore); ++NumCandidates; return false; }
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { if (!SpillCalleeSavedRegisters) { const TargetMachine &TM = Fn.getTarget(); if (!TM.usesPhysRegsForPEI()) { SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, unsigned &, unsigned &, const MBBVector &, const MBBVector &) {}; ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; } else { SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; UsesCalleeSaves = true; } } const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || TRI->requiresFrameIndexReplacementScavenging(Fn); ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. calculateCallFrameInfo(Fn); // Determine placement of CSR spill/restore code and prolog/epilog code: // place all spills in the entry block, all restores in return blocks. calculateSaveRestoreBlocks(Fn); // Handle CSR spilling and restoring, for targets that need it. SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, SaveBlocks, RestoreBlocks); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { ScavengeFrameVirtualRegs(Fn, *RS); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); } // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = Fn.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(*F, StackSize); F->getContext().diagnose(DiagStackSize); } delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); MFI.setSavePoint(nullptr); MFI.setRestorePoint(nullptr); return true; }
MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isLandingPad()) return nullptr; MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) return nullptr; // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) return nullptr; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); return nullptr; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); if (LIS) LIS->insertMBBInMaps(NMBB); else if (Indexes) Indexes->insertMBBInMaps(NMBB); // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); } } } SmallVector<unsigned, 4> UsedRegs; if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { MachineInstr *MI = I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0) continue; unsigned Reg = OI->getReg(); if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) UsedRegs.push_back(Reg); } } } ReplaceUsesOfBlockWith(Succ, NMBB); // If updateTerminator() removes instructions, we need to remove them from // SlotIndexes. SmallVector<MachineInstr*, 4> Terminators; if (Indexes) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) Terminators.push_back(I); } updateTerminator(); if (Indexes) { SmallVector<MachineInstr*, 4> NewTerminators; for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) NewTerminators.push_back(I); for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == NewTerminators.end()) Indexes->removeMachineInstrFromMaps(*I); } } // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); I != E; ++I) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. if (Indexes->hasIndex(I)) Indexes->removeMachineInstrFromMaps(I); Indexes->insertMachineInstrInMaps(I); } } } // Fix PHI nodes in Succ so they refer to NMBB instead of this for (MachineBasicBlock::instr_iterator i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), E = Succ->livein_end(); I != E; ++I) NMBB->addLiveIn(*I); // Update LiveVariables. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } } // Update relevant live-through information. LV->addNewBlock(NMBB, this, Succ); } if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in // the function. If the original block was the last in the function, all live // intervals will end prior to the beginning of the new split block. If the // original block was not at the end of the function, all live intervals will // extend to the end of the new split block. bool isLastMBB = std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); SlotIndex StartIndex = Indexes->getMBBEndIdx(this); SlotIndex PrevIndex = StartIndex.getPrevSlot(); SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); // Find the registers used from NMBB in PHIs in Succ. SmallSet<unsigned, 8> PHISrcRegs; for (MachineBasicBlock::instr_iterator I = Succ->instr_begin(), E = Succ->instr_end(); I != E && I->isPHI(); ++I) { for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); unsigned Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); if (!LI.liveAt(PrevIndex)) continue; bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { LI.removeSegment(StartIndex, EndIndex); } } // Update all intervals for registers whose uses may have been modified by // updateTerminator(). LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable<MachineDominatorTree>()) MDT->recordSplitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { if (TIL == DestLoop) { // Both in the same loop, the NMBB joins loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the // header of its loop (adding a branch into a loop elsewhere would // create an irreducible loop). assert(DestLoop->getHeader() == Succ && "Should not create irreducible loops!"); if (MachineLoop *P = DestLoop->getParentLoop()) P->addBasicBlockToLoop(NMBB, MLI->getBase()); } } } return NMBB; }
bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget()); if (Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); }
LegalizerHelper::LegalizerHelper(MachineFunction &MF) : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { MIRBuilder.setMF(MF); }
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the instruction pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { CallingConv::ID CC = MF.getFunction()->getCallingConv(); const uint32_t *RegMask = getCallPreservedMask(MF, CC); if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) report_fatal_error( "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64, false); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); Reserved.set(X86::DS); Reserved.set(X86::ES); Reserved.set(X86::FS); Reserved.set(X86::GS); // Mark the floating point stack registers as reserved. for (unsigned n = 0; n != 8; ++n) Reserved.set(X86::ST0 + n); // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { // These 8-bit registers are part of the x86-64 extension even though their // super-registers are old 32-bits. Reserved.set(X86::SIL); Reserved.set(X86::DIL); Reserved.set(X86::BPL); Reserved.set(X86::SPL); for (unsigned n = 0; n != 8; ++n) { // R8, R9, ... for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); // XMM8, XMM9, ... for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { for (unsigned n = 16; n != 32; ++n) { for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } return Reserved; }
void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) { const auto *TRI = MF.getSubtarget().getRegisterInfo(); unsigned I = 0; for (const uint32_t *Mask : TRI->getRegMasks()) RegisterMaskIds.insert(std::make_pair(Mask, I++)); }
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); TII = MF.getSubtarget().getInstrInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); MRI = &MF.getRegInfo(); bool Changed = false; // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. // Note that updateDomTree() modifies the children of the DomTree node // currently being visited. The df_iterator supports that; it doesn't look at // child_begin() / child_end() until after a node has been visited. for (MachineDomTreeNode *I : depth_first(DomTree)) { MachineBasicBlock *HBB = I->getBlock(); SmallVector<MachineOperand, 4> HeadCond; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) { continue; } // Equivalence check is to skip loops. if (!TBB || TBB == HBB) { continue; } SmallVector<MachineOperand, 4> TrueCond; MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr; if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) { continue; } MachineInstr *HeadCmpMI = findSuitableCompare(HBB); if (!HeadCmpMI) { continue; } MachineInstr *TrueCmpMI = findSuitableCompare(TBB); if (!TrueCmpMI) { continue; } AArch64CC::CondCode HeadCmp; if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) { continue; } AArch64CC::CondCode TrueCmp; if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) { continue; } const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm(); const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm(); DEBUG(dbgs() << "Head branch:\n"); DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp) << '\n'); DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n'); DEBUG(dbgs() << "True branch:\n"); DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp) << '\n'); DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n'); if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) || (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) && std::abs(TrueImm - HeadImm) == 2) { // This branch transforms machine instructions that correspond to // // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...) // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...) // // into // // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...) // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...) CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp); CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp); if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) && std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) { modifyCmp(HeadCmpMI, HeadCmpInfo); modifyCmp(TrueCmpMI, TrueCmpInfo); Changed = true; } } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) || (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) && std::abs(TrueImm - HeadImm) == 1) { // This branch transforms machine instructions that correspond to // // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...) // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...) // // into // // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...) // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...) // GT -> GE transformation increases immediate value, so picking the // smaller one; LT -> LE decreases immediate value so invert the choice. bool adjustHeadCond = (HeadImm < TrueImm); if (HeadCmp == AArch64CC::LT) { adjustHeadCond = !adjustHeadCond; } if (adjustHeadCond) { Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm); } else { Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm); } } // Other transformation cases almost never occur due to generation of < or > // comparisons instead of <= and >=. } return Changed; }
void NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) LocalAreaOffset = -LocalAreaOffset; assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } // NOTE: We do not have a call stack unsigned MaxAlign = MFI->getMaxAlignment(); // No scavenger // FIXME: Once this is working, then enable flag will change to a target // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI->getUseLocalStackAllocationBlock()) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = (Offset + Align - 1) / Align * Align; DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); MFI->setObjectOffset(Entry.first, FIOffset); } // Allocate the local block Offset += MFI->getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } // No stack protector // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) continue; if (MFI->isDeadObjectIndex(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // No scavenger if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; MFI->setStackSize(StackSize); }
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); return TFI->hasFP(MF) ? FramePtr : StackPtr; }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI.getOpcode())) continue; unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; // FIXME: Fold operands with subregs. if (OpToFold.isReg() && (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || OpToFold.getSubReg())) continue; // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer // this. SmallVector<MachineInstr *, 4> CopiesToReplace; std::vector<FoldCandidate> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(MF); for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (!Fold.isImm()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Fold.OpToFold->setIsKill(false); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); } } } } return false; }