MachineBasicBlock::iterator Filler::findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot) { SmallSet<unsigned, 32> RegDefs; SmallSet<unsigned, 32> RegUses; bool sawLoad = false; bool sawStore = false; if (slot == MBB.begin()) return MBB.end(); if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL) return MBB.end(); if (slot->getOpcode() == SP::RETL) { MachineBasicBlock::iterator J = slot; --J; if (J->getOpcode() == SP::RESTORErr || J->getOpcode() == SP::RESTOREri) { // change retl to ret. slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET)); return J; } } // Call's delay filler can def some of call's uses. if (slot->isCall()) insertCallDefsUses(slot, RegDefs, RegUses); else insertDefsUses(slot, RegDefs, RegUses); bool done = false; MachineBasicBlock::iterator I = slot; while (!done) { done = (I == MBB.begin()); if (!done) --I; // skip debug instruction if (I->isDebugInstr()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || I->hasDelaySlot() || I->isBundledWithSucc()) break; if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) { insertDefsUses(I, RegDefs, RegUses); continue; } return I; } return MBB.end(); }
/// RemoveDeadStores - Scan through a basic block and look for loads followed /// by stores. If they're both using the same stack slot, then the store is /// definitely dead. This could obviously be much more aggressive (consider /// pairs with instructions between them), but such extensions might have a /// considerable compile time impact. bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { // FIXME: This could be much more aggressive, but we need to investigate // the compile time impact of doing so. bool changed = false; SmallVector<MachineInstr*, 4> toErase; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; int FirstSS, SecondSS; if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS && FirstSS != -1) { ++NumDead; changed = true; toErase.push_back(&*I); continue; } MachineBasicBlock::iterator NextMI = std::next(I); MachineBasicBlock::iterator ProbableLoadMI = I; unsigned LoadReg = 0; unsigned StoreReg = 0; unsigned LoadSize = 0; unsigned StoreSize = 0; if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize))) continue; // Skip the ...pseudo debugging... instructions between a load and store. while ((NextMI != E) && NextMI->isDebugInstr()) { ++NextMI; ++I; } if (NextMI == E) continue; if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize))) continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 || LoadSize != StoreSize) continue; ++NumDead; changed = true; if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { ++NumDead; toErase.push_back(&*ProbableLoadMI); } toErase.push_back(&*NextMI); ++I; } for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); return changed; }
/// saveScavengerRegister - Spill the register so it can be used by the /// register scavenger. Return true. bool ThumbRegisterInfo::saveScavengerRegister( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, unsigned Reg) const { const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>(); if (!STI.isThumb1Only()) return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg); // Thumb1 can't use the emergency spill slot on the stack because // ldr/str immediate offsets must be positive, and if we're referencing // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. const TargetInstrInfo &TII = *STI.getInstrInfo(); DebugLoc DL; BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) .addReg(Reg, RegState::Kill) .add(predOps(ARMCC::AL)); // The UseMI is where we would like to restore the register. If there's // interference with R12 before then, however, we'll need to restore it // before that instead and adjust the UseMI. bool done = false; for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { if (II->isDebugInstr()) continue; // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) { UseMI = II; done = true; break; } if (!MO.isReg() || MO.isUndef() || !MO.getReg() || TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; if (MO.getReg() == ARM::R12) { UseMI = II; done = true; break; } } } // Restore the register from R12 BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)) .addReg(Reg, RegState::Define) .addReg(ARM::R12, RegState::Kill) .add(predOps(ARMCC::AL)); return true; }
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, CallContext &Context) { // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo()); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; Context.FrameSetup = FrameSetup; // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize; // A zero adjustment means no stack parameters if (!MaxAdjust) { Context.NoStackParams = true; return; } // Skip over DEBUG_VALUE. // For globals in PIC mode, we can have some LEAs here. Skip them as well. // TODO: Extend this to something that covers more cases. while (I->getOpcode() == X86::LEA32r || I->isDebugInstr()) ++I; unsigned StackPtr = RegInfo.getStackRegister(); auto StackPtrCopyInst = MBB.end(); // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual // register. If it's there, use that virtual register as stack pointer // instead. Also, we need to locate this instruction so that we can later // safely ignore it while doing the conservative processing of the call chain. // The COPY can be located anywhere between the call-frame setup // instruction and its first use. We use the call instruction as a boundary // because it is usually cheaper to check if an instruction is a call than // checking if an instruction uses a register. for (auto J = I; !J->isCall(); ++J) if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() && J->getOperand(1).getReg() == StackPtr) { StackPtrCopyInst = J; Context.SPCopy = &*J++; StackPtr = Context.SPCopy->getOperand(0).getReg(); break; } // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of store instructions that // push a sequence of stack-slot-aligned values onto the stack, with // no gaps between them. if (MaxAdjust > 4) Context.ArgStoreVector.resize(MaxAdjust, nullptr); DenseSet<unsigned int> UsedRegs; for (InstClassification Classification = Skip; Classification != Exit; ++I) { // If this is the COPY of the stack pointer, it's ok to ignore. if (I == StackPtrCopyInst) continue; Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs); if (Classification != Convert) continue; // We know the instruction has a supported store opcode. // We only want movs of the form: // mov imm/reg, k(%StackPtr) // If we run into something else, bail. // Note that AddrBaseReg may, counter to its name, not be a register, // but rather a frame index. // TODO: Support the fi case. This should probably work now that we // have the infrastructure to track the stack pointer within a call // sequence. if (!I->getOperand(X86::AddrBaseReg).isReg() || (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || !I->getOperand(X86::AddrScaleAmt).isImm() || (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || !I->getOperand(X86::AddrDisp).isImm()) return; int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); // We really don't want to consider the unaligned case. if (StackDisp & (SlotSize - 1)) return; StackDisp >>= Log2SlotSize; assert((size_t)StackDisp < Context.ArgStoreVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. if (Context.ArgStoreVector[StackDisp] != nullptr) return; Context.ArgStoreVector[StackDisp] = &*I; for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; unsigned int Reg = MO.getReg(); if (RegInfo.isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } } --I; // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. if (I == MBB.end() || !I->isCall()) return; Context.Call = &*I; if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode()) return; // Now, go through the vector, and see that we don't have any gaps, // but only a series of storing instructions. auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end(); for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize) if (*MMI == nullptr) break; // If the call had no parameters, do nothing if (MMI == Context.ArgStoreVector.begin()) return; // We are either at the last parameter, or a gap. // Make sure it's not a gap for (; MMI != MME; ++MMI) if (*MMI != nullptr) return; Context.UsePush = true; }
bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; MachineRegisterInfo &MRI = MF.getRegInfo(); LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); InstOrderMap IOM; // Map from register to instruction order (value of IOM) where the // register is used last. When moving instructions up, we need to // make sure all its defs (including dead def) will not cross its // last use when moving up. DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap; for (MachineBasicBlock &MBB : MF) { if (MBB.empty()) continue; bool SawStore = false; BuildInstOrderMap(MBB.begin(), IOM); UseMap.clear(); for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { MachineInstr &MI = *Next; ++Next; if (MI.isPHI() || MI.isDebugInstr()) continue; if (MI.mayStore()) SawStore = true; unsigned CurrentOrder = IOM[&MI]; unsigned Barrier = 0; MachineInstr *BarrierMI = nullptr; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDebug()) continue; if (MO.isUse()) UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI); else if (MO.isDead() && UseMap.count(MO.getReg())) // Barrier is the last instruction where MO get used. MI should not // be moved above Barrier. if (Barrier < UseMap[MO.getReg()].first) { Barrier = UseMap[MO.getReg()].first; BarrierMI = UseMap[MO.getReg()].second; } } if (!MI.isSafeToMove(nullptr, SawStore)) { // If MI has side effects, it should become a barrier for code motion. // IOM is rebuild from the next instruction to prevent later // instructions from being moved before this MI. if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { BuildInstOrderMap(Next, IOM); SawStore = false; } continue; } const MachineOperand *DefMO = nullptr; MachineInstr *Insert = nullptr; // Number of live-ranges that will be shortened. We do not count // live-ranges that are defined by a COPY as it could be coalesced later. unsigned NumEligibleUse = 0; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDead() || MO.isDebug()) continue; unsigned Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, // unless it is a constant physical register or a noreg. if (!TargetRegisterInfo::isVirtualRegister(Reg)) { if (!Reg || MRI.isConstantPhysReg(Reg)) continue; Insert = nullptr; break; } if (MO.isDef()) { // Do not move if there is more than one def. if (DefMO) { Insert = nullptr; break; } DefMO = &MO; } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO && MRI.getRegClass(DefMO->getReg()) == MRI.getRegClass(MO.getReg())) { // The heuristic does not handle different register classes yet // (registers of different sizes, looser/tighter constraints). This // is because it needs more accurate model to handle register // pressure correctly. MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); if (!DefInstr.isCopy()) NumEligibleUse++; Insert = FindDominatedInstruction(DefInstr, Insert, IOM); } else { Insert = nullptr; break; } } // If Barrier equals IOM[I], traverse forward to find if BarrierMI is // after Insert, if yes, then we should not hoist. for (MachineInstr *I = Insert; I && IOM[I] == Barrier; I = I->getNextNode()) if (I == BarrierMI) { Insert = nullptr; break; } // Move the instruction when # of shrunk live range > 1. if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { MachineBasicBlock::iterator I = std::next(Insert->getIterator()); // Skip all the PHI and debug instructions. while (I != MBB.end() && (I->isPHI() || I->isDebugInstr())) I = std::next(I); if (I == MI.getIterator()) continue; // Update the dominator order to be the same as the insertion point. // We do this to maintain a non-decreasing order without need to update // all instruction orders after the insertion point. unsigned NewOrder = IOM[&*I]; IOM[&MI] = NewOrder; NumInstrsHoistedToShrinkLiveRange++; // Find MI's debug value following MI. MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); if (MI.getOperand(0).isReg()) for (; EndIter != MBB.end() && EndIter->isDebugValue() && EndIter->getOperand(0).isReg() && EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg(); ++EndIter, ++Next) IOM[&*EndIter] = NewOrder; MBB.splice(I, &MBB, MI.getIterator(), EndIter); } } } return false; }
unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI) { int Survivor = Candidates.find_first(); assert(Survivor > 0 && "No candidates for scavenging"); MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); assert(StartMI != ME && "MI already at terminator"); MachineBasicBlock::iterator RestorePointMI = StartMI; MachineBasicBlock::iterator MI = StartMI; bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { if (MI->isDebugInstr()) { ++InstrLimit; // Don't count debug instructions continue; } bool isVirtKillInsn = false; bool isVirtDefInsn = false; // Remove any candidates touched by instruction. for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isDef()) isVirtDefInsn = true; else if (MO.isKill()) isVirtKillInsn = true; continue; } for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Candidates.reset(*AI); } // If we're not in a virtual reg's live range, this is a valid // restore point. if (!inVirtLiveRange) RestorePointMI = MI; // Update whether we're in the live range of a virtual register if (isVirtKillInsn) inVirtLiveRange = false; if (isVirtDefInsn) inVirtLiveRange = true; // Was our survivor untouched by this instruction? if (Candidates.test(Survivor)) continue; // All candidates gone? if (Candidates.none()) break; Survivor = Candidates.find_first(); } // If we ran off the end, that's where we want to restore. if (MI == ME) RestorePointMI = ME; assert(RestorePointMI != StartMI && "No available scavenger restore location!"); // We ran out of candidates, so stop the search. UseMI = RestorePointMI; return Survivor; }