bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; SmallSet<unsigned, 16> FoldAsLoadDefCandidates; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; LocalMIs.insert(MI); // Skip debug values. They should not affect this peephole optimization. if (MI->isDebugValue()) continue; // If there exists an instruction which belongs to the following // categories, we will discard the load candidates. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { FoldAsLoadDefCandidates.clear(); continue; } if (MI->mayStore() || MI->isCall()) FoldAsLoadDefCandidates.clear(); if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && !FoldAsLoadDefCandidates.empty()) { const MCInstrDesc &MIDesc = MI->getDesc(); for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); ++i) { const MachineOperand &MOp = MI->getOperand(i); if (!MOp.isReg()) continue; unsigned FoldAsLoadDefReg = MOp.getReg(); if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { // We need to fold load after optimizeCmpInstr, since // optimizeCmpInstr can enable folding by converting SUB to CMP. // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and // we need it for markUsesInDebugValueAsUndef(). unsigned FoldedReg = FoldAsLoadDefReg; MachineInstr *DefMI = nullptr; MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefReg, DefMI); if (FoldMI) { // Update LocalMIs since we replaced MI with FoldMI and deleted // DefMI. DEBUG(dbgs() << "Replacing: " << *MI); DEBUG(dbgs() << " With: " << *FoldMI); LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); MRI->markUsesInDebugValueAsUndef(FoldedReg); FoldAsLoadDefCandidates.erase(FoldedReg); ++NumLoadFold; // MI is replaced with FoldMI. Changed = true; break; } } } } } } return Changed; }
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. /// /// FIXME: Iterating over the instruction stream is unnecessary. We can simply /// iterate over the vreg use list, which at this point only contains machine /// operands for which eliminateFrameIndex need a new scratch reg. void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { // We might end up here again with a NULL iterator if we scavenged a // register for which we inserted spill code for definition by what was // originally the first instruction in BB. if (I == MachineBasicBlock::iterator(NULL)) I = BB->begin(); MachineInstr *MI = I; MachineBasicBlock::iterator J = llvm::next(I); MachineBasicBlock::iterator P = I == BB->begin() ? MachineBasicBlock::iterator(NULL) : llvm::prior(I); // RS should process this instruction before we might scavenge at this // location. This is because we might be replacing a virtual register // defined by this instruction, and if so, registers killed by this // instruction are available, and defined registers are not. RS->forward(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; // When we first encounter a new virtual register, it // must be a definition. assert(MI->getOperand(i).isDef() && "frame index virtual missing def!"); // Scavenge a new scratch register const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); ++NumScavengedRegs; // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. RS->setUsed(ScratchReg); } } // If the scavenger needed to use one of its spill slots, the // spill code will have been inserted in between I and J. This is a // problem because we need the spill code before I: Move I to just // prior to J. if (I != llvm::prior(J)) { BB->splice(J, BB, I); // Before we move I, we need to prepare the RS to visit I again. // Specifically, RS will assert if it sees uses of registers that // it believes are undefined. Because we have already processed // register kills in I, when it visits I again, it will believe that // those registers are undefined. To avoid this situation, unprocess // the instruction I. assert(RS->getCurrentPosition() == I && "The register scavenger has an unexpected position"); I = P; RS->unprocess(P); } else ++I; } } }
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { LiveIntervals& LI = getAnalysis<LiveIntervals>(); // Compute DFS numbers of each block computeDFS(Fn); // Determine which phi node operands need copies for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) if (!I->empty() && I->begin()->isPHI()) processBlock(I); // Break interferences where two different phis want to coalesce // in the same register. std::set<unsigned> seen; typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSetType; for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); I != E; ++I) { for (std::map<unsigned, MachineBasicBlock*>::iterator OI = I->second.begin(), OE = I->second.end(); OI != OE; ) { if (!seen.count(OI->first)) { seen.insert(OI->first); ++OI; } else { Waiting[OI->second].insert(std::make_pair(OI->first, I->first)); unsigned reg = OI->first; ++OI; I->second.erase(reg); DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first << "\n"); } } } // Insert copies // FIXME: This process should probably preserve LiveIntervals SmallPtrSet<MachineBasicBlock*, 16> visited; MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); InsertCopies(MDT.getRootNode(), visited); // Perform renaming for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); I != E; ++I) while (I->second.size()) { std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); if (SI->first != I->first) { if (mergeLiveIntervals(I->first, SI->first)) { Fn.getRegInfo().replaceRegWith(SI->first, I->first); if (RenameSets.count(SI->first)) { I->second.insert(RenameSets[SI->first].begin(), RenameSets[SI->first].end()); RenameSets.erase(SI->first); } } else { // Insert a last-minute copy if a conflict was detected. const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), I->first, SI->first, RC, RC, DebugLoc()); LI.renumber(); LiveInterval& Int = LI.getOrCreateInterval(I->first); SlotIndex instrIdx = LI.getInstructionIndex(--SI->second->getFirstTerminator()); if (Int.liveAt(instrIdx.getDefIndex())) Int.removeRange(instrIdx.getDefIndex(), LI.getMBBEndIdx(SI->second).getNextSlot(), true); LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, --SI->second->getFirstTerminator()); R.valno->setCopy(--SI->second->getFirstTerminator()); R.valno->def = instrIdx.getDefIndex(); DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> " << I->first << "\n"); } } LiveInterval& Int = LI.getOrCreateInterval(I->first); const LiveRange* LR = Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second)); LR->valno->setHasPHIKill(true); I->second.erase(SI->first); } // Remove PHIs std::vector<MachineInstr*> phis; for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI) if (BI->isPHI()) phis.push_back(BI); } for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end(); I != E; ) { MachineInstr* PInstr = *(I++); // If this is a dead PHI node, then remove it from LiveIntervals. unsigned DestReg = PInstr->getOperand(0).getReg(); LiveInterval& PI = LI.getInterval(DestReg); if (PInstr->registerDefIsDead(DestReg)) { if (PI.containsOneValue()) { LI.removeInterval(DestReg); } else { SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex(); PI.removeRange(*PI.getLiveRangeContaining(idx), true); } } else { // Trim live intervals of input registers. They are no longer live into // this block if they died after the PHI. If they lived after it, don't // trim them because they might have other legitimate uses. for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { unsigned reg = PInstr->getOperand(i).getReg(); MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); LiveInterval& InputI = LI.getInterval(reg); if (MBB != PInstr->getParent() && InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex())) InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), LI.getInstructionIndex(PInstr), true); } // If the PHI is not dead, then the valno defined by the PHI // now has an unknown def. SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex(); const LiveRange* PLR = PI.getLiveRangeContaining(idx); PLR->valno->setIsPHIDef(true); LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), PLR->start, PLR->valno); PI.addRange(R); } LI.RemoveMachineInstrFromMaps(PInstr); PInstr->eraseFromParent(); } LI.renumber(); return true; }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { #ifndef NDEBUG int SPAdjCount = 0; // frame setup / destroy count. #endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { #ifndef NDEBUG // Track whether we see even pairs of them SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; #endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) Size = -Size; SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = llvm::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, FrameIndexVirtualScavenging ? NULL : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = 0; break; } if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } // If we have evenly matched pairs of frame setup / destroy instructions, // make sure the adjustments come out to zero. If we don't have matched // pairs, we can't be sure the missing bit isn't in another basic block // due to a custom inserter playing tricks, so just asserting SPAdj==0 // isn't sufficient. See tMOVCC on Thumb1, for example. assert((SPAdjCount || SPAdj == 0) && "Unbalanced call frame setup / destroy pairs?"); } }
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<LiveIntervals>(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); PHISrcDefs[I].push_back(BBI); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { MachineOperand &SrcMO = BBI->getOperand(i); unsigned SrcReg = SrcMO.getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI) PHISrcDefs[DefMI->getParent()].push_back(DefMI); } } } // Perform a depth-first traversal of the dominator tree, splitting // interferences amongst PHI-congruence classes. DenseMap<unsigned, unsigned> CurrentDominatingParent; DenseMap<unsigned, unsigned> ImmediateDominatingParent; for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { SplitInterferencesForBasicBlock(*DI->getBlock(), CurrentDominatingParent, ImmediateDominatingParent); } // Insert copies for all PHI source and destination registers. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { InsertCopiesForPHI(BBI, I); } } // FIXME: Preserve the equivalence classes during copy insertion and use // the preversed equivalence classes instead of recomputing them. RegNodeMap.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) { unsigned DestReg = BBI->getOperand(0).getReg(); addReg(DestReg); for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { unsigned SrcReg = BBI->getOperand(i).getReg(); addReg(SrcReg); unionRegs(DestReg, SrcReg); } } } DenseMap<unsigned, unsigned> RegRenamingMap; bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); while (BBI != BBE && BBI->isPHI()) { MachineInstr *PHI = BBI; assert(PHI->getNumOperands() > 0); unsigned SrcReg = PHI->getOperand(1).getReg(); unsigned SrcColor = getRegColor(SrcReg); unsigned NewReg = RegRenamingMap[SrcColor]; if (!NewReg) { NewReg = SrcReg; RegRenamingMap[SrcColor] = SrcReg; } MergeLIsAndRename(SrcReg, NewReg); unsigned DestReg = PHI->getOperand(0).getReg(); if (!InsertedDestCopies.count(DestReg)) MergeLIsAndRename(DestReg, NewReg); for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { unsigned SrcReg = PHI->getOperand(i).getReg(); MergeLIsAndRename(SrcReg, NewReg); } ++BBI; LI->RemoveMachineInstrFromMaps(PHI); PHI->eraseFromParent(); Changed = true; } } // Due to the insertion of copies to split live ranges, the live intervals are // guaranteed to not overlap, except in one case: an original PHI source and a // PHI destination copy. In this case, they have the same value and thus don't // truly intersect, so we merge them into the value live at that point. // FIXME: Is there some better way we can handle this? for (DestCopyMap::iterator I = InsertedDestCopies.begin(), E = InsertedDestCopies.end(); I != E; ++I) { unsigned DestReg = I->first; unsigned DestColor = getRegColor(DestReg); unsigned NewReg = RegRenamingMap[DestColor]; LiveInterval &DestLI = LI->getInterval(DestReg); LiveInterval &NewLI = LI->getInterval(NewReg); assert(DestLI.ranges.size() == 1 && "PHI destination copy's live interval should be a single live " "range from the beginning of the BB to the copy instruction."); LiveRange *DestLR = DestLI.begin(); VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); if (!NewVNI) { NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); MachineInstr *CopyInstr = I->second; CopyInstr->getOperand(1).setIsKill(true); } LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); NewLI.addRange(NewLR); LI->removeInterval(DestReg); MRI->replaceRegWith(DestReg, NewReg); } // Adjust the live intervals of all PHI source registers to handle the case // where the PHIs in successor blocks were the only later uses of the source // register. for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), E = InsertedSrcCopySet.end(); I != E; ++I) { MachineBasicBlock *MBB = I->first; unsigned SrcReg = I->second; if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) SrcReg = RenamedRegister; LiveInterval &SrcLI = LI->getInterval(SrcReg); bool isLiveOut = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { isLiveOut = true; break; } } if (isLiveOut) continue; MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } Allocator.Reset(); RegNodeMap.clear(); PHISrcDefs.clear(); InsertedSrcCopySet.clear(); InsertedSrcCopyMap.clear(); InsertedDestCopies.clear(); return Changed; }
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); FoldAsLoadDefReg = 0; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; LocalMIs.insert(MI); // If there exists an instruction which belongs to the following // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { FoldAsLoadDefReg = 0; continue; } if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr // can enable folding by converting SUB to CMP. MachineInstr *DefMI = 0; MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefReg, DefMI); if (FoldMI) { // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); ++NumLoadFold; // MI is replaced with FoldMI. Changed = true; continue; } } } } return Changed; }
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); bool First = true; MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { ++MII; continue; } if (MI->isBitcast()) { if (optimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } } else if (MI->isCompare()) { if (optimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } First = false; PMII = MII; ++MII; } } return Changed; }
bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Scan the function's instructions looking for frame index references. // For each, ask the target if it wants a virtual base register for it // based on what we can tell it about where the local will end up in the // stack frame. If it wants one, re-use a suitable one we've previously // allocated, or if there isn't one that fits the bill, allocate a new one // and ask the target to create a defining instruction for it. bool UsedBaseReg = false; MachineFrameInfo *MFI = Fn.getFrameInfo(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Collect all of the instructions in the block that reference // a frame index. Also store the frame index referenced to ease later // lookup. (For any insn that has more than one FI reference, we arbitrarily // choose the first one). SmallVector<FrameRef, 64> FrameReferenceInsns; // A base register definition is a register + offset pair. SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; // Debug value instructions can't be out of range, so they don't need // any updates. if (MI->isDebugValue()) continue; // For now, allocate the base register(s) within the basic block // where they're used, and don't try to keep them around outside // of that. It may be beneficial to try sharing them more broadly // than that, but the increased register pressure makes that a // tricky thing to balance. Investigate if re-materializing these // becomes an issue. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { // Consider replacing all frame index operands that reference // an object allocated in the local block. if (MI->getOperand(i).isFI()) { // Don't try this with values not in the local block. if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) break; FrameReferenceInsns. push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); break; } } } } // Sort the frame references by local offset array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); MachineBasicBlock *Entry = Fn.begin(); // Loop through the frame references and allocate for them as necessary. for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { MachineBasicBlock::iterator I = FrameReferenceInsns[ref].getMachineInstr(); MachineInstr *MI = I; for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { // Consider replacing all frame index operands that reference // an object allocated in the local block. if (MI->getOperand(idx).isFI()) { int FrameIdx = MI->getOperand(idx).getIndex(); assert(MFI->isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); DEBUG(dbgs() << "Considering: " << *MI); if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { unsigned BaseReg = 0; int64_t Offset = 0; int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0; DEBUG(dbgs() << " Replacing FI in: " << *MI); // If we have a suitable base register available, use it; otherwise // create a new one. Note that any offset encoded in the // instruction itself will be taken into account by the target, // so we don't have to adjust for it here when reusing a base // register. std::pair<unsigned, int64_t> RegOffset; if (lookupCandidateBaseReg(BaseRegisters, RegOffset, FrameSizeAdjust, LocalOffsets[FrameIdx], MI, TRI)) { DEBUG(dbgs() << " Reusing base register " << RegOffset.first << "\n"); // We found a register to reuse. BaseReg = RegOffset.first; Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - RegOffset.second; } else { // No previously defined register was in range, so create a // new one. int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); const MachineFunction *MF = MI->getParent()->getParent(); const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); DEBUG(dbgs() << " Materializing base register " << BaseReg << " at frame local offset " << LocalOffsets[FrameIdx] + InstrOffset << "\n"); // Tell the target to insert the instruction to initialize // the base register. // MachineBasicBlock::iterator InsertionPt = Entry->begin(); TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, InstrOffset); // The base register already includes any offset specified // by the instruction, so account for that so it doesn't get // applied twice. Offset = -InstrOffset; int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + InstrOffset; BaseRegisters.push_back( std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); ++NumBaseRegisters; UsedBaseReg = true; } assert(BaseReg != 0 && "Unable to allocate virtual base register!"); // Modify the instruction to use the new base register rather // than the frame index operand. TRI->resolveFrameIndex(I, BaseReg, Offset); DEBUG(dbgs() << "Resolved: " << *MI); ++NumReplacements; } } } } return UsedBaseReg; }
bool BitLevelInfo::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); VFInfo *VFI = MF.getInfo<VFInfo>(); // No need to run the pass if bitwidth information not available anymore. if (!VFI->isBitWidthAnnotated()) return false; // Annotate the bit width information to target flag. for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) for (MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ++I) { MachineInstr &Instr = *I; bool isShifts = false; switch (Instr.getOpcode()) { default: break; case VTM::IMPLICIT_DEF: continue; case VTM::VOpRet: { // Setup the bit width for predicate operand. MachineOperand &Op = Instr.getOperand(0); VInstrInfo::setBitWidth(Op, 1); continue; } case VTM::VOpToState: case VTM::VOpToStateb: { // Setup the bit width for predicate operand. MachineOperand &Op = Instr.getOperand(0); if (Op.isImm()) { assert(Op.getImm() && "Unexpected 'never' in unconditional branch!"); Op.ChangeToRegister(0, false); VInstrInfo::setBitWidth(Op, 1); } MachineOperand &Pred = Instr.getOperand(2); if (Pred.isImm()) { assert(Pred.getImm() && "Unexpected 'never' in unconditional branch!"); Pred.ChangeToRegister(0, false); VInstrInfo::setBitWidth(Op, 1); } continue; } case VTM::COPY: case VTM::PHI: continue; case VTM::VOpSRA: case VTM::VOpSRA_c: case VTM::VOpSRL: case VTM::VOpSRL_c: case VTM::VOpSHL: case VTM::VOpSHL_c: isShifts = true; break; } BitWidthAnnotator Annotator(Instr); if (isShifts) { // Fix the RHS operand width. Annotator.setBitWidth(Log2_32_Ceil(Annotator.getBitWidth(1)), 2); Annotator.updateBitWidth(); } for (unsigned i = 0, e = Instr.getNumOperands() - 2; i < e; ++i) { MachineOperand &MO = Instr.getOperand(i); if (!MO.isReg() && !MO.isImm() && !MO.isSymbol()) continue; // Do not disturb the original target flags. if (MO.isSymbol() && MO.getTargetFlags() != 0) continue; unsigned BitWidth = Annotator.getBitWidthOrZero(i); if (BitWidth == 0) { // Already have bitwidth information. if (MO.getTargetFlags()) continue; assert(Instr.getOpcode() == VTM::VOpInternalCall && MO.isImm() && "Bitwidth info not available!"); BitWidth = 64; } bool Changed = updateBitWidth(MO, BitWidth); if (MO.isReg() && MO.isDef() && Changed) propagateBitWidth(MO); } Annotator.changeToDefaultPred(); } DEBUG(dbgs() << "---------- After bit width annotation.\n"); DEBUG(MF.dump()); // Tell the MachineFunctionInfo that we had changed all annotators to default // predicate operand. VFI->removeBitWidthAnnotators(); return false; }
bool MSPUPacketizer::runOnMachineFunction(MachineFunction &Fn) { const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); // Instantiate the packetizer. MSPUPacketizerList Packetizer(Fn, MLI, MDT); // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); // // Loop over all basic blocks and remove KILL pseudo-instructions // These instructions confuse the dependence analysis. Consider: // D0 = ... (Insn 0) // R0 = KILL R0, D0 (Insn 1) // R0 = ... (Insn 2) // Here, Insn 1 will result in the dependence graph not emitting an output // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization // for(MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { MachineBasicBlock::iterator End = MBB->end(); MachineBasicBlock::iterator MI = MBB->begin(); while(MI != End) { if(MI->isKill()) { MachineBasicBlock::iterator DeleteMI = MI; ++MI; MBB->erase(DeleteMI); End = MBB->end(); continue; } ++MI; } } // Loop over all of the basic blocks. for(MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { // Find scheduling regions and schedule / packetize each region. unsigned RemainingCount = MBB->size(); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin();) { // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. MachineBasicBlock::iterator I = RegionEnd; for(; I != MBB->begin(); --I, --RemainingCount) { if(TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn)) break; } I = MBB->begin(); // Skip empty scheduling regions. if(I == RegionEnd) { RegionEnd = llvm::prior(RegionEnd); --RemainingCount; continue; } // Skip regions with one instruction. if(I == llvm::prior(RegionEnd)) { RegionEnd = llvm::prior(RegionEnd); continue; } // PacketizeMIs() does a VLIW scheduling on MachineInstr list and packetizing. Packetizer.PacketizeMIs(MBB, I, RegionEnd); RegionEnd = I; } } return true; }
void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getFunction()->getName() << '\n'); DEBUG(dump()); SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); unsigned PhysReg = getPhys(VirtReg); assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. A // partial redef always kills and redefines the super-register. if (MO.readsReg() && (MO.isDef() || MO.isKill())) SuperKills.push_back(PhysReg); if (MO.isDef()) { // The <def,undef> flag only makes sense for sub-register defs, and // we are substituting a full physreg. An <imp-use,kill> operand // from the SuperKills list will represent the partial read of the // super-register. MO.setIsUndef(false); // Also add implicit defs for the super-register. if (MO.isDead()) SuperDeads.push_back(PhysReg); else SuperDefs.push_back(PhysReg); } // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); assert(PhysReg && "Invalid SubReg for physical register"); MO.setSubReg(0); } // Rewrite. Note we could have used MachineOperand::substPhysReg(), but // we need the inlining here. MO.setReg(PhysReg); } // Add any missing super-register kills after rewriting the whole // instruction. while (!SuperKills.empty()) MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); while (!SuperDeads.empty()) MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); while (!SuperDefs.empty()) MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); DEBUG(dbgs() << "> " << *MI); // Finally, remove any identity copies. if (MI->isIdentityCopy()) { ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); RemoveMachineInstrFromMaps(MI); if (Indexes) Indexes->removeMachineInstrFromMaps(MI); // It's safe to erase MI because MII has already been incremented. MI->eraseFromParent(); } else { // Transform identity copy to a KILL to deal with subregisters. MI->setDesc(TII->get(TargetOpcode::KILL)); DEBUG(dbgs() << "Identity copy: " << *MI); } } } } // Tell MRI about physical registers in use. for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) if (!MRI->reg_nodbg_empty(Reg)) MRI->setPhysRegUsed(Reg); }
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); // FIXME: The logic flow in this function is still too convoluted. // It needs a cleanup refactoring. Do that in preparation for tracking // more than one scratch register value and using ranges to find // available scratch registers. unsigned CurrentVirtReg = 0; unsigned CurrentScratchReg = 0; bool havePrevValue = false; TargetRegisterInfo::FrameIndexValue PrevValue(0,0); TargetRegisterInfo::FrameIndexValue Value(0,0); MachineInstr *PrevLastUseMI = NULL; unsigned PrevLastUseOp = 0; bool trackingCurrentValue = false; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; bool isDefInsn = false; bool isKillInsn = false; bool clobbersScratchReg = false; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) { // If we have a previous scratch reg, check and see if anything // here kills whatever value is in there. if (Reg == CurrentScratchReg) { if (MO.isUse()) { // Two-address operands implicitly kill if (MO.isKill() || MI->isRegTiedToDefOperand(i)) clobbersScratchReg = true; } else { assert (MO.isDef()); clobbersScratchReg = true; } } continue; } // If this is a def, remember that this insn defines the value. // This lets us properly consider insns which re-use the scratch // register, such as r2 = sub r2, #imm, in the middle of the // scratch range. if (MO.isDef()) isDefInsn = true; // Have we already allocated a scratch register for this virtual? if (Reg != CurrentVirtReg) { // When we first encounter a new virtual register, it // must be a definition. assert(MI->getOperand(i).isDef() && "frame index virtual missing def!"); // We can't have nested virtual register live ranges because // there's only a guarantee of one scavenged register at a time. assert (CurrentVirtReg == 0 && "overlapping frame index virtual registers!"); // If the target gave us information about what's in the register, // we can use that to re-use scratch regs. DenseMap<unsigned, FrameConstantEntry>::iterator Entry = FrameConstantRegMap.find(Reg); trackingCurrentValue = Entry != FrameConstantRegMap.end(); if (trackingCurrentValue) { SPAdj = (*Entry).second.second; Value = (*Entry).second.first; } else { SPAdj = 0; Value.first = 0; Value.second = 0; } // If the scratch register from the last allocation is still // available, see if the value matches. If it does, just re-use it. if (trackingCurrentValue && havePrevValue && PrevValue == Value) { // FIXME: This assumes that the instructions in the live range // for the virtual register are exclusively for the purpose // of populating the value in the register. That's reasonable // for these frame index registers, but it's still a very, very // strong assumption. rdar://7322732. Better would be to // explicitly check each instruction in the range for references // to the virtual register. Only delete those insns that // touch the virtual register. // Find the last use of the new virtual register. Remove all // instruction between here and there, and update the current // instruction to reference the last use insn instead. MachineBasicBlock::iterator LastUseMI = findLastUseReg(I, BB->end(), Reg); // Remove all instructions up 'til the last use, since they're // just calculating the value we already have. BB->erase(I, LastUseMI); I = LastUseMI; // Extend the live range of the scratch register PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); RS->setUsed(CurrentScratchReg); CurrentVirtReg = Reg; // We deleted the instruction we were scanning the operands of. // Jump back to the instruction iterator loop. Don't increment // past this instruction since we updated the iterator already. DoIncr = false; break; } // Scavenge a new scratch register CurrentVirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); CurrentScratchReg = RS->FindUnusedReg(RC); if (CurrentScratchReg == 0) // No register is "free". Scavenge a register. CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); PrevValue = Value; } // replace this reference to the virtual register with the // scratch register. assert (CurrentScratchReg && "Missing scratch register!"); MI->getOperand(i).setReg(CurrentScratchReg); if (MI->getOperand(i).isKill()) { isKillInsn = true; PrevLastUseOp = i; PrevLastUseMI = MI; } } } // If this is the last use of the scratch, stop tracking it. The // last use will be a kill operand in an instruction that does // not also define the scratch register. if (isKillInsn && !isDefInsn) { CurrentVirtReg = 0; havePrevValue = trackingCurrentValue; } // Similarly, notice if instruction clobbered the value in the // register we're tracking for possible later reuse. This is noted // above, but enforced here since the value is still live while we // process the rest of the operands of the instruction. if (clobbersScratchReg) { havePrevValue = false; CurrentScratchReg = 0; } if (DoIncr) { RS->forward(I); ++I; } } } }
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameInfo *TFI = TM.getFrameInfo(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) Size = -Size; SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else I = llvm::next(PrevI); continue; } MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).isFI()) { // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register // scavenger to go through all of these instructions so that // it can update its register information. We keep the // iterator at the point before insertion so that we can // revisit them in full. bool AtBeginning = (I == BB->begin()); if (!AtBeginning) --I; // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. TargetRegisterInfo::FrameIndexValue Value; unsigned VReg = TRI.eliminateFrameIndex(MI, SPAdj, &Value, FrameIndexVirtualScavenging ? NULL : RS); if (VReg) { assert (FrameIndexVirtualScavenging && "Not scavenging, but virtual returned from " "eliminateFrameIndex()!"); FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { I = BB->begin(); DoIncr = false; } MI = 0; break; } if (DoIncr && I != BB->end()) ++I; // Update register states. if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); } }
bool GCMachineCodeFixup::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; const TargetMachine &TM = MF.getTarget(); const TargetInstrInfo *TII = TM.getInstrInfo(); GCModuleInfo &GMI = getAnalysis<GCModuleInfo>(); GCFunctionInfo &GCFI = GMI.getFunctionInfo(*MF.getFunction()); for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); MBBI != MBBE; ++MBBI) { for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); MII != MIE;) { if (!MII->isGCRegRoot() || !MII->getOperand(0).isReg()) { ++MII; continue; } // Trace the register back to its location at the site of the call (either // a physical reg or a frame index). bool TracingReg = true; unsigned TracedReg = MII->getOperand(0).getReg(); int FrameIndex; MachineBasicBlock::iterator PrevII = MII; for (--PrevII;; --PrevII) { if (PrevII->isGCRegRoot() && PrevII->getOperand(0).isReg()) break; if (PrevII->isCall()) break; int FI; // Trace back through register reloads. unsigned Reg = TM.getInstrInfo()->isLoadFromStackSlotPostFE(&*PrevII, FI); if (Reg) { // This is a reload. If we're tracing this register, start tracing the // frame index instead. if (TracingReg && TracedReg == Reg) { TracingReg = false; FrameIndex = FI; } continue; } // Trace back through spills. if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&*PrevII, FI)) continue; // Trace back through register-to-register copies. if (PrevII->isCopy()) { if (TracingReg && TracedReg == PrevII->getOperand(0).getReg()) TracedReg = PrevII->getOperand(1).getReg(); continue; } // Trace back through non-register GC_REG_ROOT instructions. if (PrevII->isGCRegRoot() && !PrevII->getOperand(0).isReg()) continue; DEBUG(dbgs() << "Bad instruction: " << *PrevII); llvm_unreachable("GC_REG_ROOT found in an unexpected location!"); } // Now we've reached either a call or another GC_REG_ROOT instruction. // Move the GC_REG_ROOT instruction we're considering to the right place, // and rewrite it if necessary. // // Also, tell the GCFunctionInfo about the frame index, since this is // our only chance -- the frame indices will be deleted by the time // GCMachineCodeAnalysis runs. ++PrevII; unsigned RootIndex = MII->getOperand(1).getImm(); MachineInstr *NewMI; if (TracingReg) { MachineInstrBuilder MIB = BuildMI(MF, MII->getDebugLoc(), TII->get(TargetOpcode::GC_REG_ROOT)); MIB.addReg(TracedReg).addImm(RootIndex); NewMI = MIB; } else { NewMI = TII->emitFrameIndexGCRegRoot(MF, FrameIndex, RootIndex, MII->getDebugLoc()); GCFI.spillRegRoot(RootIndex, FrameIndex); } MBBI->insert(PrevII, NewMI); MachineBasicBlock::iterator NextII = MII; ++NextII; MII->eraseFromParent(); MII = NextII; } } return true; }
/// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); bool MadeChange = false; DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(errs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs; ReMatRegs.resize(MRI->getLastVirtReg()+1); typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > TiedOperandMap; TiedOperandMap TiedOperands(4); SmallPtrSet<MachineInstr*, 8> Processed; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { MachineBasicBlock::iterator nmi = next(mi); const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) ? mi->getNumOperands() : TID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) continue; if (FirstTied) { FirstTied = false; ++NumTwoAddressInstrs; DEBUG(errs() << '\t' << *mi); } assert(mi->getOperand(SrcIdx).isReg() && mi->getOperand(SrcIdx).getReg() && mi->getOperand(SrcIdx).isUse() && "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); TiedOperandMap::iterator OI = TiedOperands.find(regB); if (OI == TiedOperands.end()) { SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; } OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. for (TiedOperandMap::iterator OI = TiedOperands.begin(), OE = TiedOperands.end(); OI != OE; ++OI) { SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; // If the registers are already equal, nothing needs to be done. if (mi->getOperand(SrcIdx).getReg() == mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) break; // The tied operands have been eliminated. } bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; unsigned regB = OI->first; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; unsigned regA = mi->getOperand(DstIdx).getReg(); // Grab regB from the instruction because it may have changed if the // instruction was commuted. regB = mi->getOperand(SrcIdx).getReg(); if (regA == regB) { // The register is tied to multiple destinations (or else we would // not have continued this far), but this use of the register // already matches the tied destination. Leave it. AllUsesCopied = false; continue; } LastCopiedReg = regA; assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); #ifndef NDEBUG // First, verify that we don't have a use of "a" in the instruction // (a = b + a for example) because our transformation will not // work. This should never occur because we are in SSA form. for (unsigned i = 0; i != mi->getNumOperands(); ++i) assert(i == DstIdx || !mi->getOperand(i).isReg() || mi->getOperand(i).getReg() != regA); #endif // Emit a copy or rematerialize the definition. const TargetRegisterClass *rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); ++NumReMats; } else { bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); (void)Emitted; assert(Emitted && "Unable to issue a copy instruction!\n"); } MachineBasicBlock::iterator prevMI = prior(mi); // Update DistanceMap. DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; DEBUG(errs() << "\t\tprepend:\t" << *prevMI); MachineOperand &MO = mi->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && "inconsistent operand info for 2-reg pass"); if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(regA); } if (AllUsesCopied) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); } } // Update live variables for regB. if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) LV->addVirtualRegisterKilled(regB, prior(mi)); } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was // removed from a different tied use of regB, so now we need to add // a kill flag to one of the remaining uses of regB. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { MO.setIsKill(true); break; } } } MadeChange = true; DEBUG(errs() << "\t\trewrite to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); mi = nmi; } } // Some remat'ed instructions are dead. int VReg = ReMatRegs.find_first(); while (VReg != -1) { if (MRI->use_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } VReg = ReMatRegs.find_next(VReg); } return MadeChange; }