void PPCCTRLoops::removeIfDead(MachineInstr *MI) { // This procedure was essentially copied from DeadMachineInstructionElim SmallVector<MachineInstr *, 1> DeadPhis; if (isDead(MI, DeadPhis)) { DEBUG(dbgs() << "CTR looping will remove: " << *MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. Examine each def operand for such references; // if found, mark the DBG_VALUE as undef (but don't delete it). for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); MachineRegisterInfo::use_iterator nextI; for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), E = MRI->use_end(); I!=E; I=nextI) { nextI = llvm::next(I); // I is invalidated by the setReg MachineOperand& Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); if (UseMI==MI) continue; if (Use.isDebug()) // this might also be a instr -> phi -> instr case // which can also be removed. UseMI->getOperand(0).setReg(0U); } } MI->eraseFromParent(); for (unsigned i = 0; i < DeadPhis.size(); ++i) { DeadPhis[i]->eraseFromParent(); } } }
/// isLiveInButUnusedBefore - Return true if register is livein the MBB not /// not used before it reaches the MI that defines register. static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI, MachineBasicBlock *MBB, const TargetRegisterInfo *TRI, MachineRegisterInfo* MRI) { // First check if register is livein. bool isLiveIn = false; for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) if (Reg == *I || TRI->isSuperRegister(Reg, *I)) { isLiveIn = true; break; } if (!isLiveIn) return false; // Is there any use of it before the specified MI? SmallPtrSet<MachineInstr*, 4> UsesInMBB; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); if (UseMO.isReg() && UseMO.isUndef()) continue; MachineInstr *UseMI = &*UI; if (UseMI->getParent() == MBB) UsesInMBB.insert(UseMI); } if (UsesInMBB.empty()) return true; for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) if (UsesInMBB.count(&*I)) return false; return true; }
/// isProfitableToReMat - Return true if the heuristics determines it is likely /// to be profitable to re-materialize the definition of Reg rather than copy /// the register. bool TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, MachineInstr *MI, MachineInstr *DefMI, MachineBasicBlock *MBB, unsigned Loc) { bool OtherUse = false; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = UseMO.getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); if (DI != DistanceMap.end() && DI->second == Loc) continue; // Current use. OtherUse = true; // There is at least one other use in the MBB that will clobber the // register. if (isTwoAddrUse(UseMI, Reg)) return true; } } // If other uses in MBB are not two-address uses, then don't remat. if (OtherUse) return false; // No other uses in the same block, remat if it's defined in the same // block so it does not unnecessarily extend the live range. return MBB == DefMI->getParent(); }
bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { unsigned DstReg = 0, ZeroReg = 0; // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". if ((MI.getOpcode() == Mips::ADDiu) && (MI.getOperand(1).getReg() == Mips::ZERO) && (MI.getOperand(2).getImm() == 0)) { DstReg = MI.getOperand(0).getReg(); ZeroReg = Mips::ZERO; } else if ((MI.getOpcode() == Mips::DADDiu) && (MI.getOperand(1).getReg() == Mips::ZERO_64) && (MI.getOperand(2).getImm() == 0)) { DstReg = MI.getOperand(0).getReg(); ZeroReg = Mips::ZERO_64; } if (!DstReg) return false; // Replace uses with ZeroReg. for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), E = MRI->use_end(); U != E; ++U) { MachineOperand &MO = U.getOperand(); MachineInstr *MI = MO.getParent(); // Do not replace if it is a phi's operand or is tied to def operand. if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo())) continue; MO.setReg(ZeroReg); } return true; }
/// isDead returns true if the instruction is dead /// (this was essentially copied from DeadMachineInstructionElim::isDead, but /// with special cases for inline asm, physical registers and instructions with /// side effects removed) bool PPCCTRLoops::isDead(const MachineInstr *MI, SmallVector<MachineInstr *, 1> &DeadPhis) const { // Examine each operand. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { // This instruction has users, but if the only user is the phi node for // the parent block, and the only use of that phi node is this // instruction, then this instruction is dead: both it (and the phi // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { MachineInstr *OnePhi = I.getOperand().getParent(); for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { const MachineOperand &OPO = OnePhi->getOperand(j); if (OPO.isReg() && OPO.isDef()) { unsigned OPReg = OPO.getReg(); MachineRegisterInfo::use_iterator nextJ; for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg), E = MRI->use_end(); J!=E; J=nextJ) { nextJ = llvm::next(J); MachineOperand& Use = J.getOperand(); MachineInstr *UseMI = Use.getParent(); if (MI != UseMI) { // The phi node has a user that is not MI, bail... return false; } } } } DeadPhis.push_back(OnePhi); } else { // This def has a non-debug use. Don't delete the instruction! return false; } } } } // If there are no defs with uses, the instruction is dead. return true; }
void BitLevelInfo::propagateBitWidth(MachineOperand &MO) { assert(MO.isReg() && "Wrong operand type!"); unsigned RegNo = MO.getReg(); unsigned char BitWidth = VInstrInfo::getBitWidth(MO); assert(BitWidth && "Bit width not available!"); for (MachineRegisterInfo::use_iterator I = MRI->use_begin(RegNo), E = MRI->use_end(); I != E; ++I) { MachineOperand &MO = I.getOperand(); // Propagate bit width information through the def-use chain. if (updateBitWidth(MO, BitWidth) && (I->isCopy() || I->isPHI())) computeBitWidth(&*I); } }
bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { unsigned DstReg = 0, ZeroReg = 0; // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". if ((MI.getOpcode() == Mips::ADDiu) && (MI.getOperand(1).getReg() == Mips::ZERO) && (MI.getOperand(2).getImm() == 0)) { DstReg = MI.getOperand(0).getReg(); ZeroReg = Mips::ZERO; } else if ((MI.getOpcode() == Mips::DADDiu) && (MI.getOperand(1).getReg() == Mips::ZERO_64) && (MI.getOperand(2).getImm() == 0)) { DstReg = MI.getOperand(0).getReg(); ZeroReg = Mips::ZERO_64; } if (!DstReg) return false; // Replace uses with ZeroReg. for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), E = MRI->use_end(); U != E;) { MachineOperand &MO = *U; unsigned OpNo = U.getOperandNo(); MachineInstr *MI = MO.getParent(); ++U; // Do not replace if it is a phi's operand or is tied to def operand. if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; // Also, we have to check that the register class of the operand // contains the zero register. if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg)) continue; MO.setReg(ZeroReg); } return true; }
bool LazyLiveness::vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB) { MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); MachineBasicBlock* DefMBB = MRI->def_begin(vreg)->getParent(); unsigned def = preorder[DefMBB]; unsigned max_dom = 0; for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT[DefMBB]), DE = df_end(MDT[DefMBB]); DI != DE; ++DI) { if (preorder[DI->getBlock()] > max_dom) { max_dom = preorder[(*DI)->getBlock()]; } } if (preorder[MBB] <= def || max_dom < preorder[MBB]) return false; SparseBitVector<128>::iterator I = tv[MBB].begin(); while (I != tv[MBB].end() && *I <= def) ++I; while (I != tv[MBB].end() && *I < max_dom) { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(vreg), UE = MachineRegisterInfo::use_end(); UI != UE; ++UI) { MachineBasicBlock* UseMBB = UI->getParent(); if (rv[rev_preorder[*I]].test(preorder[UseMBB])) return true; unsigned t_dom = 0; for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT[rev_preorder[*I]]), DE = df_end(MDT[rev_preorder[*I]]); DI != DE; ++DI) if (preorder[DI->getBlock()] > t_dom) { max_dom = preorder[(*DI)->getBlock()]; } I = tv[MBB].begin(); while (I != tv[MBB].end() && *I < t_dom) ++I; } } return false; }
static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, std::vector<FoldCandidate> &FoldList, SmallVectorImpl<MachineInstr *> &CopiesToReplace, const SIInstrInfo *TII, const SIRegisterInfo &TRI, MachineRegisterInfo &MRI) { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); // FIXME: Fold operands with subregs. if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || UseOp.isImplicit())) { return; } bool FoldingImm = OpToFold.isImm(); APInt Imm; if (FoldingImm) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI.getRegClass(UseReg) : TRI.getPhysRegClass(UseReg); Imm = APInt(64, OpToFold.getImm()); const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); const TargetRegisterClass *FoldRC = TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); // Split 64-bit constants into 32-bits for folding. if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { if (UseRC->getSize() != 8) return; if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } } // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : TRI.getPhysRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } } // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) continue; foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } return; } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[UseOpIdx].RegClass == -1) return; if (FoldingImm) { MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); return; } tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. return; }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI.getOpcode())) continue; unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; // FIXME: Fold operands with subregs. if (OpToFold.isReg() && (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || OpToFold.getSubReg())) continue; // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer // this. SmallVector<MachineInstr *, 4> CopiesToReplace; std::vector<FoldCandidate> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(MF); for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (!Fold.isImm()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Fold.OpToFold->setIsKill(false); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); } } } } return false; }
void SIFoldOperands::foldOperand( MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl<FoldCandidate> &FoldList, SmallVectorImpl<MachineInstr *> &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); if (!isUseSafeToFold(TII, *UseMI, UseOp)) return; // FIXME: Fold operands with subregs. if (UseOp.isReg() && OpToFold.isReg()) { if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) return; // Don't fold subregister extracts into tied operands, only if it is a full // copy since a subregister use tied to a full register def doesn't really // make sense. e.g. don't fold: // // %vreg1 = COPY %vreg0:sub1 // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0> // // into // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0> if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) return; } // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->isRegSequence()) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end(); RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) continue; foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace); } return; } bool FoldingImm = OpToFold.isImm(); // In order to fold immediates into copies, we need to change the // copy to a MOV. if (FoldingImm && UseMI->isCopy()) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI->getRegClass(DestReg) : TRI->getPhysRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } else { const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[UseOpIdx].RegClass == -1) return; } if (!FoldingImm) { tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. return; } const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc(); const TargetRegisterClass *FoldRC = TRI->getRegClass(FoldDesc.OpInfo[0].RegClass); // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI->getRegClass(UseReg) : TRI->getPhysRegClass(UseReg); if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) return; APInt Imm(64, OpToFold.getImm()); if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); return; } tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); }
void SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const { // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer // this. SmallVector<MachineInstr *, 4> CopiesToReplace; SmallVector<FoldCandidate, 4> FoldList; MachineOperand &Dst = MI.getOperand(0); bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); if (FoldingImm) { unsigned NumLiteralUses = 0; MachineOperand *NonInlineUse = nullptr; int NonInlineUseOpNo = -1; MachineRegisterInfo::use_iterator NextUse, NextInstUse; for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end(); Use != E; Use = NextUse) { NextUse = std::next(Use); MachineInstr *UseMI = Use->getParent(); unsigned OpNo = Use.getOperandNo(); // Folding the immediate may reveal operations that can be constant // folded or replaced with a copy. This can happen for example after // frame indices are lowered to constants or from splitting 64-bit // constants. // // We may also encounter cases where one or both operands are // immediates materialized into a register, which would ordinarily not // be folded due to multiple uses or operand constraints. if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) { DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n'); // Some constant folding cases change the same immediate's use to a new // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user // again. The same constant folded instruction could also have a second // use operand. NextUse = MRI->use_begin(Dst.getReg()); continue; } // Try to fold any inline immediate uses, and then only fold other // constants if they have one use. // // The legality of the inline immediate must be checked based on the use // operand, not the defining instruction, because 32-bit instructions // with 32-bit inline immediate sources may be used to materialize // constants used in 16-bit operands. // // e.g. it is unsafe to fold: // s_mov_b32 s0, 1.0 // materializes 0x3f800000 // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00 // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) { foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace); } else { if (++NumLiteralUses == 1) { NonInlineUse = &*Use; NonInlineUseOpNo = OpNo; } } } if (NumLiteralUses == 1) { MachineInstr *UseMI = NonInlineUse->getParent(); foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace); } } else { // Folding register. for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, CopiesToReplace); } } MachineFunction *MF = MI.getParent()->getParent(); // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, *TRI)) { // Clear kill flags. if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); // FIXME: Probably shouldn't bother trying to fold if not an // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR // copies. MRI->clearKillFlags(Fold.OpToFold->getReg()); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); tryFoldInst(TII, Fold.UseMI); } else if (Fold.isCommuted()) { // Restoring instruction's original operand order if fold has failed. TII->commuteInstruction(*Fold.UseMI, false); } } }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI.getOpcode())) continue; unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; // FIXME: Fold operands with subregs. if (OpToFold.isReg() && (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || OpToFold.getSubReg())) continue; std::vector<FoldCandidate> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); // FIXME: Fold operands with subregs. if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || UseOp.isImplicit())) { continue; } APInt Imm; if (FoldingImm) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI.getRegClass(UseReg) : TRI.getRegClass(UseReg); Imm = APInt(64, OpToFold.getImm()); // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg()) { if (UseRC->getSize() != 8) continue; if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } } // In order to fold immediates into copies, we need to change the // copy to a MOV. if (UseMI->getOpcode() == AMDGPU::COPY) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : TRI.getRegClass(DestReg); unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) continue; UseMI->setDesc(TII->get(MovOp)); } } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) continue; if (FoldingImm) { MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); continue; } tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. } for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (!Fold.isImm()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Fold.OpToFold->setIsKill(false); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); } } } } return false; }
bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // We look for instructions that write S registers that are then read as // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or // merge two SPR values to form a DPR register. In order avoid false // positives we make sure that there is an SPR producer so we look past // COPY and PHI nodes to find it. // // The best code pattern for when an SPR producer is going to be used by a // DPR or QPR consumer depends on whether the other lanes of the // corresponding DPR/QPR are currently defined. // // We can handle these efficiently, depending on the type of // pseudo-instruction that is producing the pattern // // * COPY: * VDUP all lanes and merge the results together // using VEXTs. // // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR // lane, and the other lane(s) of the DPR/QPR register // that we are inserting in are undefined, use the // original DPR/QPR value. // * Otherwise, fall back on the same stategy as COPY. // // * REG_SEQUENCE: * If all except one of the input operands are // IMPLICIT_DEFs, insert the VDUP pattern for just the // defined input operand // * Otherwise, fall back on the same stategy as COPY. // // First, get all the reads of D-registers done by this instruction. SmallVector<unsigned, 8> Defs = getReadDPRs(MI); bool Modified = false; for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { // Follow the def-use chain for this DPR through COPYs, and also through // PHIs (which are essentially multi-way COPYs). It is because of PHIs that // we can end up with multiple defs of this DPR. SmallVector<MachineInstr *, 8> DefSrcs; if (!TRI->isVirtualRegister(*I)) continue; MachineInstr *Def = MRI->getVRegDef(*I); if (!Def) continue; elideCopiesAndPHIs(Def, DefSrcs); for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), EE = DefSrcs.end(); II != EE; ++II) { MachineInstr *MI = *II; // If we've already analyzed and replaced this operand, don't do // anything. if (Replacements.find(MI) != Replacements.end()) continue; // Now, work out if the instruction causes a SPR->DPR dependency. if (!hasPartialWrite(MI)) continue; // Collect all the uses of this MI's DPR def for updating later. SmallVector<MachineOperand*, 8> Uses; unsigned DPRDefReg = MI->getOperand(0).getReg(); for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), E = MRI->use_end(); I != E; ++I) Uses.push_back(&I.getOperand()); // We can optimize this. unsigned NewReg = optimizeSDPattern(MI); if (NewReg != 0) { Modified = true; for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { // Make sure to constrain the register class of the new register to // match what we're replacing. Otherwise we can optimize a DPR_VFP2 // reference into a plain DPR, and that will end poorly. NewReg is // always virtual here, so there will always be a matching subclass // to find. MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg())); DEBUG(dbgs() << "Replacing operand " << **I << " with " << PrintReg(NewReg) << "\n"); (*I)->substVirtReg(NewReg, 0, *TRI); } } Replacements[MI] = NewReg; } } return Modified; }
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure /// there is one implicit_def for each use. Add isUndef marker to /// implicit_def defs and their uses. bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" << "********** Function: " << ((Value*)fn.getFunction())->getName() << '\n'); bool Changed = false; TII = fn.getTarget().getInstrInfo(); TRI = fn.getTarget().getRegisterInfo(); MRI = &fn.getRegInfo(); LV = &getAnalysis<LiveVariables>(); SmallSet<unsigned, 8> ImpDefRegs; SmallVector<MachineInstr*, 8> ImpDefMIs; SmallVector<MachineInstr*, 4> RUses; SmallPtrSet<MachineBasicBlock*,16> Visited; SmallPtrSet<MachineInstr*, 8> ModInsts; MachineBasicBlock *Entry = fn.begin(); for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { ImpDefMIs.push_back(MI); // Is this a sub-register read-modify-write? if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } continue; } // Eliminate %reg1032:sub<def> = COPY undef. if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } unsigned Reg = MI->getOperand(0).getReg(); MI->eraseFromParent(); Changed = true; // A REG_SEQUENCE may have been expanded into partial definitions. // If this was the last one, mark Reg as implicitly defined. if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) ImpDefRegs.insert(Reg); continue; } } bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { bool isKill = MO.isKill(); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(MI); } ChangedToImpDef = true; Changed = true; break; } Changed = true; MO.setIsUndef(); // This is a partial register redef of an implicit def. // Make sure the whole register is defined by the instruction. if (MO.isDef()) { MI->addRegisterDefined(Reg); continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { // Make sure other reads of Reg are also marked <undef>. for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); } } if (ChangedToImpDef) { // Backtrack to process this new implicit_def. --I; } else { for (unsigned i = 0; i != MI->getNumOperands(); ++i) { MachineOperand& MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; ImpDefRegs.erase(MO.getReg()); } } } // Any outstanding liveout implicit_def's? for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { MachineInstr *MI = ImpDefMIs[i]; unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || !ImpDefRegs.count(Reg)) { // Delete all "local" implicit_def's. That include those which define // physical registers since they cannot be liveout. MI->eraseFromParent(); Changed = true; continue; } // If there are multiple defs of the same register and at least one // is not an implicit_def, do not insert implicit_def's before the // uses. bool Skip = false; SmallVector<MachineInstr*, 4> DeadImpDefs; for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; if (!DeadImpDef->isImplicitDef()) { Skip = true; break; } DeadImpDefs.push_back(DeadImpDef); } if (Skip) continue; // The only implicit_def which we want to keep are those that are live // out of its block. for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) DeadImpDefs[j]->eraseFromParent(); Changed = true; // Process each use instruction once. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI.getOperand().isUndef()) continue; MachineInstr *RMI = &*UI; if (ModInsts.insert(RMI)) RUses.push_back(RMI); } for (unsigned i = 0, e = RUses.size(); i != e; ++i) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. if (isUndefCopy(RMI, Reg, ImpDefRegs)) { RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector<unsigned, 4> Ops; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); if (RRMO.isReg() && RRMO.getReg() == Reg) { Ops.push_back(j); if (RRMO.isKill()) isKill = true; } } // Leave the other operands along. for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { unsigned OpIdx = Ops[j]; RMI->RemoveOperand(OpIdx-j); } // Update LiveVariables varinfo if the instruction is a kill. if (isKill) { LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } continue; } // Replace Reg with a new vreg that's marked implicit. const TargetRegisterClass* RC = MRI->getRegClass(Reg); unsigned NewVReg = MRI->createVirtualRegister(RC); bool isKill = true; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); if (RRMO.isReg() && RRMO.getReg() == Reg) { RRMO.setReg(NewVReg); RRMO.setIsUndef(); if (isKill) { // Only the first operand of NewVReg is marked kill. RRMO.setIsKill(); isKill = false; } } } } RUses.clear(); ModInsts.clear(); } ImpDefRegs.clear(); ImpDefMIs.clear(); } return Changed; }
/// Sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce /// register pressure. bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, unsigned SavedReg, MachineBasicBlock::iterator OldPos) { // Check if it's safe to move this instruction. bool SeenStore = true; // Be conservative. if (!MI->isSafeToMove(TII, SeenStore, AA)) return false; unsigned DefReg = 0; SmallSet<unsigned, 4> UseRegs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse() && MOReg != SavedReg) UseRegs.insert(MO.getReg()); if (!MO.isDef()) continue; if (MO.isImplicit()) // Don't try to move it if it implicitly defines a register. return false; if (DefReg) // For now, don't move any instructions that define multiple registers. return false; DefReg = MO.getReg(); } // Find the instruction that kills SavedReg. MachineInstr *KillMI = NULL; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg), UE = MRI->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); if (!UseMO.isKill()) continue; KillMI = UseMO.getParent(); break; } if (!KillMI || KillMI->getParent() != MBB || KillMI == MI) return false; // If any of the definitions are used by another instruction between the // position and the kill use, then it's not safe to sink it. // // FIXME: This can be sped up if there is an easy way to query whether an // instruction is before or after another instruction. Then we can use // MachineRegisterInfo def / use instead. MachineOperand *KillMO = NULL; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; unsigned NumVisited = 0; for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = OtherMI->getOperand(i); if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); if (!MOReg) continue; if (DefReg == MOReg) return false; if (MO.isKill()) { if (OtherMI == KillMI && MOReg == SavedReg) // Save the operand that kills the register. We want to unset the kill // marker if we can sink MI past it. KillMO = &MO; else if (UseRegs.count(MOReg)) // One of the uses is killed before the destination. return false; } } } // Update kill and LV information. KillMO->setIsKill(false); KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); KillMO->setIsKill(true); if (LV) LV->replaceKillInstruction(SavedReg, KillMI, MI); // Move instruction to its destination. MBB->remove(MI); MBB->insert(KillPos, MI); ++Num3AddrSunk; return true; }
/// TailDuplicateBlocks - Look for small blocks that are unconditionally /// branched to and do not fall through. Tail-duplicate their instructions /// into their predecessors to eliminate (dynamic) branches. bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; if (PreRegAlloc && TailDupVerify) { DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); VerifyPHIs(MF, true); } SmallVector<MachineInstr*, 8> NewPHIs; MachineSSAUpdater SSAUpdate(MF, &NewPHIs); for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; if (NumTails == TailDupLimit) break; // Save the successors list. SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector<MachineBasicBlock*, 8> TDBBs; SmallVector<MachineInstr*, 16> Copies; if (TailDuplicate(MBB, MF, TDBBs, Copies)) { ++NumTails; // TailBB's immediate successors are now successors of those predecessors // which duplicated TailBB. Add the predecessors as sources to the PHI // instructions. bool isDead = MBB->pred_empty(); if (PreRegAlloc) UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); // If it is dead, remove it. if (isDead) { NumInstrDups -= MBB->size(); RemoveDeadBlock(MBB); ++NumDeadBlocks; } // Update SSA form. if (!SSAUpdateVRs.empty()) { for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { unsigned VReg = SSAUpdateVRs[i]; SSAUpdate.Initialize(VReg); // If the original definition is still around, add it as an available // value. MachineInstr *DefMI = MRI->getVRegDef(VReg); MachineBasicBlock *DefBB = 0; if (DefMI) { DefBB = DefMI->getParent(); SSAUpdate.AddAvailableValue(DefBB, VReg); } // Add the new vregs as available values. DenseMap<unsigned, AvailableValsTy>::iterator LI = SSAUpdateVals.find(VReg); for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; unsigned SrcReg = LI->second[j].second; SSAUpdate.AddAvailableValue(SrcBB, SrcReg); } // Rewrite uses that are outside of the original def's block. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); while (UI != MRI->use_end()) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; if (UseMI->isDebugValue()) { // SSAUpdate can replace the use with an undef. That creates // a debug instruction that is a kill. // FIXME: Should it SSAUpdate job to delete debug instructions // instead of replacing the use with undef? UseMI->eraseFromParent(); continue; } if (UseMI->getParent() == DefBB && !UseMI->isPHI()) continue; SSAUpdate.RewriteUse(UseMO); } } SSAUpdateVRs.clear(); SSAUpdateVals.clear(); } // Eliminate some of the copies inserted by tail duplication to maintain // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; if (!Copy->isCopy()) continue; unsigned Dst = Copy->getOperand(0).getReg(); unsigned Src = Copy->getOperand(1).getReg(); MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); if (++UI == MRI->use_end()) { // Copy is the only use. Do trivial copy propagation here. MRI->replaceRegWith(Dst, Src); Copy->eraseFromParent(); } } if (PreRegAlloc && TailDupVerify) VerifyPHIs(MF, false); MadeChange = true; } } NumAddedPHIs += NewPHIs.size(); return MadeChange; }
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { bool AnyChanges = false; MRI = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); // Treat reserved registers as always live. BitVector ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); I != E; ++I) { MachineBasicBlock *MBB = &*I; // Start out assuming that reserved registers are live out of this block. LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().getDesc().isReturn()) for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { unsigned Reg = *LOI; if (TargetRegisterInfo::isPhysicalRegister(Reg)) LivePhysRegs.set(Reg); } // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs // are not live across blocks, but some targets (x86) can have flags live // out of a block. // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE; ) { MachineInstr *MI = &*MII; // If the instruction is dead, delete it! if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. Examine each def operand for such references; // if found, mark the DBG_VALUE as undef (but don't delete it). for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; MachineRegisterInfo::use_iterator nextI; for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), E = MRI->use_end(); I!=E; I=nextI) { nextI = llvm::next(I); // I is invalidated by the setReg MachineOperand& Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); if (UseMI==MI) continue; assert(Use.isDebug()); UseMI->getOperand(0).setReg(0U); } } AnyChanges = true; MI->eraseFromParent(); ++NumDeletes; MIE = MBB->rend(); // MII is now pointing to the next instruction to process, // so don't increment it. continue; } // Record the physreg defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) LivePhysRegs.reset(*SubRegs); } } } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned Reg = MO.getReg(); if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.set(Reg); for (const unsigned *AliasSet = TRI->getAliasSet(Reg); *AliasSet; ++AliasSet) LivePhysRegs.set(*AliasSet); } } } // We didn't delete the current instruction, so increment MII to // the next one. ++MII; } } LivePhysRegs.clear(); return AnyChanges; }
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI)) continue; unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && !MRI.hasOneUse(MI.getOperand(0).getReg())) continue; if (OpToFold.isReg() && !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) continue; // Prevent folding operands backwards in the function. For example, // the COPY opcode must not be replaced by 1 in this example: // // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3 // ... // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use> MachineOperand &Dst = MI.getOperand(0); if (Dst.isReg() && !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) continue; // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer // this. SmallVector<MachineInstr *, 4> CopiesToReplace; std::vector<FoldCandidate> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(MF); for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); // FIXME: Probably shouldn't bother trying to fold if not an // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR // copies. MRI.clearKillFlags(Fold.OpToFold->getReg()); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); // Folding the immediate may reveal operations that can be constant // folded or replaced with a copy. This can happen for example after // frame indices are lowered to constants or from splitting 64-bit // constants. tryConstantFoldOp(MRI, TII, Fold.UseMI); } } } } return false; }
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { SmallVector<MachineInstr *, 128> Worklist; Worklist.push_back(&TopInst); while (!Worklist.empty()) { MachineInstr *Inst = Worklist.pop_back_val(); unsigned NewOpcode = getVALUOp(*Inst); if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) continue; MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo(); // Use the new VALU Opcode. const MCInstrDesc &NewDesc = get(NewOpcode); Inst->setDesc(NewDesc); // Remove any references to SCC. Vector instructions can't read from it, and // We're just about to add the implicit use / defs of VCC, and we don't want // both. for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { MachineOperand &Op = Inst->getOperand(i); if (Op.isReg() && Op.getReg() == AMDGPU::SCC) Inst->RemoveOperand(i); } // Add the implict and explicit register definitions. if (NewDesc.ImplicitUses) { for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { unsigned Reg = NewDesc.ImplicitUses[i]; Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); } } if (NewDesc.ImplicitDefs) { for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { unsigned Reg = NewDesc.ImplicitDefs[i]; Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); } } legalizeOperands(Inst); // Update the destination register class. const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); switch (Inst->getOpcode()) { // For target instructions, getOpRegClass just returns the virtual // register class associated with the operand, so we need to find an // equivalent VGPR register class in order to move the instruction to the // VALU. case AMDGPU::COPY: case AMDGPU::PHI: case AMDGPU::REG_SEQUENCE: if (RI.hasVGPRs(NewDstRC)) continue; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); if (!NewDstRC) continue; break; default: break; } unsigned DstReg = Inst->getOperand(0).getReg(); unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); MRI.replaceRegWith(DstReg, NewDstReg); for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), E = MRI.use_end(); I != E; ++I) { MachineInstr &UseMI = *I; if (!canReadVGPR(UseMI, I.getOperandNo())) { Worklist.push_back(&UseMI); } } } }
/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // Do a quick scan of the interval values to find if any are remattable. reMattable_.clear(); usedValues_.clear(); for (LiveInterval::const_vni_iterator I = li_->vni_begin(), E = li_->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused() || !VNI->isDefAccurate()) continue; MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) continue; reMattable_.insert(VNI); } // Often, no defs are remattable. if (reMattable_.empty()) return; // Try to remat before all uses of li_->reg. bool anyRemat = false; for (MachineRegisterInfo::use_nodbg_iterator RI = mri_.use_nodbg_begin(li_->reg); MachineInstr *MI = RI.skipInstruction();) anyRemat |= reMaterializeFor(MI); if (!anyRemat) return; // Remove any values that were completely rematted. bool anyRemoved = false; for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(), E = reMattable_.end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->hasPHIKill() || usedValues_.count(VNI)) continue; MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); lis_.RemoveMachineInstrFromMaps(DefMI); vrm_.RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); VNI->setIsDefAccurate(false); anyRemoved = true; } if (!anyRemoved) return; // Removing values may cause debug uses where li_ is not live. for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); MachineInstr *MI = RI.skipInstruction();) { if (!MI->isDebugValue()) continue; // Try to preserve the debug value if li_ is live immediately after it. MachineBasicBlock::iterator NextMI = MI; ++NextMI; if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI)); if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) continue; } DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); MI->eraseFromParent(); } }