bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, MachineInstr &IntoMI) const { // Immediate neighbours are already folded. if (MI.getParent() == IntoMI.getParent() && std::next(MI.getIterator()) == IntoMI.getIterator()) return true; return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().begin() == MI.implicit_operands().end(); }
void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) { Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI); if (mi2iItr == mi2iMap.end()) return; SlotIndex MIIndex = mi2iItr->second; IndexListEntry &MIEntry = *MIIndex.listEntry(); assert(MIEntry.getInstr() == &MI && "Instruction indexes broken."); mi2iMap.erase(mi2iItr); // When removing the first instruction of a bundle update mapping to next // instruction. if (MI.isBundledWithSucc()) { // Only the first instruction of a bundle should have an index assigned. assert(!MI.isBundledWithPred() && "Should have first bundle isntruction"); MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator()); MachineInstr &NextMI = *Next; MIEntry.setInstr(&NextMI); mi2iMap.insert(std::make_pair(&NextMI, MIIndex)); return; } else { // FIXME: Eventually we want to actually delete these indexes. MIEntry.setInstr(nullptr); } }
bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction *MF = MBB.getParent(); if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS || !shouldSkip(MBB, MBB.getParent()->back())) return false; MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator()); const DebugLoc &DL = MI.getDebugLoc(); // If the exec mask is non-zero, skip the next two instructions BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) .addMBB(&NextBB); MachineBasicBlock::iterator Insert = SkipBB->begin(); // Exec mask is zero: Export to NULL target... BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP)) .addImm(0) .addImm(0x09) // V_008DFC_SQ_EXP_NULL .addImm(0) .addImm(1) .addImm(1) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef); // ... and terminate wavefront. BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); return true; }
// Compute base address using Addr and return the final register. unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, const MemAddress &Addr) { MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator MBBI = MI.getIterator(); DebugLoc DL = MI.getDebugLoc(); assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 || Addr.Base.LoSubReg) && "Expected 32-bit Base-Register-Low!!"); assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 || Addr.Base.HiSubReg) && "Expected 32-bit Base-Register-Hi!!"); LLVM_DEBUG(dbgs() << " Re-Computed Anchor-Base:\n"); MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI); MachineOperand OffsetHi = createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI); unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned DeadCarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *LoHalf = BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) .addReg(CarryReg, RegState::Define) .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg) .add(OffsetLo); (void)LoHalf; LLVM_DEBUG(dbgs() << " "; LoHalf->dump(););
// endPacket - End the current packet, bundle packet instructions and reset // DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineInstr *MI) { if (CurrentPacketMIs.size() > 1) { MachineInstr *MIFirst = CurrentPacketMIs.front(); finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator()); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); }
bool Mips16InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineBasicBlock &MBB = *MI.getParent(); switch (MI.getDesc().getOpcode()) { default: return false; case Mips::RetRA16: ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; } MBB.erase(MI.getIterator()); return true; }
// Returns true if a branch over the block was inserted. bool SIInsertSkips::skipMaskBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB) { MachineBasicBlock *DestBB = MI.getOperand(0).getMBB(); if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB)) return false; const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator InsPt = std::next(MI.getIterator()); BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) .addMBB(DestBB); return true; }
MachineOperand SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) { APInt V(32, Val, true); if (TII->isInlineConstant(V)) return MachineOperand::CreateImm(Val); unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr *Mov = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), Reg) .addImm(Val); (void)Mov; LLVM_DEBUG(dbgs() << " "; Mov->dump()); return MachineOperand::CreateReg(Reg, false); }
bool ARMInstructionSelector::select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); auto &MBB = *I.getParent(); auto &MF = *MBB.getParent(); auto &MRI = MF.getRegInfo(); if (!isPreISelGenericOpcode(I.getOpcode())) { if (I.isCopy()) return selectCopy(I, TII, MRI, TRI, RBI); return true; } using namespace TargetOpcode; if (selectImpl(I, CoverageInfo)) return true; MachineInstrBuilder MIB{MF, I}; bool isSExt = false; switch (I.getOpcode()) { case G_SEXT: isSExt = true; LLVM_FALLTHROUGH; case G_ZEXT: { LLT DstTy = MRI.getType(I.getOperand(0).getReg()); // FIXME: Smaller destination sizes coming soon! if (DstTy.getSizeInBits() != 32) { LLVM_DEBUG(dbgs() << "Unsupported destination size for extension"); return false; } LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); unsigned SrcSize = SrcTy.getSizeInBits(); switch (SrcSize) { case 1: { // ZExt boils down to & 0x1; for SExt we also subtract that from 0 I.setDesc(TII.get(Opcodes.AND)); MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); if (isSExt) { unsigned SExtResult = I.getOperand(0).getReg(); // Use a new virtual register for the result of the AND unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(AndResult); auto InsertBefore = std::next(I.getIterator()); auto SubI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.RSB)) .addDef(SExtResult) .addUse(AndResult) .addImm(0) .add(predOps(ARMCC::AL)) .add(condCodeOp()); if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI)) return false; } break; } case 8: case 16: { unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize); if (NewOpc == I.getOpcode()) return false; I.setDesc(TII.get(NewOpc)); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } default: LLVM_DEBUG(dbgs() << "Unsupported source size for extension"); return false; } break; } case G_ANYEXT: case G_TRUNC: { // The high bits are undefined, so there's nothing special to do, just // treat it as a copy. auto SrcReg = I.getOperand(1).getReg(); auto DstReg = I.getOperand(0).getReg(); const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); if (SrcRegBank.getID() == ARM::FPRRegBankID) { // This should only happen in the obscure case where we have put a 64-bit // integer into a D register. Get it out of there and keep only the // interesting part. assert(I.getOpcode() == G_TRUNC && "Unsupported operand for G_ANYEXT"); assert(DstRegBank.getID() == ARM::GPRRegBankID && "Unsupported combination of register banks"); assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size"); assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size"); unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); auto InsertBefore = std::next(I.getIterator()); auto MovI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD)) .addDef(DstReg) .addDef(IgnoredBits) .addUse(SrcReg) .add(predOps(ARMCC::AL)); if (!constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI)) return false; MIB->eraseFromParent(); return true; } if (SrcRegBank.getID() != DstRegBank.getID()) { LLVM_DEBUG( dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); return false; } if (SrcRegBank.getID() != ARM::GPRRegBankID) { LLVM_DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n"); return false; } I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } case G_CONSTANT: { if (!MRI.getType(I.getOperand(0).getReg()).isPointer()) { // Non-pointer constants should be handled by TableGen. LLVM_DEBUG(dbgs() << "Unsupported constant type\n"); return false; } auto &Val = I.getOperand(1); if (Val.isCImm()) { if (!Val.getCImm()->isZero()) { LLVM_DEBUG(dbgs() << "Unsupported pointer constant value\n"); return false; } Val.ChangeToImmediate(0); } else { assert(Val.isImm() && "Unexpected operand for G_CONSTANT"); if (Val.getImm() != 0) { LLVM_DEBUG(dbgs() << "Unsupported pointer constant value\n"); return false; } } I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; } case G_INTTOPTR: case G_PTRTOINT: { auto SrcReg = I.getOperand(1).getReg(); auto DstReg = I.getOperand(0).getReg(); const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); if (SrcRegBank.getID() != DstRegBank.getID()) { LLVM_DEBUG( dbgs() << "G_INTTOPTR/G_PTRTOINT operands on different register banks\n"); return false; } if (SrcRegBank.getID() != ARM::GPRRegBankID) { LLVM_DEBUG( dbgs() << "G_INTTOPTR/G_PTRTOINT on non-GPR not supported yet\n"); return false; } I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } case G_SELECT: return selectSelect(MIB, MRI); case G_ICMP: { CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, ARM::GPRRegBankID, 32); return selectCmp(Helper, MIB, MRI); } case G_FCMP: { assert(STI.hasVFP2() && "Can't select fcmp without VFP"); unsigned OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); if (Size == 64 && STI.isFPOnlySP()) { LLVM_DEBUG(dbgs() << "Subtarget only supports single precision"); return false; } if (Size != 32 && Size != 64) { LLVM_DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); return false; } CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, ARM::FPRRegBankID, Size); return selectCmp(Helper, MIB, MRI); } case G_LSHR: return selectShift(ARM_AM::ShiftOpc::lsr, MIB); case G_ASHR: return selectShift(ARM_AM::ShiftOpc::asr, MIB); case G_SHL: { return selectShift(ARM_AM::ShiftOpc::lsl, MIB); } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). I.setDesc(TII.get(ARM::ADDri)); MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; case G_GLOBAL_VALUE: return selectGlobal(MIB, MRI); case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } unsigned Reg = I.getOperand(0).getReg(); unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); LLT ValTy = MRI.getType(Reg); const auto ValSize = ValTy.getSizeInBits(); assert((ValSize != 64 || STI.hasVFP2()) && "Don't know how to load/store 64-bit value without VFP"); const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); if (NewOpc == G_LOAD || NewOpc == G_STORE) return false; I.setDesc(TII.get(NewOpc)); if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) // LDRH has a funny addressing mode (there's already a FIXME for it). MIB.addReg(0); MIB.addImm(0).add(predOps(ARMCC::AL)); break; } case G_MERGE_VALUES: { if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_UNMERGE_VALUES: { if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } case G_BRCOND: { if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) { LLVM_DEBUG(dbgs() << "Unsupported condition register for G_BRCOND"); return false; } // Set the flags. auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri)) .addReg(I.getOperand(0).getReg()) .addImm(1) .add(predOps(ARMCC::AL)); if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI)) return false; // Branch conditionally. auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) .add(I.getOperand(1)) .add(predOps(ARMCC::NE, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) return false; I.eraseFromParent(); return true; } case G_PHI: { I.setDesc(TII.get(PHI)); unsigned DstReg = I.getOperand(0).getReg(); const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { break; } return true; } default: return false; } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); }
void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, MachineBasicBlock::iterator &MI, bool DoInsertAtI1, bool OptForSize) { // We are going to delete I2. If MI points to I2 advance it to the next // instruction. if (MI == I2.getIterator()) ++MI; // Figure out whether I1 or I2 goes into the lowreg part. unsigned I1DestReg = I1.getOperand(0).getReg(); unsigned I2DestReg = I2.getOperand(0).getReg(); bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; // Get the double word register. unsigned DoubleRegDest = TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg, &Hexagon::DoubleRegsRegClass); assert(DoubleRegDest != 0 && "Expect a valid register"); // Setup source operands. MachineOperand &LoOperand = IsI1Loreg ? I1.getOperand(1) : I2.getOperand(1); MachineOperand &HiOperand = IsI1Loreg ? I2.getOperand(1) : I1.getOperand(1); // Figure out which source is a register and which a constant. bool IsHiReg = HiOperand.isReg(); bool IsLoReg = LoOperand.isReg(); // There is a combine of two constant extended values into CONST64. bool IsC64 = OptForSize && LoOperand.isImm() && HiOperand.isImm() && isGreaterThanNBitTFRI<16>(I1) && isGreaterThanNBitTFRI<16>(I2); MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2); // Emit combine. if (IsHiReg && IsLoReg) emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand); else if (IsHiReg) emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand); else if (IsLoReg) emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand); else if (IsC64 && !IsConst64Disabled) emitConst64(InsertPt, DoubleRegDest, HiOperand, LoOperand); else emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand); // Move debug instructions along with I1 if it's being // moved towards I2. if (!DoInsertAtI1 && DbgMItoMove.size() != 0) { // Insert debug instructions at the new location before I2. MachineBasicBlock *BB = InsertPt->getParent(); for (auto NewMI : DbgMItoMove) { // If iterator MI is pointing to DEBUG_VAL, make sure // MI now points to next relevant instruction. if (NewMI == (MachineInstr*)MI) ++MI; BB->splice(InsertPt, BB, NewMI); } } I1.eraseFromParent(); I2.eraseFromParent(); }
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); // Optimize sequences emitted for control flow lowering. They are originally // emitted as the separate operations because spill code may need to be // inserted for the saved copy of exec. // // x = copy exec // z = s_<op>_b64 x, y // exec = copy z // => // x = s_<op>_saveexec_b64 y // for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB); MachineBasicBlock::reverse_iterator E = MBB.rend(); if (I == E) continue; unsigned CopyToExec = isCopyToExec(*I); if (CopyToExec == AMDGPU::NoRegister) continue; // Scan backwards to find the def. auto CopyToExecInst = &*I; auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec); if (CopyFromExecInst == E) { auto PrepareExecInst = std::next(I); if (PrepareExecInst == E) continue; // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec if (CopyToExecInst->getOperand(1).isKill() && isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) { DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC); PrepareExecInst->getOperand(0).setIsRenamable(false); DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n'); CopyToExecInst->eraseFromParent(); } continue; } if (isLiveOut(MBB, CopyToExec)) { // The copied register is live out and has a second use in another block. DEBUG(dbgs() << "Exec copy source register is live out\n"); continue; } unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); MachineInstr *SaveExecInst = nullptr; SmallVector<MachineInstr *, 4> OtherUseInsts; for (MachineBasicBlock::iterator J = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator(); J != JE; ++J) { if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) { DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n'); // Make sure this is inserted after any VALU ops that may have been // scheduled in between. SaveExecInst = nullptr; break; } bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI); if (J->modifiesRegister(CopyToExec, TRI)) { if (SaveExecInst) { DEBUG(dbgs() << "Multiple instructions modify " << printReg(CopyToExec, TRI) << '\n'); SaveExecInst = nullptr; break; } unsigned SaveExecOp = getSaveExecOp(J->getOpcode()); if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END) break; if (ReadsCopyFromExec) { SaveExecInst = &*J; DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n'); continue; } else { DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n'); break; } } else if (ReadsCopyFromExec && !SaveExecInst) { // Make sure no other instruction is trying to use this copy, before it // will be rewritten by the saveexec, i.e. hasOneUse. There may have // been another use, such as an inserted spill. For example: // // %sgpr0_sgpr1 = COPY %exec // spill %sgpr0_sgpr1 // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1 // DEBUG(dbgs() << "Found second use of save inst candidate: " << *J << '\n'); break; } if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) { assert(SaveExecInst != &*J); OtherUseInsts.push_back(&*J); } } if (!SaveExecInst) continue; DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n'); MachineOperand &Src0 = SaveExecInst->getOperand(1); MachineOperand &Src1 = SaveExecInst->getOperand(2); MachineOperand *OtherOp = nullptr; if (Src0.isReg() && Src0.getReg() == CopyFromExec) { OtherOp = &Src1; } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) { if (!SaveExecInst->isCommutable()) break; OtherOp = &Src0; } else llvm_unreachable("unexpected"); CopyFromExecInst->eraseFromParent(); auto InsPt = SaveExecInst->getIterator(); const DebugLoc &DL = SaveExecInst->getDebugLoc(); BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())), CopyFromExec) .addReg(OtherOp->getReg()); SaveExecInst->eraseFromParent(); CopyToExecInst->eraseFromParent(); for (MachineInstr *OtherInst : OtherUseInsts) { OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC, AMDGPU::NoSubRegister, *TRI, /*ClearIsRenamable=*/true); } } return true; }
bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { HST = &MF.getSubtarget<HexagonSubtarget>(); HII = HST->getInstrInfo(); const auto &HRI = *HST->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap; bool Changed = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc != Hexagon::V6_extractw) continue; unsigned VecR = MI.getOperand(1).getReg(); VExtractMap[VecR].push_back(&MI); } } for (auto &P : VExtractMap) { unsigned VecR = P.first; if (P.second.size() <= VExtractThreshold) continue; const auto &VecRC = *MRI.getRegClass(VecR); int FI = MFI.CreateSpillStackObject(HRI.getSpillSize(VecRC), HRI.getSpillAlignment(VecRC)); MachineInstr *DefI = MRI.getVRegDef(VecR); MachineBasicBlock::iterator At = std::next(DefI->getIterator()); MachineBasicBlock &DefB = *DefI->getParent(); unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID ? Hexagon::V6_vS32b_ai : Hexagon::PS_vstorerw_ai; BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc)) .addFrameIndex(FI) .addImm(0) .addReg(VecR); unsigned VecSize = HRI.getRegSizeInBits(VecRC) / 8; for (MachineInstr *ExtI : P.second) { assert(ExtI->getOpcode() == Hexagon::V6_extractw); unsigned SR = ExtI->getOperand(1).getSubReg(); assert(ExtI->getOperand(1).getReg() == VecR); MachineBasicBlock &ExtB = *ExtI->getParent(); DebugLoc DL = ExtI->getDebugLoc(); unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR) .addFrameIndex(FI) .addImm(SR == 0 ? 0 : VecSize/2); unsigned ElemR = genElemLoad(ExtI, BaseR, MRI); unsigned ExtR = ExtI->getOperand(0).getReg(); MRI.replaceRegWith(ExtR, ElemR); ExtB.erase(ExtI); Changed = true; } } return Changed; }
/// fixupConditionalBranch - Fix up a conditional branch whose destination is /// too far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { MachineBasicBlock *DestBB = getDestBlock(MI); // Add an unconditional branch to the destination and invert the branch // condition to jump over it: // tbz L1 // => // tbnz L2 // b L1 // L2: // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, // split the MBB before the next instruction. MachineBasicBlock *MBB = MI.getParent(); MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != &MI) || !hasFallthrough(*MBB); if (BMI != &MI) { if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->getLastNonDebugInstr()) && BMI->isUnconditionalBranch()) { // Last MI in the BB is an unconditional branch. We can simply invert the // condition and swap destinations: // beq L1 // b L2 // => // bne L2 // b L1 MachineBasicBlock *NewDest = getDestBlock(*BMI); if (isBlockInRange(MI, *NewDest)) { DEBUG(dbgs() << " Invert condition and swap its destination with " << *BMI); changeBranchDestBlock(*BMI, *DestBB); int NewSize = insertInvertedConditionalBranch(*MBB, MI.getIterator(), MI.getDebugLoc(), MI, *NewDest); int OldSize = TII->getInstSizeInBytes(MI); BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); MI.eraseFromParent(); return true; } } } if (NeedSplit) { // Analyze the branch so we know how to update the successor lists. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 2> Cond; bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond, false); assert(!Fail && "branches to relax should be analyzable"); (void)Fail; MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->getInstSizeInBytes(MBB->back()); BlockInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below // Update the successor lists according to the transformation to follow. // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, NewBB); NewBB->addSuccessor(FBB); } MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << ", invert condition and change dest. to BB#" << NextBB.getNumber() << '\n'); unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; // Insert a new conditional branch and a new unconditional branch. MBBSize += insertInvertedConditionalBranch(*MBB, MBB->end(), MI.getDebugLoc(), MI, NextBB); MBBSize += insertUnconditionalBranch(*MBB, *DestBB, MI.getDebugLoc()); // Remove the old conditional branch. It may or may not still be in MBB. MBBSize -= TII->getInstSizeInBytes(MI); MI.eraseFromParent(); // Finally, keep the block offsets up to date. adjustBlockOffsets(*MBB); return true; }
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; SmallSet<unsigned, 4> Defs; SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } Defs.clear(); Uses.clear(); TrackDefUses(MI, Defs, Uses, TRI); // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); // Add implicit use of ITSTATE to IT block instructions. MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it // is set: skip the loop if (!restrictIT) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; MachineInstr *NMI = &*MBBI; MI = NMI; unsigned NPredReg = 0; ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, true/*isImp*/, false/*isKill*/)); LastITMI = NMI; } else { if (NCC == ARMCC::AL && MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { --MBBI; MBB.remove(NMI); MBB.insert(InsertPos, NMI); ClearKillFlags(MI, Uses); ++NumMovedInsts; continue; } break; } TrackDefUses(NMI, Defs, Uses, TRI); --Pos; } } // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. finalizeBundle(MBB, InsertPos.getInstrIterator(), ++LastITMI->getIterator()); Modified = true; ++NumITs; } return Modified; }
bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; MachineRegisterInfo &MRI = MF.getRegInfo(); LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); InstOrderMap IOM; // Map from register to instruction order (value of IOM) where the // register is used last. When moving instructions up, we need to // make sure all its defs (including dead def) will not cross its // last use when moving up. DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap; for (MachineBasicBlock &MBB : MF) { if (MBB.empty()) continue; bool SawStore = false; BuildInstOrderMap(MBB.begin(), IOM); UseMap.clear(); for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { MachineInstr &MI = *Next; ++Next; if (MI.isPHI() || MI.isDebugInstr()) continue; if (MI.mayStore()) SawStore = true; unsigned CurrentOrder = IOM[&MI]; unsigned Barrier = 0; MachineInstr *BarrierMI = nullptr; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDebug()) continue; if (MO.isUse()) UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI); else if (MO.isDead() && UseMap.count(MO.getReg())) // Barrier is the last instruction where MO get used. MI should not // be moved above Barrier. if (Barrier < UseMap[MO.getReg()].first) { Barrier = UseMap[MO.getReg()].first; BarrierMI = UseMap[MO.getReg()].second; } } if (!MI.isSafeToMove(nullptr, SawStore)) { // If MI has side effects, it should become a barrier for code motion. // IOM is rebuild from the next instruction to prevent later // instructions from being moved before this MI. if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { BuildInstOrderMap(Next, IOM); SawStore = false; } continue; } const MachineOperand *DefMO = nullptr; MachineInstr *Insert = nullptr; // Number of live-ranges that will be shortened. We do not count // live-ranges that are defined by a COPY as it could be coalesced later. unsigned NumEligibleUse = 0; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDead() || MO.isDebug()) continue; unsigned Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, // unless it is a constant physical register or a noreg. if (!TargetRegisterInfo::isVirtualRegister(Reg)) { if (!Reg || MRI.isConstantPhysReg(Reg)) continue; Insert = nullptr; break; } if (MO.isDef()) { // Do not move if there is more than one def. if (DefMO) { Insert = nullptr; break; } DefMO = &MO; } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO && MRI.getRegClass(DefMO->getReg()) == MRI.getRegClass(MO.getReg())) { // The heuristic does not handle different register classes yet // (registers of different sizes, looser/tighter constraints). This // is because it needs more accurate model to handle register // pressure correctly. MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); if (!DefInstr.isCopy()) NumEligibleUse++; Insert = FindDominatedInstruction(DefInstr, Insert, IOM); } else { Insert = nullptr; break; } } // If Barrier equals IOM[I], traverse forward to find if BarrierMI is // after Insert, if yes, then we should not hoist. for (MachineInstr *I = Insert; I && IOM[I] == Barrier; I = I->getNextNode()) if (I == BarrierMI) { Insert = nullptr; break; } // Move the instruction when # of shrunk live range > 1. if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { MachineBasicBlock::iterator I = std::next(Insert->getIterator()); // Skip all the PHI and debug instructions. while (I != MBB.end() && (I->isPHI() || I->isDebugInstr())) I = std::next(I); if (I == MI.getIterator()) continue; // Update the dominator order to be the same as the insertion point. // We do this to maintain a non-decreasing order without need to update // all instruction orders after the insertion point. unsigned NewOrder = IOM[&*I]; IOM[&MI] = NewOrder; NumInstrsHoistedToShrinkLiveRange++; // Find MI's debug value following MI. MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); if (MI.getOperand(0).isReg()) for (; EndIter != MBB.end() && EndIter->isDebugValue() && EndIter->getOperand(0).isReg() && EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg(); ++EndIter, ++Next) IOM[&*EndIter] = NewOrder; MBB.splice(I, &MBB, MI.getIterator(), EndIter); } } } return false; }