/// AddSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier by adding /// the exit SU to the register defs and use list. This is because we want to /// make sure instructions which define registers that are either used by /// the terminator or are live-out are properly scheduled. This is /// especially important when the definition latency of the return value(s) /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. void ScheduleDAGInstrs::AddSchedBarrierDeps() { MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier()); if (ExitMI && AllDepKnown) { // If it's a call or a barrier, add dependencies on the defs and uses of // instruction. for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = ExitMI->getOperand(i); if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); Uses[Reg].push_back(&ExitSU); } } else { // For others, e.g. fallthrough, conditional branch, assume the exit // uses all the registers that are livein to the successor blocks. SmallSet<unsigned, 8> Seen; for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (Seen.insert(Reg)) Uses[Reg].push_back(&ExitSU); } } }
std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { SmallBitVector SeenTypes(8); const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; // FIXME: probably we'll need to cache the results here somehow? for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { if (!OpInfo[i].isGenericType()) continue; // We must only record actions once for each TypeIdx; otherwise we'd // try to legalize operands multiple times down the line. unsigned TypeIdx = OpInfo[i].getGenericTypeIndex(); if (SeenTypes[TypeIdx]) continue; SeenTypes.set(TypeIdx); LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx); auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); if (Action.first != Legal) return std::make_tuple(Action.first, TypeIdx, Action.second); } return std::make_tuple(Legal, 0, LLT{}); }
/// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified /// operand folded, otherwise NULL is returned. The client is responsible for /// removing the old instruction and adding the new one in the instruction /// stream. MachineInstr* TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, MachineInstr* MI, const SmallVectorImpl<unsigned> &Ops, int FrameIndex) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) Flags |= MachineMemOperand::MOStore; else Flags |= MachineMemOperand::MOLoad; // Ask the target to do the actual folding. MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); if (!NewMI) return 0; assert((!(Flags & MachineMemOperand::MOStore) || NewMI->getDesc().mayStore()) && "Folded a def to a non-store!"); assert((!(Flags & MachineMemOperand::MOLoad) || NewMI->getDesc().mayLoad()) && "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FrameIndex) != -1); MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex), Flags, /*Offset=*/0, MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex)); NewMI->addMemOperand(MF, MMO); return NewMI; }
/// \brief Get the index of the definition and source for \p Copy /// instruction. /// \pre Copy.isCopy() or Copy.isBitcast(). /// \return True if the Copy instruction has only one register source /// and one register definition. Otherwise, \p DefIdx and \p SrcIdx /// are invalid. static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, unsigned &DefIdx, unsigned &SrcIdx) { assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); if (Copy.isCopy()) { // Copy instruction are supposed to be: Def = Src. if (Copy.getDesc().getNumOperands() != 2) return false; DefIdx = 0; SrcIdx = 1; assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); return true; } // Bitcast case. // Bitcasts with more than one def are not supported. if (Copy.getDesc().getNumDefs() != 1) return false; // Initialize SrcIdx to an undefined operand. SrcIdx = Copy.getDesc().getNumOperands(); for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { const MachineOperand &MO = Copy.getOperand(OpIdx); if (!MO.isReg() || !MO.getReg()) continue; if (MO.isDef()) DefIdx = OpIdx; else if (SrcIdx != EndOpIdx) // Multiple sources? return false; SrcIdx = OpIdx; } return true; }
MachineInstr * LanaiInstrInfo::optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, bool PreferFalse) const { assert(MI.getOpcode() == Lanai::SELECT && "unknown select instruction"); MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = canFoldIntoSelect(MI.getOperand(1).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) DefMI = canFoldIntoSelect(MI.getOperand(2).getReg(), MRI, this); if (!DefMI) return nullptr; // Find new register class to use. MachineOperand FalseReg = MI.getOperand(Invert ? 1 : 2); unsigned DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; // Create a new predicated version of DefMI. MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) NewMI.addOperand(DefMI->getOperand(i)); unsigned CondCode = MI.getOperand(3).getImm(); if (Invert) NewMI.addImm(getOppositeCondition(LPCC::CondCode(CondCode))); else NewMI.addImm(CondCode); NewMI.copyImplicitOps(MI); // The output register value when the predicate is false is an implicit // register operand tied to the first def. The tie makes the register // allocator ensure the FalseReg is allocated the same register as operand 0. FalseReg.setImplicit(); NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // Update SeenMIs set: register newly created MI and erase removed DefMI. SeenMIs.insert(NewMI); SeenMIs.erase(DefMI); // If MI is inside a loop, and DefMI is outside the loop, then kill flags on // DefMI would be invalid when transferred inside the loop. Checking for a // loop is expensive, but at least remove kill flags if they are in different // BBs. if (DefMI->getParent() != MI.getParent()) NewMI->clearKillInfo(); // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; }
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; const unsigned Reg = dep.getReg(); // ... find the definition of the register in the defining // instruction MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { const MachineOperand &MO = DefMI->getOperand(DefIdx); if (MO.isReg() && MO.isImplicit() && DefIdx >= (int)DefMI->getDesc().getNumOperands()) { // This is an implicit def, getOperandLatency() won't return the correct // latency. e.g. // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... // What we want is to compute latency between def of %D6/%D7 and use of // %Q3 instead. unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); if (DefMI->getOperand(Op2).isReg()) DefIdx = Op2; } MachineInstr *UseMI = Use->getInstr(); // For all uses of the register, calculate the maxmimum latency int Latency = -1; if (UseMI) { for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, UseMI, i); Latency = std::max(Latency, UseCycle); } } else { // UseMI is null, then it must be a scheduling barrier. if (!InstrItins || InstrItins->isEmpty()) return; unsigned DefClass = DefMI->getDesc().getSchedClass(); Latency = InstrItins->getOperandCycle(DefClass, DefIdx); } // If we found a latency, then replace the existing dependence latency. if (Latency >= 0) dep.setLatency(Latency); } }
/// Both DefMI and UseMI must be valid. By default, call directly to the /// itinerary. This may be overriden by the target. int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { unsigned DefClass = DefMI.getDesc().getSchedClass(); unsigned UseClass = UseMI.getDesc().getSchedClass(); return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); }
/// Copy implicit register operands from specified instruction to this /// instruction that are not part of the instruction definition. static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI) { for (unsigned i = MI.getDesc().getNumOperands() + MI.getDesc().getNumImplicitUses() + MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) NewMI.addOperand(MF, MO); } }
unsigned TargetInstrInfo::computeOperandLatency( const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const { int DefLatency = computeDefOperandLatency(ItinData, DefMI); if (DefLatency >= 0) return DefLatency; assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); int OperLatency = 0; if (UseMI) OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx); else { unsigned DefClass = DefMI.getDesc().getSchedClass(); OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); } if (OperLatency >= 0) return OperLatency; // No operand latency was found. unsigned InstrLatency = getInstrLatency(ItinData, DefMI); // Expected latency is the max of the stage latency and itinerary props. InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; }
unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MCInstrDesc &MCID = MI.getDesc(); const MachineBasicBlock &MBB = *MI.getParent(); const MachineFunction &MF = *MBB.getParent(); const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); if (MCID.getSize()) return MCID.getSize(); if (MI.getOpcode() == AArch64::INLINEASM) return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); if (MI.isLabel()) return 0; switch (MI.getOpcode()) { case TargetOpcode::BUNDLE: return getInstBundleLength(MI); case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; case AArch64::TLSDESCCALL: return 0; default: llvm_unreachable("Unknown instruction class"); } }
/// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) { if (MO.isReg()) return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isGlobal()) emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false); else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); else if (MO.isCPI()) { const TargetInstrDesc &TID = MI.getDesc(); // For VFP load, the immediate offset is multiplied by 4. unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; emitConstPoolAddress(MO.getIndex(), Reloc); } else if (MO.isJTI()) emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); else if (MO.isMBB()) emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); else { #ifndef NDEBUG errs() << MO; #endif llvm_unreachable(0); } return 0; }
/// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MCInstrDesc &Desc = MI.getDesc(); switch (Desc.TSFlags & MSP430II::SizeMask) { default: switch (Desc.getOpcode()) { default: llvm_unreachable("Unknown instruction size!"); case TargetOpcode::CFI_INSTRUCTION: case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::DBG_VALUE: return 0; case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MF->getTarget().getMCAsmInfo()); } } case MSP430II::SizeSpecial: switch (MI.getOpcode()) { default: llvm_unreachable("Unknown instruction size!"); case MSP430::SAR8r1c: case MSP430::SAR16r1c: return 4; } case MSP430II::Size2Bytes: return 2; case MSP430II::Size4Bytes: return 4; case MSP430II::Size6Bytes: return 6; } }
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; const TargetLowering *TLI = MF.getTarget().getTargetLowering(); // Iterate through each instruction in the function, looking for pseudos. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = I; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. const TargetInstrDesc &TID = MI->getDesc(); if (TID.usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); // The expansion may involve new basic blocks. if (NewMBB != MBB) { MBB = NewMBB; I = NewMBB; MBBI = NewMBB->begin(); MBBE = NewMBB->end(); } } } } return Changed; }
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); if (OPI != ReduceOpcodeMap.end()) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; } // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; } } ProcessNext: bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->getDesc().isCall()) // Calls don't really set CPSR. CPSRDef = 0; else if (DefCPSR) // This is the last CPSR defining instruction. CPSRDef = MI; } return Modified; }
void RegDefsUses::init(const MachineInstr &MI) { // Add all register operands which are explicit and non-variadic. update(MI, 0, MI.getDesc().getNumOperands()); // If MI is a call, add RA to Defs to prevent users of RA from going into // delay slot. if (MI.isCall()) Defs.set(Mips::RA); // Add all implicit register operands of branch instructions except // register AT. if (MI.isBranch()) { update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands()); Defs.reset(Mips::AT); } }
/// Predicate for distingushing between control transfer instructions and all /// other instructions for handling forbidden slots. Consider inline assembly /// as unsafe as well. bool MipsInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const { if (MI.isInlineAsm()) return false; return (MI.getDesc().TSFlags & MipsII::IsCTI) == 0; }
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; Counters Result; Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); // Only consider stores or EXP for EXP_CNT Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { MachineOperand &Op = MI.getOperand(0); assert(Op.isReg() && "First LGKM operand must be a register!"); unsigned Reg = Op.getReg(); unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); Result.Named.LGKM = Size > 4 ? 2 : 1; } else { Result.Named.LGKM = 0; } return Result; }
bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { assert(!MI.isBundle() && "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); const MCInstrDesc &MCID = MI.getDesc(); if (!MCID.isCommutable()) return false; switch (MI.getOpcode()) { case Mips::DPADD_U_H: case Mips::DPADD_U_W: case Mips::DPADD_U_D: case Mips::DPADD_S_H: case Mips::DPADD_S_W: case Mips::DPADD_S_D: { // The first operand is both input and output, so it should not commute if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3)) return false; if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg()) return false; return true; } } return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); }
bool TargetInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { bool MadeChange = false; assert(!MI.isBundle() && "TargetInstrInfo::PredicateInstruction() can't handle bundles"); const MCInstrDesc &MCID = MI.getDesc(); if (!MI.isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) { if (MCID.OpInfo[i].isPredicate()) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg()) { MO.setReg(Pred[j].getReg()); MadeChange = true; } else if (MO.isImm()) { MO.setImm(Pred[j].getImm()); MadeChange = true; } else if (MO.isMBB()) { MO.setMBB(Pred[j].getMBB()); MadeChange = true; } ++j; } } return MadeChange; }
void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); MCE.processDebugLoc(MI.getDebugLoc(), true); // Skip pseudo instructions. if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) return; switch (MI.getOpcode()) { case Mips::USW: NumEmitted += emitUSW(MI); break; case Mips::ULW: NumEmitted += emitULW(MI); break; case Mips::ULH: NumEmitted += emitULH(MI); break; case Mips::ULHu: NumEmitted += emitULHu(MI); break; case Mips::USH: NumEmitted += emitUSH(MI); break; default: emitWordLE(getBinaryCodeForInstr(MI)); ++NumEmitted; // Keep track of the # of mi's emitted break; } MCE.processDebugLoc(MI.getDebugLoc(), false); }
bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { assert(!isPreISelGenericOpcode(I.getOpcode()) && "A selected instruction is expected"); MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { MachineOperand &MO = I.getOperand(OpI); // There's nothing to be done on non-register operands. if (!MO.isReg()) continue; LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); assert(MO.isReg() && "Unsupported non-reg operand"); unsigned Reg = MO.getReg(); // Physical registers don't need to be constrained. if (TRI.isPhysicalRegister(Reg)) continue; // Register operands with a value of 0 (e.g. predicate operands) don't need // to be constrained. if (Reg == 0) continue; // If the operand is a vreg, we should constrain its regclass, and only // insert COPYs if that's impossible. // constrainOperandRegClass does that for us. MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI)); // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been // done. if (MO.isUse()) { int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) I.tieOperands(DefIdx, OpI); } } return true; }
unsigned ARCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (MI.getOpcode() == TargetOpcode::INLINEASM) { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } return MI.getDesc().getSize(); }
/// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); unsigned DataLatency = SU->Latency; for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; MachineInstr *UseMI = UseSU->getInstr(); int UseOp = UseList[i].OpIdx; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. // TODO: Perhaps we should get rid of // SpecialAddressLatency and just move this into // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); if (RegUseIndex >= 0 && (UseMI->mayLoad() || UseMI->mayStore()) && (unsigned)RegUseIndex < UseMCID.getNumOperands() && UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { unsigned Latency = TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, (UseOp < 0 ? 0 : UseMI), UseOp); dep.setLatency(Latency); unsigned MinLatency = TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, (UseOp < 0 ? 0 : UseMI), UseOp, /*FindMin=*/true); dep.setMinLatency(MinLatency); ST.adjustSchedDependency(SU, UseSU, dep); } UseSU->addPred(dep); } } }
/// Remove any leftover implicit operands from mutating the instruction. e.g. /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def /// anymore. static void stripExtraCopyOperands(MachineInstr &MI) { const MCInstrDesc &Desc = MI.getDesc(); unsigned NumOps = Desc.getNumOperands() + Desc.getNumImplicitUses() + Desc.getNumImplicitDefs(); for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I) MI.RemoveOperand(I); }
// Find the best LEA instruction in the List to replace address recalculation in // MI. Such LEA must meet these requirements: // 1) The address calculated by the LEA differs only by the displacement from // the address used in MI. // 2) The register class of the definition of the LEA is compatible with the // register class of the address base register of MI. // 3) Displacement of the new memory operand should fit in 1 byte if possible. // 4) The LEA should be as close to MI as possible, and prior to it if // possible. bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List, const MachineInstr &MI, MachineInstr *&LEA, int64_t &AddrDispShift, int &Dist) { const MachineFunction *MF = MI.getParent()->getParent(); const MCInstrDesc &Desc = MI.getDesc(); int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + X86II::getOperandBias(Desc); LEA = nullptr; // Loop over all LEA instructions. for (auto DefMI : List) { int64_t AddrDispShiftTemp = 0; // Compare instructions memory operands. if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp)) continue; // Make sure address displacement fits 4 bytes. if (!isInt<32>(AddrDispShiftTemp)) continue; // Check that LEA def register can be used as MI address base. Some // instructions can use a limited set of registers as address base, for // example MOV8mr_NOREX. We could constrain the register class of the LEA // def to suit MI, however since this case is very rare and hard to // reproduce in a test it's just more reliable to skip the LEA. if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) != MRI->getRegClass(DefMI->getOperand(0).getReg())) continue; // Choose the closest LEA instruction from the list, prior to MI if // possible. Note that we took into account resulting address displacement // as well. Also note that the list is sorted by the order in which the LEAs // occur, so the break condition is pretty simple. int DistTemp = calcInstrDist(*DefMI, MI); assert(DistTemp != 0 && "The distance between two different instructions cannot be zero"); if (DistTemp > 0 || LEA == nullptr) { // Do not update return LEA, if the current one provides a displacement // which fits in 1 byte, while the new candidate does not. if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) && isInt<8>(AddrDispShift)) continue; LEA = DefMI; AddrDispShift = AddrDispShiftTemp; Dist = DistTemp; } // FIXME: Maybe we should not always stop at the first LEA after MI. if (DistTemp < 0) break; } return LEA != nullptr; }
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); if (InstrItins.isEmpty()) return; // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; const unsigned Reg = dep.getReg(); // ... find the definition of the register in the defining // instruction MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); if (DefCycle >= 0) { MachineInstr *UseMI = Use->getInstr(); const unsigned UseClass = UseMI->getDesc().getSchedClass(); // For all uses of the register, calculate the maxmimum latency int Latency = -1; for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; int UseCycle = InstrItins.getOperandCycle(UseClass, i); if (UseCycle >= 0) Latency = std::max(Latency, DefCycle - UseCycle + 1); } // If we found a latency, then replace the existing dependence latency. if (Latency >= 0) dep.setLatency(Latency); } } }
unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may // still have a MinLatency property, which getStageLatency checks. if (!ItinData) return MI.mayLoad() ? 2 : 1; return ItinData->getStageLatency(MI.getDesc().getSchedClass()); }
bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isStore = false; unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift; isStore = (TSFlags == 1); if (isStore) AddrSpace = getLdStCodeAddrSpace(MI); return isStore; }
static bool isSafeToFold(const MachineInstr &MI) { switch (MI.getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::V_MOV_B64_PSEUDO: { // If there are additional implicit register operands, this may be used for // register indexing so the source register operand isn't simply copied. unsigned NumOps = MI.getDesc().getNumOperands() + MI.getDesc().getNumImplicitUses(); return MI.getNumOperands() == NumOps; } case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: return true; default: return false; } }
bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, const MachineInstr &DefMI, unsigned DefIdx) const { const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); if (!ItinData || ItinData->isEmpty()) return false; unsigned DefClass = DefMI.getDesc().getSchedClass(); int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); return (DefCycle != -1 && DefCycle <= 1); }