void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); if (Use->isMachineOpcode()) // Adjust the use operand index by num of defs. OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? Latency = (Latency > 1) ? Latency - 1 : 1; } if (Latency >= 0) dep.setLatency(Latency); }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. void SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) if (Preds[i] == D) return; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) ++NumPredsLeft; if (!isScheduled) ++N->NumSuccsLeft; Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } }
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) return; const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); if (InstrItins.isEmpty()) return; if (dep.getKind() != SDep::Data) return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; int UseCycle = 1; if (Use->isMachineOpcode()) { const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) dep.setLatency(Latency); } } }
/// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { bool FoundSucc = false; // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), EE = N->Succs.end(); II != EE; ++II) if (*II == P) { FoundSucc = true; N->Succs.erase(II); break; } assert(FoundSucc && "Mismatching preds / succs lists!"); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { --NumPreds; --N->NumSuccs; } if (!N->isScheduled) --NumPredsLeft; if (!isScheduled) --N->NumSuccsLeft; if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return; } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) return false; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } if (!isScheduled) { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; const unsigned Reg = dep.getReg(); // ... find the definition of the register in the defining // instruction MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { const MachineOperand &MO = DefMI->getOperand(DefIdx); if (MO.isReg() && MO.isImplicit() && DefIdx >= (int)DefMI->getDesc().getNumOperands()) { // This is an implicit def, getOperandLatency() won't return the correct // latency. e.g. // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... // What we want is to compute latency between def of %D6/%D7 and use of // %Q3 instead. unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); if (DefMI->getOperand(Op2).isReg()) DefIdx = Op2; } MachineInstr *UseMI = Use->getInstr(); // For all uses of the register, calculate the maxmimum latency int Latency = -1; if (UseMI) { for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, UseMI, i); Latency = std::max(Latency, UseCycle); } } else { // UseMI is null, then it must be a scheduling barrier. if (!InstrItins || InstrItins->isEmpty()) return; unsigned DefClass = DefMI->getDesc().getSchedClass(); Latency = InstrItins->getOperandCycle(DefClass, DefIdx); } // If we found a latency, then replace the existing dependence latency. if (Latency >= 0) dep.setLatency(Latency); } }
/// \brief Perform target specific adjustments to the latency of a schedule /// dependency. void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const { MachineInstr *SrcInst = Src->getInstr(); MachineInstr *DstInst = Dst->getInstr(); if (!Src->isInstr() || !Dst->isInstr()) return; const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo()); // Instructions with .new operands have zero latency. if (QII->canExecuteInBundle(*SrcInst, *DstInst) && isBestZeroLatency(Src, Dst, QII)) { Dep.setLatency(0); return; } if (!hasV60TOps()) return; // Don't adjust the latency of post-increment part of the instruction. if (QII->isPostIncrement(*SrcInst) && Dep.isAssignedRegDep()) { if (SrcInst->mayStore()) return; if (Dep.getReg() != SrcInst->getOperand(0).getReg()) return; } else if (QII->isPostIncrement(*DstInst) && Dep.getKind() == SDep::Anti) { if (DstInst->mayStore()) return; if (Dep.getReg() != DstInst->getOperand(0).getReg()) return; } else if (QII->isPostIncrement(*DstInst) && DstInst->mayStore() && Dep.isAssignedRegDep()) { MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); if (Op.isReg() && Dep.getReg() != Op.getReg()) return; } // Check if we need to change any the latency values when Phis are added. if (useBSBScheduling() && SrcInst->isPHI()) { changePhiLatency(*SrcInst, Dst, Dep); return; } // If it's a REG_SEQUENCE, use its destination instruction to determine // the correct latency. if (DstInst->isRegSequence() && Dst->NumSuccs == 1) DstInst = Dst->Succs[0].getSUnit()->getInstr(); // Try to schedule uses near definitions to generate .cur. if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) && isBestZeroLatency(Src, Dst, QII)) { Dep.setLatency(0); return; } updateLatency(*SrcInst, *DstInst, Dep); }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { SUnit *PredSU = I->getSUnit(); // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); break; } } I->setLatency(D.getLatency()); } return false; } } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } if (!isScheduled) { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
/// Determine whether the DFS cross edge should be considered a subtree edge /// or a connection between subtrees. void visitCross(const SDep &PredDep, const SUnit *Succ) { if (PredDep.getKind() == SDep::Data) { // If this is a cross edge to a root, join the subtrees. This happens when // the root was first reached by a non-data dependence. unsigned NodeNum = PredDep.getSUnit()->NodeNum; unsigned PredCnt = R.DFSData[NodeNum].InstrCount; if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) { R.DFSData[NodeNum].SubtreeID = Succ->NodeNum; R.DFSData[Succ->NodeNum].InstrCount += PredCnt; SubtreeClasses.join(Succ->NodeNum, NodeNum); return; } } ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); }
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); if (InstrItins.isEmpty()) return; // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; const unsigned Reg = dep.getReg(); // ... find the definition of the register in the defining // instruction MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); if (DefCycle >= 0) { MachineInstr *UseMI = Use->getInstr(); const unsigned UseClass = UseMI->getDesc().getSchedClass(); // For all uses of the register, calculate the maxmimum latency int Latency = -1; for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; int UseCycle = InstrItins.getOperandCycle(UseClass, i); if (UseCycle >= 0) Latency = std::max(Latency, DefCycle - UseCycle + 1); } // If we found a latency, then replace the existing dependence latency. if (Latency >= 0) dep.setLatency(Latency); } } }
/// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), N->Succs.end(), P); assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); N->Succs.erase(Succ); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { assert(NumPreds > 0 && "NumPreds will underflow!"); assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); --NumPreds; --N->NumSuccs; } if (!N->isScheduled) { if (D.isWeak()) --WeakPredsLeft; else { assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); --NumPredsLeft; } } if (!isScheduled) { if (D.isWeak()) --N->WeakSuccsLeft; else { assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); --N->NumSuccsLeft; } } if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return; } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't // add them if another kind of edge already exists. if (!Required && I->getSUnit() == D.getSUnit()) return false; if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { SUnit *PredSU = I->getSUnit(); // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); break; } } I->setLatency(D.getLatency()); } return false; } } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { if (D.isWeak()) { ++WeakPredsLeft; } else { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } } if (!isScheduled) { if (D.isWeak()) { ++N->WeakSuccsLeft; } else { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }