void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); if (Use->isMachineOpcode()) // Adjust the use operand index by num of defs. OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? Latency = (Latency > 1) ? Latency - 1 : 1; } if (Latency >= 0) dep.setLatency(Latency); }
/// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { bool FoundSucc = false; // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), EE = N->Succs.end(); II != EE; ++II) if (*II == P) { FoundSucc = true; N->Succs.erase(II); break; } assert(FoundSucc && "Mismatching preds / succs lists!"); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { --NumPreds; --N->NumSuccs; } if (!N->isScheduled) --NumPredsLeft; if (!isScheduled) --N->NumSuccsLeft; if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return; } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. void SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) if (Preds[i] == D) return; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) ++NumPredsLeft; if (!isScheduled) ++N->NumSuccsLeft; Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } }
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) return; const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); if (InstrItins.isEmpty()) return; if (dep.getKind() != SDep::Data) return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; int UseCycle = 1; if (Use->isMachineOpcode()) { const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) dep.setLatency(Latency); } } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) return false; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } if (!isScheduled) { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; const unsigned Reg = dep.getReg(); // ... find the definition of the register in the defining // instruction MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { const MachineOperand &MO = DefMI->getOperand(DefIdx); if (MO.isReg() && MO.isImplicit() && DefIdx >= (int)DefMI->getDesc().getNumOperands()) { // This is an implicit def, getOperandLatency() won't return the correct // latency. e.g. // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... // What we want is to compute latency between def of %D6/%D7 and use of // %Q3 instead. unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); if (DefMI->getOperand(Op2).isReg()) DefIdx = Op2; } MachineInstr *UseMI = Use->getInstr(); // For all uses of the register, calculate the maxmimum latency int Latency = -1; if (UseMI) { for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, UseMI, i); Latency = std::max(Latency, UseCycle); } } else { // UseMI is null, then it must be a scheduling barrier. if (!InstrItins || InstrItins->isEmpty()) return; unsigned DefClass = DefMI->getDesc().getSchedClass(); Latency = InstrItins->getOperandCycle(DefClass, DefIdx); } // If we found a latency, then replace the existing dependence latency. if (Latency >= 0) dep.setLatency(Latency); } }
// Update the latency of a Phi when the Phi bridges two instructions that // require a multi-cycle latency. void HexagonSubtarget::changePhiLatency(MachineInstr &SrcInst, SUnit *Dst, SDep &Dep) const { if (!SrcInst.isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) return; for (const SDep &PI : Dst->Preds) { if (PI.getLatency() != 0) continue; Dep.setLatency(2); break; } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { SUnit *PredSU = I->getSUnit(); // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); break; } } I->setLatency(D.getLatency()); } return false; } } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } if (!isScheduled) { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
/// Determine whether the DFS cross edge should be considered a subtree edge /// or a connection between subtrees. void visitCross(const SDep &PredDep, const SUnit *Succ) { if (PredDep.getKind() == SDep::Data) { // If this is a cross edge to a root, join the subtrees. This happens when // the root was first reached by a non-data dependence. unsigned NodeNum = PredDep.getSUnit()->NodeNum; unsigned PredCnt = R.DFSData[NodeNum].InstrCount; if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) { R.DFSData[NodeNum].SubtreeID = Succ->NodeNum; R.DFSData[Succ->NodeNum].InstrCount += PredCnt; SubtreeClasses.join(Succ->NodeNum, NodeNum); return; } } ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); }
/// \brief Perform target specific adjustments to the latency of a schedule /// dependency. void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const { MachineInstr *SrcInst = Src->getInstr(); MachineInstr *DstInst = Dst->getInstr(); if (!Src->isInstr() || !Dst->isInstr()) return; const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo()); // Instructions with .new operands have zero latency. if (QII->canExecuteInBundle(*SrcInst, *DstInst) && isBestZeroLatency(Src, Dst, QII)) { Dep.setLatency(0); return; } if (!hasV60TOps()) return; // Don't adjust the latency of post-increment part of the instruction. if (QII->isPostIncrement(*SrcInst) && Dep.isAssignedRegDep()) { if (SrcInst->mayStore()) return; if (Dep.getReg() != SrcInst->getOperand(0).getReg()) return; } else if (QII->isPostIncrement(*DstInst) && Dep.getKind() == SDep::Anti) { if (DstInst->mayStore()) return; if (Dep.getReg() != DstInst->getOperand(0).getReg()) return; } else if (QII->isPostIncrement(*DstInst) && DstInst->mayStore() && Dep.isAssignedRegDep()) { MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); if (Op.isReg() && Dep.getReg() != Op.getReg()) return; } // Check if we need to change any the latency values when Phis are added. if (useBSBScheduling() && SrcInst->isPHI()) { changePhiLatency(*SrcInst, Dst, Dep); return; } // If it's a REG_SEQUENCE, use its destination instruction to determine // the correct latency. if (DstInst->isRegSequence() && Dst->NumSuccs == 1) DstInst = Dst->Succs[0].getSUnit()->getInstr(); // Try to schedule uses near definitions to generate .cur. if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) && isBestZeroLatency(Src, Dst, QII)) { Dep.setLatency(0); return; } updateLatency(*SrcInst, *DstInst, Dep); }
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); if (InstrItins.isEmpty()) return; // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) return; const unsigned Reg = dep.getReg(); // ... find the definition of the register in the defining // instruction MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); if (DefCycle >= 0) { MachineInstr *UseMI = Use->getInstr(); const unsigned UseClass = UseMI->getDesc().getSchedClass(); // For all uses of the register, calculate the maxmimum latency int Latency = -1; for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; int UseCycle = InstrItins.getOperandCycle(UseClass, i); if (UseCycle >= 0) Latency = std::max(Latency, DefCycle - UseCycle + 1); } // If we found a latency, then replace the existing dependence latency. if (Latency >= 0) dep.setLatency(Latency); } } }
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); errs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) PendingQueue.push_back(SuccSU); }
/// InsertCopiesAndMoveSuccs - Insert register copies and move all /// scheduled successors of the given SUnit to the last copy. void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; for (SDep &Succ : SU->Succs) { if (Succ.isArtificial()) continue; SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(CopyToSU); AddPred(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, Succ)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { RemovePred(DelDeps[i].first, DelDeps[i].second); } SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); AddPred(CopyFromSU, FromDep); SDep ToDep(CopyFromSU, SDep::Data, 0); ToDep.setLatency(CopyFromSU->Latency); AddPred(CopyToSU, ToDep); Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); ++NumPRCopies; }
/// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. int UseOp = UseList[i].OpIdx; MachineInstr *RegUse = 0; SDep Dep; if (UseOp < 0) Dep = SDep(SU, SDep::Artificial); else { Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); Dep.setMinLatency( SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp, /*FindMin=*/true)); } Dep.setLatency( SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp, /*FindMin=*/false)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); } } }
/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } #endif assert(!D.isWeak() && "unexpected artificial DAG edge"); --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { PendingQueue.push_back(SuccSU); } }
/// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), N->Succs.end(), P); assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); N->Succs.erase(Succ); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { assert(NumPreds > 0 && "NumPreds will underflow!"); assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); --NumPreds; --N->NumSuccs; } if (!N->isScheduled) { if (D.isWeak()) --WeakPredsLeft; else { assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); --NumPredsLeft; } } if (!isScheduled) { if (D.isWeak()) --N->WeakSuccsLeft; else { assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); --N->NumSuccsLeft; } } if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return; } }
// This helper function is responsible for increasing the latency only. void HexagonSubtarget::updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep) const { if (!hasV60TOps()) return; auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo()); if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) { // Vec frwd scheduling. Dep.setLatency(Dep.getLatency() + 1); } else if (useBSBScheduling() && QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) { // BSB scheduling. Dep.setLatency(Dep.getLatency() + 1); } else if (EnableTCLatencySched) { // TClass latency scheduling. // Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B. if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst)) if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst)) Dep.setLatency(Dep.getLatency() + 1); } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't // add them if another kind of edge already exists. if (!Required && I->getSUnit() == D.getSUnit()) return false; if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { SUnit *PredSU = I->getSUnit(); // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); break; } } I->setLatency(D.getLatency()); } return false; } } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { if (D.isWeak()) { ++WeakPredsLeft; } else { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } } if (!isScheduled) { if (D.isWeak()) { ++N->WeakSuccsLeft; } else { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (SU->getNode()->getGluedNode()) return nullptr; SDNode *N = SU->getNode(); if (!N) return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) TryUnfold = true; } for (const SDValue &Op : N->op_values()) { MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } if (TryUnfold) { SmallVector<SDNode*, 2> NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return nullptr; LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; SDNode *LoadNode = NewNodes[0]; unsigned NumVals = N->getNumValues(); unsigned OldNumVals = SU->getNode()->getNumValues(); for (unsigned i = 0; i != NumVals; ++i) DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), SDValue(LoadNode, 1)); SUnit *NewSU = newSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } if (MCID.isCommutable()) NewSU->isCommutable = true; // LoadNode may already exist. This can happen when there is another // load from the same location and producing the same type of value // but it has different alignment or volatileness. bool isNewLoad = true; SUnit *LoadSU; if (LoadNode->getNodeId() != -1) { LoadSU = &SUnits[LoadNode->getNodeId()]; isNewLoad = false; } else { LoadSU = newSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); } SDep ChainPred; SmallVector<SDep, 4> ChainSuccs; SmallVector<SDep, 4> LoadPreds; SmallVector<SDep, 4> NodePreds; SmallVector<SDep, 4> NodeSuccs; for (SDep &Pred : SU->Preds) { if (Pred.isCtrl()) ChainPred = Pred; else if (Pred.getSUnit()->getNode() && Pred.getSUnit()->getNode()->isOperandOf(LoadNode)) LoadPreds.push_back(Pred); else NodePreds.push_back(Pred); } for (SDep &Succ : SU->Succs) { if (Succ.isCtrl()) ChainSuccs.push_back(Succ); else NodeSuccs.push_back(Succ); } if (ChainPred.getSUnit()) { RemovePred(SU, ChainPred); if (isNewLoad) AddPred(LoadSU, ChainPred); } for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { const SDep &Pred = LoadPreds[i]; RemovePred(SU, Pred); if (isNewLoad) { AddPred(LoadSU, Pred); } } for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { const SDep &Pred = NodePreds[i]; RemovePred(SU, Pred); AddPred(NewSU, Pred); } for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { SDep D = NodeSuccs[i]; SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); D.setSUnit(NewSU); AddPred(SuccDep, D); } for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { SDep D = ChainSuccs[i]; SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); if (isNewLoad) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } } if (isNewLoad) { SDep D(LoadSU, SDep::Barrier); D.setLatency(LoadSU->Latency); AddPred(NewSU, D); } ++NumUnfolds; if (NewSU->NumSuccsLeft == 0) { NewSU->isAvailable = true; return NewSU; } SU = NewSU; } LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. for (SDep &Pred : SU->Preds) if (!Pred.isArtificial()) AddPred(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; for (SDep &Succ : SU->Succs) { if (Succ.isArtificial()) continue; SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { SDep D = Succ; D.setSUnit(NewSU); AddPred(SuccSU, D); D.setSUnit(SU); DelDeps.push_back(std::make_pair(SuccSU, D)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); ++NumDups; return NewSU; }
void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = MF.getSubtarget(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; SDNode *MainNode = SU->getNode(); if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const MCInstrDesc &MCID = TII->get(Opc); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } if (MCID.isCommutable()) SU->isCommutable = true; } // Find all predecessors and successors of the group. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) SU->hasPhysRegDefs = true; } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler // emits a copy from the physical register to a virtual register unless // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; // Special-case TokenFactor chains as zero-latency. if(isChain && OpN->getOpcode() == ISD::TokenFactor) OpLatency = 0; SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) : SDep(OpSU, SDep::Data, PhysReg); Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, Dep); ST.adjustSchedDependency(OpSU, SU, Dep); } if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. // // We can't tell (without more book-keeping) if this results from // glued nodes or duplicate operands. As long as we don't reduce // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } } } }