/// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; SDep dep(SU, SDep::Data, 1, *Alias); // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. int UseOp = UseList[i].OpIdx; MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr(); dep.setLatency( SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp, /*FindMin=*/false)); dep.setMinLatency( SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp, /*FindMin=*/true)); ST.adjustSchedDependency(SU, UseSU, dep); UseSU->addPred(dep); } } }
/// addVRegDefDeps - Add register output and data dependencies from this SUnit /// to instructions that occur later in the same scheduling region if they read /// from or write to the virtual register defined at OperIdx. /// /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); // Singly defined vregs do not have output/anti dependencies. // The current operand is a def, so we have at least one. // Check here if there are any others... if (MRI.hasOneDef(Reg)) return; // Add output dependence to the next nearest def of this vreg. // // Unless this definition is dead, the output dependence should be // transitively redundant with antidependencies from this definition's // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI == VRegDefs.end()) VRegDefs.insert(VReg2SUnit(Reg, SU)); else { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, DefSU->getInstr()); DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); } DefI->SU = SU; } }
/// EmitSchedule - Emit the machine code in scheduled order. MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. EmitNoop(); continue; } // For pre-regalloc scheduling, create instructions corresponding to the // SDNode and any flagged SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. EmitPhysRegCopy(SU, CopyVRBaseMap); continue; } SmallVector<SDNode *, 4> FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap); FlaggedNodes.pop_back(); } EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); } return BB; }
/// ComputeHeight - Calculate the maximal path from the node to the entry. /// void SUnit::ComputeHeight() { SmallVector<SUnit*, 8> WorkList; WorkList.push_back(this); do { SUnit *Cur = WorkList.back(); bool Done = true; unsigned MaxSuccHeight = 0; for (SUnit::const_succ_iterator I = Cur->Succs.begin(), E = Cur->Succs.end(); I != E; ++I) { SUnit *SuccSU = I->getSUnit(); if (SuccSU->isHeightCurrent) MaxSuccHeight = std::max(MaxSuccHeight, SuccSU->Height + I->getLatency()); else { Done = false; WorkList.push_back(SuccSU); } } if (Done) { WorkList.pop_back(); if (MaxSuccHeight != Cur->Height) { Cur->setHeightDirty(); Cur->Height = MaxSuccHeight; } Cur->isHeightCurrent = true; } } while (!WorkList.empty()); }
/// ComputeDepth - Calculate the maximal path from the node to the exit. /// void SUnit::ComputeDepth() { SmallVector<SUnit*, 8> WorkList; WorkList.push_back(this); do { SUnit *Cur = WorkList.back(); bool Done = true; unsigned MaxPredDepth = 0; for (SUnit::const_pred_iterator I = Cur->Preds.begin(), E = Cur->Preds.end(); I != E; ++I) { SUnit *PredSU = I->getSUnit(); if (PredSU->isDepthCurrent) MaxPredDepth = std::max(MaxPredDepth, PredSU->Depth + I->getLatency()); else { Done = false; WorkList.push_back(PredSU); } } if (Done) { WorkList.pop_back(); if (MaxPredDepth != Cur->Depth) { Cur->setDepthDirty(); Cur->Depth = MaxPredDepth; } Cur->isDepthCurrent = true; } } while (!WorkList.empty()); }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. void SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i) if (Preds[i] == D) return; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) ++NumPredsLeft; if (!isScheduled) ++N->NumSuccsLeft; Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } }
/// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { bool FoundSucc = false; // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), EE = N->Succs.end(); II != EE; ++II) if (*II == P) { FoundSucc = true; N->Succs.erase(II); break; } assert(FoundSucc && "Mismatching preds / succs lists!"); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { --NumPreds; --N->NumSuccs; } if (!N->isScheduled) --NumPredsLeft; if (!isScheduled) --N->NumSuccsLeft; if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return; } }
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) return false; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } if (!isScheduled) { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
/// Select a bundle for the current cycle. The selected instructions are /// put into bundle in the correct issue order. If no instruction can be /// issued, false is returned. bool PatmosLatencyQueue::selectBundle(std::vector<SUnit*> &Bundle) { if (AvailableQueue.empty()) return false; // Find best bundle: // - Ensure that instructions that MUST be scheduled go into the bundle. // - find best pair of available programs, e.g. two stores with exclusive // predicates and highest ILP/.., but only if at least one of those instr. // has high priority. // - find best instructions that fit into the bundle with highest ILP/.. // // Instructions are built up into a bundle in Bundle. Instructions are removed // from AvailableQueue in scheduled() once the instruction is actually picked. unsigned CurrWidth = 0; // If the bundle is not empty, we should calculate the initial width assert(Bundle.empty()); std::vector<bool> Selected; Selected.resize(AvailableQueue.size()); // Make sure that all instructions with ScheduleLow flag go into the bundle. for (unsigned i = 0; i < AvailableQueue.size() && CurrWidth < IssueWidth; i++) { SUnit *SU = AvailableQueue[i]; if (!SU->isScheduleLow) break; if (addToBundle(Bundle, SU, CurrWidth)) { Selected[i] = true; } } // Check if any of the highest <IssueWidth> instructions can be // scheduled only with a single other instruction in this queue, or if there // is any instruction in the queue that can only be scheduled with the highest // ones. Pick them in any case // TODO magic goes here.. // Try to fill up the bundle with instructions from the queue by best effort for (unsigned i = 0; i < AvailableQueue.size() && CurrWidth < IssueWidth; i++) { if (Selected[i]) continue; SUnit *SU = AvailableQueue[i]; // check the width. ignore the width for the first instruction to allow // ALUl even when bundling is disabled. unsigned width = PII.getIssueWidth(SU->getInstr()); if (!Bundle.empty() && CurrWidth + width > IssueWidth) continue; addToBundle(Bundle, SU, CurrWidth); } return true; }
void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { LatencyPriorityQueue q = *this; while (!q.empty()) { SUnit *su = q.pop(); dbgs() << "Height " << su->getHeight() << ": "; su->dump(DAG); } }
/// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); unsigned DataLatency = SU->Latency; for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; MachineInstr *UseMI = UseSU->getInstr(); int UseOp = UseList[i].OpIdx; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. // TODO: Perhaps we should get rid of // SpecialAddressLatency and just move this into // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); if (RegUseIndex >= 0 && (UseMI->mayLoad() || UseMI->mayStore()) && (unsigned)RegUseIndex < UseMCID.getNumOperands() && UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { unsigned Latency = TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, (UseOp < 0 ? 0 : UseMI), UseOp); dep.setLatency(Latency); unsigned MinLatency = TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, (UseOp < 0 ? 0 : UseMI), UseOp, /*FindMin=*/true); dep.setMinLatency(MinLatency); ST.adjustSchedDependency(SU, UseSU, dep); } UseSU->addPred(dep); } } }
// Check if a call and subsequent A2_tfrpi instructions should maintain // scheduling affinity. We are looking for the TFRI to be consumed in // the next instruction. This should help reduce the instances of // double register pairs being allocated and scheduled before a call // when not used until after the call. This situation is exacerbated // by the fact that we allocate the pair from the callee saves list, // leading to excess spills and restores. bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, const SUnit &Inst1, const SUnit &Inst2) const { if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) return false; // TypeXTYPE are 64 bit operations. if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE) return true; return false; }
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { SUnit *SU = 0; IsTopNode = true; NextInstKind = IDOther; // check if we might want to switch current clause type bool AllowSwitchToAlu = (CurInstKind == IDOther) || (CurEmitted > InstKindLimit[CurInstKind]) || (Available[CurInstKind]->empty()); bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); if ((AllowSwitchToAlu && CurInstKind != IDAlu) || (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { // try to pick ALU SU = pickAlu(); if (SU) { if (CurEmitted > InstKindLimit[IDAlu]) CurEmitted = 0; NextInstKind = IDAlu; } } if (!SU) { // try to pick FETCH SU = pickOther(IDFetch); if (SU) NextInstKind = IDFetch; } // try to pick other if (!SU) { SU = pickOther(IDOther); if (SU) NextInstKind = IDOther; } DEBUG( if (SU) { dbgs() << "picked node: "; SU->dump(DAG); } else { dbgs() << "NO NODE "; for (int i = 0; i < IDLast; ++i) { Available[i]->dump(); Pending[i]->dump(); } for (unsigned i = 0; i < DAG->SUnits.size(); i++) { const SUnit &S = DAG->SUnits[i]; if (!S.isScheduled) S.dump(DAG); } } );
/// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { SUnit *PredSU = I->getSUnit(); // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); break; } } I->setLatency(D.getLatency()); } return false; } } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } if (!N->isScheduled) { assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; } if (!isScheduled) { assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return true; }
/// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO) { assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); unsigned DataLatency = SU->Latency; for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector<SUnit*> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; if (UseSU == SU) continue; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. // TODO: Perhaps we should get rid of // SpecialAddressLatency and just move this into // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { MachineInstr *UseMI = UseSU->getInstr(); const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); if (RegUseIndex >= 0 && (UseMI->mayLoad() || UseMI->mayStore()) && (unsigned)RegUseIndex < UseMCID.getNumOperands() && UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } } }
uint32_t CommandCargoTransMsg::execute(){ cerr<<"execute CommandCargoTransMsg"<<endl; Processable *tproc = _processor->getLocalProcssable(_toShipId); if (!tproc) return COMMAND_FINAL; SUnit* unit = tproc->isUnit(); if (!unit) return COMMAND_FINAL; if(!itemlist[_itemType]) return COMMAND_FINAL; unit->getsubable()->getCargoBay()->AddReturn(itemlist[_itemType],_quan); return COMMAND_FINAL; }
// EmitSchedule - Emit the machine code in scheduled order. MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { // For MachineInstr-based scheduling, we're rescheduling the instructions in // the block, so start by removing them from the block. while (Begin != InsertPos) { MachineBasicBlock::iterator I = Begin; ++Begin; BB->remove(I); } // First reinsert any remaining debug_values; these are either constants, // or refer to live-in registers. The beginning of the block is the right // place for the latter. The former might reasonably be placed elsewhere // using some kind of ordering algorithm, but right now it doesn't matter. for (int i = DbgValueVec.size()-1; i>=0; --i) if (DbgValueVec[i]) BB->insert(InsertPos, DbgValueVec[i]); // Then re-insert them according to the given schedule. for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. EmitNoop(); continue; } BB->insert(InsertPos, SU->getInstr()); for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) BB->insert(InsertPos, SU->DbgInstrList[i]); } // Update the Begin iterator, as the first instruction in the block // may have been scheduled later. if (!DbgValueVec.empty()) { for (int i = DbgValueVec.size()-1; i>=0; --i) if (DbgValueVec[i]!=0) { Begin = DbgValueVec[DbgValueVec.size()-1]; break; } } else if (!Sequence.empty()) Begin = Sequence[0]->getInstr(); DbgValueVec.clear(); return BB; }
/// Change the latency between the two SUnits. void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps, SUnit *Dst, unsigned Lat) const { MachineInstr &SrcI = *Src->getInstr(); for (auto &I : Deps) { if (I.getSUnit() != Dst) continue; I.setLatency(Lat); SUnit *UpdateDst = I.getSUnit(); updateLatency(SrcI, *UpdateDst->getInstr(), I); // Update the latency of opposite edge too. for (auto &PI : UpdateDst->Preds) { if (PI.getSUnit() != Src || !PI.isAssignedRegDep()) continue; PI.setLatency(Lat); updateLatency(SrcI, *UpdateDst->getInstr(), PI); } } }
/// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), N->Succs.end(), P); assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); N->Succs.erase(Succ); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { assert(NumPreds > 0 && "NumPreds will underflow!"); assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); --NumPreds; --N->NumSuccs; } if (!N->isScheduled) { if (D.isWeak()) --WeakPredsLeft; else { assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); --NumPredsLeft; } } if (!isScheduled) { if (D.isWeak()) --N->WeakSuccsLeft; else { assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); --N->NumSuccsLeft; } } if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); } return; } }
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. static SDep *CriticalPathStep(SUnit *SU) { SDep *Next = 0; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { SUnit *PredSU = P->getSUnit(); unsigned PredLatency = P->getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over // other types of edges. if (NextDepth < PredTotalLatency || (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { NextDepth = PredTotalLatency; Next = &*P; } } return Next; }
unsigned ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit(); const SDNode *ScegN = PredSU->getNode(); if (!ScegN) continue; // If value is passed to CopyToReg, it is probably // live outside BB. switch (ScegN->getOpcode()) { default: break; case ISD::TokenFactor: break; case ISD::CopyFromReg: NumberDeps++; break; case ISD::CopyToReg: break; case ISD::INLINEASM: break; } if (!ScegN->isMachineOpcode()) continue; for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { MVT VT = ScegN->getSimpleValueType(i); if (TLI->isTypeLegal(VT) && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } } } return NumberDeps; }
void apply(ScheduleDAGInstrs *DAGInstrs) override { ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); SUnit *SUa = nullptr; // Search for two consequent memory operations and link them // to prevent scheduler from moving them apart. // In DAG pre-process SUnits are in the original order of // the instructions before scheduling. for (SUnit &SU : DAG->SUnits) { MachineInstr &MI2 = *SU.getInstr(); if (!MI2.mayLoad() && !MI2.mayStore()) { SUa = nullptr; continue; } if (!SUa) { SUa = &SU; continue; } MachineInstr &MI1 = *SUa->getInstr(); if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || (TII->isDS(MI1) && TII->isDS(MI2))) { SU.addPredBarrier(SUa); for (const SDep &SI : SU.Preds) { if (SI.getSUnit() != SUa) SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); } if (&SU != &DAG->ExitSU) { for (const SDep &SI : SUa->Succs) { if (SI.getSUnit() != &SU) SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); } } } SUa = &SU; } }
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); errs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) PendingQueue.push_back(SuccSU); }
/// Create an SUnit for each real instruction, numbered in top-down toplological /// order. The instruction order A < B, implies that no edge exists from B to A. /// /// Map each real instruction to its SUnit. /// /// After initSUnits, the SUnits vector cannot be resized and the scheduler may /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs /// instead of pointers. /// /// MachineScheduler relies on initSUnits numbering the nodes by their order in /// the original instruction list. void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. SUnits.reserve(BB->size()); for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; if (MI->isDebugValue()) continue; SUnit *SU = newSUnit(MI); MISUnitMap[MI] = SU; SU->isCall = MI->isCall(); SU->isCommutable = MI->isCommutable(); // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); } }
/// Go back one cycle and update availability queue. void PatmosLatencyQueue::recedeCycle(unsigned CurrCycle) { unsigned avail = 0; for (unsigned i = 0; i < PendingQueue.size() - avail; i++) { SUnit *SU = PendingQueue[i]; if (SU->getHeight() <= CurrCycle) { // remove the instruction from pending avail++; PendingQueue[i] = *(PendingQueue.end() - avail); // revisit the moved instruction i--; // Make the instruction available AvailableQueue.push_back(SU); std::inplace_merge(AvailableQueue.begin(), AvailableQueue.end() - 1, AvailableQueue.end(), Cmp); } } PendingQueue.resize(PendingQueue.size() - avail); }
void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dumpNodeName(SU); dbgs() << ": "; if (!SU.getNode()) { dbgs() << "PHYS REG COPY\n"; return; } SU.getNode()->dump(DAG); dbgs() << "\n"; SmallVector<SDNode *, 4> GluedNodes; for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { dbgs() << " "; GluedNodes.back()->dump(DAG); dbgs() << "\n"; GluedNodes.pop_back(); } #endif }
/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } #endif assert(!D.isWeak() && "unexpected artificial DAG edge"); --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { PendingQueue.push_back(SuccSU); } }
unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; for (const SDep &Succ : SU->Succs) { if (Succ.isCtrl()) continue; SUnit *SuccSU = Succ.getSUnit(); const SDNode *ScegN = SuccSU->getNode(); if (!ScegN) continue; // If value is passed to CopyToReg, it is probably // live outside BB. switch (ScegN->getOpcode()) { default: break; case ISD::TokenFactor: break; case ISD::CopyFromReg: break; case ISD::CopyToReg: NumberDeps++; break; case ISD::INLINEASM: break; } if (!ScegN->isMachineOpcode()) continue; for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT) && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } } } return NumberDeps; }
void PatmosLatencyQueue::dump() { dbgs() << "PendingQueue:"; for (unsigned i = 0; i < PendingQueue.size(); i++) { SUnit *SU = PendingQueue[i]; if (i > 0) dbgs() << ","; dbgs() << " SU(" << SU->NodeNum << "): Height " << SU->getHeight() << " Depth " << SU->getDepth() << " Tree: " << Cmp.DFSResult->getSubtreeID(SU) << " @" << Cmp.DFSResult->getSubtreeLevel(Cmp.DFSResult->getSubtreeID(SU)); if (SU->isScheduleLow) dbgs() << " low "; } dbgs() << "\nAvailableQueue:"; for (unsigned i = 0; i < AvailableQueue.size(); i++) { SUnit *SU = AvailableQueue[i]; if (i > 0) dbgs() << ","; dbgs() << " SU(" << SU->NodeNum << ") Height " << SU->getHeight() << " Depth " << SU->getDepth() << " ILP: " << Cmp.DFSResult->getILP(SU); if (SU->isScheduleLow) dbgs() << " low "; } dbgs() << "\n"; }
void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // We'll be allocating one SUnit for each instruction, plus one for // the region exit node. SUnits.reserve(BB->size()); // We build scheduling units by walking a block's instruction list from bottom // to top. // Remember where a generic side-effecting instruction is as we procede. SUnit *BarrierChain = 0, *AliasChain = 0; // Memory references to specific known memory locations are tracked // so that they can be given more precise dependencies. We track // separately the known memory locations that may alias and those // that are known not to alias std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; // Keep track of dangling debug references to registers. std::vector<std::pair<MachineInstr*, unsigned> > DanglingDebugValue(TRI->getNumRegs(), std::make_pair(static_cast<MachineInstr*>(0), 0)); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValueVec.clear(); // Model data dependencies between instructions being scheduled and the // ExitSU. AddSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); // DBG_VALUE does not have SUnit's built, so just remember these for later // reinsertion. if (MI->isDebugValue()) { if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) DanglingDebugValue[MI->getOperand(0).getReg()] = std::make_pair(MI, DbgValueVec.size()); DbgValueVec.push_back(MI); continue; } const TargetInstrDesc &TID = MI->getDesc(); assert(!TID.isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); SU->isCall = TID.isCall(); SU->isCommutable = TID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) SU->Latency = 1; else ComputeLatency(SU); // Add register-based dependencies (data, anti, and output). for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); if (MO.isDef() && DanglingDebugValue[Reg].first!=0) { SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first); DbgValueVec[DanglingDebugValue[Reg].second] = 0; DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0); } std::vector<SUnit *> &UseList = Uses[Reg]; std::vector<SUnit *> &DefList = Defs[Reg]; // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue // target we want to allow the defining instruction to issue // in the same cycle as the using instruction. // TODO: Using a latency of 1 here for output dependencies assumes // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; if (DefSU == &ExitSU) continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(Reg))) DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; if (DefSU == &ExitSU) continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(*Alias))) DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); } } if (MO.isDef()) { // Add any data dependencies. unsigned DataLatency = SU->Latency; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; if (UseSU == SU) continue; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. // TODO: Do this for register aliases too. // TODO: Perhaps we should get rid of // SpecialAddressLatency and just move this into // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { MachineInstr *UseMI = UseSU->getInstr(); const TargetInstrDesc &UseTID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); if (RegUseIndex >= 0 && (UseTID.mayLoad() || UseTID.mayStore()) && (unsigned)RegUseIndex < UseTID.getNumOperands() && UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); if (!UnitLatencies) { ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; if (UseSU == SU) continue; const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); if (!UnitLatencies) { ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } } // If a def is going to wrap back around to the top of the loop, // backschedule it. if (!UnitLatencies && DefList.empty()) { LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); if (I != LoopRegs.Deps.end()) { const MachineOperand *UseMO = I->second.first; unsigned Count = I->second.second; const MachineInstr *UseMI = UseMO->getParent(); unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); const TargetInstrDesc &UseTID = UseMI->getDesc(); // TODO: If we knew the total depth of the region here, we could // handle the case where the whole loop is inside the region but // is large enough that the isScheduleHigh trick isn't needed. if (UseMOIdx < UseTID.getNumOperands()) { // Currently, we only support scheduling regions consisting of // single basic blocks. Check to see if the instruction is in // the same region by checking to see if it has the same parent. if (UseMI->getParent() != MI->getParent()) { unsigned Latency = SU->Latency; if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) Latency += SpecialAddressLatency; // This is a wild guess as to the portion of the latency which // will be overlapped by work done outside the current // scheduling region. Latency -= std::min(Latency, Count); // Add the artificial edge. ExitSU.addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); } else if (SpecialAddressLatency > 0 && UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { // The entire loop body is within the current scheduling region // and the latency of this operation is assumed to be greater // than the latency of the loop. // TODO: Recursively mark data-edge predecessors as // isScheduleHigh too. SU->isScheduleHigh = true; } } LoopRegs.Deps.erase(I); } } UseList.clear(); if (!MO.isDead()) DefList.clear(); DefList.push_back(SU); } else { UseList.push_back(SU); } } // Add chain dependencies. // Chain dependencies used to enforce memory order should have // latency of 0 (except for true dependency of Store followed by // aliased Load... we estimate that with a single cycle of latency // assuming the hardware will bypass) // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable // after stack slots are lowered to actual addresses. // TODO: Use an AliasAnalysis and do real alias-analysis queries, and // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; if (TID.isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } for (std::map<const Value *, std::vector<SUnit *> >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); } NonAliasMemDefs.clear(); NonAliasMemUses.clear(); // Add SU to the barrier chain. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); BarrierChain = SU; // fall-through new_alias_chain: // Chain all possibly aliasing memory references though SU. if (AliasChain) AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } for (std::map<const Value *, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); } PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); } else if (TID.mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. std::map<const Value *, SUnit *>::iterator I = ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); std::map<const Value *, SUnit *>::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/true)); I->second = SU; } else { if (MayAlias) AliasMemDefs[V] = SU; else NonAliasMemDefs[V] = SU; } // Handle the uses in MemUses, if there are any. std::map<const Value *, std::vector<SUnit *> >::iterator J = ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); std::map<const Value *, std::vector<SUnit *> >::iterator JE = ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, /*isNormalMemory=*/true)); J->second.clear(); } if (MayAlias) { // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); // Add dependence on alias chain, if needed. if (AliasChain) AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } // Add dependence on barrier chain, if needed. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } else { // Treat all other stores conservatively. goto new_alias_chain; } if (!ExitSU.isPred(SU)) // Push store's up a bit to avoid them getting in between cmp // and branches. ExitSU.addPred(SDep(SU, SDep::Order, 0, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); } else if (TID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A load from a specific PseudoSourceValue. Add precise dependencies. std::map<const Value *, SUnit *>::iterator I = ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); std::map<const Value *, SUnit *>::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/true)); if (MayAlias) AliasMemUses[V].push_back(SU); else NonAliasMemUses[V].push_back(SU); } else { // A load with no underlying object. Depend on all // potentially aliasing stores. for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); PendingLoads.push_back(SU); MayAlias = true; } // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } } } for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { Defs[i].clear(); Uses[i].clear(); } PendingLoads.clear(); }