void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { LatencyPriorityQueue q = *this; while (!q.empty()) { SUnit *su = q.pop(); dbgs() << "Height " << su->getHeight() << ": "; su->dump(DAG); } }
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { SUnit *SU = 0; IsTopNode = true; NextInstKind = IDOther; // check if we might want to switch current clause type bool AllowSwitchToAlu = (CurInstKind == IDOther) || (CurEmitted > InstKindLimit[CurInstKind]) || (Available[CurInstKind]->empty()); bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); if ((AllowSwitchToAlu && CurInstKind != IDAlu) || (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { // try to pick ALU SU = pickAlu(); if (SU) { if (CurEmitted > InstKindLimit[IDAlu]) CurEmitted = 0; NextInstKind = IDAlu; } } if (!SU) { // try to pick FETCH SU = pickOther(IDFetch); if (SU) NextInstKind = IDFetch; } // try to pick other if (!SU) { SU = pickOther(IDOther); if (SU) NextInstKind = IDOther; } DEBUG( if (SU) { dbgs() << "picked node: "; SU->dump(DAG); } else { dbgs() << "NO NODE "; for (int i = 0; i < IDLast; ++i) { Available[i]->dump(); Pending[i]->dump(); } for (unsigned i = 0; i < DAG->SUnits.size(); i++) { const SUnit &S = DAG->SUnits[i]; if (!S.isScheduled) S.dump(DAG); } } );
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); errs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) PendingQueue.push_back(SuccSU); }
/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; llvm_unreachable(nullptr); } #endif assert(!D.isWeak() && "unexpected artificial DAG edge"); --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { PendingQueue.push_back(SuccSU); } }
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { SUnit *SU = 0; NextInstKind = IDOther; IsTopNode = false; // check if we might want to switch current clause type bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) || (Available[CurInstKind].empty()); bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && (!Available[IDFetch].empty() || !Available[IDOther].empty()); if (CurInstKind == IDAlu && !Available[IDFetch].empty()) { // We use the heuristic provided by AMD Accelerated Parallel Processing // OpenCL Programming Guide : // The approx. number of WF that allows TEX inst to hide ALU inst is : // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU)) float ALUFetchRationEstimate = (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) / (FetchInstCount + Available[IDFetch].size()); unsigned NeededWF = 62.5f / ALUFetchRationEstimate; DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" ); // We assume the local GPR requirements to be "dominated" by the requirement // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and // after TEX are indeed likely to consume or generate values from/for the // TEX clause. // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need // one GPR) or TmXYZW = TnXYZW (need 2 GPR). // (TODO : use RegisterPressure) // If we are going too use too many GPR, we flush Fetch instruction to lower // register pressure on 128 bits regs. unsigned NearRegisterRequirement = 2 * Available[IDFetch].size(); if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement)) AllowSwitchFromAlu = true; } // We want to scheduled AR defs as soon as possible to make sure they aren't // put in a different ALU clause from their uses. if (!SU && !UnscheduledARDefs.empty()) { SU = UnscheduledARDefs[0]; UnscheduledARDefs.erase(UnscheduledARDefs.begin()); NextInstKind = IDAlu; } if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) || (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { // try to pick ALU SU = pickAlu(); if (!SU && !PhysicalRegCopy.empty()) { SU = PhysicalRegCopy.front(); PhysicalRegCopy.erase(PhysicalRegCopy.begin()); } if (SU) { if (CurEmitted >= InstKindLimit[IDAlu]) CurEmitted = 0; NextInstKind = IDAlu; } } if (!SU) { // try to pick FETCH SU = pickOther(IDFetch); if (SU) NextInstKind = IDFetch; } // try to pick other if (!SU) { SU = pickOther(IDOther); if (SU) NextInstKind = IDOther; } // We want to schedule the AR uses as late as possible to make sure that // the AR defs have been released. if (!SU && !UnscheduledARUses.empty()) { SU = UnscheduledARUses[0]; UnscheduledARUses.erase(UnscheduledARUses.begin()); NextInstKind = IDAlu; } DEBUG( if (SU) { dbgs() << " ** Pick node **\n"; SU->dump(DAG); } else { dbgs() << "NO NODE \n"; for (unsigned i = 0; i < DAG->SUnits.size(); i++) { const SUnit &S = DAG->SUnits[i]; if (!S.isScheduled) S.dump(DAG); } } );