/// This function assumes that "downward" from SU there exist /// tail/leaf of already constructed DAG. It iterates downward and /// checks whether SU can be aliasing any node dominated /// by it. static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, unsigned LatencyToLoad) { if (!SU) return; SmallPtrSet<const SUnit*, 16> Visited; unsigned Depth = 0; for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); I != IE; ++I) { if (SU == *I) continue; if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0; (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, /*isNormalMemory=*/true)); } // Now go through all the chain successors and iterate from them. // Keep track of visited nodes. for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), JE = (*I)->Succs.end(); J != JE; ++J) if (J->isCtrl()) iterateChainSucc (AA, MFI, SU, J->getSUnit(), ExitSU, &Depth, Visited); } }
void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap) { for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds if (I->getSUnit()->CopyDstRC) { // Copy to physical register. DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); // Find the destination physical register. unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { if (II->getReg()) { Reg = II->getReg(); break; } } BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) .addReg(VRI->second); } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) .addReg(I->getReg()); } break; } }
/// ComputeHeight - Calculate the maximal path from the node to the entry. /// void SUnit::ComputeHeight() { SmallVector<SUnit*, 8> WorkList; WorkList.push_back(this); do { SUnit *Cur = WorkList.back(); bool Done = true; unsigned MaxSuccHeight = 0; for (SUnit::const_succ_iterator I = Cur->Succs.begin(), E = Cur->Succs.end(); I != E; ++I) { SUnit *SuccSU = I->getSUnit(); if (SuccSU->isHeightCurrent) MaxSuccHeight = std::max(MaxSuccHeight, SuccSU->Height + I->getLatency()); else { Done = false; WorkList.push_back(SuccSU); } } if (Done) { WorkList.pop_back(); if (MaxSuccHeight != Cur->Height) { Cur->setHeightDirty(); Cur->Height = MaxSuccHeight; } Cur->isHeightCurrent = true; } } while (!WorkList.empty()); }
static unsigned numberCtrlDepsInSU(SUnit *SU) { unsigned NumberDeps = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) if (I->isCtrl()) NumberDeps++; return NumberDeps; }
// ScheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (IgnoreAntiDep && ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) continue; AdjustPriorityOfUnscheduledPreds(I->getSUnit()); } }
void LatencyPriorityQueue::push_impl(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) if (getSingleUnscheduledPred(I->getSUnit()) == SU) ++NumNodesBlocking; NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; Queue.push(SU); }
void SUnit::setDepthDirty() { if (!isDepthCurrent) return; SmallVector<SUnit*, 8> WorkList; WorkList.push_back(this); do { SUnit *SU = WorkList.pop_back_val(); SU->isDepthCurrent = false; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { SUnit *SuccSU = I->getSUnit(); if (SuccSU->isDepthCurrent) WorkList.push_back(SuccSU); } } while (!WorkList.empty()); }
void SUnit::dumpAll(const ScheduleDAG *G) const { dump(G); cerr << " # preds left : " << NumPredsLeft << "\n"; cerr << " # succs left : " << NumSuccsLeft << "\n"; cerr << " Latency : " << Latency << "\n"; cerr << " Depth : " << Depth << "\n"; cerr << " Height : " << Height << "\n"; if (Preds.size() != 0) { cerr << " Predecessors:\n"; for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { cerr << " "; switch (I->getKind()) { case SDep::Data: cerr << "val "; break; case SDep::Anti: cerr << "anti"; break; case SDep::Output: cerr << "out "; break; case SDep::Order: cerr << "ch "; break; } cerr << "#"; cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) cerr << " *"; cerr << "\n"; } } if (Succs.size() != 0) { cerr << " Successors:\n"; for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); I != E; ++I) { cerr << " "; switch (I->getKind()) { case SDep::Data: cerr << "val "; break; case SDep::Anti: cerr << "anti"; break; case SDep::Output: cerr << "out "; break; case SDep::Order: cerr << "ch "; break; } cerr << "#"; cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) cerr << " *"; cerr << "\n"; } } cerr << "\n"; }
/// Check if scheduling of this SU is possible /// in the current packet. /// It is _not_ precise (statefull), it is more like /// another heuristic. Many corner cases are figured /// empirically. bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { if (!SU || !SU->getInstr()) return false; // First see if the pipeline could receive this instruction // in the current cycle. switch (SU->getInstr()->getOpcode()) { default: if (!ResourcesModel->canReserveResources(*SU->getInstr())) return false; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: case TargetOpcode::REG_SEQUENCE: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::COPY: case TargetOpcode::INLINEASM: break; } MachineFunction &MF = *SU->getInstr()->getParent()->getParent(); auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); // Now see if there are no other dependencies to instructions already // in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) { if (Packet[i]->Succs.size() == 0) continue; // Enable .cur formation. if (QII.mayBeCurLoad(*Packet[i]->getInstr())) continue; for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well // ignore order dependencies. if (I->isCtrl()) continue; if (I->getSUnit() == SU) return false; } } return true; }
unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isCtrl()) continue; SUnit *SuccSU = I->getSUnit(); const SDNode *ScegN = SuccSU->getNode(); if (!ScegN) continue; // If value is passed to CopyToReg, it is probably // live outside BB. switch (ScegN->getOpcode()) { default: break; case ISD::TokenFactor: break; case ISD::CopyFromReg: break; case ISD::CopyToReg: NumberDeps++; break; case ISD::INLINEASM: break; } if (!ScegN->isMachineOpcode()) continue; for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT) && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } } } return NumberDeps; }
/// This recursive function iterates over chain deps of SUb looking for /// "latest" node that needs a chain edge to SUa. static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, SmallPtrSet<const SUnit*, 16> &Visited) { if (!SUa || !SUb || SUb == ExitSU) return *Depth; // Remember visited nodes. if (!Visited.insert(SUb)) return *Depth; // If there is _some_ dependency already in place, do not // descend any further. // TODO: Need to make sure that if that dependency got eliminated or ignored // for any reason in the future, we would not violate DAG topology. // Currently it does not happen, but makes an implicit assumption about // future implementation. // // Independently, if we encounter node that is some sort of global // object (like a call) we already have full set of dependencies to it // and we can stop descending. if (SUa->isSucc(SUb) || isGlobalMemoryObject(AA, SUb->getInstr())) return *Depth; // If we do need an edge, or we have exceeded depth budget, // add that edge to the predecessors chain of SUb, // and stop descending. if (*Depth > 200 || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/true)); return *Depth; } // Track current depth. (*Depth)++; // Iterate over chain dependencies only. for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); I != E; ++I) if (I->isCtrl()) iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); return *Depth; }
/// Check if scheduling of this SU is possible /// in the current packet. bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { if (!SU || !SU->getNode()) return false; // If this is a compound instruction, // it is likely to be a call. Do not delay it. if (SU->getNode()->getGluedNode()) return true; // First see if the pipeline could receive this instruction // in the current cycle. if (SU->getNode()->isMachineOpcode()) switch (SU->getNode()->getMachineOpcode()) { default: if (!ResourcesModel->canReserveResources(&TII->get( SU->getNode()->getMachineOpcode()))) return false; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: case TargetOpcode::REG_SEQUENCE: case TargetOpcode::IMPLICIT_DEF: break; } // Now see if there are no other dependencies // to instructions alredy in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well // ignor order deps. if (I->isCtrl()) continue; if (I->getSUnit() == SU) return false; } return true; }
// ScheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) AdjustPriorityOfUnscheduledPreds(I->getSUnit()); }
void SUnit::dumpAll(const ScheduleDAG *G) const { dump(G); dbgs() << " # preds left : " << NumPredsLeft << "\n"; dbgs() << " # succs left : " << NumSuccsLeft << "\n"; dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; dbgs() << " Depth : " << Depth << "\n"; dbgs() << " Height : " << Height << "\n"; if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { dbgs() << " "; switch (I->getKind()) { case SDep::Data: dbgs() << "val "; break; case SDep::Anti: dbgs() << "anti"; break; case SDep::Output: dbgs() << "out "; break; case SDep::Order: dbgs() << "ch "; break; } dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); if (I->isAssignedRegDep()) dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); dbgs() << "\n"; } } if (Succs.size() != 0) { dbgs() << " Successors:\n"; for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); I != E; ++I) { dbgs() << " "; switch (I->getKind()) { case SDep::Data: dbgs() << "val "; break; case SDep::Anti: dbgs() << "anti"; break; case SDep::Output: dbgs() << "out "; break; case SDep::Order: dbgs() << "ch "; break; } dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); dbgs() << "\n"; } } dbgs() << "\n"; }
/// Main resource tracking point. void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // Use NULL entry as an event marker to reset // the DFA state. if (!SU) { ResourcesModel->clearResources(); Packet.clear(); return; } const SDNode *ScegN = SU->getNode(); // Update reg pressure tracking. // First update current node. if (ScegN->isMachineOpcode()) { // Estimate generated regs. for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { MVT VT = ScegN->getSimpleValueType(i); if (TLI->isTypeLegal(VT)) { const TargetRegisterClass *RC = TLI->getRegClassFor(VT); if (RC) RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); } } // Estimate killed regs. for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT)) { const TargetRegisterClass *RC = TLI->getRegClassFor(VT); if (RC) { if (RegPressure[RC->getID()] > (numberRCValPredInSU(SU, RC->getID()))) RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); else RegPressure[RC->getID()] = 0; } } } for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) continue; --I->getSUnit()->NumRegDefsLeft; } } // Reserve resources for this SU. reserveResources(SU); // Adjust number of parallel live ranges. // Heuristic is simple - node with no data successors reduces // number of live ranges. All others, increase it. unsigned NumberNonControlDeps = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { adjustPriorityOfUnscheduledPreds(I->getSUnit()); if (!I->isCtrl()) NumberNonControlDeps++; } if (!NumberNonControlDeps) { if (ParallelLiveRanges >= SU->NumPreds) ParallelLiveRanges -= SU->NumPreds; else ParallelLiveRanges = 0; } else ParallelLiveRanges += SU->NumRegDefsLeft; // Track parallel live chains. HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); }