void PatmosLatencyQueue::dump() { dbgs() << "PendingQueue:"; for (unsigned i = 0; i < PendingQueue.size(); i++) { SUnit *SU = PendingQueue[i]; if (i > 0) dbgs() << ","; dbgs() << " SU(" << SU->NodeNum << "): Height " << SU->getHeight() << " Depth " << SU->getDepth() << " Tree: " << Cmp.DFSResult->getSubtreeID(SU) << " @" << Cmp.DFSResult->getSubtreeLevel(Cmp.DFSResult->getSubtreeID(SU)); if (SU->isScheduleLow) dbgs() << " low "; } dbgs() << "\nAvailableQueue:"; for (unsigned i = 0; i < AvailableQueue.size(); i++) { SUnit *SU = AvailableQueue[i]; if (i > 0) dbgs() << ","; dbgs() << " SU(" << SU->NodeNum << ") Height " << SU->getHeight() << " Depth " << SU->getDepth() << " ILP: " << Cmp.DFSResult->getILP(SU); if (SU->isScheduleLow) dbgs() << " low "; } dbgs() << "\n"; }
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. static SDep *CriticalPathStep(SUnit *SU) { SDep *Next = 0; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { SUnit *PredSU = P->getSUnit(); unsigned PredLatency = P->getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over // other types of edges. if (NextDepth < PredTotalLatency || (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { NextDepth = PredTotalLatency; Next = &*P; } } return Next; }
unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(std::vector<SUnit>& SUnits, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; // Find the node at the bottom of the critical path. SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } #ifndef NDEBUG { DEBUG(errs() << "Critical path has total latency " << (Max->getDepth() + Max->Latency) << "\n"); DEBUG(errs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) DEBUG(errs() << " " << TRI->getName(Reg)); } DEBUG(errs() << '\n'); } #endif // Track progress along the critical path through the SUnit graph as we walk // the instructions. SUnit *CriticalPathSU = Max; MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); // Consider this pattern: // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // There are three anti-dependencies here, and without special care, // we'd break all of them using the same register: // A = ... // ... = A // B = ... // ... = B // B = ... // ... = B // B = ... // ... = B // because at each anti-dependence, B is the first register that // isn't A which is free. This re-introduces anti-dependencies // at all but one of the original anti-dependencies that we were // trying to break. To avoid this, keep track of the most recent // register that each register was replaced with, avoid // using it to repair an anti-dependence on the same register. // This lets us produce this: // A = ... // ... = A // B = ... // ... = B // C = ... // ... = C // B = ... // ... = B // This still has an anti-dependence on B, but at least it isn't on the // original critical path. // // TODO: If we tracked more than one register here, we could potentially // fix that remaining critical edge too. This is a little more involved, // because unlike the most recent register, less recent registers should // still be considered, though only if no other registers are available. unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; // Attempt to break anti-dependence edges on the critical path. Walk the // instructions from the bottom up, tracking information about liveness // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set // AntiDepReg to the non-zero register associated with the anti-dependence. // // We limit our attention to the critical path as a heuristic to avoid // breaking anti-dependence edges that aren't going to significantly // impact the overall schedule. There are a limited number of registers // and we want to save them for the important edges. // // TODO: Instructions with multiple defs could have multiple // anti-dependencies. The current code here only knows how to break one // edge per instruction. Note that we'd have to be able to break all of // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; if (MI == CriticalPathMI) { if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!AllocatableSet.test(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; else { // If the SUnit has other dependencies on the SUnit that it // anti-depends on, don't bother breaking the anti-dependency // since those edges would prevent such units from being // scheduled past each other regardless. // // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), PE = CriticalPathSU->Preds.end(); P != PE; ++P) if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { AntiDepReg = 0; break; } } } CriticalPathSU = NextSU; CriticalPathMI = CriticalPathSU->getInstr(); } else { // We've reached the end of the critical path. CriticalPathSU = 0; CriticalPathMI = 0; } } PrescanInstruction(MI); if (MI->getDesc().hasExtraDefRegAllocReq()) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it // is invalid. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && AntiDepReg == Reg) { AntiDepReg = 0; break; } } } // Determine AntiDepReg's register class, if it is live and is // consistently used within a single class. const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; assert((AntiDepReg == 0 || RC != NULL) && "Register should be live if it's causing an anti-dependence!"); if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) AntiDepReg = 0; // Look for a suitable register to use to break the anti-depenence. // // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, LastNewReg[AntiDepReg], RC)) { DEBUG(errs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" << " using " << TRI->getName(NewReg) << "!\n"); // Update the references to the old register to refer to the new // register. std::pair<std::multimap<unsigned, MachineOperand *>::iterator, std::multimap<unsigned, MachineOperand *>::iterator> Range = RegRefs.equal_range(AntiDepReg); for (std::multimap<unsigned, MachineOperand *>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) Q->second->setReg(NewReg); // We just went back in time and modified history; the // liveness information for the anti-depenence reg is now // inconsistent. Set the state as if it were dead. Classes[NewReg] = Classes[AntiDepReg]; DefIndices[NewReg] = DefIndices[AntiDepReg]; KillIndices[NewReg] = KillIndices[AntiDepReg]; assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && "Kill and Def maps aren't consistent for NewReg!"); Classes[AntiDepReg] = 0; DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; KillIndices[AntiDepReg] = ~0u; assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && "Kill and Def maps aren't consistent for AntiDepReg!"); RegRefs.erase(AntiDepReg); LastNewReg[AntiDepReg] = NewReg; ++Broken; } } ScanInstruction(MI, Count); } return Broken; }