void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap) { for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds if (I->getSUnit()->CopyDstRC) { // Copy to physical register. DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); // Find the destination physical register. unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { if (II->getReg()) { Reg = II->getReg(); break; } } BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) .addReg(VRI->second); } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) .addReg(I->getReg()); } break; } }
unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; // Keep a map of the MachineInstr*'s back to the SUnit representing them. // This is used for updating debug information. DenseMap<MachineInstr*,const SUnit*> MISUnitMap; // Find the node at the bottom of the critical path. const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; MISUnitMap[SU->getInstr()] = SU; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } #ifndef NDEBUG { DEBUG(dbgs() << "Critical path has total latency " << (Max->getDepth() + Max->Latency) << "\n"); DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) DEBUG(dbgs() << " " << TRI->getName(Reg)); } DEBUG(dbgs() << '\n'); } #endif // Track progress along the critical path through the SUnit graph as we walk // the instructions. const SUnit *CriticalPathSU = Max; MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); // Consider this pattern: // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // A = ... // ... = A // There are three anti-dependencies here, and without special care, // we'd break all of them using the same register: // A = ... // ... = A // B = ... // ... = B // B = ... // ... = B // B = ... // ... = B // because at each anti-dependence, B is the first register that // isn't A which is free. This re-introduces anti-dependencies // at all but one of the original anti-dependencies that we were // trying to break. To avoid this, keep track of the most recent // register that each register was replaced with, avoid // using it to repair an anti-dependence on the same register. // This lets us produce this: // A = ... // ... = A // B = ... // ... = B // C = ... // ... = C // B = ... // ... = B // This still has an anti-dependence on B, but at least it isn't on the // original critical path. // // TODO: If we tracked more than one register here, we could potentially // fix that remaining critical edge too. This is a little more involved, // because unlike the most recent register, less recent registers should // still be considered, though only if no other registers are available. std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); // Attempt to break anti-dependence edges on the critical path. Walk the // instructions from the bottom up, tracking information about liveness // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; if (MI->isDebugValue()) continue; // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set // AntiDepReg to the non-zero register associated with the anti-dependence. // // We limit our attention to the critical path as a heuristic to avoid // breaking anti-dependence edges that aren't going to significantly // impact the overall schedule. There are a limited number of registers // and we want to save them for the important edges. // // TODO: Instructions with multiple defs could have multiple // anti-dependencies. The current code here only knows how to break one // edge per instruction. Note that we'd have to be able to break all of // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; if (MI == CriticalPathMI) { if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { const SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; else { // If the SUnit has other dependencies on the SUnit that it // anti-depends on, don't bother breaking the anti-dependency // since those edges would prevent such units from being // scheduled past each other regardless. // // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), PE = CriticalPathSU->Preds.end(); P != PE; ++P) if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { AntiDepReg = 0; break; } } } CriticalPathSU = NextSU; CriticalPathMI = CriticalPathSU->getInstr(); } else { // We've reached the end of the critical path. CriticalPathSU = 0; CriticalPathMI = 0; } } PrescanInstruction(MI); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it // is invalid. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; break; } } } // Determine AntiDepReg's register class, if it is live and is // consistently used within a single class. const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; assert((AntiDepReg == 0 || RC != NULL) && "Register should be live if it's causing an anti-dependence!"); if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) AntiDepReg = 0; // Look for a suitable register to use to break the anti-depenence. // // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { std::pair<std::multimap<unsigned, MachineOperand *>::iterator, std::multimap<unsigned, MachineOperand *>::iterator> Range = RegRefs.equal_range(AntiDepReg); if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, AntiDepReg, LastNewReg[AntiDepReg], RC)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" << " using " << TRI->getName(NewReg) << "!\n"); // Update the references to the old register to refer to the new // register. for (std::multimap<unsigned, MachineOperand *>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second->setReg(NewReg); // If the SU for the instruction being updated has debug information // related to the anti-dependency register, make sure to update that // as well. const SUnit *SU = MISUnitMap[Q->second->getParent()]; if (!SU) continue; for (DbgValueVector::iterator DVI = DbgValues.begin(), DVE = DbgValues.end(); DVI != DVE; ++DVI) if (DVI->second == Q->second->getParent()) UpdateDbgValue(DVI->first, AntiDepReg, NewReg); } // We just went back in time and modified history; the // liveness information for the anti-dependence reg is now // inconsistent. Set the state as if it were dead. Classes[NewReg] = Classes[AntiDepReg]; DefIndices[NewReg] = DefIndices[AntiDepReg]; KillIndices[NewReg] = KillIndices[AntiDepReg]; assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && "Kill and Def maps aren't consistent for NewReg!"); Classes[AntiDepReg] = 0; DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; KillIndices[AntiDepReg] = ~0u; assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && "Kill and Def maps aren't consistent for AntiDepReg!"); RegRefs.erase(AntiDepReg); LastNewReg[AntiDepReg] = NewReg; ++Broken; } } ScanInstruction(MI, Count); } return Broken; }