/// ComputeDepth - Calculate the maximal path from the node to the exit.
///
void SUnit::ComputeDepth() {
  SmallVector<SUnit*, 8> WorkList;
  WorkList.push_back(this);
  do {
    SUnit *Cur = WorkList.back();

    bool Done = true;
    unsigned MaxPredDepth = 0;
    for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
         E = Cur->Preds.end(); I != E; ++I) {
      SUnit *PredSU = I->getSUnit();
      if (PredSU->isDepthCurrent)
        MaxPredDepth = std::max(MaxPredDepth,
                                PredSU->Depth + I->getLatency());
      else {
        Done = false;
        WorkList.push_back(PredSU);
      }
    }

    if (Done) {
      WorkList.pop_back();
      if (MaxPredDepth != Cur->Depth) {
        Cur->setDepthDirty();
        Cur->Depth = MaxPredDepth;
      }
      Cur->isDepthCurrent = true;
    }
  } while (!WorkList.empty());
}
Exemple #2
0
/// InitDAGTopologicalSorting - create the initial topological
/// ordering from the DAG to be scheduled.
///
/// The idea of the algorithm is taken from
/// "Online algorithms for managing the topological order of
/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
/// This is the MNR algorithm, which was first introduced by
/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
/// "Maintaining a topological order under edge insertions".
///
/// Short description of the algorithm:
///
/// Topological ordering, ord, of a DAG maps each node to a topological
/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
///
/// This means that if there is a path from the node X to the node Z,
/// then ord(X) < ord(Z).
///
/// This property can be used to check for reachability of nodes:
/// if Z is reachable from X, then an insertion of the edge Z->X would
/// create a cycle.
///
/// The algorithm first computes a topological ordering for the DAG by
/// initializing the Index2Node and Node2Index arrays and then tries to keep
/// the ordering up-to-date after edge insertions by reordering the DAG.
///
/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
/// the nodes reachable from Y, and then shifts them using Shift to lie
/// immediately after X in Index2Node.
void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
  unsigned DAGSize = SUnits.size();
  std::vector<SUnit*> WorkList;
  WorkList.reserve(DAGSize);

  Index2Node.resize(DAGSize);
  Node2Index.resize(DAGSize);

  // Initialize the data structures.
  if (ExitSU)
    WorkList.push_back(ExitSU);
  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
    SUnit *SU = &SUnits[i];
    int NodeNum = SU->NodeNum;
    unsigned Degree = SU->Succs.size();
    // Temporarily use the Node2Index array as scratch space for degree counts.
    Node2Index[NodeNum] = Degree;

    // Is it a node without dependencies?
    if (Degree == 0) {
      assert(SU->Succs.empty() && "SUnit should have no successors");
      // Collect leaf nodes.
      WorkList.push_back(SU);
    }
  }

  int Id = DAGSize;
  while (!WorkList.empty()) {
    SUnit *SU = WorkList.back();
    WorkList.pop_back();
    if (SU->NodeNum < DAGSize)
      Allocate(SU->NodeNum, --Id);
    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
         I != E; ++I) {
      SUnit *SU = I->getSUnit();
      if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
        // If all dependencies of the node are processed already,
        // then the node can be computed now.
        WorkList.push_back(SU);
    }
  }

  Visited.resize(DAGSize);

#ifndef NDEBUG
  // Check correctness of the ordering
  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
    SUnit *SU = &SUnits[i];
    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
         I != E; ++I) {
      assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
      "Wrong topological sorting");
    }
  }
#endif
}
void SUnit::setHeightDirty() {
  if (!isHeightCurrent) return;
  SmallVector<SUnit*, 8> WorkList;
  WorkList.push_back(this);
  do {
    SUnit *SU = WorkList.pop_back_val();
    SU->isHeightCurrent = false;
    for (SUnit::const_pred_iterator I = SU->Preds.begin(),
         E = SU->Preds.end(); I != E; ++I) {
      SUnit *PredSU = I->getSUnit();
      if (PredSU->isHeightCurrent)
        WorkList.push_back(PredSU);
    }
  } while (!WorkList.empty());
}
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
  SUnit *OnlyAvailablePred = 0;
  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
       I != E; ++I) {
    SUnit &Pred = *I->getSUnit();
    if (!Pred.isScheduled) {
      // We found an available, but not scheduled, predecessor.  If it's the
      // only one we have found, keep track of it... otherwise give up.
      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
        return 0;
      OnlyAvailablePred = &Pred;
    }
  }
  return OnlyAvailablePred;
}
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
/// critical path.
static const SDep *CriticalPathStep(const SUnit *SU) {
  const SDep *Next = 0;
  unsigned NextDepth = 0;
  // Find the predecessor edge with the greatest depth.
  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
       P != PE; ++P) {
    const SUnit *PredSU = P->getSUnit();
    unsigned PredLatency = P->getLatency();
    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
    // In the case of a latency tie, prefer an anti-dependency edge over
    // other types of edges.
    if (NextDepth < PredTotalLatency ||
        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
      NextDepth = PredTotalLatency;
      Next = &*P;
    }
  }
  return Next;
}
Exemple #6
0
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
  SUnit *OnlyAvailablePred = 0;
  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
       I != E; ++I) {
    if (IgnoreAntiDep && 
        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
      continue;

    SUnit &Pred = *I->getSUnit();
    if (!Pred.isScheduled) {
      // We found an available, but not scheduled, predecessor.  If it's the
      // only one we have found, keep track of it... otherwise give up.
      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
        return 0;
      OnlyAvailablePred = &Pred;
    }
  }
      
  return OnlyAvailablePred;
}
void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
       I != E; ++I) {
    if (I->isCtrl()) continue;  // ignore chain preds
    if (I->getSUnit()->CopyDstRC) {
      // Copy to physical register.
      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
      // Find the destination physical register.
      unsigned Reg = 0;
      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
             EE = SU->Succs.end(); II != EE; ++II) {
        if (II->getReg()) {
          Reg = II->getReg();
          break;
        }
      }
      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
        .addReg(VRI->second);
    } else {
      // Copy from physical register.
      assert(I->getReg() && "Unknown physical register!");
      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
      (void)isNew; // Silence compiler warning.
      assert(isNew && "Node emitted out of order - early");
      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
        .addReg(I->getReg());
    }
    break;
  }
}
unsigned CriticalAntiDepBreaker::
BreakAntiDependencies(const std::vector<SUnit>& SUnits,
                      MachineBasicBlock::iterator Begin,
                      MachineBasicBlock::iterator End,
                      unsigned InsertPosIndex,
                      DbgValueVector &DbgValues) {
  // The code below assumes that there is at least one instruction,
  // so just duck out immediately if the block is empty.
  if (SUnits.empty()) return 0;

  // Keep a map of the MachineInstr*'s back to the SUnit representing them.
  // This is used for updating debug information.
  DenseMap<MachineInstr*,const SUnit*> MISUnitMap;

  // Find the node at the bottom of the critical path.
  const SUnit *Max = 0;
  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
    const SUnit *SU = &SUnits[i];
    MISUnitMap[SU->getInstr()] = SU;
    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
      Max = SU;
  }

#ifndef NDEBUG
  {
    DEBUG(dbgs() << "Critical path has total latency "
          << (Max->getDepth() + Max->Latency) << "\n");
    DEBUG(dbgs() << "Available regs:");
    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
      if (KillIndices[Reg] == ~0u)
        DEBUG(dbgs() << " " << TRI->getName(Reg));
    }
    DEBUG(dbgs() << '\n');
  }
#endif

  // Track progress along the critical path through the SUnit graph as we walk
  // the instructions.
  const SUnit *CriticalPathSU = Max;
  MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();

  // Consider this pattern:
  //   A = ...
  //   ... = A
  //   A = ...
  //   ... = A
  //   A = ...
  //   ... = A
  //   A = ...
  //   ... = A
  // There are three anti-dependencies here, and without special care,
  // we'd break all of them using the same register:
  //   A = ...
  //   ... = A
  //   B = ...
  //   ... = B
  //   B = ...
  //   ... = B
  //   B = ...
  //   ... = B
  // because at each anti-dependence, B is the first register that
  // isn't A which is free.  This re-introduces anti-dependencies
  // at all but one of the original anti-dependencies that we were
  // trying to break.  To avoid this, keep track of the most recent
  // register that each register was replaced with, avoid
  // using it to repair an anti-dependence on the same register.
  // This lets us produce this:
  //   A = ...
  //   ... = A
  //   B = ...
  //   ... = B
  //   C = ...
  //   ... = C
  //   B = ...
  //   ... = B
  // This still has an anti-dependence on B, but at least it isn't on the
  // original critical path.
  //
  // TODO: If we tracked more than one register here, we could potentially
  // fix that remaining critical edge too. This is a little more involved,
  // because unlike the most recent register, less recent registers should
  // still be considered, though only if no other registers are available.
  std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);

  // Attempt to break anti-dependence edges on the critical path. Walk the
  // instructions from the bottom up, tracking information about liveness
  // as we go to help determine which registers are available.
  unsigned Broken = 0;
  unsigned Count = InsertPosIndex - 1;
  for (MachineBasicBlock::iterator I = End, E = Begin;
       I != E; --Count) {
    MachineInstr *MI = --I;
    if (MI->isDebugValue())
      continue;

    // Check if this instruction has a dependence on the critical path that
    // is an anti-dependence that we may be able to break. If it is, set
    // AntiDepReg to the non-zero register associated with the anti-dependence.
    //
    // We limit our attention to the critical path as a heuristic to avoid
    // breaking anti-dependence edges that aren't going to significantly
    // impact the overall schedule. There are a limited number of registers
    // and we want to save them for the important edges.
    //
    // TODO: Instructions with multiple defs could have multiple
    // anti-dependencies. The current code here only knows how to break one
    // edge per instruction. Note that we'd have to be able to break all of
    // the anti-dependencies in an instruction in order to be effective.
    unsigned AntiDepReg = 0;
    if (MI == CriticalPathMI) {
      if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
        const SUnit *NextSU = Edge->getSUnit();

        // Only consider anti-dependence edges.
        if (Edge->getKind() == SDep::Anti) {
          AntiDepReg = Edge->getReg();
          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
          if (!RegClassInfo.isAllocatable(AntiDepReg))
            // Don't break anti-dependencies on non-allocatable registers.
            AntiDepReg = 0;
          else if (KeepRegs.count(AntiDepReg))
            // Don't break anti-dependencies if an use down below requires
            // this exact register.
            AntiDepReg = 0;
          else {
            // If the SUnit has other dependencies on the SUnit that it
            // anti-depends on, don't bother breaking the anti-dependency
            // since those edges would prevent such units from being
            // scheduled past each other regardless.
            //
            // Also, if there are dependencies on other SUnits with the
            // same register as the anti-dependency, don't attempt to
            // break it.
            for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
                 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
              if (P->getSUnit() == NextSU ?
                    (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
                    (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
                AntiDepReg = 0;
                break;
              }
          }
        }
        CriticalPathSU = NextSU;
        CriticalPathMI = CriticalPathSU->getInstr();
      } else {
        // We've reached the end of the critical path.
        CriticalPathSU = 0;
        CriticalPathMI = 0;
      }
    }

    PrescanInstruction(MI);

    // If MI's defs have a special allocation requirement, don't allow
    // any def registers to be changed. Also assume all registers
    // defined in a call must not be changed (ABI).
    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
        TII->isPredicated(MI))
      // If this instruction's defs have special allocation requirement, don't
      // break this anti-dependency.
      AntiDepReg = 0;
    else if (AntiDepReg) {
      // If this instruction has a use of AntiDepReg, breaking it
      // is invalid.
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg()) continue;
        unsigned Reg = MO.getReg();
        if (Reg == 0) continue;
        if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
          AntiDepReg = 0;
          break;
        }
      }
    }

    // Determine AntiDepReg's register class, if it is live and is
    // consistently used within a single class.
    const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
    assert((AntiDepReg == 0 || RC != NULL) &&
           "Register should be live if it's causing an anti-dependence!");
    if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
      AntiDepReg = 0;

    // Look for a suitable register to use to break the anti-depenence.
    //
    // TODO: Instead of picking the first free register, consider which might
    // be the best.
    if (AntiDepReg != 0) {
      std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
                std::multimap<unsigned, MachineOperand *>::iterator>
        Range = RegRefs.equal_range(AntiDepReg);
      if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
                                                     AntiDepReg,
                                                     LastNewReg[AntiDepReg],
                                                     RC)) {
        DEBUG(dbgs() << "Breaking anti-dependence edge on "
              << TRI->getName(AntiDepReg)
              << " with " << RegRefs.count(AntiDepReg) << " references"
              << " using " << TRI->getName(NewReg) << "!\n");

        // Update the references to the old register to refer to the new
        // register.
        for (std::multimap<unsigned, MachineOperand *>::iterator
             Q = Range.first, QE = Range.second; Q != QE; ++Q) {
          Q->second->setReg(NewReg);
          // If the SU for the instruction being updated has debug information
          // related to the anti-dependency register, make sure to update that
          // as well.
          const SUnit *SU = MISUnitMap[Q->second->getParent()];
          if (!SU) continue;
          for (DbgValueVector::iterator DVI = DbgValues.begin(),
                 DVE = DbgValues.end(); DVI != DVE; ++DVI)
            if (DVI->second == Q->second->getParent())
              UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
        }

        // We just went back in time and modified history; the
        // liveness information for the anti-dependence reg is now
        // inconsistent. Set the state as if it were dead.
        Classes[NewReg] = Classes[AntiDepReg];
        DefIndices[NewReg] = DefIndices[AntiDepReg];
        KillIndices[NewReg] = KillIndices[AntiDepReg];
        assert(((KillIndices[NewReg] == ~0u) !=
                (DefIndices[NewReg] == ~0u)) &&
             "Kill and Def maps aren't consistent for NewReg!");

        Classes[AntiDepReg] = 0;
        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
        KillIndices[AntiDepReg] = ~0u;
        assert(((KillIndices[AntiDepReg] == ~0u) !=
                (DefIndices[AntiDepReg] == ~0u)) &&
             "Kill and Def maps aren't consistent for AntiDepReg!");

        RegRefs.erase(AntiDepReg);
        LastNewReg[AntiDepReg] = NewReg;
        ++Broken;
      }
    }

    ScanInstruction(MI, Count);
  }

  return Broken;
}
Exemple #9
0
/// InitDAGTopologicalSorting - create the initial topological
/// ordering from the DAG to be scheduled.
///
/// The idea of the algorithm is taken from
/// "Online algorithms for managing the topological order of
/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
/// This is the MNR algorithm, which was first introduced by
/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
/// "Maintaining a topological order under edge insertions".
///
/// Short description of the algorithm:
///
/// Topological ordering, ord, of a DAG maps each node to a topological
/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
///
/// This means that if there is a path from the node X to the node Z,
/// then ord(X) < ord(Z).
///
/// This property can be used to check for reachability of nodes:
/// if Z is reachable from X, then an insertion of the edge Z->X would
/// create a cycle.
///
/// The algorithm first computes a topological ordering for the DAG by
/// initializing the Index2Node and Node2Index arrays and then tries to keep
/// the ordering up-to-date after edge insertions by reordering the DAG.
///
/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
/// the nodes reachable from Y, and then shifts them using Shift to lie
/// immediately after X in Index2Node.
void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {

    /* MISSING :
       - Implement topological sorting over the nodes in 'SUnits' acording 
       to the least latency heuristic, i.e. at each iteration, 
       the instruction with the least latency is selected.

       1) Use the 'Allocate' function in this class to assign the topological ordering.
       2) Iteratively select a node without predecessors, assign a topological index,
       remove it , and process the successors without any incoming edges.
     */

    unsigned DAGSize = SUnits.size();
    std::vector<SUnit*> WorkList;
    WorkList.reserve(DAGSize);

    Index2Node.resize(DAGSize);
    Node2Index.resize(DAGSize);

    /*Initialize the WorkList with all leaf nodes
     Also the map which contains the NumPreds
     */
    std::map<int, int> succ_degree;
    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
		SUnit *current = &SUnits[i];
        if (current->Succs.size() == 0) {
            WorkList.push_back(current);
        }
        succ_degree[current->NodeNum] = current->Succs.size();
    }
	
	int order = DAGSize;
	// int min_latency;
	// SUnit *selected;
    while (!WorkList.empty()){
        SUnit *current = WorkList.back();
        WorkList.pop_back();
        Allocate(current->NodeNum, order);
        order--;
		// min_latency = 999;
		// selected = NULL;
        for (SUnit::const_pred_iterator i = current->Preds.begin(), e = current->Preds.end(); i != e; ++i){
            SUnit *pred = i->getSUnit();
			// int curr_latency = i->getLatency();
			// if (curr_latency < min_latency){
			// 	min_latency = curr_latency;
			// 	selected = pred;
			// }
			
            succ_degree[pred->NodeNum]--;
            if (succ_degree[pred->NodeNum] == 0){
                WorkList.push_back(pred);
            }

        }
		// if (selected){
		// 	WorkList.push_back(selected);
		// }
    }

}