Esempio n. 1
0
static unsigned numberCtrlPredInSU(SUnit *SU) {
  unsigned NumberDeps = 0;
  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
       I != E; ++I)
    if (I->isCtrl())
      NumberDeps++;

  return NumberDeps;
}
Esempio n. 2
0
/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
/// create a cycle.
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
  if (IsReachable(TargetSU, SU))
    return true;
  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
       I != E; ++I)
    if (I->isAssignedRegDep() &&
        IsReachable(TargetSU, I->getSUnit()))
      return true;
  return false;
}
Esempio n. 3
0
void SUnit::biasCriticalPath() {
  if (NumPreds < 2)
    return;

  SUnit::pred_iterator BestI = Preds.begin();
  unsigned MaxDepth = BestI->getSUnit()->getDepth();
  for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
       ++I) {
    if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
      BestI = I;
  }
  if (BestI != Preds.begin())
    std::swap(*Preds.begin(), *BestI);
}
unsigned
ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
    unsigned NumberDeps = 0;
    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
            I != E; ++I) {
        if (I->isCtrl())
            continue;

        SUnit *PredSU = I->getSUnit();
        const SDNode *ScegN = PredSU->getNode();

        if (!ScegN)
            continue;

        // If value is passed to CopyToReg, it is probably
        // live outside BB.
        switch (ScegN->getOpcode()) {
        default:
            break;
        case ISD::TokenFactor:
            break;
        case ISD::CopyFromReg:
            NumberDeps++;
            break;
        case ISD::CopyToReg:
            break;
        case ISD::INLINEASM:
            break;
        }
        if (!ScegN->isMachineOpcode())
            continue;

        for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
            MVT VT = ScegN->getSimpleValueType(i);
            if (TLI->isTypeLegal(VT)
                    && (TLI->getRegClassFor(VT)->getID() == RCId)) {
                NumberDeps++;
                break;
            }
        }
    }
    return NumberDeps;
}
Esempio n. 5
0
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
/// critical path.
static SDep *CriticalPathStep(SUnit *SU) {
  SDep *Next = 0;
  unsigned NextDepth = 0;
  // Find the predecessor edge with the greatest depth.
  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
       P != PE; ++P) {
    SUnit *PredSU = P->getSUnit();
    unsigned PredLatency = P->getLatency();
    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
    // In the case of a latency tie, prefer an anti-dependency edge over
    // other types of edges.
    if (NextDepth < PredTotalLatency ||
        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
      NextDepth = PredTotalLatency;
      Next = &*P;
    }
  }
  return Next;
}
Esempio n. 6
0
/// Main resource tracking point.
void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
  // Use NULL entry as an event marker to reset
  // the DFA state.
  if (!SU) {
    ResourcesModel->clearResources();
    Packet.clear();
    return;
  }

  const SDNode *ScegN = SU->getNode();
  // Update reg pressure tracking.
  // First update current node.
  if (ScegN->isMachineOpcode()) {
    // Estimate generated regs.
    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
      MVT VT = ScegN->getSimpleValueType(i);

      if (TLI->isTypeLegal(VT)) {
        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
        if (RC)
          RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
      }
    }
    // Estimate killed regs.
    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
      const SDValue &Op = ScegN->getOperand(i);
      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());

      if (TLI->isTypeLegal(VT)) {
        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
        if (RC) {
          if (RegPressure[RC->getID()] >
            (numberRCValPredInSU(SU, RC->getID())))
            RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
          else RegPressure[RC->getID()] = 0;
        }
      }
    }
    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
                              I != E; ++I) {
      if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
        continue;
      --I->getSUnit()->NumRegDefsLeft;
    }
  }

  // Reserve resources for this SU.
  reserveResources(SU);

  // Adjust number of parallel live ranges.
  // Heuristic is simple - node with no data successors reduces
  // number of live ranges. All others, increase it.
  unsigned NumberNonControlDeps = 0;

  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
                                  I != E; ++I) {
    adjustPriorityOfUnscheduledPreds(I->getSUnit());
    if (!I->isCtrl())
      NumberNonControlDeps++;
  }

  if (!NumberNonControlDeps) {
    if (ParallelLiveRanges >= SU->NumPreds)
      ParallelLiveRanges -= SU->NumPreds;
    else
      ParallelLiveRanges = 0;

  }
  else
    ParallelLiveRanges += SU->NumRegDefsLeft;

  // Track parallel live chains.
  HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
  HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
}
void PatmosPostRASchedStrategy::postprocessDAG(ScheduleDAGPostRA *dag)
{
  DAG = dag;

  SUnit *CFL = NULL;
  // Find the inline asm statement, if any. Note that asm is a barrier,
  // therefore there is at most one CFL or inline asm.
  SUnit *Asm = NULL;

  // Push up loads to ensure load delay slot across BBs
  // TODO For some reasons, loads do not always have exit edges, and a latency
  //      of 1; find out why. Happens e.g. in coremark with 16k methods setup.
  for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
         ie = DAG->SUnits.rend(); it != ie; it++) {
    MachineInstr *MI = it->getInstr();
    if (!MI) continue;

    if (MI->mayLoad()) {
      SDep Dep(&*it, SDep::Artificial);
      Dep.setLatency(computeExitLatency(*it));
      DAG->ExitSU.addPred(Dep);
    }
  }

  // Find the branch/call/ret instruction if available
  for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
       ie = DAG->SUnits.rend(); it != ie; it++)
  {
    MachineInstr *MI = it->getInstr();
    if (!MI) continue;
    if (isPatmosCFL(MI->getOpcode(), MI->getDesc().TSFlags)) {
      CFL = &*it;
      break;
    }
    if (MI->isInlineAsm()) {
      Asm = &*it;
      break;
    }
  }

  const PatmosSubtarget *PST = PTM.getSubtargetImpl();

  unsigned DelaySlot = CFL ? PST->getDelaySlotCycles(CFL->getInstr()) : 0;

  if (CFL) {
    // RET and CALL have implicit deps on the return values and call
    // arguments. Remove all those edges to schedule them into the delay slot
    // if the registers are not actually used by CALL and RET
    if (CFL->getInstr()->isReturn() || CFL->getInstr()->isCall())
      removeImplicitCFLDeps(*CFL);

    // Add an artificial dep from CFL to exit for the delay slot
    SDep DelayDep(CFL, SDep::Artificial);
    DelayDep.setLatency(DelaySlot + 1);
    DAG->ExitSU.addPred(DelayDep);

    CFL->isScheduleLow = true;

    if (PTM.getSubtargetImpl()->getCFLType() != PatmosSubtarget::CFL_DELAYED) {
      // Push up single instructions that can be scheduled in the same
      // cycle as the branch
      unsigned LowCount = 0;
      SUnit *LowSU = 0;
      for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
             ie = DAG->SUnits.rend(); it != ie; it++) {
        if (&*it == CFL) continue;
        
        MachineInstr *MI = it->getInstr();
        if (!MI) continue;
        
        if (it->getHeight() <= DelaySlot) {
          LowCount++;
          if (PII.canIssueInSlot(MI, LowCount)) {
            LowSU = &*it;
          }
        }
      }

      if (LowSU && LowCount == 1) {
        SDep Dep(LowSU, SDep::Artificial);
        Dep.setLatency(DelaySlot + 1);
        DAG->ExitSU.addPred(Dep);
      }
    }

    if (PTM.getSubtargetImpl()->getCFLType() == PatmosSubtarget::CFL_NON_DELAYED) {
      // Add dependencies from all other instructions to exit
      for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
             ie = DAG->SUnits.rend(); it != ie; it++) {
        if (&*it == CFL) continue;

        MachineInstr *MI = it->getInstr();
        if (!MI) continue;

        SDep Dep(&*it, SDep::Artificial);
        Dep.setLatency(DelaySlot + 1);
        DAG->ExitSU.addPred(Dep);
      }
    }
  }

  // Add an exit delay between loads and inline asm, in case asm is empty
  if (Asm) {
    std::vector<SUnit*> PredLoads;
    for (SUnit::pred_iterator it = Asm->Preds.begin(), ie = Asm->Preds.end();
         it != ie; it++)
    {
      if (!it->getSUnit()) continue;
      MachineInstr *MI = it->getSUnit()->getInstr();
      // Check for loads
      if (!MI || !MI->mayLoad()) continue;
      PredLoads.push_back(it->getSUnit());
    }
    for (std::vector<SUnit*>::iterator it = PredLoads.begin(),
         ie = PredLoads.end(); it != ie; it++)
    {
      // Add a delay between loads and inline-asm, even if the operand is not
      // used.
      SDep Dep(*it, SDep::Artificial);
      Dep.setLatency( computeExitLatency(**it) );

      Asm->addPred(Dep);
    }
  }

  // remove barriers between loads/stores with different memory type
  removeTypedMemBarriers();

  // remove any dependency between instructions with mutually exclusive
  // predicates
  removeExclusivePredDeps();

  // TODO SWS and LWS do not have ST as implicit def edges
  // TODO CALL has chain edges to all SWS/.. instructions, remove

  // TODO remove edges from MUL to other MULs to overlap MUL and MFS for
  //      pipelined muls.
}
Esempio n. 8
0
unsigned CriticalAntiDepBreaker::
BreakAntiDependencies(std::vector<SUnit>& SUnits,
                      MachineBasicBlock::iterator& Begin,
                      MachineBasicBlock::iterator& End,
                      unsigned InsertPosIndex) {
  // The code below assumes that there is at least one instruction,
  // so just duck out immediately if the block is empty.
  if (SUnits.empty()) return 0;

  // Find the node at the bottom of the critical path.
  SUnit *Max = 0;
  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
    SUnit *SU = &SUnits[i];
    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
      Max = SU;
  }

#ifndef NDEBUG
  {
    DEBUG(errs() << "Critical path has total latency "
          << (Max->getDepth() + Max->Latency) << "\n");
    DEBUG(errs() << "Available regs:");
    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
      if (KillIndices[Reg] == ~0u)
        DEBUG(errs() << " " << TRI->getName(Reg));
    }
    DEBUG(errs() << '\n');
  }
#endif

  // Track progress along the critical path through the SUnit graph as we walk
  // the instructions.
  SUnit *CriticalPathSU = Max;
  MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();

  // Consider this pattern:
  //   A = ...
  //   ... = A
  //   A = ...
  //   ... = A
  //   A = ...
  //   ... = A
  //   A = ...
  //   ... = A
  // There are three anti-dependencies here, and without special care,
  // we'd break all of them using the same register:
  //   A = ...
  //   ... = A
  //   B = ...
  //   ... = B
  //   B = ...
  //   ... = B
  //   B = ...
  //   ... = B
  // because at each anti-dependence, B is the first register that
  // isn't A which is free.  This re-introduces anti-dependencies
  // at all but one of the original anti-dependencies that we were
  // trying to break.  To avoid this, keep track of the most recent
  // register that each register was replaced with, avoid
  // using it to repair an anti-dependence on the same register.
  // This lets us produce this:
  //   A = ...
  //   ... = A
  //   B = ...
  //   ... = B
  //   C = ...
  //   ... = C
  //   B = ...
  //   ... = B
  // This still has an anti-dependence on B, but at least it isn't on the
  // original critical path.
  //
  // TODO: If we tracked more than one register here, we could potentially
  // fix that remaining critical edge too. This is a little more involved,
  // because unlike the most recent register, less recent registers should
  // still be considered, though only if no other registers are available.
  unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};

  // Attempt to break anti-dependence edges on the critical path. Walk the
  // instructions from the bottom up, tracking information about liveness
  // as we go to help determine which registers are available.
  unsigned Broken = 0;
  unsigned Count = InsertPosIndex - 1;
  for (MachineBasicBlock::iterator I = End, E = Begin;
       I != E; --Count) {
    MachineInstr *MI = --I;

    // Check if this instruction has a dependence on the critical path that
    // is an anti-dependence that we may be able to break. If it is, set
    // AntiDepReg to the non-zero register associated with the anti-dependence.
    //
    // We limit our attention to the critical path as a heuristic to avoid
    // breaking anti-dependence edges that aren't going to significantly
    // impact the overall schedule. There are a limited number of registers
    // and we want to save them for the important edges.
    // 
    // TODO: Instructions with multiple defs could have multiple
    // anti-dependencies. The current code here only knows how to break one
    // edge per instruction. Note that we'd have to be able to break all of
    // the anti-dependencies in an instruction in order to be effective.
    unsigned AntiDepReg = 0;
    if (MI == CriticalPathMI) {
      if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
        SUnit *NextSU = Edge->getSUnit();

        // Only consider anti-dependence edges.
        if (Edge->getKind() == SDep::Anti) {
          AntiDepReg = Edge->getReg();
          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
          if (!AllocatableSet.test(AntiDepReg))
            // Don't break anti-dependencies on non-allocatable registers.
            AntiDepReg = 0;
          else if (KeepRegs.count(AntiDepReg))
            // Don't break anti-dependencies if an use down below requires
            // this exact register.
            AntiDepReg = 0;
          else {
            // If the SUnit has other dependencies on the SUnit that it
            // anti-depends on, don't bother breaking the anti-dependency
            // since those edges would prevent such units from being
            // scheduled past each other regardless.
            //
            // Also, if there are dependencies on other SUnits with the
            // same register as the anti-dependency, don't attempt to
            // break it.
            for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
                 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
              if (P->getSUnit() == NextSU ?
                    (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
                    (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
                AntiDepReg = 0;
                break;
              }
          }
        }
        CriticalPathSU = NextSU;
        CriticalPathMI = CriticalPathSU->getInstr();
      } else {
        // We've reached the end of the critical path.
        CriticalPathSU = 0;
        CriticalPathMI = 0;
      }
    }

    PrescanInstruction(MI);

    if (MI->getDesc().hasExtraDefRegAllocReq())
      // If this instruction's defs have special allocation requirement, don't
      // break this anti-dependency.
      AntiDepReg = 0;
    else if (AntiDepReg) {
      // If this instruction has a use of AntiDepReg, breaking it
      // is invalid.
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg()) continue;
        unsigned Reg = MO.getReg();
        if (Reg == 0) continue;
        if (MO.isUse() && AntiDepReg == Reg) {
          AntiDepReg = 0;
          break;
        }
      }
    }

    // Determine AntiDepReg's register class, if it is live and is
    // consistently used within a single class.
    const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
    assert((AntiDepReg == 0 || RC != NULL) &&
           "Register should be live if it's causing an anti-dependence!");
    if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
      AntiDepReg = 0;

    // Look for a suitable register to use to break the anti-depenence.
    //
    // TODO: Instead of picking the first free register, consider which might
    // be the best.
    if (AntiDepReg != 0) {
      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
                                                     LastNewReg[AntiDepReg],
                                                     RC)) {
        DEBUG(errs() << "Breaking anti-dependence edge on "
              << TRI->getName(AntiDepReg)
              << " with " << RegRefs.count(AntiDepReg) << " references"
              << " using " << TRI->getName(NewReg) << "!\n");

        // Update the references to the old register to refer to the new
        // register.
        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
                  std::multimap<unsigned, MachineOperand *>::iterator>
           Range = RegRefs.equal_range(AntiDepReg);
        for (std::multimap<unsigned, MachineOperand *>::iterator
             Q = Range.first, QE = Range.second; Q != QE; ++Q)
          Q->second->setReg(NewReg);

        // We just went back in time and modified history; the
        // liveness information for the anti-depenence reg is now
        // inconsistent. Set the state as if it were dead.
        Classes[NewReg] = Classes[AntiDepReg];
        DefIndices[NewReg] = DefIndices[AntiDepReg];
        KillIndices[NewReg] = KillIndices[AntiDepReg];
        assert(((KillIndices[NewReg] == ~0u) !=
                (DefIndices[NewReg] == ~0u)) &&
             "Kill and Def maps aren't consistent for NewReg!");

        Classes[AntiDepReg] = 0;
        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
        KillIndices[AntiDepReg] = ~0u;
        assert(((KillIndices[AntiDepReg] == ~0u) !=
                (DefIndices[AntiDepReg] == ~0u)) &&
             "Kill and Def maps aren't consistent for AntiDepReg!");

        RegRefs.erase(AntiDepReg);
        LastNewReg[AntiDepReg] = NewReg;
        ++Broken;
      }
    }

    ScanInstruction(MI, Count);
  }

  return Broken;
}