/// \brief Perform target specific adjustments to the latency of a schedule
/// dependency.
void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
                                             SDep &Dep) const {
  MachineInstr *SrcInst = Src->getInstr();
  MachineInstr *DstInst = Dst->getInstr();
  if (!Src->isInstr() || !Dst->isInstr())
    return;

  const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo());

  // Instructions with .new operands have zero latency.
  if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
      isBestZeroLatency(Src, Dst, QII)) {
    Dep.setLatency(0);
    return;
  }

  if (!hasV60TOps())
    return;

  // Don't adjust the latency of post-increment part of the instruction.
  if (QII->isPostIncrement(*SrcInst) && Dep.isAssignedRegDep()) {
    if (SrcInst->mayStore())
      return;
    if (Dep.getReg() != SrcInst->getOperand(0).getReg())
      return;
  } else if (QII->isPostIncrement(*DstInst) && Dep.getKind() == SDep::Anti) {
    if (DstInst->mayStore())
      return;
    if (Dep.getReg() != DstInst->getOperand(0).getReg())
      return;
  } else if (QII->isPostIncrement(*DstInst) && DstInst->mayStore() &&
             Dep.isAssignedRegDep()) {
    MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1);
    if (Op.isReg() && Dep.getReg() != Op.getReg())
      return;
  }

  // Check if we need to change any the latency values when Phis are added.
  if (useBSBScheduling() && SrcInst->isPHI()) {
    changePhiLatency(*SrcInst, Dst, Dep);
    return;
  }

  // If it's a REG_SEQUENCE, use its destination instruction to determine
  // the correct latency.
  if (DstInst->isRegSequence() && Dst->NumSuccs == 1)
    DstInst = Dst->Succs[0].getSUnit()->getInstr();

  // Try to schedule uses near definitions to generate .cur.
  if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
      isBestZeroLatency(Src, Dst, QII)) {
    Dep.setLatency(0);
    return;
  }

  updateLatency(*SrcInst, *DstInst, Dep);
}
Esempio n. 2
0
void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
                                               unsigned OpIdx, SDep& dep) const{
  // Check to see if the scheduler cares about latencies.
  if (forceUnitLatencies())
    return;

  if (dep.getKind() != SDep::Data)
    return;

  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
  if (Use->isMachineOpcode())
    // Adjust the use operand index by num of defs.
    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
      !BB->succ_empty()) {
    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
    if (TargetRegisterInfo::isVirtualRegister(Reg))
      // This copy is a liveout value. It is likely coalesced, so reduce the
      // latency so not to penalize the def.
      // FIXME: need target specific adjustment here?
      Latency = (Latency > 1) ? Latency - 1 : 1;
  }
  if (Latency >= 0)
    dep.setLatency(Latency);
}
Esempio n. 3
0
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
                                               unsigned OpIdx, SDep& dep) const{
  // Check to see if the scheduler cares about latencies.
  if (ForceUnitLatencies())
    return;

  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
  if (InstrItins.isEmpty())
    return;
  
  if (dep.getKind() != SDep::Data)
    return;

  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
  if (Def->isMachineOpcode()) {
    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
    if (DefIdx >= II.getNumDefs())
      return;
    int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
    if (DefCycle < 0)
      return;
    int UseCycle = 1;
    if (Use->isMachineOpcode()) {
      const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
      UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
    }
    if (UseCycle >= 0) {
      int Latency = DefCycle - UseCycle + 1;
      if (Latency >= 0)
        dep.setLatency(Latency);
    }
  }
}
Esempio n. 4
0
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
                                              SDep& dep) const {
  if (!InstrItins || InstrItins->isEmpty())
    return;

  // For a data dependency with a known register...
  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
    return;

  const unsigned Reg = dep.getReg();

  // ... find the definition of the register in the defining
  // instruction
  MachineInstr *DefMI = Def->getInstr();
  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
  if (DefIdx != -1) {
    const MachineOperand &MO = DefMI->getOperand(DefIdx);
    if (MO.isReg() && MO.isImplicit() &&
        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
      // This is an implicit def, getOperandLatency() won't return the correct
      // latency. e.g.
      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
      // What we want is to compute latency between def of %D6/%D7 and use of
      // %Q3 instead.
      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
      if (DefMI->getOperand(Op2).isReg())
        DefIdx = Op2;
    }
    MachineInstr *UseMI = Use->getInstr();
    // For all uses of the register, calculate the maxmimum latency
    int Latency = -1;
    if (UseMI) {
      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = UseMI->getOperand(i);
        if (!MO.isReg() || !MO.isUse())
          continue;
        unsigned MOReg = MO.getReg();
        if (MOReg != Reg)
          continue;

        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
                                              UseMI, i);
        Latency = std::max(Latency, UseCycle);
      }
    } else {
      // UseMI is null, then it must be a scheduling barrier.
      if (!InstrItins || InstrItins->isEmpty())
        return;
      unsigned DefClass = DefMI->getDesc().getSchedClass();
      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
    }

    // If we found a latency, then replace the existing dependence latency.
    if (Latency >= 0)
      dep.setLatency(Latency);
  }
}
// Update the latency of a Phi when the Phi bridges two instructions that
// require a multi-cycle latency.
void HexagonSubtarget::changePhiLatency(MachineInstr &SrcInst, SUnit *Dst,
      SDep &Dep) const {
  if (!SrcInst.isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0)
    return;

  for (const SDep &PI : Dst->Preds) {
    if (PI.getLatency() != 0)
      continue;
    Dep.setLatency(2);
    break;
  }
}
// This helper function is responsible for increasing the latency only.
void HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
      MachineInstr &DstInst, SDep &Dep) const {
  if (!hasV60TOps())
    return;

  auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo());

  if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) {
    // Vec frwd scheduling.
    Dep.setLatency(Dep.getLatency() + 1);
  } else if (useBSBScheduling() &&
             QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) {
    // BSB scheduling.
    Dep.setLatency(Dep.getLatency() + 1);
  } else if (EnableTCLatencySched) {
    // TClass latency scheduling.
    // Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B.
    if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst))
      if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst))
        Dep.setLatency(Dep.getLatency() + 1);
  }
}
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
                                              SDep& dep) const {
  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
  if (InstrItins.isEmpty())
    return;
  
  // For a data dependency with a known register...
  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
    return;

  const unsigned Reg = dep.getReg();

  // ... find the definition of the register in the defining
  // instruction
  MachineInstr *DefMI = Def->getInstr();
  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
  if (DefIdx != -1) {
    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
                                              DefIdx);
    if (DefCycle >= 0) {
      MachineInstr *UseMI = Use->getInstr();
      const unsigned UseClass = UseMI->getDesc().getSchedClass();

      // For all uses of the register, calculate the maxmimum latency
      int Latency = -1;
      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = UseMI->getOperand(i);
        if (!MO.isReg() || !MO.isUse())
          continue;
        unsigned MOReg = MO.getReg();
        if (MOReg != Reg)
          continue;

        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
        if (UseCycle >= 0)
          Latency = std::max(Latency, DefCycle - UseCycle + 1);
      }

      // If we found a latency, then replace the existing dependence latency.
      if (Latency >= 0)
        dep.setLatency(Latency);
    }
  }
}
Esempio n. 8
0
/// MO is an operand of SU's instruction that defines a physical register. Add
/// data dependencies from SU to any uses of the physical register.
void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
  assert(MO.isDef() && "expect physreg def");

  // Ask the target if address-backscheduling is desirable, and if so how much.
  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();

  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
       Alias.isValid(); ++Alias) {
    if (!Uses.contains(*Alias))
      continue;
    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
    for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
      SUnit *UseSU = UseList[i].SU;
      if (UseSU == SU)
        continue;

      // Adjust the dependence latency using operand def/use information,
      // then allow the target to perform its own adjustments.
      int UseOp = UseList[i].OpIdx;
      MachineInstr *RegUse = 0;
      SDep Dep;
      if (UseOp < 0)
        Dep = SDep(SU, SDep::Artificial);
      else {
        Dep = SDep(SU, SDep::Data, *Alias);
        RegUse = UseSU->getInstr();
        Dep.setMinLatency(
          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
                                           RegUse, UseOp, /*FindMin=*/true));
      }
      Dep.setLatency(
        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
                                         RegUse, UseOp, /*FindMin=*/false));

      ST.adjustSchedDependency(SU, UseSU, Dep);
      UseSU->addPred(Dep);
    }
  }
}
Esempio n. 9
0
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtargetInfo &ST = MF.getSubtarget();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = forceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();

    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const MCInstrDesc &MCID = TII->get(Opc);
      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (MCID.isCommutable())
        SU->isCommutable = true;
    }

    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }

      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0 && !StressSched)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        // Special-case TokenFactor chains as zero-latency.
        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
          OpLatency = 0;

        SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
          : SDep(OpSU, SDep::Data, PhysReg);
        Dep.setLatency(OpLatency);
        if (!isChain && !UnitLatencies) {
          computeOperandLatency(OpN, N, i, Dep);
          ST.adjustSchedDependency(OpSU, SU, Dep);
        }

        if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
          // Multiple register uses are combined in the same SUnit. For example,
          // we could have a set of glued nodes with all their defs consumed by
          // another set of glued nodes. Register pressure tracking sees this as
          // a single use, so to keep pressure balanced we reduce the defs.
          //
          // We can't tell (without more book-keeping) if this results from
          // glued nodes or duplicate operands. As long as we don't reduce
          // NumRegDefsLeft to zero, we handle the common cases well.
          --OpSU->NumRegDefsLeft;
        }
      }
    }
  }
}