コード例 #1
0
void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
  unsigned  NodeNumDefs = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
    if (N->isMachineOpcode()) {
      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
      // No register need be allocated for this.
      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
        NodeNumDefs = 0;
        break;
      }
      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
    }
    else
      switch(N->getOpcode()) {
        default:     break;
        case ISD::CopyFromReg:
          NodeNumDefs++;
          break;
        case ISD::INLINEASM:
          NodeNumDefs++;
          break;
      }

  SU->NumRegDefsLeft = NodeNumDefs;
}
コード例 #2
0
void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
  SDNode *N = SU->getNode();

  // TokenFactor operands are considered zero latency, and some schedulers
  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
  // whenever node latency is nonzero.
  if (N && N->getOpcode() == ISD::TokenFactor) {
    SU->Latency = 0;
    return;
  }

  // Check to see if the scheduler cares about latencies.
  if (forceUnitLatencies()) {
    SU->Latency = 1;
    return;
  }

  if (!InstrItins || InstrItins->isEmpty()) {
    if (N && N->isMachineOpcode() &&
        TII->isHighLatencyDef(N->getMachineOpcode()))
      SU->Latency = HighLatencyCycles;
    else
      SU->Latency = 1;
    return;
  }

  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes glued together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
    if (N->isMachineOpcode())
      SU->Latency += TII->getInstrLatency(InstrItins, N);
}
コード例 #3
0
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
  
  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes flagged together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
    if (N->isMachineOpcode()) {
      SU->Latency += InstrItins.
        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
    }
}
コード例 #4
0
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
                                              SmallVectorImpl<unsigned> &LRegs){
  if (NumLiveRegs == 0)
    return false;

  SmallSet<unsigned, 4> RegAdded;
  // If this node would clobber any "live" register, then it's not ready.
  for (SDep &Pred : SU->Preds) {
    if (Pred.isAssignedRegDep()) {
      CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs,
                         RegAdded, LRegs, TRI);
    }
  }

  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
    if (Node->getOpcode() == ISD::INLINEASM) {
      // Inline asm can clobber physical defs.
      unsigned NumOps = Node->getNumOperands();
      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
        --NumOps;  // Ignore the glue operand.

      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
        unsigned Flags =
          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);

        ++i; // Skip the ID value.
        if (InlineAsm::isRegDefKind(Flags) ||
            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
            InlineAsm::isClobberKind(Flags)) {
          // Check for def of register or earlyclobber register.
          for (; NumVals; --NumVals, ++i) {
            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
            if (TargetRegisterInfo::isPhysicalRegister(Reg))
              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
          }
        } else
          i += NumVals;
      }
      continue;
    }
    if (!Node->isMachineOpcode())
      continue;
    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
    if (!MCID.ImplicitDefs)
      continue;
    for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
    }
  }
  return !LRegs.empty();
}
コード例 #5
0
ファイル: ScheduleDAGSDNodes.cpp プロジェクト: happz/llvm
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
///
void ScheduleDAGSDNodes::ClusterNodes() {
  for (SDNode &NI : DAG->allnodes()) {
    SDNode *Node = &NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const MCInstrDesc &MCID = TII->get(Opc);
    if (MCID.mayLoad())
      // Cluster loads from "near" addresses into combined SUnits.
      ClusterNeighboringLoads(Node);
  }
}
コード例 #6
0
ファイル: VISelDAGToDAG.cpp プロジェクト: b14ckkn19ht/Shang
SDNode *VDAGToDAGISel::SelectBitSlice(SDNode *N) {
  SDNode *Op = N->getOperand(0).getNode();
  // Emit the constant bit slice to constant directly if possible.
  if (ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Op))
    return SelectConstBitSlice(CSD, N);

  assert(Op->getOpcode() != VTMISD::BitSlice
         && (!Op->isMachineOpcode()
             || Op->getMachineOpcode() != VTM::VOpBitSlice)
         && "DAGCombine should handle this!");

  return SelectSimpleNode(N, VTM::VOpBitSlice);
}
コード例 #7
0
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
///
void ScheduleDAGSDNodes::ClusterNodes() {
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    SDNode *Node = &*NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const MCInstrDesc &MCID = TII->get(Opc);
    if (MCID.mayLoad())
      // Cluster loads from "near" addresses into combined SUnits.
      ClusterNeighboringLoads(Node);
  }
}
コード例 #8
0
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
  // Check to see if the scheduler cares about latencies.
  if (ForceUnitLatencies()) {
    SU->Latency = 1;
    return;
  }

  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
  if (InstrItins.isEmpty()) {
    SU->Latency = 1;
    return;
  }
  
  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes flagged together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
    if (N->isMachineOpcode()) {
      SU->Latency += InstrItins.
        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
    }
}
コード例 #9
0
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = ForceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();
    
    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const TargetInstrDesc &TID = TII->get(Opc);
      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (TID.isCommutable())
        SU->isCommutable = true;
    }
    
    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }
      
      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                               OpLatency, PhysReg);
        if (!isChain && !UnitLatencies) {
          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
        }

        SU->addPred(dep);
      }
    }
  }
}
コード例 #10
0
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
  if (SU->getNode()->getGluedNode())
    return nullptr;

  SDNode *N = SU->getNode();
  if (!N)
    return nullptr;

  SUnit *NewSU;
  bool TryUnfold = false;
  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
    MVT VT = N->getSimpleValueType(i);
    if (VT == MVT::Glue)
      return nullptr;
    else if (VT == MVT::Other)
      TryUnfold = true;
  }
  for (const SDValue &Op : N->op_values()) {
    MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
    if (VT == MVT::Glue)
      return nullptr;
  }

  if (TryUnfold) {
    SmallVector<SDNode*, 2> NewNodes;
    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
      return nullptr;

    LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
    assert(NewNodes.size() == 2 && "Expected a load folding node!");

    N = NewNodes[1];
    SDNode *LoadNode = NewNodes[0];
    unsigned NumVals = N->getNumValues();
    unsigned OldNumVals = SU->getNode()->getNumValues();
    for (unsigned i = 0; i != NumVals; ++i)
      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
                                   SDValue(LoadNode, 1));

    SUnit *NewSU = newSUnit(N);
    assert(N->getNodeId() == -1 && "Node already inserted!");
    N->setNodeId(NewSU->NodeNum);

    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
        NewSU->isTwoAddress = true;
        break;
      }
    }
    if (MCID.isCommutable())
      NewSU->isCommutable = true;

    // LoadNode may already exist. This can happen when there is another
    // load from the same location and producing the same type of value
    // but it has different alignment or volatileness.
    bool isNewLoad = true;
    SUnit *LoadSU;
    if (LoadNode->getNodeId() != -1) {
      LoadSU = &SUnits[LoadNode->getNodeId()];
      isNewLoad = false;
    } else {
      LoadSU = newSUnit(LoadNode);
      LoadNode->setNodeId(LoadSU->NodeNum);
    }

    SDep ChainPred;
    SmallVector<SDep, 4> ChainSuccs;
    SmallVector<SDep, 4> LoadPreds;
    SmallVector<SDep, 4> NodePreds;
    SmallVector<SDep, 4> NodeSuccs;
    for (SDep &Pred : SU->Preds) {
      if (Pred.isCtrl())
        ChainPred = Pred;
      else if (Pred.getSUnit()->getNode() &&
               Pred.getSUnit()->getNode()->isOperandOf(LoadNode))
        LoadPreds.push_back(Pred);
      else
        NodePreds.push_back(Pred);
    }
    for (SDep &Succ : SU->Succs) {
      if (Succ.isCtrl())
        ChainSuccs.push_back(Succ);
      else
        NodeSuccs.push_back(Succ);
    }

    if (ChainPred.getSUnit()) {
      RemovePred(SU, ChainPred);
      if (isNewLoad)
        AddPred(LoadSU, ChainPred);
    }
    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
      const SDep &Pred = LoadPreds[i];
      RemovePred(SU, Pred);
      if (isNewLoad) {
        AddPred(LoadSU, Pred);
      }
    }
    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
      const SDep &Pred = NodePreds[i];
      RemovePred(SU, Pred);
      AddPred(NewSU, Pred);
    }
    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
      SDep D = NodeSuccs[i];
      SUnit *SuccDep = D.getSUnit();
      D.setSUnit(SU);
      RemovePred(SuccDep, D);
      D.setSUnit(NewSU);
      AddPred(SuccDep, D);
    }
    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
      SDep D = ChainSuccs[i];
      SUnit *SuccDep = D.getSUnit();
      D.setSUnit(SU);
      RemovePred(SuccDep, D);
      if (isNewLoad) {
        D.setSUnit(LoadSU);
        AddPred(SuccDep, D);
      }
    }
    if (isNewLoad) {
      SDep D(LoadSU, SDep::Barrier);
      D.setLatency(LoadSU->Latency);
      AddPred(NewSU, D);
    }

    ++NumUnfolds;

    if (NewSU->NumSuccsLeft == 0) {
      NewSU->isAvailable = true;
      return NewSU;
    }
    SU = NewSU;
  }

  LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
  NewSU = Clone(SU);

  // New SUnit has the exact same predecessors.
  for (SDep &Pred : SU->Preds)
    if (!Pred.isArtificial())
      AddPred(NewSU, Pred);

  // Only copy scheduled successors. Cut them from old node's successor
  // list and move them over.
  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
  for (SDep &Succ : SU->Succs) {
    if (Succ.isArtificial())
      continue;
    SUnit *SuccSU = Succ.getSUnit();
    if (SuccSU->isScheduled) {
      SDep D = Succ;
      D.setSUnit(NewSU);
      AddPred(SuccSU, D);
      D.setSUnit(SU);
      DelDeps.push_back(std::make_pair(SuccSU, D));
    }
  }
  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
    RemovePred(DelDeps[i].first, DelDeps[i].second);

  ++NumDups;
  return NewSU;
}
コード例 #11
0
ファイル: InstrEmitter.cpp プロジェクト: 5432935/crossbridge
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
                DenseMap<SDValue, unsigned> &VRBaseMap) {
  unsigned Opc = Node->getMachineOpcode();
  
  // Handle subreg insert/extract specially
  if (Opc == TargetOpcode::EXTRACT_SUBREG || 
      Opc == TargetOpcode::INSERT_SUBREG ||
      Opc == TargetOpcode::SUBREG_TO_REG) {
    EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
    return;
  }

  // Handle COPY_TO_REGCLASS specially.
  if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
    EmitCopyToRegClassNode(Node, VRBaseMap);
    return;
  }

  // Handle REG_SEQUENCE specially.
  if (Opc == TargetOpcode::REG_SEQUENCE) {
    EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
    return;
  }

  if (Opc == TargetOpcode::IMPLICIT_DEF)
    // We want a unique VR for each IMPLICIT_DEF use.
    return;
  
  const TargetInstrDesc &II = TII->get(Opc);
  unsigned NumResults = CountResults(Node);
  unsigned NodeOperands = CountOperands(Node);
  bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
  unsigned NumMIOperands = NodeOperands + NumResults;
  if (II.isVariadic())
    assert(NumMIOperands >= II.getNumOperands() &&
           "Too few operands for a variadic node!");
  else
    assert(NumMIOperands >= II.getNumOperands() &&
           NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
           "#operands for dag node doesn't match .td file!");
#endif

  // Create the new machine instruction.
  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);

  // The MachineInstr constructor adds implicit-def operands. Scan through
  // these to determine which are dead.
  if (MI->getNumOperands() != 0 &&
      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
    // First, collect all used registers.
    SmallVector<unsigned, 8> UsedRegs;
    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
      if (F->getOpcode() == ISD::CopyFromReg)
        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
      else {
        // Collect declared implicit uses.
        const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
        UsedRegs.append(TID.getImplicitUses(),
                        TID.getImplicitUses() + TID.getNumImplicitUses());
        // In addition to declared implicit uses, we must also check for
        // direct RegisterSDNode operands.
        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
            unsigned Reg = R->getReg();
            if (TargetRegisterInfo::isPhysicalRegister(Reg))
              UsedRegs.push_back(Reg);
          }
      }
    // Then mark unused registers as dead.
    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
  }
  
  // Add result register values for things that are defined by this
  // instruction.
  if (NumResults)
    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
  
  // Emit all of the actual operands of this instruction, adding them to the
  // instruction as appropriate.
  bool HasOptPRefs = II.getNumDefs() > NumResults;
  assert((!HasOptPRefs || !HasPhysRegOuts) &&
         "Unable to cope with optional defs and phys regs defs!");
  unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
  for (unsigned i = NumSkip; i != NodeOperands; ++i)
    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
               VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);

  // Transfer all of the memory reference descriptions of this instruction.
  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                 cast<MachineSDNode>(Node)->memoperands_end());

  // Insert the instruction into position in the block. This needs to
  // happen before any custom inserter hook is called so that the
  // hook knows where in the block to insert the replacement code.
  MBB->insert(InsertPos, MI);

  // Additional results must be physical register defs.
  if (HasPhysRegOuts) {
    for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
      unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
      if (Node->hasAnyUseOfValue(i))
        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
      // If there are no uses, mark the register as dead now, so that
      // MachineLICM/Sink can see that it's dead. Don't do this if the
      // node has a Glue value, for the benefit of targets still using
      // Glue for values in physregs.
      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
        MI->addRegisterDead(Reg, TRI);
    }
  }
  
  // If the instruction has implicit defs and the node doesn't, mark the
  // implicit def as dead.  If the node has any glue outputs, we don't do this
  // because we don't know what implicit defs are being used by glued nodes.
  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
    if (const unsigned *IDList = II.getImplicitDefs()) {
      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
           i != e; ++i)
        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
    }
}
コード例 #12
0
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = forceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();

    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const MCInstrDesc &MCID = TII->get(Opc);
      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (MCID.isCommutable())
        SU->isCommutable = true;
    }

    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }

      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0 && !StressSched)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        // Special-case TokenFactor chains as zero-latency.
        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
          OpLatency = 0;

        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                               OpLatency, PhysReg);
        if (!isChain && !UnitLatencies) {
          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
        }

        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
          // Multiple register uses are combined in the same SUnit. For example,
          // we could have a set of glued nodes with all their defs consumed by
          // another set of glued nodes. Register pressure tracking sees this as
          // a single use, so to keep pressure balanced we reduce the defs.
          //
          // We can't tell (without more book-keeping) if this results from
          // glued nodes or duplicate operands. As long as we don't reduce
          // NumRegDefsLeft to zero, we handle the common cases well.
          --OpSU->NumRegDefsLeft;
        }
      }
    }
  }
}
コード例 #13
0
void ScheduleDAGSDNodes::BuildSchedUnits() {
  // During scheduling, the NodeId field of SDNode is used to map SDNodes
  // to their associated SUnits by holding SUnits table indices. A value
  // of -1 means the SDNode does not yet have an associated SUnit.
  unsigned NumNodes = 0;
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    NI->setNodeId(-1);
    ++NumNodes;
  }

  // Reserve entries in the vector for each of the SUnits we are creating.  This
  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  // invalidated.
  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  // This is a temporary workaround.
  SUnits.reserve(NumNodes * 2);

  // Add all nodes in depth first order.
  SmallVector<SDNode*, 64> Worklist;
  SmallPtrSet<SDNode*, 64> Visited;
  Worklist.push_back(DAG->getRoot().getNode());
  Visited.insert(DAG->getRoot().getNode());

  SmallVector<SUnit*, 8> CallSUnits;
  while (!Worklist.empty()) {
    SDNode *NI = Worklist.pop_back_val();

    // Add all operands to the worklist unless they've already been added.
    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
      if (Visited.insert(NI->getOperand(i).getNode()))
        Worklist.push_back(NI->getOperand(i).getNode());

    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
      continue;

    // If this node has already been processed, stop now.
    if (NI->getNodeId() != -1) continue;

    SUnit *NodeSUnit = newSUnit(NI);

    // See if anything is glued to this node, if so, add them to glued
    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
    // is required to be the last operand and result of a node.

    // Scan up to find glued preds.
    SDNode *N = NI;
    while (N->getNumOperands() &&
           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
      N = N->getOperand(N->getNumOperands()-1).getNode();
      assert(N->getNodeId() == -1 && "Node already inserted!");
      N->setNodeId(NodeSUnit->NodeNum);
      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
        NodeSUnit->isCall = true;
    }

    // Scan down to find any glued succs.
    N = NI;
    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
      SDValue GlueVal(N, N->getNumValues()-1);

      // There are either zero or one users of the Glue result.
      bool HasGlueUse = false;
      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
           UI != E; ++UI)
        if (GlueVal.isOperandOf(*UI)) {
          HasGlueUse = true;
          assert(N->getNodeId() == -1 && "Node already inserted!");
          N->setNodeId(NodeSUnit->NodeNum);
          N = *UI;
          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
            NodeSUnit->isCall = true;
          break;
        }
      if (!HasGlueUse) break;
    }

    if (NodeSUnit->isCall)
      CallSUnits.push_back(NodeSUnit);

    // Schedule zero-latency TokenFactor below any nodes that may increase the
    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
    // have false stalls.
    if (NI->getOpcode() == ISD::TokenFactor)
      NodeSUnit->isScheduleLow = true;

    // If there are glue operands involved, N is now the bottom-most node
    // of the sequence of nodes that are glued together.
    // Update the SUnit.
    NodeSUnit->setNode(N);
    assert(N->getNodeId() == -1 && "Node already inserted!");
    N->setNodeId(NodeSUnit->NodeNum);

    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
    InitNumRegDefsLeft(NodeSUnit);

    // Assign the Latency field of NodeSUnit using target-provided information.
    computeLatency(NodeSUnit);
  }

  // Find all call operands.
  while (!CallSUnits.empty()) {
    SUnit *SU = CallSUnits.pop_back_val();
    for (const SDNode *SUNode = SU->getNode(); SUNode;
         SUNode = SUNode->getGluedNode()) {
      if (SUNode->getOpcode() != ISD::CopyToReg)
        continue;
      SDNode *SrcN = SUNode->getOperand(2).getNode();
      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
      SrcSU->isCallOp = true;
    }
  }
}
コード例 #14
0
ファイル: InstrEmitter.cpp プロジェクト: crabtw/llvm
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
                DenseMap<SDValue, unsigned> &VRBaseMap) {
  unsigned Opc = Node->getMachineOpcode();

  // Handle subreg insert/extract specially
  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
      Opc == TargetOpcode::INSERT_SUBREG ||
      Opc == TargetOpcode::SUBREG_TO_REG) {
    EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
    return;
  }

  // Handle COPY_TO_REGCLASS specially.
  if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
    EmitCopyToRegClassNode(Node, VRBaseMap);
    return;
  }

  // Handle REG_SEQUENCE specially.
  if (Opc == TargetOpcode::REG_SEQUENCE) {
    EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
    return;
  }

  if (Opc == TargetOpcode::IMPLICIT_DEF)
    // We want a unique VR for each IMPLICIT_DEF use.
    return;

  const MCInstrDesc &II = TII->get(Opc);
  unsigned NumResults = CountResults(Node);
  unsigned NumDefs = II.getNumDefs();
  const MCPhysReg *ScratchRegs = nullptr;

  // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
  if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
    // Stackmaps do not have arguments and do not preserve their calling
    // convention. However, to simplify runtime support, they clobber the same
    // scratch registers as AnyRegCC.
    unsigned CC = CallingConv::AnyReg;
    if (Opc == TargetOpcode::PATCHPOINT) {
      CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
      NumDefs = NumResults;
    }
    ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
  }

  unsigned NumImpUses = 0;
  unsigned NodeOperands =
    countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
  bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
#ifndef NDEBUG
  unsigned NumMIOperands = NodeOperands + NumResults;
  if (II.isVariadic())
    assert(NumMIOperands >= II.getNumOperands() &&
           "Too few operands for a variadic node!");
  else
    assert(NumMIOperands >= II.getNumOperands() &&
           NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
                            NumImpUses &&
           "#operands for dag node doesn't match .td file!");
#endif

  // Create the new machine instruction.
  MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);

  // Add result register values for things that are defined by this
  // instruction.
  if (NumResults) {
    CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);

    // Transfer any IR flags from the SDNode to the MachineInstr
    MachineInstr *MI = MIB.getInstr();
    const SDNodeFlags Flags = Node->getFlags();
    if (Flags.hasNoSignedZeros())
      MI->setFlag(MachineInstr::MIFlag::FmNsz);

    if (Flags.hasAllowReciprocal())
      MI->setFlag(MachineInstr::MIFlag::FmArcp);

    if (Flags.hasNoNaNs())
      MI->setFlag(MachineInstr::MIFlag::FmNoNans);

    if (Flags.hasNoInfs())
      MI->setFlag(MachineInstr::MIFlag::FmNoInfs);

    if (Flags.hasAllowContract())
      MI->setFlag(MachineInstr::MIFlag::FmContract);

    if (Flags.hasApproximateFuncs())
      MI->setFlag(MachineInstr::MIFlag::FmAfn);

    if (Flags.hasAllowReassociation())
      MI->setFlag(MachineInstr::MIFlag::FmReassoc);
  }

  // Emit all of the actual operands of this instruction, adding them to the
  // instruction as appropriate.
  bool HasOptPRefs = NumDefs > NumResults;
  assert((!HasOptPRefs || !HasPhysRegOuts) &&
         "Unable to cope with optional defs and phys regs defs!");
  unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
  for (unsigned i = NumSkip; i != NodeOperands; ++i)
    AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
               VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);

  // Add scratch registers as implicit def and early clobber
  if (ScratchRegs)
    for (unsigned i = 0; ScratchRegs[i]; ++i)
      MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
                                 RegState::EarlyClobber);

  // Transfer all of the memory reference descriptions of this instruction.
  MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                 cast<MachineSDNode>(Node)->memoperands_end());

  // Insert the instruction into position in the block. This needs to
  // happen before any custom inserter hook is called so that the
  // hook knows where in the block to insert the replacement code.
  MBB->insert(InsertPos, MIB);

  // The MachineInstr may also define physregs instead of virtregs.  These
  // physreg values can reach other instructions in different ways:
  //
  // 1. When there is a use of a Node value beyond the explicitly defined
  //    virtual registers, we emit a CopyFromReg for one of the implicitly
  //    defined physregs.  This only happens when HasPhysRegOuts is true.
  //
  // 2. A CopyFromReg reading a physreg may be glued to this instruction.
  //
  // 3. A glued instruction may implicitly use a physreg.
  //
  // 4. A glued instruction may use a RegisterSDNode operand.
  //
  // Collect all the used physreg defs, and make sure that any unused physreg
  // defs are marked as dead.
  SmallVector<unsigned, 8> UsedRegs;

  // Additional results must be physical register defs.
  if (HasPhysRegOuts) {
    for (unsigned i = NumDefs; i < NumResults; ++i) {
      unsigned Reg = II.getImplicitDefs()[i - NumDefs];
      if (!Node->hasAnyUseOfValue(i))
        continue;
      // This implicitly defined physreg has a use.
      UsedRegs.push_back(Reg);
      EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
    }
  }

  // Scan the glue chain for any used physregs.
  if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
      if (F->getOpcode() == ISD::CopyFromReg) {
        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
        continue;
      } else if (F->getOpcode() == ISD::CopyToReg) {
        // Skip CopyToReg nodes that are internal to the glue chain.
        continue;
      }
      // Collect declared implicit uses.
      const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
      UsedRegs.append(MCID.getImplicitUses(),
                      MCID.getImplicitUses() + MCID.getNumImplicitUses());
      // In addition to declared implicit uses, we must also check for
      // direct RegisterSDNode operands.
      for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
        if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
          unsigned Reg = R->getReg();
          if (TargetRegisterInfo::isPhysicalRegister(Reg))
            UsedRegs.push_back(Reg);
        }
    }
  }

  // Finally mark unused registers as dead.
  if (!UsedRegs.empty() || II.getImplicitDefs())
    MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);

  // Run post-isel target hook to adjust this instruction if needed.
  if (II.hasPostISelHook())
    TLI->AdjustInstrPostInstrSelection(*MIB, Node);
}
コード例 #15
0
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
                                         bool IsClone, bool IsCloned,
                                         unsigned SrcReg,
                                         DenseMap<SDValue, unsigned> &VRBaseMap) {
  unsigned VRBase = 0;
  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    // Just use the input register directly!
    SDValue Op(Node, ResNo);
    if (IsClone)
      VRBaseMap.erase(Op);
    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
    isNew = isNew; // Silence compiler warning.
    assert(isNew && "Node emitted out of order - early");
    return;
  }

  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
  // the CopyToReg'd destination register instead of creating a new vreg.
  bool MatchReg = true;
  const TargetRegisterClass *UseRC = NULL;
  if (!IsClone && !IsCloned)
    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
         UI != E; ++UI) {
      SDNode *User = *UI;
      bool Match = true;
      if (User->getOpcode() == ISD::CopyToReg && 
          User->getOperand(2).getNode() == Node &&
          User->getOperand(2).getResNo() == ResNo) {
        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
          VRBase = DestReg;
          Match = false;
        } else if (DestReg != SrcReg)
          Match = false;
      } else {
        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
          SDValue Op = User->getOperand(i);
          if (Op.getNode() != Node || Op.getResNo() != ResNo)
            continue;
          MVT VT = Node->getValueType(Op.getResNo());
          if (VT == MVT::Other || VT == MVT::Flag)
            continue;
          Match = false;
          if (User->isMachineOpcode()) {
            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
            const TargetRegisterClass *RC =
              getInstrOperandRegClass(TRI, II, i+II.getNumDefs());
            if (!UseRC)
              UseRC = RC;
            else if (RC) {
              if (UseRC->hasSuperClass(RC))
                UseRC = RC;
              else
                assert((UseRC == RC || RC->hasSuperClass(UseRC)) &&
                       "Multiple uses expecting different register classes!");
            }
          }
        }
      }
      MatchReg &= Match;
      if (VRBase)
        break;
    }

  MVT VT = Node->getValueType(ResNo);
  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
  SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
  
  // Figure out the register class to create for the destreg.
  if (VRBase) {
    DstRC = MRI.getRegClass(VRBase);
  } else if (UseRC) {
    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
    DstRC = UseRC;
  } else {
    DstRC = TLI->getRegClassFor(VT);
  }
    
  // If all uses are reading from the src physical register and copying the
  // register is either impossible or very expensive, then don't create a copy.
  if (MatchReg && SrcRC->getCopyCost() < 0) {
    VRBase = SrcReg;
  } else {
    // Create the reg, emit the copy.
    VRBase = MRI.createVirtualRegister(DstRC);
    bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,
                                     DstRC, SrcRC);
    // If the target didn't handle the copy with different register
    // classes and the destination is a subset of the source,
    // try a normal same-RC copy.
    if (!Emitted && DstRC->hasSuperClass(SrcRC))
      Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,
                                  SrcRC, SrcRC);

    assert(Emitted && "Unable to issue a copy instruction!\n");
  }

  SDValue Op(Node, ResNo);
  if (IsClone)
    VRBaseMap.erase(Op);
  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
  isNew = isNew; // Silence compiler warning.
  assert(isNew && "Node emitted out of order - early");
}
コード例 #16
0
/// Returns single number reflecting benefit of scheduling SU
/// in the current cycle.
signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
  // Initial trivial priority.
  signed ResCount = 1;

  // Do not waste time on a node that is already scheduled.
  if (SU->isScheduled)
    return ResCount;

  // Forced priority is high.
  if (SU->isScheduleHigh)
    ResCount += PriorityOne;

  // Adaptable scheduling
  // A small, but very parallel
  // region, where reg pressure is an issue.
  if (HorizontalVerticalBalance > RegPressureThreshold) {
    // Critical path first
    ResCount += (SU->getHeight() * ScaleTwo);
    // If resources are available for it, multiply the
    // chance of scheduling.
    if (isResourceAvailable(SU))
      ResCount <<= FactorOne;

    // Consider change to reg pressure from scheduling
    // this SU.
    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
  }
  // Default heuristic, greeady and
  // critical path driven.
  else {
    // Critical path first.
    ResCount += (SU->getHeight() * ScaleTwo);
    // Now see how many instructions is blocked by this SU.
    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
    // If resources are available for it, multiply the
    // chance of scheduling.
    if (isResourceAvailable(SU))
      ResCount <<= FactorOne;

    ResCount -= (regPressureDelta(SU) * ScaleTwo);
  }

  // These are platform specific things.
  // Will need to go into the back end
  // and accessed from here via a hook.
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
    if (N->isMachineOpcode()) {
      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
      if (TID.isCall())
        ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
    }
    else
      switch (N->getOpcode()) {
      default:  break;
      case ISD::TokenFactor:
      case ISD::CopyFromReg:
      case ISD::CopyToReg:
        ResCount += PriorityFive;
        break;

      case ISD::INLINEASM:
        ResCount += PriorityFour;
        break;
      }
  }
  return ResCount;
}
コード例 #17
0
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
        assert(ImmIdx != -1);

        // subtract one from ImmIdx, because the DST operand is usually index
        // 0 for MachineInstrs, but we have no DST in the Ops vector.
        ImmIdx--;

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->isALUInstr(Result->getMachineOpcode())) {
      // Fold FNEG/FABS/CONST_ADDRESS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  const R600InstrInfo *TII =
                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case AMDGPUISD::CONST_ADDRESS: {
    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
                              I != SDNode::use_end(); I = Next) {
      Next = llvm::next(I);
      if (!I->isMachineOpcode()) {
        continue;
      }
      unsigned Opcode = I->getMachineOpcode();
      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
      int SrcIdx = I.getOperandNo();
      int SelIdx;
      // Unlike MachineInstrs, SDNodes do not have results in their operand
      // list, so we need to increment the SrcIdx, since
      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
      if (HasDst) {
        SrcIdx++;
      }

      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
      if (SelIdx < 0) {
        continue;
      }

      SDValue CstOffset;
      if (N->getValueType(0).isVector() ||
          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
        continue;

      // Gather constants values
      int SrcIndices[] = {
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
      };
      std::vector<unsigned> Consts;
      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
        int OtherSrcIdx = SrcIndices[i];
        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
          continue;
        }
        if (HasDst) {
          OtherSrcIdx--;
          OtherSelIdx--;
        }
        if (RegisterSDNode *Reg =
                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        continue;

      // Convert back to SDNode indices
      if (HasDst) {
        SrcIdx--;
        SelIdx--;
      }
      std::vector<SDValue> Ops;
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        if (i == SrcIdx) {
          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
        } else if (i == SelIdx) {
          Ops.push_back(CstOffset);
        } else {
          Ops.push_back(I->getOperand(i));
        }
      }
      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
    }
    break;
  }
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    unsigned RegClassID;
    switch(N->getValueType(0).getVectorNumElements()) {
    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::BUILD_PAIR: {
    SDValue RC, SubReg0, SubReg1;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }
    if (N->getValueType(0) == MVT::i128) {
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    } else if (N->getValueType(0) == MVT::i64) {
      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    } else {
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
    }
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                            N->getOperand(1), SubReg1 };
    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
                                  SDLoc(N), N->getValueType(0), Ops);
  }

  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
                                        AMDGPU::OpName::literal);
        if (ImmIdx == -1) {
          continue;
        }

        if (TII->getOperandIdx(Use->getMachineOpcode(),
                               AMDGPU::OpName::dst) != -1) {
          // subtract one from ImmIdx, because the DST operand is usually index
          // 0 for MachineInstrs, but we have no DST in the Ops vector.
          ImmIdx--;
        }

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

    }
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
      // Fold FNEG/FABS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
コード例 #19
0
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
/// This function finds loads of the same base and different offsets. If the
/// offsets are not far apart (target specific), it add MVT::Flag inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
  SmallPtrSet<SDNode*, 16> Visited;
  SmallVector<int64_t, 4> Offsets;
  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    SDNode *Node = &*NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const TargetInstrDesc &TID = TII->get(Opc);
    if (!TID.mayLoad())
      continue;

    SDNode *Chain = 0;
    unsigned NumOps = Node->getNumOperands();
    if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
      Chain = Node->getOperand(NumOps-1).getNode();
    if (!Chain)
      continue;

    // Look for other loads of the same chain. Find loads that are loading from
    // the same base pointer and different offsets.
    Visited.clear();
    Offsets.clear();
    O2SMap.clear();
    bool Cluster = false;
    SDNode *Base = Node;
    int64_t BaseOffset;
    for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
         I != E; ++I) {
      SDNode *User = *I;
      if (User == Node || !Visited.insert(User))
        continue;
      int64_t Offset1, Offset2;
      if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
          Offset1 == Offset2)
        // FIXME: Should be ok if they addresses are identical. But earlier
        // optimizations really should have eliminated one of the loads.
        continue;
      if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
        Offsets.push_back(Offset1);
      O2SMap.insert(std::make_pair(Offset2, User));
      Offsets.push_back(Offset2);
      if (Offset2 < Offset1) {
        Base = User;
        BaseOffset = Offset2;
      } else {
        BaseOffset = Offset1;
      }
      Cluster = true;
    }

    if (!Cluster)
      continue;

    // Sort them in increasing order.
    std::sort(Offsets.begin(), Offsets.end());

    // Check if the loads are close enough.
    SmallVector<SDNode*, 4> Loads;
    unsigned NumLoads = 0;
    int64_t BaseOff = Offsets[0];
    SDNode *BaseLoad = O2SMap[BaseOff];
    Loads.push_back(BaseLoad);
    for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
      int64_t Offset = Offsets[i];
      SDNode *Load = O2SMap[Offset];
      if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
                                        NumLoads))
        break; // Stop right here. Ignore loads that are further away.
      Loads.push_back(Load);
      ++NumLoads;
    }

    if (NumLoads == 0)
      continue;

    // Cluster loads by adding MVT::Flag outputs and inputs. This also
    // ensure they are scheduled in order of increasing addresses.
    SDNode *Lead = Loads[0];
    AddFlags(Lead, SDValue(0,0), true, DAG);
    SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
    for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
      bool OutFlag = i < e-1;
      SDNode *Load = Loads[i];
      AddFlags(Load, InFlag, OutFlag, DAG);
      if (OutFlag)
        InFlag = SDValue(Load, Load->getNumValues()-1);
      ++LoadsClustered;
    }
  }
}
コード例 #20
0
ファイル: InstrEmitter.cpp プロジェクト: jizhonghao/freebsd
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
                DenseMap<SDValue, unsigned> &VRBaseMap) {
    unsigned Opc = Node->getMachineOpcode();

    // Handle subreg insert/extract specially
    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
            Opc == TargetOpcode::INSERT_SUBREG ||
            Opc == TargetOpcode::SUBREG_TO_REG) {
        EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
        return;
    }

    // Handle COPY_TO_REGCLASS specially.
    if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
        EmitCopyToRegClassNode(Node, VRBaseMap);
        return;
    }

    // Handle REG_SEQUENCE specially.
    if (Opc == TargetOpcode::REG_SEQUENCE) {
        EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
        return;
    }

    if (Opc == TargetOpcode::IMPLICIT_DEF)
        // We want a unique VR for each IMPLICIT_DEF use.
        return;

    const MCInstrDesc &II = TII->get(Opc);
    unsigned NumResults = CountResults(Node);
    unsigned NodeOperands = CountOperands(Node);
    bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
    unsigned NumMIOperands = NodeOperands + NumResults;
    if (II.isVariadic())
        assert(NumMIOperands >= II.getNumOperands() &&
               "Too few operands for a variadic node!");
    else
        assert(NumMIOperands >= II.getNumOperands() &&
               NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
               "#operands for dag node doesn't match .td file!");
#endif

    // Create the new machine instruction.
    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);

    // Add result register values for things that are defined by this
    // instruction.
    if (NumResults)
        CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);

    // Emit all of the actual operands of this instruction, adding them to the
    // instruction as appropriate.
    bool HasOptPRefs = II.getNumDefs() > NumResults;
    assert((!HasOptPRefs || !HasPhysRegOuts) &&
           "Unable to cope with optional defs and phys regs defs!");
    unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
    for (unsigned i = NumSkip; i != NodeOperands; ++i)
        AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
                   VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);

    // Transfer all of the memory reference descriptions of this instruction.
    MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                   cast<MachineSDNode>(Node)->memoperands_end());

    // Insert the instruction into position in the block. This needs to
    // happen before any custom inserter hook is called so that the
    // hook knows where in the block to insert the replacement code.
    MBB->insert(InsertPos, MI);

    // The MachineInstr may also define physregs instead of virtregs.  These
    // physreg values can reach other instructions in different ways:
    //
    // 1. When there is a use of a Node value beyond the explicitly defined
    //    virtual registers, we emit a CopyFromReg for one of the implicitly
    //    defined physregs.  This only happens when HasPhysRegOuts is true.
    //
    // 2. A CopyFromReg reading a physreg may be glued to this instruction.
    //
    // 3. A glued instruction may implicitly use a physreg.
    //
    // 4. A glued instruction may use a RegisterSDNode operand.
    //
    // Collect all the used physreg defs, and make sure that any unused physreg
    // defs are marked as dead.
    SmallVector<unsigned, 8> UsedRegs;

    // Additional results must be physical register defs.
    if (HasPhysRegOuts) {
        for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
            unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
            if (!Node->hasAnyUseOfValue(i))
                continue;
            // This implicitly defined physreg has a use.
            UsedRegs.push_back(Reg);
            EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
        }
    }

    // Scan the glue chain for any used physregs.
    if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
        for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
            if (F->getOpcode() == ISD::CopyFromReg) {
                UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
                continue;
            } else if (F->getOpcode() == ISD::CopyToReg) {
                // Skip CopyToReg nodes that are internal to the glue chain.
                continue;
            }
            // Collect declared implicit uses.
            const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
            UsedRegs.append(MCID.getImplicitUses(),
                            MCID.getImplicitUses() + MCID.getNumImplicitUses());
            // In addition to declared implicit uses, we must also check for
            // direct RegisterSDNode operands.
            for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
                if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
                    unsigned Reg = R->getReg();
                    if (TargetRegisterInfo::isPhysicalRegister(Reg))
                        UsedRegs.push_back(Reg);
                }
        }
    }

    // Finally mark unused registers as dead.
    if (!UsedRegs.empty() || II.getImplicitDefs())
        MI->setPhysRegsDeadExcept(UsedRegs, *TRI);

    // Run post-isel target hook to adjust this instruction if needed.
#ifdef NDEBUG
    if (II.hasPostISelHook())
#endif
        TLI->AdjustInstrPostInstrSelection(MI, Node);
}
コード例 #21
0
ファイル: InstrEmitter.cpp プロジェクト: jizhonghao/freebsd
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
    unsigned VRBase = 0;
    if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
        // Just use the input register directly!
        SDValue Op(Node, ResNo);
        if (IsClone)
            VRBaseMap.erase(Op);
        bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
        (void)isNew; // Silence compiler warning.
        assert(isNew && "Node emitted out of order - early");
        return;
    }

    // If the node is only used by a CopyToReg and the dest reg is a vreg, use
    // the CopyToReg'd destination register instead of creating a new vreg.
    bool MatchReg = true;
    const TargetRegisterClass *UseRC = NULL;
    EVT VT = Node->getValueType(ResNo);

    // Stick to the preferred register classes for legal types.
    if (TLI->isTypeLegal(VT))
        UseRC = TLI->getRegClassFor(VT);

    if (!IsClone && !IsCloned)
        for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
                UI != E; ++UI) {
            SDNode *User = *UI;
            bool Match = true;
            if (User->getOpcode() == ISD::CopyToReg &&
                    User->getOperand(2).getNode() == Node &&
                    User->getOperand(2).getResNo() == ResNo) {
                unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
                if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
                    VRBase = DestReg;
                    Match = false;
                } else if (DestReg != SrcReg)
                    Match = false;
            } else {
                for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
                    SDValue Op = User->getOperand(i);
                    if (Op.getNode() != Node || Op.getResNo() != ResNo)
                        continue;
                    EVT VT = Node->getValueType(Op.getResNo());
                    if (VT == MVT::Other || VT == MVT::Glue)
                        continue;
                    Match = false;
                    if (User->isMachineOpcode()) {
                        const MCInstrDesc &II = TII->get(User->getMachineOpcode());
                        const TargetRegisterClass *RC = 0;
                        if (i+II.getNumDefs() < II.getNumOperands())
                            RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
                        if (!UseRC)
                            UseRC = RC;
                        else if (RC) {
                            const TargetRegisterClass *ComRC =
                                TRI->getCommonSubClass(UseRC, RC);
                            // If multiple uses expect disjoint register classes, we emit
                            // copies in AddRegisterOperand.
                            if (ComRC)
                                UseRC = ComRC;
                        }
                    }
                }
            }
            MatchReg &= Match;
            if (VRBase)
                break;
        }

    const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
    SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);

    // Figure out the register class to create for the destreg.
    if (VRBase) {
        DstRC = MRI->getRegClass(VRBase);
    } else if (UseRC) {
        assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
        DstRC = UseRC;
    } else {
        DstRC = TLI->getRegClassFor(VT);
    }

    // If all uses are reading from the src physical register and copying the
    // register is either impossible or very expensive, then don't create a copy.
    if (MatchReg && SrcRC->getCopyCost() < 0) {
        VRBase = SrcReg;
    } else {
        // Create the reg, emit the copy.
        VRBase = MRI->createVirtualRegister(DstRC);
        BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
                VRBase).addReg(SrcReg);
    }

    SDValue Op(Node, ResNo);
    if (IsClone)
        VRBaseMap.erase(Op);
    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
    (void)isNew; // Silence compiler warning.
    assert(isNew && "Node emitted out of order - early");
}