コード例 #1
0
void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
  SDNode *N = SU->getNode();

  // TokenFactor operands are considered zero latency, and some schedulers
  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
  // whenever node latency is nonzero.
  if (N && N->getOpcode() == ISD::TokenFactor) {
    SU->Latency = 0;
    return;
  }

  // Check to see if the scheduler cares about latencies.
  if (forceUnitLatencies()) {
    SU->Latency = 1;
    return;
  }

  if (!InstrItins || InstrItins->isEmpty()) {
    if (N && N->isMachineOpcode() &&
        TII->isHighLatencyDef(N->getMachineOpcode()))
      SU->Latency = HighLatencyCycles;
    else
      SU->Latency = 1;
    return;
  }

  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes glued together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
    if (N->isMachineOpcode())
      SU->Latency += TII->getInstrLatency(InstrItins, N);
}
コード例 #2
0
void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
  unsigned  NodeNumDefs = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
    if (N->isMachineOpcode()) {
      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
      // No register need be allocated for this.
      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
        NodeNumDefs = 0;
        break;
      }
      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
    }
    else
      switch(N->getOpcode()) {
        default:     break;
        case ISD::CopyFromReg:
          NodeNumDefs++;
          break;
        case ISD::INLINEASM:
          NodeNumDefs++;
          break;
      }

  SU->NumRegDefsLeft = NodeNumDefs;
}
コード例 #3
0
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
  
  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes flagged together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
    if (N->isMachineOpcode()) {
      SU->Latency += InstrItins.
        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
    }
}
コード例 #4
0
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
                                              SmallVectorImpl<unsigned> &LRegs){
  if (NumLiveRegs == 0)
    return false;

  SmallSet<unsigned, 4> RegAdded;
  // If this node would clobber any "live" register, then it's not ready.
  for (SDep &Pred : SU->Preds) {
    if (Pred.isAssignedRegDep()) {
      CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs,
                         RegAdded, LRegs, TRI);
    }
  }

  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
    if (Node->getOpcode() == ISD::INLINEASM) {
      // Inline asm can clobber physical defs.
      unsigned NumOps = Node->getNumOperands();
      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
        --NumOps;  // Ignore the glue operand.

      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
        unsigned Flags =
          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);

        ++i; // Skip the ID value.
        if (InlineAsm::isRegDefKind(Flags) ||
            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
            InlineAsm::isClobberKind(Flags)) {
          // Check for def of register or earlyclobber register.
          for (; NumVals; --NumVals, ++i) {
            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
            if (TargetRegisterInfo::isPhysicalRegister(Reg))
              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
          }
        } else
          i += NumVals;
      }
      continue;
    }
    if (!Node->isMachineOpcode())
      continue;
    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
    if (!MCID.ImplicitDefs)
      continue;
    for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
    }
  }
  return !LRegs.empty();
}
コード例 #5
0
ファイル: ScheduleDAGSDNodes.cpp プロジェクト: happz/llvm
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
///
void ScheduleDAGSDNodes::ClusterNodes() {
  for (SDNode &NI : DAG->allnodes()) {
    SDNode *Node = &NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const MCInstrDesc &MCID = TII->get(Opc);
    if (MCID.mayLoad())
      // Cluster loads from "near" addresses into combined SUnits.
      ClusterNeighboringLoads(Node);
  }
}
コード例 #6
0
ファイル: VISelDAGToDAG.cpp プロジェクト: b14ckkn19ht/Shang
SDNode *VDAGToDAGISel::SelectBitSlice(SDNode *N) {
  SDNode *Op = N->getOperand(0).getNode();
  // Emit the constant bit slice to constant directly if possible.
  if (ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Op))
    return SelectConstBitSlice(CSD, N);

  assert(Op->getOpcode() != VTMISD::BitSlice
         && (!Op->isMachineOpcode()
             || Op->getMachineOpcode() != VTM::VOpBitSlice)
         && "DAGCombine should handle this!");

  return SelectSimpleNode(N, VTM::VOpBitSlice);
}
コード例 #7
0
/// ClusterNodes - Cluster certain nodes which should be scheduled together.
///
void ScheduleDAGSDNodes::ClusterNodes() {
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    SDNode *Node = &*NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const MCInstrDesc &MCID = TII->get(Opc);
    if (MCID.mayLoad())
      // Cluster loads from "near" addresses into combined SUnits.
      ClusterNeighboringLoads(Node);
  }
}
コード例 #8
0
ファイル: SparcISelDAGToDAG.cpp プロジェクト: aaasz/SHP
SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
  SDNode *N = Op.getNode();
  DebugLoc dl = N->getDebugLoc();
  if (N->isMachineOpcode())
    return NULL;   // Already selected.

  switch (N->getOpcode()) {
  default: break;
  case SPISD::GLOBAL_BASE_REG:
    return getGlobalBaseReg();

  case ISD::SDIV:
  case ISD::UDIV: {
    // FIXME: should use a custom expander to expose the SRA to the dag.
    SDValue DivLHS = N->getOperand(0);
    SDValue DivRHS = N->getOperand(1);

    // Set the Y register to the high-part.
    SDValue TopPart;
    if (N->getOpcode() == ISD::SDIV) {
      TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
                                   CurDAG->getTargetConstant(31, MVT::i32)), 0);
    } else {
      TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
    }
    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart,
                                     CurDAG->getRegister(SP::G0, MVT::i32)), 0);

    // FIXME: Handle div by immediate.
    unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
    return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
                                TopPart);
  }
  case ISD::MULHU:
  case ISD::MULHS: {
    // FIXME: Handle mul by immediate.
    SDValue MulLHS = N->getOperand(0);
    SDValue MulRHS = N->getOperand(1);
    unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
                                         MulLHS, MulRHS);
    // The high part is in the Y register.
    return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
    return NULL;
  }
  }

  return SelectCode(Op);
}
コード例 #9
0
void ScheduleDAGLinearize::Schedule() {
  LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");

  SmallVector<SDNode*, 8> Glues;
  unsigned DAGSize = 0;
  for (SDNode &Node : DAG->allnodes()) {
    SDNode *N = &Node;

    // Use node id to record degree.
    unsigned Degree = N->use_size();
    N->setNodeId(Degree);
    unsigned NumVals = N->getNumValues();
    if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
        N->hasAnyUseOfValue(NumVals-1)) {
      SDNode *User = findGluedUser(N);
      if (User) {
        Glues.push_back(N);
        GluedMap.insert(std::make_pair(N, User));
      }
    }

    if (N->isMachineOpcode() ||
        (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
      ++DAGSize;
  }

  for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
    SDNode *Glue = Glues[i];
    SDNode *GUser = GluedMap[Glue];
    unsigned Degree = Glue->getNodeId();
    unsigned UDegree = GUser->getNodeId();

    // Glue user must be scheduled together with the glue operand. So other
    // users of the glue operand must be treated as its users.
    SDNode *ImmGUser = Glue->getGluedUser();
    for (const SDNode *U : Glue->uses())
      if (U == ImmGUser)
        --Degree;
    GUser->setNodeId(UDegree + Degree);
    Glue->setNodeId(1);
  }

  Sequence.reserve(DAGSize);
  ScheduleNode(DAG->getRoot().getNode());
}
コード例 #10
0
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
  // Check to see if the scheduler cares about latencies.
  if (ForceUnitLatencies()) {
    SU->Latency = 1;
    return;
  }

  if (!InstrItins || InstrItins->isEmpty()) {
    SU->Latency = 1;
    return;
  }
  
  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes glued together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
    if (N->isMachineOpcode())
      SU->Latency += TII->getInstrLatency(InstrItins, N);
}
コード例 #11
0
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
  // Check to see if the scheduler cares about latencies.
  if (ForceUnitLatencies()) {
    SU->Latency = 1;
    return;
  }

  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
  if (InstrItins.isEmpty()) {
    SU->Latency = 1;
    return;
  }
  
  // Compute the latency for the node.  We use the sum of the latencies for
  // all nodes flagged together into this SUnit.
  SU->Latency = 0;
  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
    if (N->isMachineOpcode()) {
      SU->Latency += InstrItins.
        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
    }
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  const R600InstrInfo *TII =
                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case AMDGPUISD::CONST_ADDRESS: {
    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
                              I != SDNode::use_end(); I = Next) {
      Next = llvm::next(I);
      if (!I->isMachineOpcode()) {
        continue;
      }
      unsigned Opcode = I->getMachineOpcode();
      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
      int SrcIdx = I.getOperandNo();
      int SelIdx;
      // Unlike MachineInstrs, SDNodes do not have results in their operand
      // list, so we need to increment the SrcIdx, since
      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
      if (HasDst) {
        SrcIdx++;
      }

      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
      if (SelIdx < 0) {
        continue;
      }

      SDValue CstOffset;
      if (N->getValueType(0).isVector() ||
          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
        continue;

      // Gather constants values
      int SrcIndices[] = {
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
      };
      std::vector<unsigned> Consts;
      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
        int OtherSrcIdx = SrcIndices[i];
        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
          continue;
        }
        if (HasDst) {
          OtherSrcIdx--;
          OtherSelIdx--;
        }
        if (RegisterSDNode *Reg =
                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        continue;

      // Convert back to SDNode indices
      if (HasDst) {
        SrcIdx--;
        SelIdx--;
      }
      std::vector<SDValue> Ops;
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        if (i == SrcIdx) {
          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
        } else if (i == SelIdx) {
          Ops.push_back(CstOffset);
        } else {
          Ops.push_back(I->getOperand(i));
        }
      }
      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
    }
    break;
  }
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    unsigned RegClassID;
    switch(N->getValueType(0).getVectorNumElements()) {
    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::BUILD_PAIR: {
    SDValue RC, SubReg0, SubReg1;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }
    if (N->getValueType(0) == MVT::i128) {
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    } else if (N->getValueType(0) == MVT::i64) {
      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    } else {
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
    }
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                            N->getOperand(1), SubReg1 };
    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
                                  SDLoc(N), N->getValueType(0), Ops);
  }

  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
                                        AMDGPU::OpName::literal);
        if (ImmIdx == -1) {
          continue;
        }

        if (TII->getOperandIdx(Use->getMachineOpcode(),
                               AMDGPU::OpName::dst) != -1) {
          // subtract one from ImmIdx, because the DST operand is usually index
          // 0 for MachineInstrs, but we have no DST in the Ops vector.
          ImmIdx--;
        }

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

    }
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
      // Fold FNEG/FABS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
コード例 #13
0
void ScheduleDAGSDNodes::BuildSchedUnits() {
  // During scheduling, the NodeId field of SDNode is used to map SDNodes
  // to their associated SUnits by holding SUnits table indices. A value
  // of -1 means the SDNode does not yet have an associated SUnit.
  unsigned NumNodes = 0;
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    NI->setNodeId(-1);
    ++NumNodes;
  }

  // Reserve entries in the vector for each of the SUnits we are creating.  This
  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  // invalidated.
  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  // This is a temporary workaround.
  SUnits.reserve(NumNodes * 2);

  // Add all nodes in depth first order.
  SmallVector<SDNode*, 64> Worklist;
  SmallPtrSet<SDNode*, 64> Visited;
  Worklist.push_back(DAG->getRoot().getNode());
  Visited.insert(DAG->getRoot().getNode());

  SmallVector<SUnit*, 8> CallSUnits;
  while (!Worklist.empty()) {
    SDNode *NI = Worklist.pop_back_val();

    // Add all operands to the worklist unless they've already been added.
    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
      if (Visited.insert(NI->getOperand(i).getNode()))
        Worklist.push_back(NI->getOperand(i).getNode());

    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
      continue;

    // If this node has already been processed, stop now.
    if (NI->getNodeId() != -1) continue;

    SUnit *NodeSUnit = newSUnit(NI);

    // See if anything is glued to this node, if so, add them to glued
    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
    // is required to be the last operand and result of a node.

    // Scan up to find glued preds.
    SDNode *N = NI;
    while (N->getNumOperands() &&
           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
      N = N->getOperand(N->getNumOperands()-1).getNode();
      assert(N->getNodeId() == -1 && "Node already inserted!");
      N->setNodeId(NodeSUnit->NodeNum);
      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
        NodeSUnit->isCall = true;
    }

    // Scan down to find any glued succs.
    N = NI;
    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
      SDValue GlueVal(N, N->getNumValues()-1);

      // There are either zero or one users of the Glue result.
      bool HasGlueUse = false;
      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
           UI != E; ++UI)
        if (GlueVal.isOperandOf(*UI)) {
          HasGlueUse = true;
          assert(N->getNodeId() == -1 && "Node already inserted!");
          N->setNodeId(NodeSUnit->NodeNum);
          N = *UI;
          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
            NodeSUnit->isCall = true;
          break;
        }
      if (!HasGlueUse) break;
    }

    if (NodeSUnit->isCall)
      CallSUnits.push_back(NodeSUnit);

    // Schedule zero-latency TokenFactor below any nodes that may increase the
    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
    // have false stalls.
    if (NI->getOpcode() == ISD::TokenFactor)
      NodeSUnit->isScheduleLow = true;

    // If there are glue operands involved, N is now the bottom-most node
    // of the sequence of nodes that are glued together.
    // Update the SUnit.
    NodeSUnit->setNode(N);
    assert(N->getNodeId() == -1 && "Node already inserted!");
    N->setNodeId(NodeSUnit->NodeNum);

    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
    InitNumRegDefsLeft(NodeSUnit);

    // Assign the Latency field of NodeSUnit using target-provided information.
    computeLatency(NodeSUnit);
  }

  // Find all call operands.
  while (!CallSUnits.empty()) {
    SUnit *SU = CallSUnits.pop_back_val();
    for (const SDNode *SUNode = SU->getNode(); SUNode;
         SUNode = SUNode->getGluedNode()) {
      if (SUNode->getOpcode() != ISD::CopyToReg)
        continue;
      SDNode *SrcN = SUNode->getOperand(2).getNode();
      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
      SrcSU->isCallOp = true;
    }
  }
}
コード例 #14
0
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = forceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();

    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const MCInstrDesc &MCID = TII->get(Opc);
      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (MCID.isCommutable())
        SU->isCommutable = true;
    }

    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }

      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0 && !StressSched)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        // Special-case TokenFactor chains as zero-latency.
        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
          OpLatency = 0;

        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                               OpLatency, PhysReg);
        if (!isChain && !UnitLatencies) {
          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
        }

        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
          // Multiple register uses are combined in the same SUnit. For example,
          // we could have a set of glued nodes with all their defs consumed by
          // another set of glued nodes. Register pressure tracking sees this as
          // a single use, so to keep pressure balanced we reduce the defs.
          //
          // We can't tell (without more book-keeping) if this results from
          // glued nodes or duplicate operands. As long as we don't reduce
          // NumRegDefsLeft to zero, we handle the common cases well.
          --OpSU->NumRegDefsLeft;
        }
      }
    }
  }
}
コード例 #15
0
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *IA64DAGToDAGISel::Select(SDValue Op) {
    SDNode *N = Op.getNode();
    if (N->isMachineOpcode())
        return NULL;   // Already selected.
    DebugLoc dl = Op.getDebugLoc();

    switch (N->getOpcode()) {
    default:
        break;

    case IA64ISD::BRCALL: { // XXX: this is also a hack!
        SDValue Chain = N->getOperand(0);
        SDValue InFlag;  // Null incoming flag value.

        if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag
            InFlag = N->getOperand(2);
        }

        unsigned CallOpcode;
        SDValue CallOperand;

        // if we can call directly, do so
        if (GlobalAddressSDNode *GASD =
                    dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
            CallOpcode = IA64::BRCALL_IPREL_GA;
            CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64);
        } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) {
            // FIXME: we currently NEED this case for correctness, to avoid
            // "non-pic code with imm reloc.n against dynamic symbol" errors
            CallOpcode = IA64::BRCALL_IPREL_ES;
            CallOperand = N->getOperand(1);
        } else {
            // otherwise we need to load the function descriptor,
            // load the branch target (function)'s entry point and GP,
            // branch (call) then restore the GP
            SDValue FnDescriptor = N->getOperand(1);

            // load the branch target's entry point [mem] and
            // GP value [mem+8]
            SDValue targetEntryPoint=
                SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other,
                                              FnDescriptor, CurDAG->getEntryNode()), 0);
            Chain = targetEntryPoint.getValue(1);
            SDValue targetGPAddr=
                SDValue(CurDAG->getTargetNode(IA64::ADDS, dl, MVT::i64,
                                              FnDescriptor,
                                              CurDAG->getConstant(8, MVT::i64)), 0);
            Chain = targetGPAddr.getValue(1);
            SDValue targetGP =
                SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64,MVT::Other,
                                              targetGPAddr, CurDAG->getEntryNode()), 0);
            Chain = targetGP.getValue(1);

            Chain = CurDAG->getCopyToReg(Chain, dl, IA64::r1, targetGP, InFlag);
            InFlag = Chain.getValue(1);
            Chain = CurDAG->getCopyToReg(Chain, dl, IA64::B6,
                                         targetEntryPoint, InFlag); // FLAG these?
            InFlag = Chain.getValue(1);

            CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64);
            CallOpcode = IA64::BRCALL_INDIRECT;
        }

        // Finally, once everything is setup, emit the call itself
        if (InFlag.getNode())
            Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
                                                  MVT::Flag, CallOperand, InFlag), 0);
        else // there might be no arguments
            Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
                                                  MVT::Flag, CallOperand, Chain), 0);
        InFlag = Chain.getValue(1);

        std::vector<SDValue> CallResults;

        CallResults.push_back(Chain);
        CallResults.push_back(InFlag);

        for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
            ReplaceUses(Op.getValue(i), CallResults[i]);
        return NULL;
    }

    case IA64ISD::GETFD: {
        SDValue Input = N->getOperand(0);
        return CurDAG->getTargetNode(IA64::GETFD, dl, MVT::i64, Input);
    }

    case ISD::FDIV:
    case ISD::SDIV:
    case ISD::UDIV:
    case ISD::SREM:
    case ISD::UREM:
        return SelectDIV(Op);

    case ISD::TargetConstantFP: {
        SDValue Chain = CurDAG->getEntryNode(); // this is a constant, so..

        SDValue V;
        ConstantFPSDNode* N2 = cast<ConstantFPSDNode>(N);
        if (N2->getValueAPF().isPosZero()) {
            V = CurDAG->getCopyFromReg(Chain, dl, IA64::F0, MVT::f64);
        } else if (N2->isExactlyValue(N2->getValueType(0) == MVT::f32 ?
                                      APFloat(+1.0f) : APFloat(+1.0))) {
            V = CurDAG->getCopyFromReg(Chain, dl, IA64::F1, MVT::f64);
        } else
            assert(0 && "Unexpected FP constant!");

        ReplaceUses(SDValue(N, 0), V);
        return 0;
    }

    case ISD::FrameIndex: { // TODO: reduce creepyness
        int FI = cast<FrameIndexSDNode>(N)->getIndex();
        if (N->hasOneUse())
            return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
                                        CurDAG->getTargetFrameIndex(FI, MVT::i64));
        else
            return CurDAG->getTargetNode(IA64::MOV, dl, MVT::i64,
                                         CurDAG->getTargetFrameIndex(FI, MVT::i64));
    }

    case ISD::ConstantPool: { // TODO: nuke the constant pool
        // (ia64 doesn't need one)
        ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
        Constant *C = CP->getConstVal();
        SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64,
                      CP->getAlignment());
        return CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, // ?
                                     CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
    }

    case ISD::GlobalAddress: {
        GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
        SDValue GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
        SDValue Tmp =
            SDValue(CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64,
                                          CurDAG->getRegister(IA64::r1,
                                                  MVT::i64), GA), 0);
        return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, Tmp,
                                     CurDAG->getEntryNode());
    }

    /* XXX
       case ISD::ExternalSymbol: {
         SDValue EA = CurDAG->getTargetExternalSymbol(
           cast<ExternalSymbolSDNode>(N)->getSymbol(),
           MVT::i64);
         SDValue Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, dl, MVT::i64,
                                               CurDAG->getRegister(IA64::r1,
                                                                   MVT::i64),
                                               EA);
         return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, Tmp);
       }
    */

    case ISD::LOAD: { // FIXME: load -1, not 1, for bools?
        LoadSDNode *LD = cast<LoadSDNode>(N);
        SDValue Chain = LD->getChain();
        SDValue Address = LD->getBasePtr();

        MVT TypeBeingLoaded = LD->getMemoryVT();
        unsigned Opc;
        switch (TypeBeingLoaded.getSimpleVT()) {
        default:
#ifndef NDEBUG
            N->dump(CurDAG);
#endif
            assert(0 && "Cannot load this type!");
        case MVT::i1: { // this is a bool
            Opc = IA64::LD1; // first we load a byte, then compare for != 0
            if(N->getValueType(0) == MVT::i1) { // XXX: early exit!
                return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other,
                                            SDValue(CurDAG->getTargetNode(Opc, dl,
                                                    MVT::i64,
                                                    Address), 0),
                                            CurDAG->getRegister(IA64::r0, MVT::i64),
                                            Chain);
            }
            /* otherwise, we want to load a bool into something bigger: LD1
               will do that for us, so we just fall through */
        }
        case MVT::i8:
            Opc = IA64::LD1;
            break;
        case MVT::i16:
            Opc = IA64::LD2;
            break;
        case MVT::i32:
            Opc = IA64::LD4;
            break;
        case MVT::i64:
            Opc = IA64::LD8;
            break;

        case MVT::f32:
            Opc = IA64::LDF4;
            break;
        case MVT::f64:
            Opc = IA64::LDF8;
            break;
        }

        // TODO: comment this
        return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
                                    Address, Chain);
    }

    case ISD::STORE: {
        StoreSDNode *ST = cast<StoreSDNode>(N);
        SDValue Address = ST->getBasePtr();
        SDValue Chain = ST->getChain();

        unsigned Opc;
        if (ISD::isNON_TRUNCStore(N)) {
            switch (N->getOperand(1).getValueType().getSimpleVT()) {
            default:
                assert(0 && "unknown type in store");
            case MVT::i1: { // this is a bool
                Opc = IA64::ST1; // we store either 0 or 1 as a byte
                // first load zero!
                SDValue Initial = CurDAG->getCopyFromReg(Chain, dl, IA64::r0, MVT::i64);
                Chain = Initial.getValue(1);
                // then load 1 into the same reg iff the predicate to store is 1
                SDValue Tmp = ST->getValue();
                Tmp =
                    SDValue(CurDAG->getTargetNode(IA64::TPCADDS, dl, MVT::i64, Initial,
                                                  CurDAG->getTargetConstant(1,
                                                          MVT::i64),
                                                  Tmp), 0);
                return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain);
            }
            case MVT::i64:
                Opc = IA64::ST8;
                break;
            case MVT::f64:
                Opc = IA64::STF8;
                break;
            }
        } else { // Truncating store
            switch(ST->getMemoryVT().getSimpleVT()) {
            default:
                assert(0 && "unknown type in truncstore");
            case MVT::i8:
                Opc = IA64::ST1;
                break;
            case MVT::i16:
                Opc = IA64::ST2;
                break;
            case MVT::i32:
                Opc = IA64::ST4;
                break;
            case MVT::f32:
                Opc = IA64::STF4;
                break;
            }
        }

        SDValue N1 = N->getOperand(1);
        SDValue N2 = N->getOperand(2);
        return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain);
    }

    case ISD::BRCOND: {
        SDValue Chain = N->getOperand(0);
        SDValue CC = N->getOperand(1);
        MachineBasicBlock *Dest =
            cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
        //FIXME - we do NOT need long branches all the time
        return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC,
                                    CurDAG->getBasicBlock(Dest), Chain);
    }

    case ISD::CALLSEQ_START:
    case ISD::CALLSEQ_END: {
        int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
        unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
                       IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
        SDValue N0 = N->getOperand(0);
        return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0);
    }

    case ISD::BR:
        // FIXME: we don't need long branches all the time!
        SDValue N0 = N->getOperand(0);
        return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other,
                                    N->getOperand(1), N0);
    }

    return SelectCode(Op);
}
コード例 #16
0
ファイル: InstrEmitter.cpp プロジェクト: jizhonghao/freebsd
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
    unsigned VRBase = 0;
    if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
        // Just use the input register directly!
        SDValue Op(Node, ResNo);
        if (IsClone)
            VRBaseMap.erase(Op);
        bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
        (void)isNew; // Silence compiler warning.
        assert(isNew && "Node emitted out of order - early");
        return;
    }

    // If the node is only used by a CopyToReg and the dest reg is a vreg, use
    // the CopyToReg'd destination register instead of creating a new vreg.
    bool MatchReg = true;
    const TargetRegisterClass *UseRC = NULL;
    EVT VT = Node->getValueType(ResNo);

    // Stick to the preferred register classes for legal types.
    if (TLI->isTypeLegal(VT))
        UseRC = TLI->getRegClassFor(VT);

    if (!IsClone && !IsCloned)
        for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
                UI != E; ++UI) {
            SDNode *User = *UI;
            bool Match = true;
            if (User->getOpcode() == ISD::CopyToReg &&
                    User->getOperand(2).getNode() == Node &&
                    User->getOperand(2).getResNo() == ResNo) {
                unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
                if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
                    VRBase = DestReg;
                    Match = false;
                } else if (DestReg != SrcReg)
                    Match = false;
            } else {
                for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
                    SDValue Op = User->getOperand(i);
                    if (Op.getNode() != Node || Op.getResNo() != ResNo)
                        continue;
                    EVT VT = Node->getValueType(Op.getResNo());
                    if (VT == MVT::Other || VT == MVT::Glue)
                        continue;
                    Match = false;
                    if (User->isMachineOpcode()) {
                        const MCInstrDesc &II = TII->get(User->getMachineOpcode());
                        const TargetRegisterClass *RC = 0;
                        if (i+II.getNumDefs() < II.getNumOperands())
                            RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
                        if (!UseRC)
                            UseRC = RC;
                        else if (RC) {
                            const TargetRegisterClass *ComRC =
                                TRI->getCommonSubClass(UseRC, RC);
                            // If multiple uses expect disjoint register classes, we emit
                            // copies in AddRegisterOperand.
                            if (ComRC)
                                UseRC = ComRC;
                        }
                    }
                }
            }
            MatchReg &= Match;
            if (VRBase)
                break;
        }

    const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
    SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);

    // Figure out the register class to create for the destreg.
    if (VRBase) {
        DstRC = MRI->getRegClass(VRBase);
    } else if (UseRC) {
        assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
        DstRC = UseRC;
    } else {
        DstRC = TLI->getRegClassFor(VT);
    }

    // If all uses are reading from the src physical register and copying the
    // register is either impossible or very expensive, then don't create a copy.
    if (MatchReg && SrcRC->getCopyCost() < 0) {
        VRBase = SrcReg;
    } else {
        // Create the reg, emit the copy.
        VRBase = MRI->createVirtualRegister(DstRC);
        BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
                VRBase).addReg(SrcReg);
    }

    SDValue Op(Node, ResNo);
    if (IsClone)
        VRBaseMap.erase(Op);
    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
    (void)isNew; // Silence compiler warning.
    assert(isNew && "Node emitted out of order - early");
}
コード例 #17
0
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
/// This function finds loads of the same base and different offsets. If the
/// offsets are not far apart (target specific), it add MVT::Flag inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
  SmallPtrSet<SDNode*, 16> Visited;
  SmallVector<int64_t, 4> Offsets;
  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    SDNode *Node = &*NI;
    if (!Node || !Node->isMachineOpcode())
      continue;

    unsigned Opc = Node->getMachineOpcode();
    const TargetInstrDesc &TID = TII->get(Opc);
    if (!TID.mayLoad())
      continue;

    SDNode *Chain = 0;
    unsigned NumOps = Node->getNumOperands();
    if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
      Chain = Node->getOperand(NumOps-1).getNode();
    if (!Chain)
      continue;

    // Look for other loads of the same chain. Find loads that are loading from
    // the same base pointer and different offsets.
    Visited.clear();
    Offsets.clear();
    O2SMap.clear();
    bool Cluster = false;
    SDNode *Base = Node;
    int64_t BaseOffset;
    for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
         I != E; ++I) {
      SDNode *User = *I;
      if (User == Node || !Visited.insert(User))
        continue;
      int64_t Offset1, Offset2;
      if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
          Offset1 == Offset2)
        // FIXME: Should be ok if they addresses are identical. But earlier
        // optimizations really should have eliminated one of the loads.
        continue;
      if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
        Offsets.push_back(Offset1);
      O2SMap.insert(std::make_pair(Offset2, User));
      Offsets.push_back(Offset2);
      if (Offset2 < Offset1) {
        Base = User;
        BaseOffset = Offset2;
      } else {
        BaseOffset = Offset1;
      }
      Cluster = true;
    }

    if (!Cluster)
      continue;

    // Sort them in increasing order.
    std::sort(Offsets.begin(), Offsets.end());

    // Check if the loads are close enough.
    SmallVector<SDNode*, 4> Loads;
    unsigned NumLoads = 0;
    int64_t BaseOff = Offsets[0];
    SDNode *BaseLoad = O2SMap[BaseOff];
    Loads.push_back(BaseLoad);
    for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
      int64_t Offset = Offsets[i];
      SDNode *Load = O2SMap[Offset];
      if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
                                        NumLoads))
        break; // Stop right here. Ignore loads that are further away.
      Loads.push_back(Load);
      ++NumLoads;
    }

    if (NumLoads == 0)
      continue;

    // Cluster loads by adding MVT::Flag outputs and inputs. This also
    // ensure they are scheduled in order of increasing addresses.
    SDNode *Lead = Loads[0];
    AddFlags(Lead, SDValue(0,0), true, DAG);
    SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
    for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
      bool OutFlag = i < e-1;
      SDNode *Load = Loads[i];
      AddFlags(Load, InFlag, OutFlag, DAG);
      if (OutFlag)
        InFlag = SDValue(Load, Load->getNumValues()-1);
      ++LoadsClustered;
    }
  }
}
コード例 #18
0
/// Returns single number reflecting benefit of scheduling SU
/// in the current cycle.
signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
  // Initial trivial priority.
  signed ResCount = 1;

  // Do not waste time on a node that is already scheduled.
  if (SU->isScheduled)
    return ResCount;

  // Forced priority is high.
  if (SU->isScheduleHigh)
    ResCount += PriorityOne;

  // Adaptable scheduling
  // A small, but very parallel
  // region, where reg pressure is an issue.
  if (HorizontalVerticalBalance > RegPressureThreshold) {
    // Critical path first
    ResCount += (SU->getHeight() * ScaleTwo);
    // If resources are available for it, multiply the
    // chance of scheduling.
    if (isResourceAvailable(SU))
      ResCount <<= FactorOne;

    // Consider change to reg pressure from scheduling
    // this SU.
    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
  }
  // Default heuristic, greeady and
  // critical path driven.
  else {
    // Critical path first.
    ResCount += (SU->getHeight() * ScaleTwo);
    // Now see how many instructions is blocked by this SU.
    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
    // If resources are available for it, multiply the
    // chance of scheduling.
    if (isResourceAvailable(SU))
      ResCount <<= FactorOne;

    ResCount -= (regPressureDelta(SU) * ScaleTwo);
  }

  // These are platform specific things.
  // Will need to go into the back end
  // and accessed from here via a hook.
  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
    if (N->isMachineOpcode()) {
      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
      if (TID.isCall())
        ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
    }
    else
      switch (N->getOpcode()) {
      default:  break;
      case ISD::TokenFactor:
      case ISD::CopyFromReg:
      case ISD::CopyToReg:
        ResCount += PriorityFive;
        break;

      case ISD::INLINEASM:
        ResCount += PriorityFour;
        break;
      }
  }
  return ResCount;
}
コード例 #19
0
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
                                         bool IsClone, bool IsCloned,
                                         unsigned SrcReg,
                                         DenseMap<SDValue, unsigned> &VRBaseMap) {
  unsigned VRBase = 0;
  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    // Just use the input register directly!
    SDValue Op(Node, ResNo);
    if (IsClone)
      VRBaseMap.erase(Op);
    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
    isNew = isNew; // Silence compiler warning.
    assert(isNew && "Node emitted out of order - early");
    return;
  }

  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
  // the CopyToReg'd destination register instead of creating a new vreg.
  bool MatchReg = true;
  const TargetRegisterClass *UseRC = NULL;
  if (!IsClone && !IsCloned)
    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
         UI != E; ++UI) {
      SDNode *User = *UI;
      bool Match = true;
      if (User->getOpcode() == ISD::CopyToReg && 
          User->getOperand(2).getNode() == Node &&
          User->getOperand(2).getResNo() == ResNo) {
        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
          VRBase = DestReg;
          Match = false;
        } else if (DestReg != SrcReg)
          Match = false;
      } else {
        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
          SDValue Op = User->getOperand(i);
          if (Op.getNode() != Node || Op.getResNo() != ResNo)
            continue;
          MVT VT = Node->getValueType(Op.getResNo());
          if (VT == MVT::Other || VT == MVT::Flag)
            continue;
          Match = false;
          if (User->isMachineOpcode()) {
            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
            const TargetRegisterClass *RC =
              getInstrOperandRegClass(TRI, II, i+II.getNumDefs());
            if (!UseRC)
              UseRC = RC;
            else if (RC) {
              if (UseRC->hasSuperClass(RC))
                UseRC = RC;
              else
                assert((UseRC == RC || RC->hasSuperClass(UseRC)) &&
                       "Multiple uses expecting different register classes!");
            }
          }
        }
      }
      MatchReg &= Match;
      if (VRBase)
        break;
    }

  MVT VT = Node->getValueType(ResNo);
  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
  SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
  
  // Figure out the register class to create for the destreg.
  if (VRBase) {
    DstRC = MRI.getRegClass(VRBase);
  } else if (UseRC) {
    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
    DstRC = UseRC;
  } else {
    DstRC = TLI->getRegClassFor(VT);
  }
    
  // If all uses are reading from the src physical register and copying the
  // register is either impossible or very expensive, then don't create a copy.
  if (MatchReg && SrcRC->getCopyCost() < 0) {
    VRBase = SrcReg;
  } else {
    // Create the reg, emit the copy.
    VRBase = MRI.createVirtualRegister(DstRC);
    bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,
                                     DstRC, SrcRC);
    // If the target didn't handle the copy with different register
    // classes and the destination is a subset of the source,
    // try a normal same-RC copy.
    if (!Emitted && DstRC->hasSuperClass(SrcRC))
      Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,
                                  SrcRC, SrcRC);

    assert(Emitted && "Unable to issue a copy instruction!\n");
  }

  SDValue Op(Node, ResNo);
  if (IsClone)
    VRBaseMap.erase(Op);
  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
  isNew = isNew; // Silence compiler warning.
  assert(isNew && "Node emitted out of order - early");
}
コード例 #20
0
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
        assert(ImmIdx != -1);

        // subtract one from ImmIdx, because the DST operand is usually index
        // 0 for MachineInstrs, but we have no DST in the Ops vector.
        ImmIdx--;

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->isALUInstr(Result->getMachineOpcode())) {
      // Fold FNEG/FABS/CONST_ADDRESS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
コード例 #21
0
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = ForceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();
    
    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const TargetInstrDesc &TID = TII->get(Opc);
      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (TID.isCommutable())
        SU->isCommutable = true;
    }
    
    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }
      
      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                               OpLatency, PhysReg);
        if (!isChain && !UnitLatencies) {
          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
        }

        SU->addPred(dep);
      }
    }
  }
}