コード例 #1
0
void SelectionDAGBuilder::LowerStatepoint(
    ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
  // The basic scheme here is that information about both the original call and
  // the safepoint is encoded in the CallInst.  We create a temporary call and
  // lower it, then reverse engineer the calling sequence.

  NumOfStatepoints++;
  // Clear state
  StatepointLowering.startNewStatepoint(*this);

  ImmutableCallSite CS(ISP.getCallSite());

#ifndef NDEBUG
  // Consistency check. Don't do this for invokes. It would be too
  // expensive to preserve this information across different basic blocks
  if (!CS.isInvoke()) {
    for (const User *U : CS->users()) {
      const CallInst *Call = cast<CallInst>(U);
      if (isGCRelocate(Call))
        StatepointLowering.scheduleRelocCall(*Call);
    }
  }
#endif

#ifndef NDEBUG
  // If this is a malformed statepoint, report it early to simplify debugging.
  // This should catch any IR level mistake that's made when constructing or
  // transforming statepoints.
  ISP.verify();

  // Check that the associated GCStrategy expects to encounter statepoints.
  assert(GFI->getStrategy().useStatepoints() &&
         "GCStrategy does not expect to encounter statepoints");
#endif

  // Lower statepoint vmstate and gcstate arguments
  SmallVector<SDValue, 10> LoweredMetaArgs;
  lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this);

  // Get call node, we will replace it later with statepoint
  SDNode *CallNode =
      lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);

  // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
  // nodes with all the appropriate arguments and return values.

  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
  SDValue Chain = CallNode->getOperand(0);

  SDValue Glue;
  bool CallHasIncomingGlue = CallNode->getGluedNode();
  if (CallHasIncomingGlue) {
    // Glue is always last operand
    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
  }

  // Build the GC_TRANSITION_START node if necessary.
  //
  // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
  // order in which they appear in the call to the statepoint intrinsic. If
  // any of the operands is a pointer-typed, that operand is immediately
  // followed by a SRCVALUE for the pointer that may be used during lowering
  // (e.g. to form MachinePointerInfo values for loads/stores).
  const bool IsGCTransition =
      (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) ==
          (uint64_t)StatepointFlags::GCTransition;
  if (IsGCTransition) {
    SmallVector<SDValue, 8> TSOps;

    // Add chain
    TSOps.push_back(Chain);

    // Add GC transition arguments
    for (const Value *V : ISP.gc_transition_args()) {
      TSOps.push_back(getValue(V));
      if (V->getType()->isPointerTy())
        TSOps.push_back(DAG.getSrcValue(V));
    }

    // Add glue if necessary
    if (CallHasIncomingGlue)
      TSOps.push_back(Glue);

    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

    SDValue GCTransitionStart =
        DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);

    Chain = GCTransitionStart.getValue(0);
    Glue = GCTransitionStart.getValue(1);
  }

  // TODO: Currently, all of these operands are being marked as read/write in
  // PrologEpilougeInserter.cpp, we should special case the VMState arguments
  // and flags to be read-only.
  SmallVector<SDValue, 40> Ops;

  // Add the <id> and <numBytes> constants.
  Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64));
  Ops.push_back(
      DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32));

  // Calculate and push starting position of vmstate arguments
  // Get number of arguments incoming directly into call node
  unsigned NumCallRegArgs =
      CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));

  // Add call target
  SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
  Ops.push_back(CallTarget);

  // Add call arguments
  // Get position of register mask in the call
  SDNode::op_iterator RegMaskIt;
  if (CallHasIncomingGlue)
    RegMaskIt = CallNode->op_end() - 2;
  else
    RegMaskIt = CallNode->op_end() - 1;
  Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);

  // Add a constant argument for the calling convention
  pushStackMapConstant(Ops, *this, CS.getCallingConv());

  // Add a constant argument for the flags
  uint64_t Flags = ISP.getFlags();
  assert(
      ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0)
          && "unknown flag used");
  pushStackMapConstant(Ops, *this, Flags);

  // Insert all vmstate and gcstate arguments
  Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());

  // Add register mask from call node
  Ops.push_back(*RegMaskIt);

  // Add chain
  Ops.push_back(Chain);

  // Same for the glue, but we add it only if original call had it
  if (Glue.getNode())
    Ops.push_back(Glue);

  // Compute return values.  Provide a glue output since we consume one as
  // input.  This allows someone else to chain off us as needed.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

  SDNode *StatepointMCNode =
      DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);

  SDNode *SinkNode = StatepointMCNode;

  // Build the GC_TRANSITION_END node if necessary.
  //
  // See the comment above regarding GC_TRANSITION_START for the layout of
  // the operands to the GC_TRANSITION_END node.
  if (IsGCTransition) {
    SmallVector<SDValue, 8> TEOps;

    // Add chain
    TEOps.push_back(SDValue(StatepointMCNode, 0));

    // Add GC transition arguments
    for (const Value *V : ISP.gc_transition_args()) {
      TEOps.push_back(getValue(V));
      if (V->getType()->isPointerTy())
        TEOps.push_back(DAG.getSrcValue(V));
    }

    // Add glue
    TEOps.push_back(SDValue(StatepointMCNode, 1));

    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

    SDValue GCTransitionStart =
        DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);

    SinkNode = GCTransitionStart.getNode();
  }

  // Replace original call
  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
  // Remove originall call node
  DAG.DeleteNode(CallNode);

  // DON'T set the root - under the assumption that it's already set past the
  // inserted node we created.

  // TODO: A better future implementation would be to emit a single variable
  // argument, variable return value STATEPOINT node here and then hookup the
  // return value of each gc.relocate to the respective output of the
  // previously emitted STATEPOINT value.  Unfortunately, this doesn't appear
  // to actually be possible today.
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  const R600InstrInfo *TII =
                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case AMDGPUISD::CONST_ADDRESS: {
    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
                              I != SDNode::use_end(); I = Next) {
      Next = llvm::next(I);
      if (!I->isMachineOpcode()) {
        continue;
      }
      unsigned Opcode = I->getMachineOpcode();
      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
      int SrcIdx = I.getOperandNo();
      int SelIdx;
      // Unlike MachineInstrs, SDNodes do not have results in their operand
      // list, so we need to increment the SrcIdx, since
      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
      if (HasDst) {
        SrcIdx++;
      }

      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
      if (SelIdx < 0) {
        continue;
      }

      SDValue CstOffset;
      if (N->getValueType(0).isVector() ||
          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
        continue;

      // Gather constants values
      int SrcIndices[] = {
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
      };
      std::vector<unsigned> Consts;
      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
        int OtherSrcIdx = SrcIndices[i];
        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
          continue;
        }
        if (HasDst) {
          OtherSrcIdx--;
          OtherSelIdx--;
        }
        if (RegisterSDNode *Reg =
                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        continue;

      // Convert back to SDNode indices
      if (HasDst) {
        SrcIdx--;
        SelIdx--;
      }
      std::vector<SDValue> Ops;
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        if (i == SrcIdx) {
          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
        } else if (i == SelIdx) {
          Ops.push_back(CstOffset);
        } else {
          Ops.push_back(I->getOperand(i));
        }
      }
      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
    }
    break;
  }
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    unsigned RegClassID;
    switch(N->getValueType(0).getVectorNumElements()) {
    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::BUILD_PAIR: {
    SDValue RC, SubReg0, SubReg1;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }
    if (N->getValueType(0) == MVT::i128) {
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    } else if (N->getValueType(0) == MVT::i64) {
      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    } else {
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
    }
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                            N->getOperand(1), SubReg1 };
    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
                                  SDLoc(N), N->getValueType(0), Ops);
  }

  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
                                        AMDGPU::OpName::literal);
        if (ImmIdx == -1) {
          continue;
        }

        if (TII->getOperandIdx(Use->getMachineOpcode(),
                               AMDGPU::OpName::dst) != -1) {
          // subtract one from ImmIdx, because the DST operand is usually index
          // 0 for MachineInstrs, but we have no DST in the Ops vector.
          ImmIdx--;
        }

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

    }
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
      // Fold FNEG/FABS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
コード例 #3
0
void ScheduleDAGSDNodes::BuildSchedUnits() {
  // During scheduling, the NodeId field of SDNode is used to map SDNodes
  // to their associated SUnits by holding SUnits table indices. A value
  // of -1 means the SDNode does not yet have an associated SUnit.
  unsigned NumNodes = 0;
  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
       E = DAG->allnodes_end(); NI != E; ++NI) {
    NI->setNodeId(-1);
    ++NumNodes;
  }

  // Reserve entries in the vector for each of the SUnits we are creating.  This
  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  // invalidated.
  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  // This is a temporary workaround.
  SUnits.reserve(NumNodes * 2);
  
  // Add all nodes in depth first order.
  SmallVector<SDNode*, 64> Worklist;
  SmallPtrSet<SDNode*, 64> Visited;
  Worklist.push_back(DAG->getRoot().getNode());
  Visited.insert(DAG->getRoot().getNode());
  
  while (!Worklist.empty()) {
    SDNode *NI = Worklist.pop_back_val();
    
    // Add all operands to the worklist unless they've already been added.
    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
      if (Visited.insert(NI->getOperand(i).getNode()))
        Worklist.push_back(NI->getOperand(i).getNode());
  
    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
      continue;
    
    // If this node has already been processed, stop now.
    if (NI->getNodeId() != -1) continue;
    
    SUnit *NodeSUnit = NewSUnit(NI);
    
    // See if anything is flagged to this node, if so, add them to flagged
    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
    // are required to be the last operand and result of a node.
    
    // Scan up to find flagged preds.
    SDNode *N = NI;
    while (N->getNumOperands() &&
           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
      N = N->getOperand(N->getNumOperands()-1).getNode();
      assert(N->getNodeId() == -1 && "Node already inserted!");
      N->setNodeId(NodeSUnit->NodeNum);
    }
    
    // Scan down to find any flagged succs.
    N = NI;
    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
      SDValue FlagVal(N, N->getNumValues()-1);
      
      // There are either zero or one users of the Flag result.
      bool HasFlagUse = false;
      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
           UI != E; ++UI)
        if (FlagVal.isOperandOf(*UI)) {
          HasFlagUse = true;
          assert(N->getNodeId() == -1 && "Node already inserted!");
          N->setNodeId(NodeSUnit->NodeNum);
          N = *UI;
          break;
        }
      if (!HasFlagUse) break;
    }
    
    // If there are flag operands involved, N is now the bottom-most node
    // of the sequence of nodes that are flagged together.
    // Update the SUnit.
    NodeSUnit->setNode(N);
    assert(N->getNodeId() == -1 && "Node already inserted!");
    N->setNodeId(NodeSUnit->NodeNum);

    // Assign the Latency field of NodeSUnit using target-provided information.
    ComputeLatency(NodeSUnit);
  }
}
コード例 #4
0
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = ForceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();
    
    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const TargetInstrDesc &TID = TII->get(Opc);
      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (TID.isCommutable())
        SU->isCommutable = true;
    }
    
    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }
      
      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                               OpLatency, PhysReg);
        if (!isChain && !UnitLatencies) {
          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
        }

        SU->addPred(dep);
      }
    }
  }
}
コード例 #5
0
ファイル: InstrEmitter.cpp プロジェクト: 5432935/crossbridge
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
  unsigned VRBase = 0;
  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    // Just use the input register directly!
    SDValue Op(Node, ResNo);
    if (IsClone)
      VRBaseMap.erase(Op);
    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
    (void)isNew; // Silence compiler warning.
    assert(isNew && "Node emitted out of order - early");
    return;
  }

  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
  // the CopyToReg'd destination register instead of creating a new vreg.
  bool MatchReg = true;
  const TargetRegisterClass *UseRC = NULL;
  if (!IsClone && !IsCloned)
    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
         UI != E; ++UI) {
      SDNode *User = *UI;
      bool Match = true;
      if (User->getOpcode() == ISD::CopyToReg && 
          User->getOperand(2).getNode() == Node &&
          User->getOperand(2).getResNo() == ResNo) {
        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
          VRBase = DestReg;
          Match = false;
        } else if (DestReg != SrcReg)
          Match = false;
      } else {
        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
          SDValue Op = User->getOperand(i);
          if (Op.getNode() != Node || Op.getResNo() != ResNo)
            continue;
          EVT VT = Node->getValueType(Op.getResNo());
          if (VT == MVT::Other || VT == MVT::Glue)
            continue;
          Match = false;
          if (User->isMachineOpcode()) {
            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
            const TargetRegisterClass *RC = 0;
            if (i+II.getNumDefs() < II.getNumOperands())
              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
            if (!UseRC)
              UseRC = RC;
            else if (RC) {
              const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
              // If multiple uses expect disjoint register classes, we emit
              // copies in AddRegisterOperand.
              if (ComRC)
                UseRC = ComRC;
            }
          }
        }
      }
      MatchReg &= Match;
      if (VRBase)
        break;
    }

  EVT VT = Node->getValueType(ResNo);
  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
  SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
  
  // Figure out the register class to create for the destreg.
  if (VRBase) {
    DstRC = MRI->getRegClass(VRBase);
  } else if (UseRC) {
    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
    DstRC = UseRC;
  } else {
    DstRC = TLI->getRegClassFor(VT);
  }
    
  // If all uses are reading from the src physical register and copying the
  // register is either impossible or very expensive, then don't create a copy.
  if (MatchReg && SrcRC->getCopyCost() < 0) {
    VRBase = SrcReg;
  } else {
    // Create the reg, emit the copy.
    VRBase = MRI->createVirtualRegister(DstRC);
    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
            VRBase).addReg(SrcReg);
  }

  SDValue Op(Node, ResNo);
  if (IsClone)
    VRBaseMap.erase(Op);
  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
  (void)isNew; // Silence compiler warning.
  assert(isNew && "Node emitted out of order - early");
}
コード例 #6
0
ファイル: InstrEmitter.cpp プロジェクト: 5432935/crossbridge
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
                DenseMap<SDValue, unsigned> &VRBaseMap) {
  unsigned Opc = Node->getMachineOpcode();
  
  // Handle subreg insert/extract specially
  if (Opc == TargetOpcode::EXTRACT_SUBREG || 
      Opc == TargetOpcode::INSERT_SUBREG ||
      Opc == TargetOpcode::SUBREG_TO_REG) {
    EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
    return;
  }

  // Handle COPY_TO_REGCLASS specially.
  if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
    EmitCopyToRegClassNode(Node, VRBaseMap);
    return;
  }

  // Handle REG_SEQUENCE specially.
  if (Opc == TargetOpcode::REG_SEQUENCE) {
    EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
    return;
  }

  if (Opc == TargetOpcode::IMPLICIT_DEF)
    // We want a unique VR for each IMPLICIT_DEF use.
    return;
  
  const TargetInstrDesc &II = TII->get(Opc);
  unsigned NumResults = CountResults(Node);
  unsigned NodeOperands = CountOperands(Node);
  bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
  unsigned NumMIOperands = NodeOperands + NumResults;
  if (II.isVariadic())
    assert(NumMIOperands >= II.getNumOperands() &&
           "Too few operands for a variadic node!");
  else
    assert(NumMIOperands >= II.getNumOperands() &&
           NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
           "#operands for dag node doesn't match .td file!");
#endif

  // Create the new machine instruction.
  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);

  // The MachineInstr constructor adds implicit-def operands. Scan through
  // these to determine which are dead.
  if (MI->getNumOperands() != 0 &&
      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
    // First, collect all used registers.
    SmallVector<unsigned, 8> UsedRegs;
    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
      if (F->getOpcode() == ISD::CopyFromReg)
        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
      else {
        // Collect declared implicit uses.
        const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
        UsedRegs.append(TID.getImplicitUses(),
                        TID.getImplicitUses() + TID.getNumImplicitUses());
        // In addition to declared implicit uses, we must also check for
        // direct RegisterSDNode operands.
        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
            unsigned Reg = R->getReg();
            if (TargetRegisterInfo::isPhysicalRegister(Reg))
              UsedRegs.push_back(Reg);
          }
      }
    // Then mark unused registers as dead.
    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
  }
  
  // Add result register values for things that are defined by this
  // instruction.
  if (NumResults)
    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
  
  // Emit all of the actual operands of this instruction, adding them to the
  // instruction as appropriate.
  bool HasOptPRefs = II.getNumDefs() > NumResults;
  assert((!HasOptPRefs || !HasPhysRegOuts) &&
         "Unable to cope with optional defs and phys regs defs!");
  unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
  for (unsigned i = NumSkip; i != NodeOperands; ++i)
    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
               VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);

  // Transfer all of the memory reference descriptions of this instruction.
  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                 cast<MachineSDNode>(Node)->memoperands_end());

  // Insert the instruction into position in the block. This needs to
  // happen before any custom inserter hook is called so that the
  // hook knows where in the block to insert the replacement code.
  MBB->insert(InsertPos, MI);

  // Additional results must be physical register defs.
  if (HasPhysRegOuts) {
    for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
      unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
      if (Node->hasAnyUseOfValue(i))
        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
      // If there are no uses, mark the register as dead now, so that
      // MachineLICM/Sink can see that it's dead. Don't do this if the
      // node has a Glue value, for the benefit of targets still using
      // Glue for values in physregs.
      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
        MI->addRegisterDead(Reg, TRI);
    }
  }
  
  // If the instruction has implicit defs and the node doesn't, mark the
  // implicit def as dead.  If the node has any glue outputs, we don't do this
  // because we don't know what implicit defs are being used by glued nodes.
  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
    if (const unsigned *IDList = II.getImplicitDefs()) {
      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
           i != e; ++i)
        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
    }
}
コード例 #7
0
ファイル: InstrEmitter.cpp プロジェクト: cyrilmagsuci/freebsd
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
                DenseMap<SDValue, unsigned> &VRBaseMap) {
    unsigned Opc = Node->getMachineOpcode();

    // Handle subreg insert/extract specially
    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
            Opc == TargetOpcode::INSERT_SUBREG ||
            Opc == TargetOpcode::SUBREG_TO_REG) {
        EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
        return;
    }

    // Handle COPY_TO_REGCLASS specially.
    if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
        EmitCopyToRegClassNode(Node, VRBaseMap);
        return;
    }

    // Handle REG_SEQUENCE specially.
    if (Opc == TargetOpcode::REG_SEQUENCE) {
        EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
        return;
    }

    if (Opc == TargetOpcode::IMPLICIT_DEF)
        // We want a unique VR for each IMPLICIT_DEF use.
        return;

    const MCInstrDesc &II = TII->get(Opc);
    unsigned NumResults = CountResults(Node);
    unsigned NumDefs = II.getNumDefs();
    const MCPhysReg *ScratchRegs = nullptr;

    // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
    if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
        // Stackmaps do not have arguments and do not preserve their calling
        // convention. However, to simplify runtime support, they clobber the same
        // scratch registers as AnyRegCC.
        unsigned CC = CallingConv::AnyReg;
        if (Opc == TargetOpcode::PATCHPOINT) {
            CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
            NumDefs = NumResults;
        }
        ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
    }

    unsigned NumImpUses = 0;
    unsigned NodeOperands =
        countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
    bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
#ifndef NDEBUG
    unsigned NumMIOperands = NodeOperands + NumResults;
    if (II.isVariadic())
        assert(NumMIOperands >= II.getNumOperands() &&
               "Too few operands for a variadic node!");
    else
        assert(NumMIOperands >= II.getNumOperands() &&
               NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
               NumImpUses &&
               "#operands for dag node doesn't match .td file!");
#endif

    // Create the new machine instruction.
    MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);

    // Add result register values for things that are defined by this
    // instruction.
    if (NumResults)
        CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);

    // Emit all of the actual operands of this instruction, adding them to the
    // instruction as appropriate.
    bool HasOptPRefs = NumDefs > NumResults;
    assert((!HasOptPRefs || !HasPhysRegOuts) &&
           "Unable to cope with optional defs and phys regs defs!");
    unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
    for (unsigned i = NumSkip; i != NodeOperands; ++i)
        AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
                   VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);

    // Add scratch registers as implicit def and early clobber
    if (ScratchRegs)
        for (unsigned i = 0; ScratchRegs[i]; ++i)
            MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
                       RegState::EarlyClobber);

    // Transfer all of the memory reference descriptions of this instruction.
    MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                   cast<MachineSDNode>(Node)->memoperands_end());

    // Insert the instruction into position in the block. This needs to
    // happen before any custom inserter hook is called so that the
    // hook knows where in the block to insert the replacement code.
    MBB->insert(InsertPos, MIB);

    // The MachineInstr may also define physregs instead of virtregs.  These
    // physreg values can reach other instructions in different ways:
    //
    // 1. When there is a use of a Node value beyond the explicitly defined
    //    virtual registers, we emit a CopyFromReg for one of the implicitly
    //    defined physregs.  This only happens when HasPhysRegOuts is true.
    //
    // 2. A CopyFromReg reading a physreg may be glued to this instruction.
    //
    // 3. A glued instruction may implicitly use a physreg.
    //
    // 4. A glued instruction may use a RegisterSDNode operand.
    //
    // Collect all the used physreg defs, and make sure that any unused physreg
    // defs are marked as dead.
    SmallVector<unsigned, 8> UsedRegs;

    // Additional results must be physical register defs.
    if (HasPhysRegOuts) {
        for (unsigned i = NumDefs; i < NumResults; ++i) {
            unsigned Reg = II.getImplicitDefs()[i - NumDefs];
            if (!Node->hasAnyUseOfValue(i))
                continue;
            // This implicitly defined physreg has a use.
            UsedRegs.push_back(Reg);
            EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
        }
    }

    // Scan the glue chain for any used physregs.
    if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
        for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
            if (F->getOpcode() == ISD::CopyFromReg) {
                UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
                continue;
            } else if (F->getOpcode() == ISD::CopyToReg) {
                // Skip CopyToReg nodes that are internal to the glue chain.
                continue;
            }
            // Collect declared implicit uses.
            const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
            UsedRegs.append(MCID.getImplicitUses(),
                            MCID.getImplicitUses() + MCID.getNumImplicitUses());
            // In addition to declared implicit uses, we must also check for
            // direct RegisterSDNode operands.
            for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
                if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
                    unsigned Reg = R->getReg();
                    if (TargetRegisterInfo::isPhysicalRegister(Reg))
                        UsedRegs.push_back(Reg);
                }
        }
    }

    // Finally mark unused registers as dead.
    if (!UsedRegs.empty() || II.getImplicitDefs())
        MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);

    // Run post-isel target hook to adjust this instruction if needed.
    if (II.hasPostISelHook())
        TLI->AdjustInstrPostInstrSelection(MIB, Node);
}
コード例 #8
0
/// run - This is the main entry point for the type legalizer.  This does a
/// top-down traversal of the dag, legalizing types as it goes.  Returns "true"
/// if it made any changes.
bool DAGTypeLegalizer::run() {
  bool Changed = false;

  // Create a dummy node (which is not added to allnodes), that adds a reference
  // to the root node, preventing it from being deleted, and tracking any
  // changes of the root.
  HandleSDNode Dummy(DAG.getRoot());
  Dummy.setNodeId(Unanalyzed);

  // The root of the dag may dangle to deleted nodes until the type legalizer is
  // done.  Set it to null to avoid confusion.
  DAG.setRoot(SDValue());

  // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
  // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
  // non-leaves.
  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
       E = DAG.allnodes_end(); I != E; ++I) {
    if (I->getNumOperands() == 0) {
      I->setNodeId(ReadyToProcess);
      Worklist.push_back(I);
    } else {
      I->setNodeId(Unanalyzed);
    }
  }

  // Now that we have a set of nodes to process, handle them all.
  while (!Worklist.empty()) {
#ifndef XDEBUG
    if (EnableExpensiveChecks)
#endif
      PerformExpensiveChecks();

    SDNode *N = Worklist.back();
    Worklist.pop_back();
    assert(N->getNodeId() == ReadyToProcess &&
           "Node should be ready if on worklist!");

    if (IgnoreNodeResults(N))
      goto ScanOperands;

    // Scan the values produced by the node, checking to see if any result
    // types are illegal.
    for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
      EVT ResultVT = N->getValueType(i);
      switch (getTypeAction(ResultVT)) {
      case TargetLowering::TypeLegal:
        break;
      // The following calls must take care of *all* of the node's results,
      // not just the illegal result they were passed (this includes results
      // with a legal type).  Results can be remapped using ReplaceValueWith,
      // or their promoted/expanded/etc values registered in PromotedIntegers,
      // ExpandedIntegers etc.
      case TargetLowering::TypePromoteInteger:
        PromoteIntegerResult(N, i);
        Changed = true;
        goto NodeDone;
      case TargetLowering::TypeExpandInteger:
        ExpandIntegerResult(N, i);
        Changed = true;
        goto NodeDone;
      case TargetLowering::TypeSoftenFloat:
        SoftenFloatResult(N, i);
        Changed = true;
        goto NodeDone;
      case TargetLowering::TypeExpandFloat:
        ExpandFloatResult(N, i);
        Changed = true;
        goto NodeDone;
      case TargetLowering::TypeScalarizeVector:
        ScalarizeVectorResult(N, i);
        Changed = true;
        goto NodeDone;
      case TargetLowering::TypeSplitVector:
        SplitVectorResult(N, i);
        Changed = true;
        goto NodeDone;
      case TargetLowering::TypeWidenVector:
        WidenVectorResult(N, i);
        Changed = true;
        goto NodeDone;
      }
    }

ScanOperands:
    // Scan the operand list for the node, handling any nodes with operands that
    // are illegal.
    {
    unsigned NumOperands = N->getNumOperands();
    bool NeedsReanalyzing = false;
    unsigned i;
    for (i = 0; i != NumOperands; ++i) {
      if (IgnoreNodeResults(N->getOperand(i).getNode()))
        continue;

      EVT OpVT = N->getOperand(i).getValueType();
      switch (getTypeAction(OpVT)) {
      case TargetLowering::TypeLegal:
        continue;
      // The following calls must either replace all of the node's results
      // using ReplaceValueWith, and return "false"; or update the node's
      // operands in place, and return "true".
      case TargetLowering::TypePromoteInteger:
        NeedsReanalyzing = PromoteIntegerOperand(N, i);
        Changed = true;
        break;
      case TargetLowering::TypeExpandInteger:
        NeedsReanalyzing = ExpandIntegerOperand(N, i);
        Changed = true;
        break;
      case TargetLowering::TypeSoftenFloat:
        NeedsReanalyzing = SoftenFloatOperand(N, i);
        Changed = true;
        break;
      case TargetLowering::TypeExpandFloat:
        NeedsReanalyzing = ExpandFloatOperand(N, i);
        Changed = true;
        break;
      case TargetLowering::TypeScalarizeVector:
        NeedsReanalyzing = ScalarizeVectorOperand(N, i);
        Changed = true;
        break;
      case TargetLowering::TypeSplitVector:
        NeedsReanalyzing = SplitVectorOperand(N, i);
        Changed = true;
        break;
      case TargetLowering::TypeWidenVector:
        NeedsReanalyzing = WidenVectorOperand(N, i);
        Changed = true;
        break;
      }
      break;
    }

    // The sub-method updated N in place.  Check to see if any operands are new,
    // and if so, mark them.  If the node needs revisiting, don't add all users
    // to the worklist etc.
    if (NeedsReanalyzing) {
      assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
      N->setNodeId(NewNode);
      // Recompute the NodeId and correct processed operands, adding the node to
      // the worklist if ready.
      SDNode *M = AnalyzeNewNode(N);
      if (M == N)
        // The node didn't morph - nothing special to do, it will be revisited.
        continue;

      // The node morphed - this is equivalent to legalizing by replacing every
      // value of N with the corresponding value of M.  So do that now.
      assert(N->getNumValues() == M->getNumValues() &&
             "Node morphing changed the number of results!");
      for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
        // Replacing the value takes care of remapping the new value.
        ReplaceValueWith(SDValue(N, i), SDValue(M, i));
      assert(N->getNodeId() == NewNode && "Unexpected node state!");
      // The node continues to live on as part of the NewNode fungus that
      // grows on top of the useful nodes.  Nothing more needs to be done
      // with it - move on to the next node.
      continue;
    }

    if (i == NumOperands) {
      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
    }
    }
NodeDone:

    // If we reach here, the node was processed, potentially creating new nodes.
    // Mark it as processed and add its users to the worklist as appropriate.
    assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
    N->setNodeId(Processed);

    for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
         UI != E; ++UI) {
      SDNode *User = *UI;
      int NodeId = User->getNodeId();

      // This node has two options: it can either be a new node or its Node ID
      // may be a count of the number of operands it has that are not ready.
      if (NodeId > 0) {
        User->setNodeId(NodeId-1);

        // If this was the last use it was waiting on, add it to the ready list.
        if (NodeId-1 == ReadyToProcess)
          Worklist.push_back(User);
        continue;
      }

      // If this is an unreachable new node, then ignore it.  If it ever becomes
      // reachable by being used by a newly created node then it will be handled
      // by AnalyzeNewNode.
      if (NodeId == NewNode)
        continue;

      // Otherwise, this node is new: this is the first operand of it that
      // became ready.  Its new NodeId is the number of operands it has minus 1
      // (as this node is now processed).
      assert(NodeId == Unanalyzed && "Unknown node ID!");
      User->setNodeId(User->getNumOperands() - 1);

      // If the node only has a single operand, it is now ready.
      if (User->getNumOperands() == 1)
        Worklist.push_back(User);
    }
  }
コード例 #9
0
ファイル: ScheduleDAGSDNodes.cpp プロジェクト: happz/llvm
void ScheduleDAGSDNodes::AddSchedEdges() {
  const TargetSubtargetInfo &ST = MF.getSubtarget();

  // Check to see if the scheduler cares about latencies.
  bool UnitLatencies = forceUnitLatencies();

  // Pass 2: add the preds, succs, etc.
  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
    SUnit *SU = &SUnits[su];
    SDNode *MainNode = SU->getNode();

    if (MainNode->isMachineOpcode()) {
      unsigned Opc = MainNode->getMachineOpcode();
      const MCInstrDesc &MCID = TII->get(Opc);
      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
          SU->isTwoAddress = true;
          break;
        }
      }
      if (MCID.isCommutable())
        SU->isCommutable = true;
    }

    // Find all predecessors and successors of the group.
    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
      if (N->isMachineOpcode() &&
          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
        SU->hasPhysRegClobbers = true;
        unsigned NumUsed = InstrEmitter::CountResults(N);
        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
          --NumUsed;    // Skip over unused values at the end.
        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
          SU->hasPhysRegDefs = true;
      }

      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
        SDNode *OpN = N->getOperand(i).getNode();
        if (isPassiveNode(OpN)) continue;   // Not scheduled.
        SUnit *OpSU = &SUnits[OpN->getNodeId()];
        assert(OpSU && "Node has no SUnit!");
        if (OpSU == SU) continue;           // In the same group.

        EVT OpVT = N->getOperand(i).getValueType();
        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
        bool isChain = OpVT == MVT::Other;

        unsigned PhysReg = 0;
        int Cost = 1;
        // Determine if this is a physical register dependency.
        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
        assert((PhysReg == 0 || !isChain) &&
               "Chain dependence via physreg data?");
        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
        // emits a copy from the physical register to a virtual register unless
        // it requires a cross class copy (cost < 0). That means we are only
        // treating "expensive to copy" register dependency as physical register
        // dependency. This may change in the future though.
        if (Cost >= 0 && !StressSched)
          PhysReg = 0;

        // If this is a ctrl dep, latency is 1.
        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
        // Special-case TokenFactor chains as zero-latency.
        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
          OpLatency = 0;

        SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
          : SDep(OpSU, SDep::Data, PhysReg);
        Dep.setLatency(OpLatency);
        if (!isChain && !UnitLatencies) {
          computeOperandLatency(OpN, N, i, Dep);
          ST.adjustSchedDependency(OpSU, SU, Dep);
        }

        if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
          // Multiple register uses are combined in the same SUnit. For example,
          // we could have a set of glued nodes with all their defs consumed by
          // another set of glued nodes. Register pressure tracking sees this as
          // a single use, so to keep pressure balanced we reduce the defs.
          //
          // We can't tell (without more book-keeping) if this results from
          // glued nodes or duplicate operands. As long as we don't reduce
          // NumRegDefsLeft to zero, we handle the common cases well.
          --OpSU->NumRegDefsLeft;
        }
      }
    }
  }
}
コード例 #10
0
ファイル: ScheduleDAGSDNodes.cpp プロジェクト: happz/llvm
void ScheduleDAGSDNodes::BuildSchedUnits() {
  // During scheduling, the NodeId field of SDNode is used to map SDNodes
  // to their associated SUnits by holding SUnits table indices. A value
  // of -1 means the SDNode does not yet have an associated SUnit.
  unsigned NumNodes = 0;
  for (SDNode &NI : DAG->allnodes()) {
    NI.setNodeId(-1);
    ++NumNodes;
  }

  // Reserve entries in the vector for each of the SUnits we are creating.  This
  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  // invalidated.
  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  // This is a temporary workaround.
  SUnits.reserve(NumNodes * 2);

  // Add all nodes in depth first order.
  SmallVector<SDNode*, 64> Worklist;
  SmallPtrSet<SDNode*, 32> Visited;
  Worklist.push_back(DAG->getRoot().getNode());
  Visited.insert(DAG->getRoot().getNode());

  SmallVector<SUnit*, 8> CallSUnits;
  while (!Worklist.empty()) {
    SDNode *NI = Worklist.pop_back_val();

    // Add all operands to the worklist unless they've already been added.
    for (const SDValue &Op : NI->op_values())
      if (Visited.insert(Op.getNode()).second)
        Worklist.push_back(Op.getNode());

    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
      continue;

    // If this node has already been processed, stop now.
    if (NI->getNodeId() != -1) continue;

    SUnit *NodeSUnit = newSUnit(NI);

    // See if anything is glued to this node, if so, add them to glued
    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
    // is required to be the last operand and result of a node.

    // Scan up to find glued preds.
    SDNode *N = NI;
    while (N->getNumOperands() &&
           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
      N = N->getOperand(N->getNumOperands()-1).getNode();
      assert(N->getNodeId() == -1 && "Node already inserted!");
      N->setNodeId(NodeSUnit->NodeNum);
      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
        NodeSUnit->isCall = true;
    }

    // Scan down to find any glued succs.
    N = NI;
    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
      SDValue GlueVal(N, N->getNumValues()-1);

      // There are either zero or one users of the Glue result.
      bool HasGlueUse = false;
      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
           UI != E; ++UI)
        if (GlueVal.isOperandOf(*UI)) {
          HasGlueUse = true;
          assert(N->getNodeId() == -1 && "Node already inserted!");
          N->setNodeId(NodeSUnit->NodeNum);
          N = *UI;
          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
            NodeSUnit->isCall = true;
          break;
        }
      if (!HasGlueUse) break;
    }

    if (NodeSUnit->isCall)
      CallSUnits.push_back(NodeSUnit);

    // Schedule zero-latency TokenFactor below any nodes that may increase the
    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
    // have false stalls.
    if (NI->getOpcode() == ISD::TokenFactor)
      NodeSUnit->isScheduleLow = true;

    // If there are glue operands involved, N is now the bottom-most node
    // of the sequence of nodes that are glued together.
    // Update the SUnit.
    NodeSUnit->setNode(N);
    assert(N->getNodeId() == -1 && "Node already inserted!");
    N->setNodeId(NodeSUnit->NodeNum);

    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
    InitNumRegDefsLeft(NodeSUnit);

    // Assign the Latency field of NodeSUnit using target-provided information.
    computeLatency(NodeSUnit);
  }

  // Find all call operands.
  while (!CallSUnits.empty()) {
    SUnit *SU = CallSUnits.pop_back_val();
    for (const SDNode *SUNode = SU->getNode(); SUNode;
         SUNode = SUNode->getGluedNode()) {
      if (SUNode->getOpcode() != ISD::CopyToReg)
        continue;
      SDNode *SrcN = SUNode->getOperand(2).getNode();
      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
      SrcSU->isCallOp = true;
    }
  }
}
コード例 #11
0
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
        assert(ImmIdx != -1);

        // subtract one from ImmIdx, because the DST operand is usually index
        // 0 for MachineInstrs, but we have no DST in the Ops vector.
        ImmIdx--;

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->isALUInstr(Result->getMachineOpcode())) {
      // Fold FNEG/FABS/CONST_ADDRESS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
コード例 #12
0
SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
    SelectionDAGBuilder::StatepointLoweringInfo &SI) {
  // The basic scheme here is that information about both the original call and
  // the safepoint is encoded in the CallInst.  We create a temporary call and
  // lower it, then reverse engineer the calling sequence.

  NumOfStatepoints++;
  // Clear state
  StatepointLowering.startNewStatepoint(*this);

#ifndef NDEBUG
  // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
  // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
  for (auto *Reloc : SI.GCRelocates)
    if (Reloc->getParent() == SI.StatepointInstr->getParent())
      StatepointLowering.scheduleRelocCall(*Reloc);
#endif

  // Remove any redundant llvm::Values which map to the same SDValue as another
  // input.  Also has the effect of removing duplicates in the original
  // llvm::Value input list as well.  This is a useful optimization for
  // reducing the size of the StackMap section.  It has no other impact.
  removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
                        FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
  assert(SI.Bases.size() == SI.Ptrs.size() &&
         SI.Ptrs.size() == SI.GCRelocates.size());

  // Lower statepoint vmstate and gcstate arguments
  SmallVector<SDValue, 10> LoweredMetaArgs;
  lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this);

  // Now that we've emitted the spills, we need to update the root so that the
  // call sequence is ordered correctly.
  SI.CLI.setChain(getRoot());

  // Get call node, we will replace it later with statepoint
  SDValue ReturnVal;
  SDNode *CallNode;
  std::tie(ReturnVal, CallNode) =
      lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports);

  // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
  // nodes with all the appropriate arguments and return values.

  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
  SDValue Chain = CallNode->getOperand(0);

  SDValue Glue;
  bool CallHasIncomingGlue = CallNode->getGluedNode();
  if (CallHasIncomingGlue) {
    // Glue is always last operand
    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
  }

  // Build the GC_TRANSITION_START node if necessary.
  //
  // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
  // order in which they appear in the call to the statepoint intrinsic. If
  // any of the operands is a pointer-typed, that operand is immediately
  // followed by a SRCVALUE for the pointer that may be used during lowering
  // (e.g. to form MachinePointerInfo values for loads/stores).
  const bool IsGCTransition =
      (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) ==
      (uint64_t)StatepointFlags::GCTransition;
  if (IsGCTransition) {
    SmallVector<SDValue, 8> TSOps;

    // Add chain
    TSOps.push_back(Chain);

    // Add GC transition arguments
    for (const Value *V : SI.GCTransitionArgs) {
      TSOps.push_back(getValue(V));
      if (V->getType()->isPointerTy())
        TSOps.push_back(DAG.getSrcValue(V));
    }

    // Add glue if necessary
    if (CallHasIncomingGlue)
      TSOps.push_back(Glue);

    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

    SDValue GCTransitionStart =
        DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);

    Chain = GCTransitionStart.getValue(0);
    Glue = GCTransitionStart.getValue(1);
  }

  // TODO: Currently, all of these operands are being marked as read/write in
  // PrologEpilougeInserter.cpp, we should special case the VMState arguments
  // and flags to be read-only.
  SmallVector<SDValue, 40> Ops;

  // Add the <id> and <numBytes> constants.
  Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64));
  Ops.push_back(
      DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32));

  // Calculate and push starting position of vmstate arguments
  // Get number of arguments incoming directly into call node
  unsigned NumCallRegArgs =
      CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));

  // Add call target
  SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
  Ops.push_back(CallTarget);

  // Add call arguments
  // Get position of register mask in the call
  SDNode::op_iterator RegMaskIt;
  if (CallHasIncomingGlue)
    RegMaskIt = CallNode->op_end() - 2;
  else
    RegMaskIt = CallNode->op_end() - 1;
  Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);

  // Add a constant argument for the calling convention
  pushStackMapConstant(Ops, *this, SI.CLI.CallConv);

  // Add a constant argument for the flags
  uint64_t Flags = SI.StatepointFlags;
  assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) &&
         "Unknown flag used");
  pushStackMapConstant(Ops, *this, Flags);

  // Insert all vmstate and gcstate arguments
  Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());

  // Add register mask from call node
  Ops.push_back(*RegMaskIt);

  // Add chain
  Ops.push_back(Chain);

  // Same for the glue, but we add it only if original call had it
  if (Glue.getNode())
    Ops.push_back(Glue);

  // Compute return values.  Provide a glue output since we consume one as
  // input.  This allows someone else to chain off us as needed.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

  SDNode *StatepointMCNode =
      DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);

  SDNode *SinkNode = StatepointMCNode;

  // Build the GC_TRANSITION_END node if necessary.
  //
  // See the comment above regarding GC_TRANSITION_START for the layout of
  // the operands to the GC_TRANSITION_END node.
  if (IsGCTransition) {
    SmallVector<SDValue, 8> TEOps;

    // Add chain
    TEOps.push_back(SDValue(StatepointMCNode, 0));

    // Add GC transition arguments
    for (const Value *V : SI.GCTransitionArgs) {
      TEOps.push_back(getValue(V));
      if (V->getType()->isPointerTy())
        TEOps.push_back(DAG.getSrcValue(V));
    }

    // Add glue
    TEOps.push_back(SDValue(StatepointMCNode, 1));

    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

    SDValue GCTransitionStart =
        DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);

    SinkNode = GCTransitionStart.getNode();
  }

  // Replace original call
  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
  // Remove original call node
  DAG.DeleteNode(CallNode);

  // DON'T set the root - under the assumption that it's already set past the
  // inserted node we created.

  // TODO: A better future implementation would be to emit a single variable
  // argument, variable return value STATEPOINT node here and then hookup the
  // return value of each gc.relocate to the respective output of the
  // previously emitted STATEPOINT value.  Unfortunately, this doesn't appear
  // to actually be possible today.

  return ReturnVal;
}
コード例 #13
0
ファイル: SelectionDAGDumper.cpp プロジェクト: bugsnag/llvm
/// Return true if this node is so simple that we should just print it inline
/// if it appears as an operand.
static bool shouldPrintInline(const SDNode &Node) {
  if (Node.getOpcode() == ISD::EntryToken)
    return false;
  return Node.getNumOperands() == 0;
}