Beispiel #1
0
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
                                           SDValue &Offset) {
  ConstantSDNode * IMMOffset;

  if (Addr.getOpcode() == ISD::ADD
      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
      && isInt<16>(IMMOffset->getZExtValue())) {

      Base = Addr.getOperand(0);
      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
      return true;
  // If the pointer address is constant, we can move it to the offset field.
  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
             && isInt<16>(IMMOffset->getZExtValue())) {
    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
                                  SDLoc(CurDAG->getEntryNode()),
                                  AMDGPU::ZERO, MVT::i32);
    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
    return true;
  }

  // Default case, no offset
  Base = Addr;
  Offset = CurDAG->getTargetConstant(0, MVT::i32);
  return true;
}
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
                                      SDValue &Offset) {
  //errs() << "SelectADDRlocal: ";
  //Addr.getNode()->dumpr();
  if (isa<FrameIndexSDNode>(Addr)) {
    Base = Addr;
    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
    //errs() << "Success\n";
    return true;
  }

  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    Base = Addr.getOperand(0);
    if (!isa<FrameIndexSDNode>(Base)) {
      //errs() << "Failure\n";
      return false;
    }
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
    //errs() << "Offset: ";
    //Offset.getNode()->dumpr();
    //errs() << "Success\n";
    return true;
  }

  //errs() << "Failure\n";
  return false;
}
SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
    SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
    SDValue Size, unsigned Align, bool isVolatile,
    MachinePointerInfo DstPtrInfo) const {
  // Check to see if there is a specialized entry-point for memory zeroing.
  ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
  ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
  const char *bzeroEntry =
      (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0;
  // For small size (< 256), it is not beneficial to use bzero
  // instead of memset.
  if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
    const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
                                          DAG.getTarget().getTargetLowering());

    EVT IntPtr = TLI.getPointerTy();
    Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
    TargetLowering::ArgListTy Args;
    TargetLowering::ArgListEntry Entry;
    Entry.Node = Dst;
    Entry.Ty = IntPtrTy;
    Args.push_back(Entry);
    Entry.Node = Size;
    Args.push_back(Entry);
    TargetLowering::CallLoweringInfo CLI(
        Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
        0, CallingConv::C, /*isTailCall=*/false,
        /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
        DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
    return CallResult.second;
  }
  return SDValue();
}
Beispiel #4
0
/// ComplexPattern used on Cpu0InstrInfo
/// Used on Cpu0 Load/Store instructions
bool Cpu0DAGToDAGISel::
SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
  EVT ValTy = Addr.getValueType();

  // If Parent is an unaligned f32 load or store, select a (base + index)
  // floating point load/store instruction (luxc1 or suxc1).
  const LSBaseSDNode* LS = 0;

  if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
    EVT VT = LS->getMemoryVT();

    if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
      assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
             "Unaligned loads/stores not supported for this type.");
      if (VT == MVT::f32)
        return false;
    }
  }

  // if Address is FI, get the TargetFrameIndex.
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
    Offset = CurDAG->getTargetConstant(0, ValTy);
    return true;
  }

  // on PIC code Load GA
  if (Addr.getOpcode() == Cpu0ISD::Wrapper) {
    Base   = Addr.getOperand(0);
    Offset = Addr.getOperand(1);
    return true;
  }

  if (TM.getRelocationModel() != Reloc::PIC_) {
    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
        Addr.getOpcode() == ISD::TargetGlobalAddress))
      return false;
  }
  
  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<16>(CN->getSExtValue())) {

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
      return true;
    }
  }

  Base   = Addr;
  Offset = CurDAG->getTargetConstant(0, ValTy);
  return true;
}
Beispiel #5
0
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
                                            SDValue &Offset) {
  ConstantSDNode *C;

  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    Base = Addr.getOperand(0);
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
  } else {
    Base = Addr;
    Offset = CurDAG->getTargetConstant(0, MVT::i32);
  }

  return true;
}
Beispiel #6
0
// Return true if N is a undef or a constant.
// If N was undef, return a (i8imm 0) in Retval
// If N was imm, convert it to i8imm and return in Retval
// Note: The convert to i8imm is required, otherwise the
// pattern matcher inserts a bunch of IMOVi8rr to convert
// the imm to i8imm, and this causes instruction selection
// to fail.
bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
    return false;

  if (N.getOpcode() == ISD::UNDEF)
    Retval = CurDAG->getTargetConstant(0, MVT::i8);
  else {
    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
    unsigned retval = cn->getZExtValue();
    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
  }
  return true;
}
bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
  uint32_t Bits;
  uint32_t RegWidth = N.getValueType().getSizeInBits();

  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  if (!CN) return false;

  if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
    return false;

  Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
  return true;
}
Beispiel #8
0
void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
                                        SDValue CmpLHS, const SDLoc &DL,
                                        SDNode *Node) const {
    unsigned Opc = InFlag.getOpcode();
    (void)Opc;

    assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
            (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
           "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");

    unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
    if (Subtarget->isGP64bit()) {
        SLTuOp = Mips::SLTu64;
        ADDuOp = Mips::DADDu;
    }

    SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
    SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
    EVT VT = LHS.getValueType();

    SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);

    if (Subtarget->isGP64bit()) {
        // On 64-bit targets, sltu produces an i64 but our backend currently says
        // that SLTu64 produces an i32. We need to fix this in the long run but for
        // now, just make the DAG type-correct by asserting the upper bits are zero.
        Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
                                       CurDAG->getTargetConstant(0, DL, VT),
                                       SDValue(Carry, 0),
                                       CurDAG->getTargetConstant(Mips::sub_32, DL,
                                               VT));
    }

    // Generate a second addition only if we know that RHS is not a
    // constant-zero node.
    SDNode *AddCarry = Carry;
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
    if (!C || C->getZExtValue())
        AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);

    CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0));
}
SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy(
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
    return SDValue();

  uint64_t SizeVal = ConstantSize->getZExtValue();
  if (SizeVal < 32 || (SizeVal % 8) != 0)
    return SDValue();

  // Special case aligned memcpys with size >= 32 bytes and a multiple of 8.
  //
  const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
  TargetLowering::ArgListTy Args;
  TargetLowering::ArgListEntry Entry;
  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
  Entry.Node = Dst;
  Args.push_back(Entry);
  Entry.Node = Src;
  Args.push_back(Entry);
  Entry.Node = Size;
  Args.push_back(Entry);

  const char *SpecialMemcpyName =
      "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";

  TargetLowering::CallLoweringInfo CLI(DAG);
  CLI.setDebugLoc(dl)
      .setChain(Chain)
      .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
                 Type::getVoidTy(*DAG.getContext()),
                 DAG.getTargetExternalSymbol(
                     SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
                 std::move(Args))
      .setDiscardResult();

  std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
  return CallResult.second;
}
Beispiel #10
0
/// Match frameindex+offset and frameindex|offset
bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base,
                                                    SDValue &Offset,
                                                    unsigned OffsetBits) const {
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isIntN(OffsetBits, CN->getSExtValue())) {
      EVT ValTy = Addr.getValueType();

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
      return true;
    }
  }
  return false;
}
/// Used on microMIPS Load/Store unaligned instructions (12-bit offset)
bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
                                            SDValue &Offset) const {
  EVT ValTy = Addr.getValueType();

  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<12>(CN->getSExtValue())) {

      // If the first operand is a FI then get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
      return true;
    }
  }

  return false;
}
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
  const ARMSubtarget &Subtarget =
      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
  // Do repeated 4-byte loads and stores. To be improved.
  // This requires 4-byte alignment.
  if ((Align & 3) != 0)
    return SDValue();
  // This requires the copy size to be a constant, preferably
  // within a subtarget-specific limit.
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (!ConstantSize)
    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
                                  RTLIB::MEMCPY);
  uint64_t SizeVal = ConstantSize->getZExtValue();
  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
                                  RTLIB::MEMCPY);

  unsigned BytesLeft = SizeVal & 3;
  unsigned NumMemOps = SizeVal >> 2;
  unsigned EmittedNumMemOps = 0;
  EVT VT = MVT::i32;
  unsigned VTSize = 4;
  unsigned i = 0;
  // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
  SDValue TFOps[6];
  SDValue Loads[6];
  uint64_t SrcOff = 0, DstOff = 0;

  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
  // pressure on the general purpose registers. However this seems harder to map
  // onto the register allocator's view of the world.

  // The number of MEMCPY pseudo-instructions to emit. We use up to
  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
  // later on. This is a lower bound on the number of MEMCPY operations we must
  // emit.
  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;

  // Code size optimisation: do not inline memcpy if expansion results in
  // more instructions than the libary call.
  if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
    return SDValue();
  }

  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);

  for (unsigned I = 0; I != NumMEMCPYs; ++I) {
    // Evenly distribute registers among MEMCPY operations to reduce register
    // pressure.
    unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
    unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;

    Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
                      DAG.getConstant(NumRegs, dl, MVT::i32));
    Src = Dst.getValue(1);
    Chain = Dst.getValue(2);

    DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
    SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);

    EmittedNumMemOps = NextEmittedNumMemOps;
  }

  if (BytesLeft == 0)
    return Chain;

  // Issue loads / stores for the trailing (1 - 3) bytes.
  auto getRemainingValueType = [](unsigned BytesLeft) {
    return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
  };
  auto getRemainingSize = [](unsigned BytesLeft) {
    return (BytesLeft >= 2) ? 2 : 1;
  };

  unsigned BytesLeftSave = BytesLeft;
  i = 0;
  while (BytesLeft) {
    VT = getRemainingValueType(BytesLeft);
    VTSize = getRemainingSize(BytesLeft);
    Loads[i] = DAG.getLoad(VT, dl, Chain,
                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
                           SrcPtrInfo.getWithOffset(SrcOff));
    TFOps[i] = Loads[i].getValue(1);
    ++i;
    SrcOff += VTSize;
    BytesLeft -= VTSize;
  }
  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                      makeArrayRef(TFOps, i));

  i = 0;
  BytesLeft = BytesLeftSave;
  while (BytesLeft) {
    VT = getRemainingValueType(BytesLeft);
    VTSize = getRemainingSize(BytesLeft);
    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                        DAG.getConstant(DstOff, dl, MVT::i32)),
                            DstPtrInfo.getWithOffset(DstOff));
    ++i;
    DstOff += VTSize;
    BytesLeft -= VTSize;
  }
  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                     makeArrayRef(TFOps, i));
}
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                        SDValue Chain, SDValue Dst, SDValue Src,
                                        SDValue Size, unsigned Align,
                                        bool isVolatile, bool AlwaysInline,
                                         MachinePointerInfo DstPtrInfo,
                                         MachinePointerInfo SrcPtrInfo) const {
  // This requires the copy size to be a constant, preferably
  // within a subtarget-specific limit.
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (!ConstantSize)
    return SDValue();
  uint64_t SizeVal = ConstantSize->getZExtValue();
  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
    return SDValue();

  /// If not DWORD aligned, it is more efficient to call the library.  However
  /// if calling the library is not allowed (AlwaysInline), then soldier on as
  /// the code generated here is better than the long load-store sequence we
  /// would otherwise get.
  if (!AlwaysInline && (Align & 3) != 0)
    return SDValue();

  // If to a segment-relative address space, use the default lowering.
  if (DstPtrInfo.getAddrSpace() >= 256 ||
      SrcPtrInfo.getAddrSpace() >= 256)
    return SDValue();

  // ESI might be used as a base pointer, in that case we can't simply overwrite
  // the register.  Fall back to generic code.
  const X86RegisterInfo *TRI =
      static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
  if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
      TRI->getBaseRegister() == X86::ESI)
    return SDValue();

  MVT AVT;
  if (Align & 1)
    AVT = MVT::i8;
  else if (Align & 2)
    AVT = MVT::i16;
  else if (Align & 4)
    // DWORD aligned
    AVT = MVT::i32;
  else
    // QWORD aligned
    AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;

  unsigned UBytes = AVT.getSizeInBits() / 8;
  unsigned CountVal = SizeVal / UBytes;
  SDValue Count = DAG.getIntPtrConstant(CountVal);
  unsigned BytesLeft = SizeVal % UBytes;

  SDValue InFlag;
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                                                              X86::ECX,
                            Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
                                                              X86::EDI,
                            Dst, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
                                                              X86::ESI,
                            Src, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);

  SmallVector<SDValue, 4> Results;
  Results.push_back(RepMovs);
  if (BytesLeft) {
    // Handle the last 1 - 7 bytes.
    unsigned Offset = SizeVal - BytesLeft;
    EVT DstVT = Dst.getValueType();
    EVT SrcVT = Src.getValueType();
    EVT SizeVT = Size.getValueType();
    Results.push_back(DAG.getMemcpy(Chain, dl,
                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
                                                DAG.getConstant(Offset, DstVT)),
                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                DAG.getConstant(Offset, SrcVT)),
                                    DAG.getConstant(BytesLeft, SizeVT),
                                    Align, isVolatile, AlwaysInline,
                                    DstPtrInfo.getWithOffset(Offset),
                                    SrcPtrInfo.getWithOffset(Offset)));
  }

  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
}
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                                             SDValue Chain,
                                             SDValue Dst, SDValue Src,
                                             SDValue Size, unsigned Align,
                                             bool isVolatile,
                                         MachinePointerInfo DstPtrInfo) const {
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

  // If to a segment-relative address space, use the default lowering.
  if (DstPtrInfo.getAddrSpace() >= 256)
    return SDValue();

  // If not DWORD aligned or size is more than the threshold, call the library.
  // The libc version is likely to be faster for these cases. It can use the
  // address value and run time information about the CPU.
  if ((Align & 3) != 0 ||
      !ConstantSize ||
      ConstantSize->getZExtValue() >
        Subtarget->getMaxInlineSizeThreshold()) {
    // Check to see if there is a specialized entry-point for memory zeroing.
    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);

    if (const char *bzeroEntry =  V &&
        V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) {
      EVT IntPtr = TLI.getPointerTy();
      Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
      TargetLowering::ArgListTy Args;
      TargetLowering::ArgListEntry Entry;
      Entry.Node = Dst;
      Entry.Ty = IntPtrTy;
      Args.push_back(Entry);
      Entry.Node = Size;
      Args.push_back(Entry);

      TargetLowering::CallLoweringInfo CLI(DAG);
      CLI.setDebugLoc(dl).setChain(Chain)
        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
                   DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
        .setDiscardResult();

      std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
      return CallResult.second;
    }

    // Otherwise have the target-independent code call memset.
    return SDValue();
  }

  uint64_t SizeVal = ConstantSize->getZExtValue();
  SDValue InFlag;
  EVT AVT;
  SDValue Count;
  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
  unsigned BytesLeft = 0;
  bool TwoRepStos = false;
  if (ValC) {
    unsigned ValReg;
    uint64_t Val = ValC->getZExtValue() & 255;

    // If the value is a constant, then we can potentially use larger sets.
    switch (Align & 3) {
    case 2:   // WORD aligned
      AVT = MVT::i16;
      ValReg = X86::AX;
      Val = (Val << 8) | Val;
      break;
    case 0:  // DWORD aligned
      AVT = MVT::i32;
      ValReg = X86::EAX;
      Val = (Val << 8)  | Val;
      Val = (Val << 16) | Val;
      if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
        AVT = MVT::i64;
        ValReg = X86::RAX;
        Val = (Val << 32) | Val;
      }
      break;
    default:  // Byte aligned
      AVT = MVT::i8;
      ValReg = X86::AL;
      Count = DAG.getIntPtrConstant(SizeVal);
      break;
    }

    if (AVT.bitsGT(MVT::i8)) {
      unsigned UBytes = AVT.getSizeInBits() / 8;
      Count = DAG.getIntPtrConstant(SizeVal / UBytes);
      BytesLeft = SizeVal % UBytes;
    }

    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
                              InFlag);
    InFlag = Chain.getValue(1);
  } else {
    AVT = MVT::i8;
    Count  = DAG.getIntPtrConstant(SizeVal);
    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
    InFlag = Chain.getValue(1);
  }

  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                                                              X86::ECX,
                            Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
                                                              X86::EDI,
                            Dst, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);

  if (TwoRepStos) {
    InFlag = Chain.getValue(1);
    Count  = Size;
    EVT CVT = Count.getValueType();
    SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
    Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
                                                             X86::ECX,
                              Left, InFlag);
    InFlag = Chain.getValue(1);
    Tys = DAG.getVTList(MVT::Other, MVT::Glue);
    SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
    Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
  } else if (BytesLeft) {
    // Handle the last 1 - 7 bytes.
    unsigned Offset = SizeVal - BytesLeft;
    EVT AddrVT = Dst.getValueType();
    EVT SizeVT = Size.getValueType();

    Chain = DAG.getMemset(Chain, dl,
                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
                                      DAG.getConstant(Offset, AddrVT)),
                          Src,
                          DAG.getConstant(BytesLeft, SizeVT),
                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
  }

  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
  return Chain;
}
Beispiel #15
0
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
        assert(ImmIdx != -1);

        // subtract one from ImmIdx, because the DST operand is usually index
        // 0 for MachineInstrs, but we have no DST in the Ops vector.
        ImmIdx--;

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->isALUInstr(Result->getMachineOpcode())) {
      // Fold FNEG/FABS/CONST_ADDRESS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
Beispiel #16
0
/// ComplexPattern used on OR1KInstrInfo
/// Used on OR1K Load/Store instructions
bool OR1KDAGToDAGISel::
SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
  // if Address is FI, get the TargetFrameIndex.
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
    Offset = CurDAG->getTargetConstant(0, MVT::i32);
    return true;
  }

  // on PIC code Load GA
  if (TM.getRelocationModel() == Reloc::PIC_) {
    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
        (Addr.getOpcode() == ISD::TargetConstantPool) ||
        (Addr.getOpcode() == ISD::TargetJumpTable)){
      OR1KMachineFunctionInfo *MFI =
        CurDAG->getMachineFunction().getInfo<OR1KMachineFunctionInfo>();
      Base   = CurDAG->getRegister(MFI->getGlobalBaseReg(), MVT::i32);
      Offset = Addr;
      return true;
    }
  } else {
    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
        Addr.getOpcode() == ISD::TargetGlobalAddress))
      return false;
  }

  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<16>(CN->getSExtValue())) {

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
      return true;
    }
  }

  // Operand is a result from an ADD.
  if (Addr.getOpcode() == ISD::ADD) {
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
      if (isUInt<16>(CN->getZExtValue())) {

        // If the first operand is a FI, get the TargetFI Node
        if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                    (Addr.getOperand(0))) {
          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
        } else {
          Base = Addr.getOperand(0);
        }

        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
        return true;
      }
    }
  }

  Base   = Addr;
  Offset = CurDAG->getTargetConstant(0, MVT::i32);
  return true;
}
Beispiel #17
0
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::
SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
  // if Address is FI, get the TargetFrameIndex.
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
    Offset = CurDAG->getTargetConstant(0, MVT::i32);
    return true;
  }

  // on PIC code Load GA
  if (TM.getRelocationModel() == Reloc::PIC_) {
    if (Addr.getOpcode() == MipsISD::WrapperPIC) {
      Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
      Offset = Addr.getOperand(0);
      return true;
    }
  } else {
    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
        Addr.getOpcode() == ISD::TargetGlobalAddress))
      return false;
    else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
      Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
      Offset = Addr;
      return true;
    }
  }

  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<16>(CN->getSExtValue())) {

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
      return true;
    }
  }

  // Operand is a result from an ADD.
  if (Addr.getOpcode() == ISD::ADD) {
    // When loading from constant pools, load the lower address part in
    // the instruction itself. Example, instead of:
    //  lui $2, %hi($CPI1_0)
    //  addiu $2, $2, %lo($CPI1_0)
    //  lwc1 $f0, 0($2)
    // Generate:
    //  lui $2, %hi($CPI1_0)
    //  lwc1 $f0, %lo($CPI1_0)($2)
    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
         Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
        Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
      SDValue LoVal = Addr.getOperand(1);
      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || 
          isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
        Base = Addr.getOperand(0);
        Offset = LoVal.getOperand(0);
        return true;
      }
    }
  }

  Base   = Addr;
  Offset = CurDAG->getTargetConstant(0, MVT::i32);
  return true;
}
Beispiel #18
0
bool Mips16DAGToDAGISel::selectAddr16(
  SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
  SDValue &Alias) {
  EVT ValTy = Addr.getValueType();

  Alias = CurDAG->getTargetConstant(0, ValTy);

  // if Address is FI, get the TargetFrameIndex.
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
    Offset = CurDAG->getTargetConstant(0, ValTy);
    getMips16SPRefReg(Parent, Alias);
    return true;
  }
  // on PIC code Load GA
  if (Addr.getOpcode() == MipsISD::Wrapper) {
    Base   = Addr.getOperand(0);
    Offset = Addr.getOperand(1);
    return true;
  }
  if (TM.getRelocationModel() != Reloc::PIC_) {
    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
        Addr.getOpcode() == ISD::TargetGlobalAddress))
      return false;
  }
  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<16>(CN->getSExtValue())) {

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0))) {
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
        getMips16SPRefReg(Parent, Alias);
      }
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
      return true;
    }
  }
  // Operand is a result from an ADD.
  if (Addr.getOpcode() == ISD::ADD) {
    // When loading from constant pools, load the lower address part in
    // the instruction itself. Example, instead of:
    //  lui $2, %hi($CPI1_0)
    //  addiu $2, $2, %lo($CPI1_0)
    //  lwc1 $f0, 0($2)
    // Generate:
    //  lui $2, %hi($CPI1_0)
    //  lwc1 $f0, %lo($CPI1_0)($2)
    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
        Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
      SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
      if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
          isa<JumpTableSDNode>(Opnd0)) {
        Base = Addr.getOperand(0);
        Offset = Opnd0;
        return true;
      }
    }

    // If an indexed floating point load/store can be emitted, return false.
    const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);

    if (LS &&
        (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
        Subtarget.hasFPIdx())
      return false;
  }
  Base   = Addr;
  Offset = CurDAG->getTargetConstant(0, ValTy);
  return true;
}
Beispiel #19
0
SDValue
ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                             SDValue Chain,
                                             SDValue Dst, SDValue Src,
                                             SDValue Size, unsigned Align,
                                             bool isVolatile, bool AlwaysInline,
                                             MachinePointerInfo DstPtrInfo,
                                          MachinePointerInfo SrcPtrInfo) const {
  // Do repeated 4-byte loads and stores. To be improved.
  // This requires 4-byte alignment.
  if ((Align & 3) != 0)
    return SDValue();
  // This requires the copy size to be a constant, preferably
  // within a subtarget-specific limit.
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (!ConstantSize)
    return SDValue();
  uint64_t SizeVal = ConstantSize->getZExtValue();
  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
    return SDValue();

  unsigned BytesLeft = SizeVal & 3;
  unsigned NumMemOps = SizeVal >> 2;
  unsigned EmittedNumMemOps = 0;
  EVT VT = MVT::i32;
  unsigned VTSize = 4;
  unsigned i = 0;
  const unsigned MAX_LOADS_IN_LDM = 6;
  SDValue TFOps[MAX_LOADS_IN_LDM];
  SDValue Loads[MAX_LOADS_IN_LDM];
  uint64_t SrcOff = 0, DstOff = 0;

  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
  // same number of stores.  The loads and stores will get combined into
  // ldm/stm later on.
  while (EmittedNumMemOps < NumMemOps) {
    for (i = 0;
         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
      Loads[i] = DAG.getLoad(VT, dl, Chain,
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                         DAG.getConstant(SrcOff, MVT::i32)),
                             SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
                             false, false, 0);
      TFOps[i] = Loads[i].getValue(1);
      SrcOff += VTSize;
    }
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);

    for (i = 0;
         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                          DAG.getConstant(DstOff, MVT::i32)),
                              DstPtrInfo.getWithOffset(DstOff),
                              isVolatile, false, 0);
      DstOff += VTSize;
    }
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);

    EmittedNumMemOps += i;
  }

  if (BytesLeft == 0)
    return Chain;

  // Issue loads / stores for the trailing (1 - 3) bytes.
  unsigned BytesLeftSave = BytesLeft;
  i = 0;
  while (BytesLeft) {
    if (BytesLeft >= 2) {
      VT = MVT::i16;
      VTSize = 2;
    } else {
      VT = MVT::i8;
      VTSize = 1;
    }

    Loads[i] = DAG.getLoad(VT, dl, Chain,
                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                       DAG.getConstant(SrcOff, MVT::i32)),
                           SrcPtrInfo.getWithOffset(SrcOff),
                           false, false, false, 0);
    TFOps[i] = Loads[i].getValue(1);
    ++i;
    SrcOff += VTSize;
    BytesLeft -= VTSize;
  }
  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);

  i = 0;
  BytesLeft = BytesLeftSave;
  while (BytesLeft) {
    if (BytesLeft >= 2) {
      VT = MVT::i16;
      VTSize = 2;
    } else {
      VT = MVT::i8;
      VTSize = 1;
    }

    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                        DAG.getConstant(DstOff, MVT::i32)),
                            DstPtrInfo.getWithOffset(DstOff), false, false, 0);
    ++i;
    DstOff += VTSize;
    BytesLeft -= VTSize;
  }
  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
}
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
        SDValue Chain, SDValue Dst, SDValue Src,
        SDValue Size, unsigned Align,
        bool isVolatile, bool AlwaysInline,
        MachinePointerInfo DstPtrInfo,
        MachinePointerInfo SrcPtrInfo) const {
    // This requires the copy size to be a constant, preferably
    // within a subtarget-specific limit.
    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
    if (!ConstantSize)
        return SDValue();
    uint64_t SizeVal = ConstantSize->getZExtValue();
    if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
        return SDValue();

    /// If not DWORD aligned, it is more efficient to call the library.  However
    /// if calling the library is not allowed (AlwaysInline), then soldier on as
    /// the code generated here is better than the long load-store sequence we
    /// would otherwise get.
    if (!AlwaysInline && (Align & 3) != 0)
        return SDValue();

    // If to a segment-relative address space, use the default lowering.
    if (DstPtrInfo.getAddrSpace() >= 256 ||
            SrcPtrInfo.getAddrSpace() >= 256)
        return SDValue();

    // If ESI is used as a base pointer, we must preserve it when doing rep movs.
    const X86RegisterInfo *TRI =
        static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
    bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) &&
                       TRI->getBaseRegister() == X86::ESI;

    MVT AVT;
    if (Align & 1)
        AVT = MVT::i8;
    else if (Align & 2)
        AVT = MVT::i16;
    else if (Align & 4)
        // DWORD aligned
        AVT = MVT::i32;
    else
        // QWORD aligned
        AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;

    unsigned UBytes = AVT.getSizeInBits() / 8;
    unsigned CountVal = SizeVal / UBytes;
    SDValue Count = DAG.getIntPtrConstant(CountVal);
    unsigned BytesLeft = SizeVal % UBytes;


    if (PreserveESI) {
        // Save ESI to a physical register. (We cannot use a virtual register
        // because if it is spilled we wouldn't be able to reload it.)
        // We don't glue this because the register dependencies are explicit.
        Chain = DAG.getCopyToReg(Chain, dl, X86::EDX,
                                 DAG.getRegister(X86::ESI, MVT::i32));
    }

    SDValue InGlue(0, 0);
    Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                              X86::ECX,
                              Count, InGlue);
    InGlue = Chain.getValue(1);
    Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
                              X86::EDI,
                              Dst, InGlue);
    InGlue = Chain.getValue(1);
    Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
                              X86::ESI,
                              Src, InGlue);
    InGlue = Chain.getValue(1);

    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
    SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
    // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue.
    SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
                                  array_lengthof(Ops));

    if (PreserveESI) {
        InGlue = RepMovs.getValue(1);
        RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI,
                                   DAG.getRegister(X86::EDX, MVT::i32), InGlue);
    }

    SmallVector<SDValue, 4> Results;
    Results.push_back(RepMovs);


    if (BytesLeft) {
        // Handle the last 1 - 7 bytes.
        unsigned Offset = SizeVal - BytesLeft;
        EVT DstVT = Dst.getValueType();
        EVT SrcVT = Src.getValueType();
        EVT SizeVT = Size.getValueType();
        Results.push_back(DAG.getMemcpy(Chain, dl,
                                        DAG.getNode(ISD::ADD, dl, DstVT, Dst,
                                                DAG.getConstant(Offset, DstVT)),
                                        DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                DAG.getConstant(Offset, SrcVT)),
                                        DAG.getConstant(BytesLeft, SizeVT),
                                        Align, isVolatile, AlwaysInline,
                                        DstPtrInfo.getWithOffset(Offset),
                                        SrcPtrInfo.getWithOffset(Offset)));
    }

    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                       &Results[0], Results.size());
}
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
  // This requires the copy size to be a constant, preferably
  // within a subtarget-specific limit.
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  const X86Subtarget &Subtarget =
      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
  if (!ConstantSize)
    return SDValue();
  RepMovsRepeats Repeats(ConstantSize->getZExtValue());
  if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold())
    return SDValue();

  /// If not DWORD aligned, it is more efficient to call the library.  However
  /// if calling the library is not allowed (AlwaysInline), then soldier on as
  /// the code generated here is better than the long load-store sequence we
  /// would otherwise get.
  if (!AlwaysInline && (Align & 3) != 0)
    return SDValue();

  // If to a segment-relative address space, use the default lowering.
  if (DstPtrInfo.getAddrSpace() >= 256 ||
      SrcPtrInfo.getAddrSpace() >= 256)
    return SDValue();

  // If the base register might conflict with our physical registers, bail out.
  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
                                  X86::ECX, X86::ESI, X86::EDI};
  if (isBaseRegConflictPossible(DAG, ClobberSet))
    return SDValue();

  // If the target has enhanced REPMOVSB, then it's at least as fast to use
  // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
  // BytesLeft.
  if (!Subtarget.hasERMSB() && !(Align & 1)) {
    if (Align & 2)
      // WORD aligned
      Repeats.AVT = MVT::i16;
    else if (Align & 4)
      // DWORD aligned
      Repeats.AVT = MVT::i32;
    else
      // QWORD aligned
      Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;

    if (Repeats.BytesLeft() > 0 &&
        DAG.getMachineFunction().getFunction().optForMinSize()) {
      // When agressively optimizing for size, avoid generating the code to
      // handle BytesLeft.
      Repeats.AVT = MVT::i8;
    }
  }

  SDValue InFlag;
  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
                           DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
  InFlag = Chain.getValue(1);
  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
                           Dst, InFlag);
  InFlag = Chain.getValue(1);
  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
                           Src, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag };
  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);

  SmallVector<SDValue, 4> Results;
  Results.push_back(RepMovs);
  if (Repeats.BytesLeft()) {
    // Handle the last 1 - 7 bytes.
    unsigned Offset = Repeats.Size - Repeats.BytesLeft();
    EVT DstVT = Dst.getValueType();
    EVT SrcVT = Src.getValueType();
    EVT SizeVT = Size.getValueType();
    Results.push_back(DAG.getMemcpy(Chain, dl,
                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
                                                DAG.getConstant(Offset, dl,
                                                                DstVT)),
                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                DAG.getConstant(Offset, dl,
                                                                SrcVT)),
                                    DAG.getConstant(Repeats.BytesLeft(), dl,
                                                    SizeVT),
                                    Align, isVolatile, AlwaysInline, false,
                                    DstPtrInfo.getWithOffset(Offset),
                                    SrcPtrInfo.getWithOffset(Offset)));
  }

  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
}
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                        SDValue Chain, SDValue Dst, SDValue Src,
                                        SDValue Size, unsigned Align,
                                        bool isVolatile, bool AlwaysInline,
                                        const Value *DstSV,
                                        uint64_t DstSVOff,
                                        const Value *SrcSV,
                                        uint64_t SrcSVOff) const {
  // This requires the copy size to be a constant, preferrably
  // within a subtarget-specific limit.
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (!ConstantSize)
    return SDValue();
  uint64_t SizeVal = ConstantSize->getZExtValue();
  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
    return SDValue();

  /// If not DWORD aligned, call the library.
  if ((Align & 3) != 0)
    return SDValue();

  // DWORD aligned
  EVT AVT = MVT::i32;
  if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
    AVT = MVT::i64;

  unsigned UBytes = AVT.getSizeInBits() / 8;
  unsigned CountVal = SizeVal / UBytes;
  SDValue Count = DAG.getIntPtrConstant(CountVal);
  unsigned BytesLeft = SizeVal % UBytes;

  SDValue InFlag(0, 0);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                                                              X86::ECX,
                            Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
                                                              X86::EDI,
                            Dst, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
                                                              X86::ESI,
                            Src, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
                                array_lengthof(Ops));

  SmallVector<SDValue, 4> Results;
  Results.push_back(RepMovs);
  if (BytesLeft) {
    // Handle the last 1 - 7 bytes.
    unsigned Offset = SizeVal - BytesLeft;
    EVT DstVT = Dst.getValueType();
    EVT SrcVT = Src.getValueType();
    EVT SizeVT = Size.getValueType();
    Results.push_back(DAG.getMemcpy(Chain, dl,
                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
                                                DAG.getConstant(Offset, DstVT)),
                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                DAG.getConstant(Offset, SrcVT)),
                                    DAG.getConstant(BytesLeft, SizeVT),
                                    Align, isVolatile, AlwaysInline,
                                    DstSV, DstSVOff + Offset,
                                    SrcSV, SrcSVOff + Offset));
  }

  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                     &Results[0], Results.size());
}
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
                                   SDValue &Offset) {
  // FrameIndex addresses are handled separately
  //errs() << "SelectADDRri: ";
  //Addr.getNode()->dumpr();
  if (isa<FrameIndexSDNode>(Addr)) {
    //errs() << "Failure\n";
    return false;
  }

  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    Base = Addr.getOperand(0);
    if (isa<FrameIndexSDNode>(Base)) {
      //errs() << "Failure\n";
      return false;
    }
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
    //errs() << "Success\n";
    return true;
  }

  /*if (Addr.getNumOperands() == 1) {
    Base = Addr;
    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
    errs() << "Success\n";
    return true;
  }*/

  //errs() << "SelectADDRri fails on: ";
  //Addr.getNode()->dumpr();

  if (isImm(Addr)) {
    //errs() << "Failure\n";
    return false;
  }

  Base = Addr;
  Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());

  //errs() << "Success\n";
  return true;

  /*if (Addr.getOpcode() != ISD::ADD) {
    // let SelectADDRii handle the [imm] case
    if (isImm(Addr))
      return false;
    // it is [reg]

    assert(Addr.getValueType().isSimple() && "Type must be simple");
    Base = Addr;
    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());

    return true;
  }

  if (Addr.getNumOperands() < 2)
    return false;

  // let SelectADDRii handle the [imm+imm] case
  if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
    return false;

  // try [reg+imm] and [imm+reg]
  for (int i = 0; i < 2; i ++)
    if (SelectImm(Addr.getOperand(1-i), Offset)) {
      Base = Addr.getOperand(i);
      return true;
    }

  // neither [reg+imm] nor [imm+reg]
  return false;*/
}
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::
SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
  EVT ValTy = Addr.getValueType();

  // If Parent is an unaligned f32 load or store, select a (base + index)
  // floating point load/store instruction (luxc1 or suxc1).
  const LSBaseSDNode* LS = 0;

  if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
    EVT VT = LS->getMemoryVT();

    if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
      assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
             "Unaligned loads/stores not supported for this type.");
      if (VT == MVT::f32)
        return false;
    }
  }

  // if Address is FI, get the TargetFrameIndex.
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
    Offset = CurDAG->getTargetConstant(0, ValTy);
    return true;
  }

  // on PIC code Load GA
  if (Addr.getOpcode() == MipsISD::Wrapper) {
    Base   = Addr.getOperand(0);
    Offset = Addr.getOperand(1);
    return true;
  }

  if (TM.getRelocationModel() != Reloc::PIC_) {
    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
        Addr.getOpcode() == ISD::TargetGlobalAddress))
      return false;
  }

  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<16>(CN->getSExtValue())) {

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
      return true;
    }
  }

  // Operand is a result from an ADD.
  if (Addr.getOpcode() == ISD::ADD) {
    // When loading from constant pools, load the lower address part in
    // the instruction itself. Example, instead of:
    //  lui $2, %hi($CPI1_0)
    //  addiu $2, $2, %lo($CPI1_0)
    //  lwc1 $f0, 0($2)
    // Generate:
    //  lui $2, %hi($CPI1_0)
    //  lwc1 $f0, %lo($CPI1_0)($2)
    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
      SDValue LoVal = Addr.getOperand(1);
      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
          isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
        Base = Addr.getOperand(0);
        Offset = LoVal.getOperand(0);
        return true;
      }
    }

    // If an indexed floating point load/store can be emitted, return false.
    if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
        Subtarget.hasMips32r2Or64())
      return false;
  }

  Base   = Addr;
  Offset = CurDAG->getTargetConstant(0, ValTy);
  return true;
}
Beispiel #25
0
/*!
 */
SDNode *
SPUDAGToDAGISel::Select(SDNode *N) {
  unsigned Opc = N->getOpcode();
  int n_ops = -1;
  unsigned NewOpc = 0;
  EVT OpVT = N->getValueType(0);
  SDValue Ops[8];
  DebugLoc dl = N->getDebugLoc();

  if (N->isMachineOpcode())
    return NULL;   // Already selected.

  if (Opc == ISD::FrameIndex) {
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
    SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));

    if (FI < 128) {
      NewOpc = SPU::AIr32;
      Ops[0] = TFI;
      Ops[1] = Imm0;
      n_ops = 2;
    } else {
      NewOpc = SPU::Ar32;
      Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
      Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
                                              N->getValueType(0), TFI),
                       0);
      n_ops = 2;
    }
  } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
    // Catch the i64 constants that end up here. Note: The backend doesn't
    // attempt to legalize the constant (it's useless because DAGCombiner
    // will insert 64-bit constants and we can't stop it).
    return SelectI64Constant(N, OpVT, N->getDebugLoc());
  } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
             && OpVT == MVT::i64) {
    SDValue Op0 = N->getOperand(0);
    EVT Op0VT = Op0.getValueType();
    EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                    Op0VT, (128 / Op0VT.getSizeInBits()));
    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                   OpVT, (128 / OpVT.getSizeInBits()));
    SDValue shufMask;

    switch (Op0VT.getSimpleVT().SimpleTy) {
    default:
      report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
      /*NOTREACHED*/
    case MVT::i32:
      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                 CurDAG->getConstant(0x80808080, MVT::i32),
                                 CurDAG->getConstant(0x00010203, MVT::i32),
                                 CurDAG->getConstant(0x80808080, MVT::i32),
                                 CurDAG->getConstant(0x08090a0b, MVT::i32));
      break;

    case MVT::i16:
      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                 CurDAG->getConstant(0x80808080, MVT::i32),
                                 CurDAG->getConstant(0x80800203, MVT::i32),
                                 CurDAG->getConstant(0x80808080, MVT::i32),
                                 CurDAG->getConstant(0x80800a0b, MVT::i32));
      break;

    case MVT::i8:
      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                 CurDAG->getConstant(0x80808080, MVT::i32),
                                 CurDAG->getConstant(0x80808003, MVT::i32),
                                 CurDAG->getConstant(0x80808080, MVT::i32),
                                 CurDAG->getConstant(0x8080800b, MVT::i32));
      break;
    }

    SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());

    HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
                                               Op0VecVT, Op0));

    SDValue PromScalar;
    if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
      PromScalar = SDValue(N, 0);
    else
      PromScalar = PromoteScalar.getValue();

    SDValue zextShuffle =
            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
                            PromScalar, PromScalar,
                            SDValue(shufMaskLoad, 0));

    HandleSDNode Dummy2(zextShuffle);
    if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
      zextShuffle = SDValue(N, 0);
    else
      zextShuffle = Dummy2.getValue();
    HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
                                       zextShuffle));

    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
    SelectCode(Dummy.getValue().getNode());
    return Dummy.getValue().getNode();
  } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
    SDNode *CGLoad =
            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());

    HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
                                       N->getOperand(0), N->getOperand(1),
                                       SDValue(CGLoad, 0)));

    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
      return N;
    return Dummy.getValue().getNode();
  } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
    SDNode *CGLoad =
            emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());

    HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
                                       N->getOperand(0), N->getOperand(1),
                                       SDValue(CGLoad, 0)));

    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
      return N;
    return Dummy.getValue().getNode();
  } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
    SDNode *CGLoad =
            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());

    HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
                                       N->getOperand(0), N->getOperand(1),
                                       SDValue(CGLoad, 0)));
    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
      return N;
    return Dummy.getValue().getNode();
  } else if (Opc == ISD::TRUNCATE) {
    SDValue Op0 = N->getOperand(0);
    if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
        && OpVT == MVT::i32
        && Op0.getValueType() == MVT::i64) {
      // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
      //
      // Take advantage of the fact that the upper 32 bits are in the
      // i32 preferred slot and avoid shuffle gymnastics:
      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
      if (CN != 0) {
        unsigned shift_amt = unsigned(CN->getZExtValue());

        if (shift_amt >= 32) {
          SDNode *hi32 =
                  CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
                                         Op0.getOperand(0), getRC(MVT::i32));

          shift_amt -= 32;
          if (shift_amt > 0) {
            // Take care of the additional shift, if present:
            SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
            unsigned Opc = SPU::ROTMAIr32_i32;

            if (Op0.getOpcode() == ISD::SRL)
              Opc = SPU::ROTMr32;

            hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
                                          shift);
          }

          return hi32;
        }
      }
    }
  } else if (Opc == ISD::SHL) {
    if (OpVT == MVT::i64)
      return SelectSHLi64(N, OpVT);
  } else if (Opc == ISD::SRL) {
    if (OpVT == MVT::i64)
      return SelectSRLi64(N, OpVT);
  } else if (Opc == ISD::SRA) {
    if (OpVT == MVT::i64)
      return SelectSRAi64(N, OpVT);
  } else if (Opc == ISD::FNEG
             && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
    DebugLoc dl = N->getDebugLoc();
    // Check if the pattern is a special form of DFNMS:
    // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
    SDValue Op0 = N->getOperand(0);
    if (Op0.getOpcode() == ISD::FSUB) {
      SDValue Op00 = Op0.getOperand(0);
      if (Op00.getOpcode() == ISD::FMUL) {
        unsigned Opc = SPU::DFNMSf64;
        if (OpVT == MVT::v2f64)
          Opc = SPU::DFNMSv2f64;

        return CurDAG->getMachineNode(Opc, dl, OpVT,
                                      Op00.getOperand(0),
                                      Op00.getOperand(1),
                                      Op0.getOperand(1));
      }
    }

    SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
    SDNode *signMask = 0;
    unsigned Opc = SPU::XORfneg64;

    if (OpVT == MVT::f64) {
      signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
    } else if (OpVT == MVT::v2f64) {
      Opc = SPU::XORfnegvec;
      signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
                                                 MVT::v2i64,
                                                 negConst, negConst).getNode());
    }

    return CurDAG->getMachineNode(Opc, dl, OpVT,
                                  N->getOperand(0), SDValue(signMask, 0));
  } else if (Opc == ISD::FABS) {
    if (OpVT == MVT::f64) {
      SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
      return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
                                    N->getOperand(0), SDValue(signMask, 0));
    } else if (OpVT == MVT::v2f64) {
      SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
      SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
                                       absConst, absConst);
      SDNode *signMask = emitBuildVector(absVec.getNode());
      return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
                                    N->getOperand(0), SDValue(signMask, 0));
    }
  } else if (Opc == SPUISD::LDRESULT) {
    // Custom select instructions for LDRESULT
    EVT VT = N->getValueType(0);
    SDValue Arg = N->getOperand(0);
    SDValue Chain = N->getOperand(1);
    SDNode *Result;

    Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
                                    MVT::Other, Arg,
                                    getRC( VT.getSimpleVT()), Chain);
    return Result;

  } else if (Opc == SPUISD::IndirectAddr) {
    // Look at the operands: SelectCode() will catch the cases that aren't
    // specifically handled here.
    //
    // SPUInstrInfo catches the following patterns:
    // (SPUindirect (SPUhi ...), (SPUlo ...))
    // (SPUindirect $sp, imm)
    EVT VT = N->getValueType(0);
    SDValue Op0 = N->getOperand(0);
    SDValue Op1 = N->getOperand(1);
    RegisterSDNode *RN;

    if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
        || (Op0.getOpcode() == ISD::Register
            && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
                && RN->getReg() != SPU::R1))) {
      NewOpc = SPU::Ar32;
      Ops[1] = Op1;
      if (Op1.getOpcode() == ISD::Constant) {
        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
        Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
        if (isInt<10>(CN->getSExtValue())) {
          NewOpc = SPU::AIr32;
          Ops[1] = Op1;
        } else {
          Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
                                                  N->getValueType(0),
                                                  Op1),
                           0);
        }
      }
      Ops[0] = Op0;
      n_ops = 2;
    }
  }

  if (n_ops > 0) {
    if (N->hasOneUse())
      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
    else
      return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
  } else
    return SelectCode(N);
}
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
                                          SDValue &Offset) const {
  EVT ValTy = Addr.getValueType();

  // if Address is FI, get the TargetFrameIndex.
  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
    Offset = CurDAG->getTargetConstant(0, ValTy);
    return true;
  }

  // on PIC code Load GA
  if (Addr.getOpcode() == MipsISD::Wrapper) {
    Base   = Addr.getOperand(0);
    Offset = Addr.getOperand(1);
    return true;
  }

  if (TM.getRelocationModel() != Reloc::PIC_) {
    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
        Addr.getOpcode() == ISD::TargetGlobalAddress))
      return false;
  }

  // Addresses of the form FI+const or FI|const
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
    if (isInt<16>(CN->getSExtValue())) {

      // If the first operand is a FI, get the TargetFI Node
      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
                                  (Addr.getOperand(0)))
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
      else
        Base = Addr.getOperand(0);

      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
      return true;
    }
  }

  // Operand is a result from an ADD.
  if (Addr.getOpcode() == ISD::ADD) {
    // When loading from constant pools, load the lower address part in
    // the instruction itself. Example, instead of:
    //  lui $2, %hi($CPI1_0)
    //  addiu $2, $2, %lo($CPI1_0)
    //  lwc1 $f0, 0($2)
    // Generate:
    //  lui $2, %hi($CPI1_0)
    //  lwc1 $f0, %lo($CPI1_0)($2)
    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
        Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
      SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
      if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
          isa<JumpTableSDNode>(Opnd0)) {
        Base = Addr.getOperand(0);
        Offset = Opnd0;
        return true;
      }
    }
  }

  return false;
}
Beispiel #27
0
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
  int OperandIdx[] = {
    TII->getOperandIdx(Opcode, R600Operands::SRC0),
    TII->getOperandIdx(Opcode, R600Operands::SRC1),
    TII->getOperandIdx(Opcode, R600Operands::SRC2)
  };
  int SelIdx[] = {
    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
    TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
  };
  int NegIdx[] = {
    TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG),
    TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG),
    TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG)
  };
  int AbsIdx[] = {
    TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS),
    TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS),
    -1
  };

  for (unsigned i = 0; i < 3; i++) {
    if (OperandIdx[i] < 0)
      return false;
    SDValue Operand = Ops[OperandIdx[i] - 1];
    switch (Operand.getOpcode()) {
    case AMDGPUISD::CONST_ADDRESS: {
      SDValue CstOffset;
      if (Operand.getValueType().isVector() ||
          !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
        break;

      // Gather others constants values
      std::vector<unsigned> Consts;
      for (unsigned j = 0; j < 3; j++) {
        int SrcIdx = OperandIdx[j];
        if (SrcIdx < 0)
          break;
        if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        break;

      Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
      Ops[SelIdx[i] - 1] = CstOffset;
      return true;
      }
    case ISD::FNEG:
      if (NegIdx[i] < 0)
        break;
      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
      Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
      return true;
    case ISD::FABS:
      if (AbsIdx[i] < 0)
        break;
      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
      Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
      return true;
    case ISD::BITCAST:
      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
      return true;
    default:
      break;
    }
  }
  return false;
}
Beispiel #28
0
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
    SDValue Size, unsigned Align, bool isVolatile,
    MachinePointerInfo DstPtrInfo) const {
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  const X86Subtarget &Subtarget =
      DAG.getMachineFunction().getSubtarget<X86Subtarget>();

#ifndef NDEBUG
  // If the base register might conflict with our physical registers, bail out.
  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
                                  X86::ECX, X86::EAX, X86::EDI};
  assert(!isBaseRegConflictPossible(DAG, ClobberSet));
#endif

  // If to a segment-relative address space, use the default lowering.
  if (DstPtrInfo.getAddrSpace() >= 256)
    return SDValue();

  // If not DWORD aligned or size is more than the threshold, call the library.
  // The libc version is likely to be faster for these cases. It can use the
  // address value and run time information about the CPU.
  if ((Align & 3) != 0 || !ConstantSize ||
      ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
    // Check to see if there is a specialized entry-point for memory zeroing.
    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);

    if (const char *bzeroName = (ValC && ValC->isNullValue())
        ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
        : nullptr) {
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
      EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
      Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
      TargetLowering::ArgListTy Args;
      TargetLowering::ArgListEntry Entry;
      Entry.Node = Dst;
      Entry.Ty = IntPtrTy;
      Args.push_back(Entry);
      Entry.Node = Size;
      Args.push_back(Entry);

      TargetLowering::CallLoweringInfo CLI(DAG);
      CLI.setDebugLoc(dl)
          .setChain(Chain)
          .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
                        DAG.getExternalSymbol(bzeroName, IntPtr),
                        std::move(Args))
          .setDiscardResult();

      std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
      return CallResult.second;
    }

    // Otherwise have the target-independent code call memset.
    return SDValue();
  }

  uint64_t SizeVal = ConstantSize->getZExtValue();
  SDValue InFlag;
  EVT AVT;
  SDValue Count;
  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
  unsigned BytesLeft = 0;
  if (ValC) {
    unsigned ValReg;
    uint64_t Val = ValC->getZExtValue() & 255;

    // If the value is a constant, then we can potentially use larger sets.
    switch (Align & 3) {
    case 2:   // WORD aligned
      AVT = MVT::i16;
      ValReg = X86::AX;
      Val = (Val << 8) | Val;
      break;
    case 0:  // DWORD aligned
      AVT = MVT::i32;
      ValReg = X86::EAX;
      Val = (Val << 8)  | Val;
      Val = (Val << 16) | Val;
      if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
        AVT = MVT::i64;
        ValReg = X86::RAX;
        Val = (Val << 32) | Val;
      }
      break;
    default:  // Byte aligned
      AVT = MVT::i8;
      ValReg = X86::AL;
      Count = DAG.getIntPtrConstant(SizeVal, dl);
      break;
    }

    if (AVT.bitsGT(MVT::i8)) {
      unsigned UBytes = AVT.getSizeInBits() / 8;
      Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
      BytesLeft = SizeVal % UBytes;
    }

    Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
                             InFlag);
    InFlag = Chain.getValue(1);
  } else {
    AVT = MVT::i8;
    Count  = DAG.getIntPtrConstant(SizeVal, dl);
    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
    InFlag = Chain.getValue(1);
  }

  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
                           Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
                           Dst, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);

  if (BytesLeft) {
    // Handle the last 1 - 7 bytes.
    unsigned Offset = SizeVal - BytesLeft;
    EVT AddrVT = Dst.getValueType();
    EVT SizeVT = Size.getValueType();

    Chain = DAG.getMemset(Chain, dl,
                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
                                      DAG.getConstant(Offset, dl, AddrVT)),
                          Val,
                          DAG.getConstant(BytesLeft, dl, SizeVT),
                          Align, isVolatile, false,
                          DstPtrInfo.getWithOffset(Offset));
  }

  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
  return Chain;
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  const R600InstrInfo *TII =
                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case AMDGPUISD::CONST_ADDRESS: {
    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
                              I != SDNode::use_end(); I = Next) {
      Next = llvm::next(I);
      if (!I->isMachineOpcode()) {
        continue;
      }
      unsigned Opcode = I->getMachineOpcode();
      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
      int SrcIdx = I.getOperandNo();
      int SelIdx;
      // Unlike MachineInstrs, SDNodes do not have results in their operand
      // list, so we need to increment the SrcIdx, since
      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
      if (HasDst) {
        SrcIdx++;
      }

      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
      if (SelIdx < 0) {
        continue;
      }

      SDValue CstOffset;
      if (N->getValueType(0).isVector() ||
          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
        continue;

      // Gather constants values
      int SrcIndices[] = {
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
      };
      std::vector<unsigned> Consts;
      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
        int OtherSrcIdx = SrcIndices[i];
        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
          continue;
        }
        if (HasDst) {
          OtherSrcIdx--;
          OtherSelIdx--;
        }
        if (RegisterSDNode *Reg =
                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        continue;

      // Convert back to SDNode indices
      if (HasDst) {
        SrcIdx--;
        SelIdx--;
      }
      std::vector<SDValue> Ops;
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        if (i == SrcIdx) {
          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
        } else if (i == SelIdx) {
          Ops.push_back(CstOffset);
        } else {
          Ops.push_back(I->getOperand(i));
        }
      }
      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
    }
    break;
  }
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    unsigned RegClassID;
    switch(N->getValueType(0).getVectorNumElements()) {
    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::BUILD_PAIR: {
    SDValue RC, SubReg0, SubReg1;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }
    if (N->getValueType(0) == MVT::i128) {
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    } else if (N->getValueType(0) == MVT::i64) {
      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    } else {
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
    }
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                            N->getOperand(1), SubReg1 };
    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
                                  SDLoc(N), N->getValueType(0), Ops);
  }

  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
                                        AMDGPU::OpName::literal);
        if (ImmIdx == -1) {
          continue;
        }

        if (TII->getOperandIdx(Use->getMachineOpcode(),
                               AMDGPU::OpName::dst) != -1) {
          // subtract one from ImmIdx, because the DST operand is usually index
          // 0 for MachineInstrs, but we have no DST in the Ops vector.
          ImmIdx--;
        }

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

    }
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
      // Fold FNEG/FABS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}