bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode * IMMOffset; if (Addr.getOpcode() == ISD::ADD && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) && isInt<16>(IMMOffset->getZExtValue())) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); return true; // If the pointer address is constant, we can move it to the offset field. } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) && isInt<16>(IMMOffset->getZExtValue())) { Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(CurDAG->getEntryNode()), AMDGPU::ZERO, MVT::i32); Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); return true; } // Default case, no offset Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; }
// Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset) { //errs() << "SelectADDRlocal: "; //Addr.getNode()->dumpr(); if (isa<FrameIndexSDNode>(Addr)) { Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); //errs() << "Success\n"; return true; } if (CurDAG->isBaseWithConstantOffset(Addr)) { Base = Addr.getOperand(0); if (!isa<FrameIndexSDNode>(Base)) { //errs() << "Failure\n"; return false; } ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); //errs() << "Offset: "; //Offset.getNode()->dumpr(); //errs() << "Success\n"; return true; } //errs() << "Failure\n"; return false; }
SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); const char *bzeroEntry = (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0; // For small size (< 256), it is not beneficial to use bzero // instead of memset. if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>( DAG.getTarget().getTargetLowering()); EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI( Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } return SDValue(); }
/// ComplexPattern used on Cpu0InstrInfo /// Used on Cpu0 Load/Store instructions bool Cpu0DAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); // If Parent is an unaligned f32 load or store, select a (base + index) // floating point load/store instruction (luxc1 or suxc1). const LSBaseSDNode* LS = 0; if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { EVT VT = LS->getMemoryVT(); if (VT.getSizeInBits() / 8 > LS->getAlignment()) { assert(TLI.allowsUnalignedMemoryAccesses(VT) && "Unaligned loads/stores not supported for this type."); if (VT == MVT::f32) return false; } } // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (Addr.getOpcode() == Cpu0ISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } Base = Addr; Offset = CurDAG->getTargetConstant(0, ValTy); return true; }
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode *C; if ((C = dyn_cast<ConstantSDNode>(Addr))) { Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); } else { Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i32); } return true; }
// Return true if N is a undef or a constant. // If N was undef, return a (i8imm 0) in Retval // If N was imm, convert it to i8imm and return in Retval // Note: The convert to i8imm is required, otherwise the // pattern matcher inserts a bunch of IMOVi8rr to convert // the imm to i8imm, and this causes instruction selection // to fail. bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) { if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant)) return false; if (N.getOpcode() == ISD::UNDEF) Retval = CurDAG->getTargetConstant(0, MVT::i8); else { ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode()); unsigned retval = cn->getZExtValue(); Retval = CurDAG->getTargetConstant(retval, MVT::i8); } return true; }
bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { uint32_t Bits; uint32_t RegWidth = N.getValueType().getSizeInBits(); ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); if (!CN) return false; if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) return false; Imm = CurDAG->getTargetConstant(Bits, MVT::i32); return true; }
void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, const SDLoc &DL, SDNode *Node) const { unsigned Opc = InFlag.getOpcode(); (void)Opc; assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || (Opc == ISD::SUBC || Opc == ISD::SUBE)) && "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; if (Subtarget->isGP64bit()) { SLTuOp = Mips::SLTu64; ADDuOp = Mips::DADDu; } SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); if (Subtarget->isGP64bit()) { // On 64-bit targets, sltu produces an i64 but our backend currently says // that SLTu64 produces an i32. We need to fix this in the long run but for // now, just make the DAG type-correct by asserting the upper bits are zero. Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, CurDAG->getTargetConstant(0, DL, VT), SDValue(Carry, 0), CurDAG->getTargetConstant(Mips::sub_32, DL, VT)); } // Generate a second addition only if we know that RHS is not a // constant-zero node. SDNode *AddCarry = Carry; ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); if (!C || C->getZExtValue()) AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); }
SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (SizeVal < 32 || (SizeVal % 8) != 0) return SDValue(); // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. // const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); const char *SpecialMemcpyName = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*DAG.getContext()), DAG.getTargetExternalSymbol( SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)) .setDiscardResult(); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; }
/// Match frameindex+offset and frameindex|offset bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits) const { if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isIntN(OffsetBits, CN->getSExtValue())) { EVT ValTy = Addr.getValueType(); // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } return false; }
/// Used on microMIPS Load/Store unaligned instructions (12-bit offset) bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<12>(CN->getSExtValue())) { // If the first operand is a FI then get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } return false; }
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMCPY); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMCPY); unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; // Emit a maximum of 4 loads in Thumb1 since we have fewer registers const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; SDValue TFOps[6]; SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to // VLDM/VSTM and make this code emit it when appropriate. This would reduce // pressure on the general purpose registers. However this seems harder to map // onto the register allocator's view of the world. // The number of MEMCPY pseudo-instructions to emit. We use up to // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm // later on. This is a lower bound on the number of MEMCPY operations we must // emit. unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { return SDValue(); } SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); for (unsigned I = 0; I != NumMEMCPYs; ++I) { // Evenly distribute registers among MEMCPY operations to reduce register // pressure. unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, DAG.getConstant(NumRegs, dl, MVT::i32)); Src = Dst.getValue(1); Chain = Dst.getValue(2); DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); EmittedNumMemOps = NextEmittedNumMemOps; } if (BytesLeft == 0) return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. auto getRemainingValueType = [](unsigned BytesLeft) { return (BytesLeft >= 2) ? MVT::i16 : MVT::i8; }; auto getRemainingSize = [](unsigned BytesLeft) { return (BytesLeft >= 2) ? 2 : 1; }; unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { VT = getRemainingValueType(BytesLeft); VTSize = getRemainingSize(BytesLeft); Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, dl, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff)); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; BytesLeft -= VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(TFOps, i)); i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { VT = getRemainingValueType(BytesLeft); VTSize = getRemainingSize(BytesLeft); TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, dl, MVT::i32)), DstPtrInfo.getWithOffset(DstOff)); ++i; DstOff += VTSize; BytesLeft -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(TFOps, i)); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // ESI might be used as a base pointer, in that case we can't simply overwrite // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); if (TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI) return SDValue(); MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget->getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) { EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memset. return SDValue(); } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; ValReg = X86::AX; Val = (Val << 8) | Val; break; case 0: // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; } break; default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; Count = DAG.getIntPtrConstant(SizeVal); break; } if (AVT.bitsGT(MVT::i8)) { unsigned UBytes = AVT.getSizeInBits() / 8; Count = DAG.getIntPtrConstant(SizeVal / UBytes); BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (TwoRepStos) { InFlag = Chain.getValue(1); Count = Size; EVT CVT = Count.getValueType(); SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; case ISD::BUILD_VECTOR: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { break; } // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. SDValue RegSeqArgs[9] = { CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) }; bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[2 * i + 1] = N->getOperand(i); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); // XXX: Custom immediate lowering not implemented yet. Instead we use // pseudo instructions defined in SIInstructions.td if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { break; } const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); uint64_t ImmValue = 0; unsigned ImmReg = AMDGPU::ALU_LITERAL_X; if (N->getOpcode() == ISD::ConstantFP) { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::f64); ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); APFloat Value = C->getValueAPF(); float FloatValue = Value.convertToFloat(); if (FloatValue == 0.0) { ImmReg = AMDGPU::ZERO; } else if (FloatValue == 0.5) { ImmReg = AMDGPU::HALF; } else if (FloatValue == 1.0) { ImmReg = AMDGPU::ONE; } else { ImmValue = Value.bitcastToAPInt().getZExtValue(); } } else { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::i64); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); if (C->getZExtValue() == 0) { ImmReg = AMDGPU::ZERO; } else if (C->getZExtValue() == 1) { ImmReg = AMDGPU::ONE_INT; } else { ImmValue = C->getZExtValue(); } } for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); Use != SDNode::use_end(); Use = Next) { Next = llvm::next(Use); std::vector<SDValue> Ops; for (unsigned i = 0; i < Use->getNumOperands(); ++i) { Ops.push_back(Use->getOperand(i)); } if (!Use->isMachineOpcode()) { if (ImmReg == AMDGPU::ALU_LITERAL_X) { // We can only use literal constants (e.g. AMDGPU::ZERO, // AMDGPU::ONE, etc) in machine opcodes. continue; } } else { if (!TII->isALUInstr(Use->getMachineOpcode()) || (TII->get(Use->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)) { continue; } int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM); assert(ImmIdx != -1); // subtract one from ImmIdx, because the DST operand is usually index // 0 for MachineInstrs, but we have no DST in the Ops vector. ImmIdx--; // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == AMDGPU::ALU_LITERAL_X) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); assert(C); if (C->getZExtValue() != 0) { // This instruction is already using an immediate. continue; } // Set the immediate value Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); } } // Set the immediate register Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); } break; } } SDNode *Result = SelectCode(N); // Fold operands of selected node const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->isALUInstr(Result->getMachineOpcode())) { // Fold FNEG/FABS/CONST_ADDRESS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); if (PotentialClamp->isMachineOpcode() && PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { unsigned ClampIdx = TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP); std::vector<SDValue> Ops; unsigned NumOp = Result->getNumOperands(); for (unsigned i = 0; i < NumOp; ++i) { Ops.push_back(Result->getOperand(i)); } Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); Result = CurDAG->SelectNodeTo(PotentialClamp, Result->getMachineOpcode(), PotentialClamp->getVTList(), Ops.data(), NumOp); } } } } return Result; }
/// ComplexPattern used on OR1KInstrInfo /// Used on OR1K Load/Store instructions bool OR1KDAGToDAGISel:: SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || (Addr.getOpcode() == ISD::TargetConstantPool) || (Addr.getOpcode() == ISD::TargetJumpTable)){ OR1KMachineFunctionInfo *MFI = CurDAG->getMachineFunction().getInfo<OR1KMachineFunctionInfo>(); Base = CurDAG->getRegister(MFI->getGlobalBaseReg(), MVT::i32); Offset = Addr; return true; } } else { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { if (isUInt<16>(CN->getZExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); } else { Base = Addr.getOperand(0); } Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); return true; } } } Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; }
/// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { if (Addr.getOpcode() == MipsISD::WrapperPIC) { Base = CurDAG->getRegister(Mips::GP, MVT::i32); Offset = Addr.getOperand(0); return true; } } else { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) { Base = CurDAG->getRegister(Mips::GP, MVT::i32); Offset = Addr; return true; } } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); return true; } } } Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; }
bool Mips16DAGToDAGISel::selectAddr16( SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, SDValue &Alias) { EVT ValTy = Addr.getValueType(); Alias = CurDAG->getTargetConstant(0, ValTy); // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); getMips16SPRefReg(Parent, Alias); return true; } // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); getMips16SPRefReg(Parent, Alias); } else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); Offset = Opnd0; return true; } } // If an indexed floating point load/store can be emitted, return false. const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasFPIdx()) return false; } Base = Addr; Offset = CurDAG->getTargetConstant(0, ValTy); return true; }
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; const unsigned MAX_LOADS_IN_LDM = 6; SDValue TFOps[MAX_LOADS_IN_LDM]; SDValue Loads[MAX_LOADS_IN_LDM]; uint64_t SrcOff = 0, DstOff = 0; // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the // same number of stores. The loads and stores will get combined into // ldm/stm later on. while (EmittedNumMemOps < NumMemOps) { for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff), isVolatile, false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), DstPtrInfo.getWithOffset(DstOff), isVolatile, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); EmittedNumMemOps += i; } if (BytesLeft == 0) return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { if (BytesLeft >= 2) { VT = MVT::i16; VTSize = 2; } else { VT = MVT::i8; VTSize = 1; } Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff), false, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; BytesLeft -= VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { if (BytesLeft >= 2) { VT = MVT::i16; VTSize = 2; } else { VT = MVT::i8; VTSize = 1; } TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), DstPtrInfo.getWithOffset(DstOff), false, false, 0); ++i; DstOff += VTSize; BytesLeft -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // If ESI is used as a base pointer, we must preserve it when doing rep movs. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI; MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; if (PreserveESI) { // Save ESI to a physical register. (We cannot use a virtual register // because if it is spilled we wouldn't be able to reload it.) // We don't glue this because the register dependencies are explicit. Chain = DAG.getCopyToReg(Chain, dl, X86::EDX, DAG.getRegister(X86::ESI, MVT::i32)); } SDValue InGlue(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InGlue); InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InGlue); InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InGlue); InGlue = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue }; // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue. SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); if (PreserveESI) { InGlue = RepMovs.getValue(1); RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI, DAG.getRegister(X86::EDX, MVT::i32), InGlue); } SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Results[0], Results.size()); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget<X86Subtarget>(); if (!ConstantSize) return SDValue(); RepMovsRepeats Repeats(ConstantSize->getZExtValue()); if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // If the base register might conflict with our physical registers, bail out. const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, X86::ECX, X86::ESI, X86::EDI}; if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); // If the target has enhanced REPMOVSB, then it's at least as fast to use // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle // BytesLeft. if (!Subtarget.hasERMSB() && !(Align & 1)) { if (Align & 2) // WORD aligned Repeats.AVT = MVT::i16; else if (Align & 4) // DWORD aligned Repeats.AVT = MVT::i32; else // QWORD aligned Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; if (Repeats.BytesLeft() > 0 && DAG.getMachineFunction().getFunction().optForMinSize()) { // When agressively optimizing for size, avoid generating the code to // handle BytesLeft. Repeats.AVT = MVT::i8; } } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (Repeats.BytesLeft()) { // Handle the last 1 - 7 bytes. unsigned Offset = Repeats.Size - Repeats.BytesLeft(); EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), DAG.getConstant(Repeats.BytesLeft(), dl, SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) const { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, call the library. if ((Align & 3) != 0) return SDValue(); // DWORD aligned EVT AVT = MVT::i32; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned AVT = MVT::i64; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstSV, DstSVOff + Offset, SrcSV, SrcSVOff + Offset)); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Results[0], Results.size()); }
// Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset) { // FrameIndex addresses are handled separately //errs() << "SelectADDRri: "; //Addr.getNode()->dumpr(); if (isa<FrameIndexSDNode>(Addr)) { //errs() << "Failure\n"; return false; } if (CurDAG->isBaseWithConstantOffset(Addr)) { Base = Addr.getOperand(0); if (isa<FrameIndexSDNode>(Base)) { //errs() << "Failure\n"; return false; } ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); //errs() << "Success\n"; return true; } /*if (Addr.getNumOperands() == 1) { Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); errs() << "Success\n"; return true; }*/ //errs() << "SelectADDRri fails on: "; //Addr.getNode()->dumpr(); if (isImm(Addr)) { //errs() << "Failure\n"; return false; } Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); //errs() << "Success\n"; return true; /*if (Addr.getOpcode() != ISD::ADD) { // let SelectADDRii handle the [imm] case if (isImm(Addr)) return false; // it is [reg] assert(Addr.getValueType().isSimple() && "Type must be simple"); Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); return true; } if (Addr.getNumOperands() < 2) return false; // let SelectADDRii handle the [imm+imm] case if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) return false; // try [reg+imm] and [imm+reg] for (int i = 0; i < 2; i ++) if (SelectImm(Addr.getOperand(1-i), Offset)) { Base = Addr.getOperand(i); return true; } // neither [reg+imm] nor [imm+reg] return false;*/ }
/// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); // If Parent is an unaligned f32 load or store, select a (base + index) // floating point load/store instruction (luxc1 or suxc1). const LSBaseSDNode* LS = 0; if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { EVT VT = LS->getMemoryVT(); if (VT.getSizeInBits() / 8 > LS->getAlignment()) { assert(TLI.allowsUnalignedMemoryAccesses(VT) && "Unaligned loads/stores not supported for this type."); if (VT == MVT::f32) return false; } } // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); return true; } } // If an indexed floating point load/store can be emitted, return false. if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasMips32r2Or64()) return false; } Base = Addr; Offset = CurDAG->getTargetConstant(0, ValTy); return true; }
/*! */ SDNode * SPUDAGToDAGISel::Select(SDNode *N) { unsigned Opc = N->getOpcode(); int n_ops = -1; unsigned NewOpc = 0; EVT OpVT = N->getValueType(0); SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) return NULL; // Already selected. if (Opc == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); if (FI < 128) { NewOpc = SPU::AIr32; Ops[0] = TFI; Ops[1] = Imm0; n_ops = 2; } else { NewOpc = SPU::Ar32; Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, N->getValueType(0), TFI), 0); n_ops = 2; } } else if (Opc == ISD::Constant && OpVT == MVT::i64) { // Catch the i64 constants that end up here. Note: The backend doesn't // attempt to legalize the constant (it's useless because DAGCombiner // will insert 64-bit constants and we can't stop it). return SelectI64Constant(N, OpVT, N->getDebugLoc()); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = N->getOperand(0); EVT Op0VT = Op0.getValueType(); EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), Op0VT, (128 / Op0VT.getSizeInBits())); EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); SDValue shufMask; switch (Op0VT.getSimpleVT().SimpleTy) { default: report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); /*NOTREACHED*/ case MVT::i32: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x00010203, MVT::i32), CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x08090a0b, MVT::i32)); break; case MVT::i16: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x80800203, MVT::i32), CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x80800a0b, MVT::i32)); break; case MVT::i8: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x80808003, MVT::i32), CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x8080800b, MVT::i32)); break; } SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0)); SDValue PromScalar; if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) PromScalar = SDValue(N, 0); else PromScalar = PromoteScalar.getValue(); SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, PromScalar, PromScalar, SDValue(shufMaskLoad, 0)); HandleSDNode Dummy2(zextShuffle); if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) zextShuffle = SDValue(N, 0); else zextShuffle = Dummy2.getValue(); HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, zextShuffle)); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); SelectCode(Dummy.getValue().getNode()); return Dummy.getValue().getNode(); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; return Dummy.getValue().getNode(); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; return Dummy.getValue().getNode(); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; return Dummy.getValue().getNode(); } else if (Opc == ISD::TRUNCATE) { SDValue Op0 = N->getOperand(0); if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) && OpVT == MVT::i32 && Op0.getValueType() == MVT::i64) { // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 // // Take advantage of the fact that the upper 32 bits are in the // i32 preferred slot and avoid shuffle gymnastics: ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); if (CN != 0) { unsigned shift_amt = unsigned(CN->getZExtValue()); if (shift_amt >= 32) { SDNode *hi32 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, Op0.getOperand(0), getRC(MVT::i32)); shift_amt -= 32; if (shift_amt > 0) { // Take care of the additional shift, if present: SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); unsigned Opc = SPU::ROTMAIr32_i32; if (Op0.getOpcode() == ISD::SRL) Opc = SPU::ROTMr32; hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0), shift); } return hi32; } } } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) return SelectSHLi64(N, OpVT); } else if (Opc == ISD::SRL) { if (OpVT == MVT::i64) return SelectSRLi64(N, OpVT); } else if (Opc == ISD::SRA) { if (OpVT == MVT::i64) return SelectSRAi64(N, OpVT); } else if (Opc == ISD::FNEG && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { DebugLoc dl = N->getDebugLoc(); // Check if the pattern is a special form of DFNMS: // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) SDValue Op0 = N->getOperand(0); if (Op0.getOpcode() == ISD::FSUB) { SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == ISD::FMUL) { unsigned Opc = SPU::DFNMSf64; if (OpVT == MVT::v2f64) Opc = SPU::DFNMSv2f64; return CurDAG->getMachineNode(Opc, dl, OpVT, Op00.getOperand(0), Op00.getOperand(1), Op0.getOperand(1)); } } SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); SDNode *signMask = 0; unsigned Opc = SPU::XORfneg64; if (OpVT == MVT::f64) { signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); } else if (OpVT == MVT::v2f64) { Opc = SPU::XORfnegvec; signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, negConst, negConst).getNode()); } return CurDAG->getMachineNode(Opc, dl, OpVT, N->getOperand(0), SDValue(signMask, 0)); } else if (Opc == ISD::FABS) { if (OpVT == MVT::f64) { SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, N->getOperand(0), SDValue(signMask, 0)); } else if (OpVT == MVT::v2f64) { SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, absConst, absConst); SDNode *signMask = emitBuildVector(absVec.getNode()); return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, N->getOperand(0), SDValue(signMask, 0)); } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT EVT VT = N->getValueType(0); SDValue Arg = N->getOperand(0); SDValue Chain = N->getOperand(1); SDNode *Result; Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT, MVT::Other, Arg, getRC( VT.getSimpleVT()), Chain); return Result; } else if (Opc == SPUISD::IndirectAddr) { // Look at the operands: SelectCode() will catch the cases that aren't // specifically handled here. // // SPUInstrInfo catches the following patterns: // (SPUindirect (SPUhi ...), (SPUlo ...)) // (SPUindirect $sp, imm) EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); RegisterSDNode *RN; if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) || (Op0.getOpcode() == ISD::Register && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0 && RN->getReg() != SPU::R1))) { NewOpc = SPU::Ar32; Ops[1] = Op1; if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *CN = cast<ConstantSDNode>(Op1); Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); if (isInt<10>(CN->getSExtValue())) { NewOpc = SPU::AIr32; Ops[1] = Op1; } else { Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, N->getValueType(0), Op1), 0); } } Ops[0] = Op0; n_ops = 2; } } if (n_ops > 0) { if (N->hasOneUse()) return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); else return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); } else return SelectCode(N); }
/// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); Offset = Opnd0; return true; } } } return false; }
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, const R600InstrInfo *TII, std::vector<SDValue> &Ops) { int OperandIdx[] = { TII->getOperandIdx(Opcode, R600Operands::SRC0), TII->getOperandIdx(Opcode, R600Operands::SRC1), TII->getOperandIdx(Opcode, R600Operands::SRC2) }; int SelIdx[] = { TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL), TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL), TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL) }; int NegIdx[] = { TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG), TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG), TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG) }; int AbsIdx[] = { TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS), TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS), -1 }; for (unsigned i = 0; i < 3; i++) { if (OperandIdx[i] < 0) return false; SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { SDValue CstOffset; if (Operand.getValueType().isVector() || !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) break; // Gather others constants values std::vector<unsigned> Consts; for (unsigned j = 0; j < 3; j++) { int SrcIdx = OperandIdx[j]; if (SrcIdx < 0) break; if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { if (Reg->getReg() == AMDGPU::ALU_CONST) { ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); Consts.push_back(Cst->getZExtValue()); } } } ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); Consts.push_back(Cst->getZExtValue()); if (!TII->fitsConstReadLimitations(Consts)) break; Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); Ops[SelIdx[i] - 1] = CstOffset; return true; } case ISD::FNEG: if (NegIdx[i] < 0) break; Ops[OperandIdx[i] - 1] = Operand.getOperand(0); Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); return true; case ISD::FABS: if (AbsIdx[i] < 0) break; Ops[OperandIdx[i] - 1] = Operand.getOperand(0); Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); return true; case ISD::BITCAST: Ops[OperandIdx[i] - 1] = Operand.getOperand(0); return true; default: break; } } return false; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget<X86Subtarget>(); #ifndef NDEBUG // If the base register might conflict with our physical registers, bail out. const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, X86::ECX, X86::EAX, X86::EDI}; assert(!isBaseRegConflictPossible(DAG, ClobberSet)); #endif // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); if (const char *bzeroName = (ValC && ValC->isNullValue()) ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) : nullptr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(bzeroName, IntPtr), std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memset. return SDValue(); } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); unsigned BytesLeft = 0; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; ValReg = X86::AX; Val = (Val << 8) | Val; break; case 0: // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; } break; default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; Count = DAG.getIntPtrConstant(SizeVal, dl); break; } if (AVT.bitsGT(MVT::i8)) { unsigned UBytes = AVT.getSizeInBits() / 8; Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl); BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal, dl); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag); InFlag = Chain.getValue(1); } bool Use64BitRegs = Subtarget.isTarget64BitLP64(); Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), Val, DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile, false, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; case AMDGPUISD::CONST_ADDRESS: { for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I); I != SDNode::use_end(); I = Next) { Next = llvm::next(I); if (!I->isMachineOpcode()) { continue; } unsigned Opcode = I->getMachineOpcode(); bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; int SrcIdx = I.getOperandNo(); int SelIdx; // Unlike MachineInstrs, SDNodes do not have results in their operand // list, so we need to increment the SrcIdx, since // R600InstrInfo::getOperandIdx is based on the MachineInstr indices. if (HasDst) { SrcIdx++; } SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx); if (SelIdx < 0) { continue; } SDValue CstOffset; if (N->getValueType(0).isVector() || !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset)) continue; // Gather constants values int SrcIndices[] = { TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) }; std::vector<unsigned> Consts; for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { int OtherSrcIdx = SrcIndices[i]; int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); if (OtherSrcIdx < 0 || OtherSelIdx < 0) { continue; } if (HasDst) { OtherSrcIdx--; OtherSelIdx--; } if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) { if (Reg->getReg() == AMDGPU::ALU_CONST) { ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); } } } ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); Consts.push_back(Cst->getZExtValue()); if (!TII->fitsConstReadLimitations(Consts)) continue; // Convert back to SDNode indices if (HasDst) { SrcIdx--; SelIdx--; } std::vector<SDValue> Ops; for (int i = 0, e = I->getNumOperands(); i != e; ++i) { if (i == SrcIdx) { Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); } else if (i == SelIdx) { Ops.push_back(CstOffset); } else { Ops.push_back(I->getOperand(i)); } } CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size()); } break; } case ISD::BUILD_VECTOR: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } unsigned RegClassID; switch(N->getValueType(0).getVectorNumElements()) { case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. SDValue RegSeqArgs[9] = { CurDAG->getTargetConstant(RegClassID, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) }; bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[2 * i + 1] = N->getOperand(i); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { llvm_unreachable("Unhandled value type for BUILD_PAIR"); } const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); // XXX: Custom immediate lowering not implemented yet. Instead we use // pseudo instructions defined in SIInstructions.td if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } uint64_t ImmValue = 0; unsigned ImmReg = AMDGPU::ALU_LITERAL_X; if (N->getOpcode() == ISD::ConstantFP) { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::f64); ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); APFloat Value = C->getValueAPF(); float FloatValue = Value.convertToFloat(); if (FloatValue == 0.0) { ImmReg = AMDGPU::ZERO; } else if (FloatValue == 0.5) { ImmReg = AMDGPU::HALF; } else if (FloatValue == 1.0) { ImmReg = AMDGPU::ONE; } else { ImmValue = Value.bitcastToAPInt().getZExtValue(); } } else { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::i64); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); if (C->getZExtValue() == 0) { ImmReg = AMDGPU::ZERO; } else if (C->getZExtValue() == 1) { ImmReg = AMDGPU::ONE_INT; } else { ImmValue = C->getZExtValue(); } } for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); Use != SDNode::use_end(); Use = Next) { Next = llvm::next(Use); std::vector<SDValue> Ops; for (unsigned i = 0; i < Use->getNumOperands(); ++i) { Ops.push_back(Use->getOperand(i)); } if (!Use->isMachineOpcode()) { if (ImmReg == AMDGPU::ALU_LITERAL_X) { // We can only use literal constants (e.g. AMDGPU::ZERO, // AMDGPU::ONE, etc) in machine opcodes. continue; } } else { if (!TII->isALUInstr(Use->getMachineOpcode()) || (TII->get(Use->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)) { continue; } int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::literal); if (ImmIdx == -1) { continue; } if (TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::dst) != -1) { // subtract one from ImmIdx, because the DST operand is usually index // 0 for MachineInstrs, but we have no DST in the Ops vector. ImmIdx--; } // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == AMDGPU::ALU_LITERAL_X) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); assert(C); if (C->getZExtValue() != 0) { // This instruction is already using an immediate. continue; } // Set the immediate value Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); } } // Set the immediate register Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); } break; } } SDNode *Result = SelectCode(N); // Fold operands of selected node const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) { bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->hasInstrModifiers(Result->getMachineOpcode())) { // Fold FNEG/FABS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); if (PotentialClamp->isMachineOpcode() && PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { unsigned ClampIdx = TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp); std::vector<SDValue> Ops; unsigned NumOp = Result->getNumOperands(); for (unsigned i = 0; i < NumOp; ++i) { Ops.push_back(Result->getOperand(i)); } Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); Result = CurDAG->SelectNodeTo(PotentialClamp, Result->getMachineOpcode(), PotentialClamp->getVTList(), Ops.data(), NumOp); } } } } return Result; }