bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode * IMMOffset; if (Addr.getOpcode() == ISD::ADD && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) && isInt<16>(IMMOffset->getZExtValue())) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); return true; // If the pointer address is constant, we can move it to the offset field. } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) && isInt<16>(IMMOffset->getZExtValue())) { Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(CurDAG->getEntryNode()), AMDGPU::ZERO, MVT::i32); Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); return true; } // Default case, no offset Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; }
// ComplexPattern used on BPF Load/Store instructions bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. SDLoc DL(Addr); if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) return false; // Addresses of the form Addr+const or Addr|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64); return true; } } Base = Addr; Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; }
/// ComplexPattern used on Cpu0InstrInfo /// Used on Cpu0 Load/Store instructions bool Cpu0DAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); // If Parent is an unaligned f32 load or store, select a (base + index) // floating point load/store instruction (luxc1 or suxc1). const LSBaseSDNode* LS = 0; if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { EVT VT = LS->getMemoryVT(); if (VT.getSizeInBits() / 8 > LS->getAlignment()) { assert(TLI.allowsUnalignedMemoryAccesses(VT) && "Unaligned loads/stores not supported for this type."); if (VT == MVT::f32) return false; } } // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (Addr.getOpcode() == Cpu0ISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } Base = Addr; Offset = CurDAG->getTargetConstant(0, ValTy); return true; }
SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); const char *bzeroEntry = (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0; // For small size (< 256), it is not beneficial to use bzero // instead of memset. if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>( DAG.getTarget().getTargetLowering()); EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI( Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } return SDValue(); }
// Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset) { //errs() << "SelectADDRlocal: "; //Addr.getNode()->dumpr(); if (isa<FrameIndexSDNode>(Addr)) { Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); //errs() << "Success\n"; return true; } if (CurDAG->isBaseWithConstantOffset(Addr)) { Base = Addr.getOperand(0); if (!isa<FrameIndexSDNode>(Base)) { //errs() << "Failure\n"; return false; } ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); //errs() << "Offset: "; //Offset.getNode()->dumpr(); //errs() << "Success\n"; return true; } //errs() << "Failure\n"; return false; }
bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { SDNode *node = operand.getNode(); if (!ConstantSDNode::classof(node)) return false; ConstantSDNode *CN = cast<ConstantSDNode>(node); imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32); return true; }
// Return true if N is a undef or a constant. // If N was undef, return a (i8imm 0) in Retval // If N was imm, convert it to i8imm and return in Retval // Note: The convert to i8imm is required, otherwise the // pattern matcher inserts a bunch of IMOVi8rr to convert // the imm to i8imm, and this causes instruction selection // to fail. bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) { if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant)) return false; if (N.getOpcode() == ISD::UNDEF) Retval = CurDAG->getTargetConstant(0, MVT::i8); else { ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode()); unsigned retval = cn->getZExtValue(); Retval = CurDAG->getTargetConstant(retval, MVT::i8); } return true; }
bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { uint32_t Bits; uint32_t RegWidth = N.getValueType().getSizeInBits(); ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); if (!CN) return false; if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) return false; Imm = CurDAG->getTargetConstant(Bits, MVT::i32); return true; }
SDValue VectorProcTargetLowering::LowerConstant(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT PtrVT = Op.getValueType(); ConstantSDNode *C = cast<ConstantSDNode>(Op); if (C->getAPIntValue().abs().ult(0x4000)) { // Don't need to convert to constant pool reference. This will fit in // the immediate field of a single instruction, sign extended (15 bits). return SDValue(); } SDValue CPIdx = DAG.getConstantPool(C->getConstantIntValue(), MVT::i32); return DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, 4); }
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset) { ConstantSDNode *C; if ((C = dyn_cast<ConstantSDNode>(Addr))) { Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); } else { Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i32); } return true; }
/// Select multiply instructions. SDNode* CoffeeDAGToDAGISel::SelectMULT(SDNode *N, DebugLoc dl) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); unsigned Opc = 0; if (CN) { // signed 15 bit imm bool test = isInt<15>(CN->getSExtValue()); bool test1 = isInt<32>(CN->getSExtValue()); if (isInt<15>(CN->getSExtValue())) Opc = Coffee::MULTI; else Opc = Coffee::MULTR; } else { // register Opc = Coffee::MULTR; } if (Opc == 0 ) llvm_unreachable("coffee: selectMULT"); SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Glue, N->getOperand(0), N->getOperand(1)); // take the second output which is glue SDValue glue = SDValue(Mul, 1); /* def MULHI : InstCoffee<(outs GPRC:$rd), (ins), "mulhi\t$rd", [], IIAlu, FrmJ> { }*/ // this MULHI instruction is meant for telling which register is used to save the upper half of the // mulitplication result so no inputs are needed here. // but we need to take the glue output from MULTI/MULTR so that it will guarantee the MULHI will appear // right after them. return CurDAG->getMachineNode(Coffee::MULHI, dl, MVT::i32, glue); }
void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, const SDLoc &DL, SDNode *Node) const { unsigned Opc = InFlag.getOpcode(); (void)Opc; assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || (Opc == ISD::SUBC || Opc == ISD::SUBE)) && "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; if (Subtarget->isGP64bit()) { SLTuOp = Mips::SLTu64; ADDuOp = Mips::DADDu; } SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); if (Subtarget->isGP64bit()) { // On 64-bit targets, sltu produces an i64 but our backend currently says // that SLTu64 produces an i32. We need to fix this in the long run but for // now, just make the DAG type-correct by asserting the upper bits are zero. Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, CurDAG->getTargetConstant(0, DL, VT), SDValue(Carry, 0), CurDAG->getTargetConstant(Mips::sub_32, DL, VT)); } // Generate a second addition only if we know that RHS is not a // constant-zero node. SDNode *AddCarry = Carry; ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); if (!C || C->getZExtValue()) AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); }
bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } if (Addr.getOpcode() == ISD::ADD) { ConstantSDNode *CN = 0; if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper) && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) && (CN->getSExtValue() % 4 == 0)) { // Constant word offset from a object in the data region Base = Addr.getOperand(0).getOperand(0); Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); return true; } } return false; }
SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (SizeVal < 32 || (SizeVal % 8) != 0) return SDValue(); // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. // const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); const char *SpecialMemcpyName = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*DAG.getContext()), DAG.getTargetExternalSymbol( SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)) .setDiscardResult(); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; }
bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset) { FrameIndexSDNode *FIN = 0; if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } if (Addr.getOpcode() == ISD::ADD) { ConstantSDNode *CN = 0; if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { // Constant positive word offset from frame index Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); return true; } } return false; }
/// Match frameindex+offset and frameindex|offset bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits) const { if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isIntN(OffsetBits, CN->getSExtValue())) { EVT ValTy = Addr.getValueType(); // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } return false; }
/// Used on microMIPS Load/Store unaligned instructions (12-bit offset) bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<12>(CN->getSExtValue())) { // If the first operand is a FI then get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } return false; }
// ComplexPattern used on BPF FI instruction bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { SDLoc DL(Addr); if (!CurDAG->isBaseWithConstantOffset(Addr)) return false; // Addresses of the form Addr+const or Addr|const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); else return false; Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64); return true; } return false; }
/// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); Offset = Opnd0; return true; } } } return false; }
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMCPY); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMCPY); unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; // Emit a maximum of 4 loads in Thumb1 since we have fewer registers const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; SDValue TFOps[6]; SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to // VLDM/VSTM and make this code emit it when appropriate. This would reduce // pressure on the general purpose registers. However this seems harder to map // onto the register allocator's view of the world. // The number of MEMCPY pseudo-instructions to emit. We use up to // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm // later on. This is a lower bound on the number of MEMCPY operations we must // emit. unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { return SDValue(); } SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); for (unsigned I = 0; I != NumMEMCPYs; ++I) { // Evenly distribute registers among MEMCPY operations to reduce register // pressure. unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, DAG.getConstant(NumRegs, dl, MVT::i32)); Src = Dst.getValue(1); Chain = Dst.getValue(2); DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); EmittedNumMemOps = NextEmittedNumMemOps; } if (BytesLeft == 0) return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. auto getRemainingValueType = [](unsigned BytesLeft) { return (BytesLeft >= 2) ? MVT::i16 : MVT::i8; }; auto getRemainingSize = [](unsigned BytesLeft) { return (BytesLeft >= 2) ? 2 : 1; }; unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { VT = getRemainingValueType(BytesLeft); VTSize = getRemainingSize(BytesLeft); Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, dl, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff)); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; BytesLeft -= VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(TFOps, i)); i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { VT = getRemainingValueType(BytesLeft); VTSize = getRemainingSize(BytesLeft); TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, dl, MVT::i32)), DstPtrInfo.getWithOffset(DstOff)); ++i; DstOff += VTSize; BytesLeft -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(TFOps, i)); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget->getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) { EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memset. return SDValue(); } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; ValReg = X86::AX; Val = (Val << 8) | Val; break; case 0: // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; } break; default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; Count = DAG.getIntPtrConstant(SizeVal); break; } if (AVT.bitsGT(MVT::i8)) { unsigned UBytes = AVT.getSizeInBits() / 8; Count = DAG.getIntPtrConstant(SizeVal / UBytes); BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (TwoRepStos) { InFlag = Chain.getValue(1); Count = Size; EVT CVT = Count.getValueType(); SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // ESI might be used as a base pointer, in that case we can't simply overwrite // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); if (TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI) return SDValue(); MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
bool SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index, int minOffset, int maxOffset) { unsigned Opc = N.getOpcode(); EVT PtrTy = SPUtli.getPointerTy(); if (Opc == ISD::FrameIndex) { // Stack frame index must be less than 512 (divided by 16): FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " << FI << "\n"); if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = CurDAG->getTargetFrameIndex(FI, PtrTy); return true; } } else if (Opc == ISD::ADD) { // Generated by getelementptr const SDValue Op0 = N.getOperand(0); const SDValue Op1 = N.getOperand(1); if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = N; return true; } else if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = cast<ConstantSDNode>(Op1); int32_t offset = int32_t(CN->getSExtValue()); if (Op0.getOpcode() == ISD::FrameIndex) { FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = CurDAG->getTargetFrameIndex(FI, PtrTy); return true; } } else if (offset > minOffset && offset < maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = Op0; return true; } } else if (Op0.getOpcode() == ISD::Constant || Op0.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = cast<ConstantSDNode>(Op0); int32_t offset = int32_t(CN->getSExtValue()); if (Op1.getOpcode() == ISD::FrameIndex) { FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = CurDAG->getTargetFrameIndex(FI, PtrTy); return true; } } else if (offset > minOffset && offset < maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = Op1; return true; } } } else if (Opc == SPUISD::IndirectAddr) { // Indirect with constant offset -> D-Form address const SDValue Op0 = N.getOperand(0); const SDValue Op1 = N.getOperand(1); if (Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) { // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0)) Base = CurDAG->getTargetConstant(0, PtrTy); Index = N; return true; } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) { int32_t offset = 0; SDValue idxOp; if (isa<ConstantSDNode>(Op1)) { ConstantSDNode *CN = cast<ConstantSDNode>(Op1); offset = int32_t(CN->getSExtValue()); idxOp = Op0; } else if (isa<ConstantSDNode>(Op0)) { ConstantSDNode *CN = cast<ConstantSDNode>(Op0); offset = int32_t(CN->getSExtValue()); idxOp = Op1; } if (offset >= minOffset && offset <= maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = idxOp; return true; } } } else if (Opc == SPUISD::AFormAddr) { Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; } else if (Opc == SPUISD::LDRESULT) { Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; } else if (Opc == ISD::Register ||Opc == ISD::CopyFromReg ||Opc == ISD::UNDEF ||Opc == ISD::Constant) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { // Direct load/store without getelementptr SDValue Offs; Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { if (Offs.getOpcode() == ISD::UNDEF) Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); Base = Offs; Index = N; return true; } } else { /* If otherwise unadorned, default to D-form address with 0 offset: */ if (Opc == ISD::CopyFromReg) { Index = N.getOperand(1); } else { Index = N; } Base = CurDAG->getTargetConstant(0, Index.getValueType()); return true; } } return false; }
/*! */ SDNode * SPUDAGToDAGISel::Select(SDNode *N) { unsigned Opc = N->getOpcode(); int n_ops = -1; unsigned NewOpc = 0; EVT OpVT = N->getValueType(0); SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) return NULL; // Already selected. if (Opc == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); if (FI < 128) { NewOpc = SPU::AIr32; Ops[0] = TFI; Ops[1] = Imm0; n_ops = 2; } else { NewOpc = SPU::Ar32; Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, N->getValueType(0), TFI), 0); n_ops = 2; } } else if (Opc == ISD::Constant && OpVT == MVT::i64) { // Catch the i64 constants that end up here. Note: The backend doesn't // attempt to legalize the constant (it's useless because DAGCombiner // will insert 64-bit constants and we can't stop it). return SelectI64Constant(N, OpVT, N->getDebugLoc()); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = N->getOperand(0); EVT Op0VT = Op0.getValueType(); EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), Op0VT, (128 / Op0VT.getSizeInBits())); EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); SDValue shufMask; switch (Op0VT.getSimpleVT().SimpleTy) { default: report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); /*NOTREACHED*/ case MVT::i32: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x00010203, MVT::i32), CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x08090a0b, MVT::i32)); break; case MVT::i16: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x80800203, MVT::i32), CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x80800a0b, MVT::i32)); break; case MVT::i8: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x80808003, MVT::i32), CurDAG->getConstant(0x80808080, MVT::i32), CurDAG->getConstant(0x8080800b, MVT::i32)); break; } SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0)); SDValue PromScalar; if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) PromScalar = SDValue(N, 0); else PromScalar = PromoteScalar.getValue(); SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, PromScalar, PromScalar, SDValue(shufMaskLoad, 0)); HandleSDNode Dummy2(zextShuffle); if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) zextShuffle = SDValue(N, 0); else zextShuffle = Dummy2.getValue(); HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, zextShuffle)); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); SelectCode(Dummy.getValue().getNode()); return Dummy.getValue().getNode(); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; return Dummy.getValue().getNode(); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; return Dummy.getValue().getNode(); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; return Dummy.getValue().getNode(); } else if (Opc == ISD::TRUNCATE) { SDValue Op0 = N->getOperand(0); if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) && OpVT == MVT::i32 && Op0.getValueType() == MVT::i64) { // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 // // Take advantage of the fact that the upper 32 bits are in the // i32 preferred slot and avoid shuffle gymnastics: ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); if (CN != 0) { unsigned shift_amt = unsigned(CN->getZExtValue()); if (shift_amt >= 32) { SDNode *hi32 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, Op0.getOperand(0), getRC(MVT::i32)); shift_amt -= 32; if (shift_amt > 0) { // Take care of the additional shift, if present: SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); unsigned Opc = SPU::ROTMAIr32_i32; if (Op0.getOpcode() == ISD::SRL) Opc = SPU::ROTMr32; hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0), shift); } return hi32; } } } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) return SelectSHLi64(N, OpVT); } else if (Opc == ISD::SRL) { if (OpVT == MVT::i64) return SelectSRLi64(N, OpVT); } else if (Opc == ISD::SRA) { if (OpVT == MVT::i64) return SelectSRAi64(N, OpVT); } else if (Opc == ISD::FNEG && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { DebugLoc dl = N->getDebugLoc(); // Check if the pattern is a special form of DFNMS: // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) SDValue Op0 = N->getOperand(0); if (Op0.getOpcode() == ISD::FSUB) { SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == ISD::FMUL) { unsigned Opc = SPU::DFNMSf64; if (OpVT == MVT::v2f64) Opc = SPU::DFNMSv2f64; return CurDAG->getMachineNode(Opc, dl, OpVT, Op00.getOperand(0), Op00.getOperand(1), Op0.getOperand(1)); } } SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); SDNode *signMask = 0; unsigned Opc = SPU::XORfneg64; if (OpVT == MVT::f64) { signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); } else if (OpVT == MVT::v2f64) { Opc = SPU::XORfnegvec; signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, negConst, negConst).getNode()); } return CurDAG->getMachineNode(Opc, dl, OpVT, N->getOperand(0), SDValue(signMask, 0)); } else if (Opc == ISD::FABS) { if (OpVT == MVT::f64) { SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, N->getOperand(0), SDValue(signMask, 0)); } else if (OpVT == MVT::v2f64) { SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, absConst, absConst); SDNode *signMask = emitBuildVector(absVec.getNode()); return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, N->getOperand(0), SDValue(signMask, 0)); } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT EVT VT = N->getValueType(0); SDValue Arg = N->getOperand(0); SDValue Chain = N->getOperand(1); SDNode *Result; Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT, MVT::Other, Arg, getRC( VT.getSimpleVT()), Chain); return Result; } else if (Opc == SPUISD::IndirectAddr) { // Look at the operands: SelectCode() will catch the cases that aren't // specifically handled here. // // SPUInstrInfo catches the following patterns: // (SPUindirect (SPUhi ...), (SPUlo ...)) // (SPUindirect $sp, imm) EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); RegisterSDNode *RN; if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) || (Op0.getOpcode() == ISD::Register && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0 && RN->getReg() != SPU::R1))) { NewOpc = SPU::Ar32; Ops[1] = Op1; if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *CN = cast<ConstantSDNode>(Op1); Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); if (isInt<10>(CN->getSExtValue())) { NewOpc = SPU::AIr32; Ops[1] = Op1; } else { Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, N->getValueType(0), Op1), 0); } } Ops[0] = Op0; n_ops = 2; } } if (n_ops > 0) { if (N->hasOneUse()) return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); else return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); } else return SelectCode(N); }
bool Mips16DAGToDAGISel::selectAddr16( SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, SDValue &Alias) { EVT ValTy = Addr.getValueType(); Alias = CurDAG->getTargetConstant(0, ValTy); // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); Offset = CurDAG->getTargetConstant(0, ValTy); getMips16SPRefReg(Parent, Alias); return true; } // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { Base = Addr.getOperand(0); Offset = Addr.getOperand(1); return true; } if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; } // Addresses of the form FI+const or FI|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); getMips16SPRefReg(Parent, Alias); } else Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { // When loading from constant pools, load the lower address part in // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); Offset = Opnd0; return true; } } // If an indexed floating point load/store can be emitted, return false. const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasFPIdx()) return false; } Base = Addr; Offset = CurDAG->getTargetConstant(0, ValTy); return true; }
SDValue LanaiTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); if (VT != MVT::i32) return SDValue(); ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op->getOperand(1)); if (!C) return SDValue(); int64_t MulAmt = C->getSExtValue(); int32_t HighestOne = -1; uint32_t NonzeroEntries = 0; int SignedDigit[32] = {0}; // Convert to non-adjacent form (NAF) signed-digit representation. // NAF is a signed-digit form where no adjacent digits are non-zero. It is the // minimal Hamming weight representation of a number (on average 1/3 of the // digits will be non-zero vs 1/2 for regular binary representation). And as // the non-zero digits will be the only digits contributing to the instruction // count, this is desirable. The next loop converts it to NAF (following the // approach in 'Guide to Elliptic Curve Cryptography' [ISBN: 038795273X]) by // choosing the non-zero coefficients such that the resulting quotient is // divisible by 2 which will cause the next coefficient to be zero. int64_t E = std::abs(MulAmt); int S = (MulAmt < 0 ? -1 : 1); int I = 0; while (E > 0) { int ZI = 0; if (E % 2 == 1) { ZI = 2 - (E % 4); if (ZI != 0) ++NonzeroEntries; } SignedDigit[I] = S * ZI; if (SignedDigit[I] == 1) HighestOne = I; E = (E - ZI) / 2; ++I; } // Compute number of instructions required. Due to differences in lowering // between the different processors this count is not exact. // Start by assuming a shift and a add/sub for every non-zero entry (hence // every non-zero entry requires 1 shift and 1 add/sub except for the first // entry). int32_t InstrRequired = 2 * NonzeroEntries - 1; // Correct possible over-adding due to shift by 0 (which is not emitted). if (std::abs(MulAmt) % 2 == 1) --InstrRequired; // Return if the form generated would exceed the instruction threshold. if (InstrRequired > LanaiLowerConstantMulThreshold) return SDValue(); SDValue Res; SDLoc DL(Op); SDValue V = Op->getOperand(0); // Initialize the running sum. Set the running sum to the maximal shifted // positive value (i.e., largest i such that zi == 1 and MulAmt has V<<i as a // term NAF). if (HighestOne == -1) Res = DAG.getConstant(0, DL, MVT::i32); else { Res = DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(HighestOne, DL, MVT::i32)); SignedDigit[HighestOne] = 0; } // Assemble multiplication from shift, add, sub using NAF form and running // sum. for (unsigned int I = 0; I < sizeof(SignedDigit) / sizeof(SignedDigit[0]); ++I) { if (SignedDigit[I] == 0) continue; // Shifted multiplicand (v<<i). SDValue Op = DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(I, DL, MVT::i32)); if (SignedDigit[I] == 1) Res = DAG.getNode(ISD::ADD, DL, VT, Res, Op); else if (SignedDigit[I] == -1) Res = DAG.getNode(ISD::SUB, DL, VT, Res, Op); } return Res; }
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { // Yes hardcoded 'm' symbol. Just because it also has been hardcoded in // SelectionDAGISel (callee for this method). assert(ConstraintCode == 'm' && "Unexpected asm memory constraint"); //MachineFunction& MF = CurDAG->getMachineFunction(); MachineRegisterInfo &RI = MF->getRegInfo(); const TargetLowering* TL = MF->getTarget().getTargetLowering(); const RegisterSDNode *RegNode = dyn_cast<RegisterSDNode>(Op); // If address operand is of PTRDISPREGS class, all is OK, then. if (RegNode && RI.getRegClass(RegNode->getReg()) == &AVR::PTRDISPREGSRegClass) { OutOps.push_back(Op); return false; } if (Op->getOpcode() == ISD::FrameIndex) { SDValue Base, Disp; if (SelectAddr(Op.getNode(), Op, Base, Disp)) { OutOps.push_back(Base); OutOps.push_back(Disp); return false; } return true; } // If Op is add reg, imm and // reg is either virtual register or register of PTRDISPREGSRegClass if (Op->getOpcode() == ISD::ADD || Op->getOpcode() == ISD::SUB) { SDValue CopyFromRegOp = Op->getOperand(0); SDValue ImmOp = Op->getOperand(1); ConstantSDNode *ImmNode = dyn_cast<ConstantSDNode>(ImmOp); unsigned Reg; bool CanHandleRegImmOpt = true; CanHandleRegImmOpt &= ImmNode != 0; CanHandleRegImmOpt &= ImmNode->getAPIntValue().getZExtValue() < 64; if (CopyFromRegOp->getOpcode() == ISD::CopyFromReg) { RegisterSDNode *RegNode = cast<RegisterSDNode>(CopyFromRegOp->getOperand(1)); Reg = RegNode->getReg(); CanHandleRegImmOpt &= (TargetRegisterInfo::isVirtualRegister(Reg) || AVR::PTRDISPREGSRegClass.contains(Reg)); } else { CanHandleRegImmOpt = false; } if (CanHandleRegImmOpt) { // If we detect proper case - correct virtual register class // if needed and go to another inlineasm operand. SDValue Base, Disp; if (RI.getRegClass(Reg) != &AVR::PTRDISPREGSRegClass) { SDLoc dl(CopyFromRegOp); unsigned VReg = RI.createVirtualRegister( &AVR::PTRDISPREGSRegClass); SDValue CopyToReg = CurDAG->getCopyToReg(CopyFromRegOp, dl, VReg, CopyFromRegOp); SDValue NewCopyFromRegOp = CurDAG->getCopyFromReg(CopyToReg, dl, VReg, TL->getPointerTy()); Base = NewCopyFromRegOp; } else { Base = CopyFromRegOp; } if (ImmNode->getValueType(0) != MVT::i8) { Disp = CurDAG->getTargetConstant( ImmNode->getAPIntValue().getZExtValue(), MVT::i8); } else { Disp = ImmOp; } OutOps.push_back(Base); OutOps.push_back(Disp); return false; } } // More generic case. // Create chain that puts Op into pointer register // and return that register. SDLoc dl(Op); unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass); SDValue CopyToReg = CurDAG->getCopyToReg(Op, dl, VReg, Op); SDValue CopyFromReg = CurDAG->getCopyFromReg(CopyToReg, dl, VReg, TL->getPointerTy()); OutOps.push_back(CopyFromReg); return false; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget<X86Subtarget>(); if (!ConstantSize) return SDValue(); RepMovsRepeats Repeats(ConstantSize->getZExtValue()); if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // If the base register might conflict with our physical registers, bail out. const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, X86::ECX, X86::ESI, X86::EDI}; if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); // If the target has enhanced REPMOVSB, then it's at least as fast to use // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle // BytesLeft. if (!Subtarget.hasERMSB() && !(Align & 1)) { if (Align & 2) // WORD aligned Repeats.AVT = MVT::i16; else if (Align & 4) // DWORD aligned Repeats.AVT = MVT::i32; else // QWORD aligned Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; if (Repeats.BytesLeft() > 0 && DAG.getMachineFunction().getFunction().optForMinSize()) { // When agressively optimizing for size, avoid generating the code to // handle BytesLeft. Repeats.AVT = MVT::i8; } } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (Repeats.BytesLeft()) { // Handle the last 1 - 7 bytes. unsigned Offset = Repeats.Size - Repeats.BytesLeft(); EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), DAG.getConstant(Repeats.BytesLeft(), dl, SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode, std::vector<SDValue> &OutOps) { assert((ConstraintCode == InlineAsm::Constraint_m || ConstraintCode == InlineAsm::Constraint_Q) && "Unexpected asm memory constraint"); MachineRegisterInfo &RI = MF->getRegInfo(); const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>(); const TargetLowering &TL = *STI.getTargetLowering(); SDLoc dl(Op); auto DL = CurDAG->getDataLayout(); const RegisterSDNode *RegNode = dyn_cast<RegisterSDNode>(Op); // If address operand is of PTRDISPREGS class, all is OK, then. if (RegNode && RI.getRegClass(RegNode->getReg()) == &AVR::PTRDISPREGSRegClass) { OutOps.push_back(Op); return false; } if (Op->getOpcode() == ISD::FrameIndex) { SDValue Base, Disp; if (SelectAddr(Op.getNode(), Op, Base, Disp)) { OutOps.push_back(Base); OutOps.push_back(Disp); return false; } return true; } // If Op is add 'register, immediate' and // register is either virtual register or register of PTRDISPREGSRegClass if (Op->getOpcode() == ISD::ADD || Op->getOpcode() == ISD::SUB) { SDValue CopyFromRegOp = Op->getOperand(0); SDValue ImmOp = Op->getOperand(1); ConstantSDNode *ImmNode = dyn_cast<ConstantSDNode>(ImmOp); unsigned Reg; bool CanHandleRegImmOpt = true; CanHandleRegImmOpt &= ImmNode != 0; CanHandleRegImmOpt &= ImmNode->getAPIntValue().getZExtValue() < 64; if (CopyFromRegOp->getOpcode() == ISD::CopyFromReg) { RegisterSDNode *RegNode = cast<RegisterSDNode>(CopyFromRegOp->getOperand(1)); Reg = RegNode->getReg(); CanHandleRegImmOpt &= (TargetRegisterInfo::isVirtualRegister(Reg) || AVR::PTRDISPREGSRegClass.contains(Reg)); } else { CanHandleRegImmOpt = false; } // If we detect proper case - correct virtual register class // if needed and go to another inlineasm operand. if (CanHandleRegImmOpt) { SDValue Base, Disp; if (RI.getRegClass(Reg) != &AVR::PTRDISPREGSRegClass) { SDLoc dl(CopyFromRegOp); unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass); SDValue CopyToReg = CurDAG->getCopyToReg(CopyFromRegOp, dl, VReg, CopyFromRegOp); SDValue NewCopyFromRegOp = CurDAG->getCopyFromReg(CopyToReg, dl, VReg, TL.getPointerTy(DL)); Base = NewCopyFromRegOp; } else { Base = CopyFromRegOp; } if (ImmNode->getValueType(0) != MVT::i8) { Disp = CurDAG->getTargetConstant(ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8); } else { Disp = ImmOp; } OutOps.push_back(Base); OutOps.push_back(Disp); return false; } } // More generic case. // Create chain that puts Op into pointer register // and return that register. unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass); SDValue CopyToReg = CurDAG->getCopyToReg(Op, dl, VReg, Op); SDValue CopyFromReg = CurDAG->getCopyFromReg(CopyToReg, dl, VReg, TL.getPointerTy(DL)); OutOps.push_back(CopyFromReg); return false; }
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; const unsigned MAX_LOADS_IN_LDM = 6; SDValue TFOps[MAX_LOADS_IN_LDM]; SDValue Loads[MAX_LOADS_IN_LDM]; uint64_t SrcOff = 0, DstOff = 0; // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the // same number of stores. The loads and stores will get combined into // ldm/stm later on. while (EmittedNumMemOps < NumMemOps) { for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff), isVolatile, false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), DstPtrInfo.getWithOffset(DstOff), isVolatile, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); EmittedNumMemOps += i; } if (BytesLeft == 0) return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { if (BytesLeft >= 2) { VT = MVT::i16; VTSize = 2; } else { VT = MVT::i8; VTSize = 1; } Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff), false, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; BytesLeft -= VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { if (BytesLeft >= 2) { VT = MVT::i16; VTSize = 2; } else { VT = MVT::i8; VTSize = 1; } TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), DstPtrInfo.getWithOffset(DstOff), false, false, 0); ++i; DstOff += VTSize; BytesLeft -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); }