/// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// SDValue AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag).getValue(1); SDValue RetValue = Chain.getValue(0); InFlag = Chain.getValue(2); // If this is an 8/16/32-bit value, it is really passed promoted to 64 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue); InVals.push_back(RetValue); } return Chain; }
// There is no native floating point division, but we can convert this to a // reciprocal/multiply operation. If the first parameter is constant 1.0, then // just a reciprocal will suffice. SDValue VectorProcTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT type = Op.getOperand(1).getValueType(); SDValue two = DAG.getConstantFP(2.0, type); SDValue denom = Op.getOperand(1); SDValue estimate = DAG.getNode(VectorProcISD::RECIPROCAL_EST, dl, type, denom); // Perform a series of newton/raphson refinements. Each iteration doubles // the precision. The initial estimate has 6 bits of precision, so two iteration // results in 24 bits, which is larger than the significand. for (int i = 0; i < 2; i++) { // trial = x * estimate (ideally, x * 1/x should be 1.0) // error = 2.0 - trial // estimate = estimate * error SDValue trial = DAG.getNode(ISD::FMUL, dl, type, estimate, denom); SDValue error = DAG.getNode(ISD::FSUB, dl, type, two, trial); estimate = DAG.getNode(ISD::FMUL, dl, type, estimate, error); } // Check if the first parameter is constant 1.0. If so, we don't need // to multiply. bool isOne = false; if (type.isVector()) { if (isSplatVector(Op.getOperand(0).getNode())) { ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op.getOperand(0).getOperand(0)); isOne = C && C->isExactlyValue(1.0); } } else { ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)); isOne = C && C->isExactlyValue(1.0); } if (!isOne) estimate = DAG.getNode(ISD::FMUL, dl, type, Op.getOperand(0), estimate); return estimate; }
// After instruction selection, insert COPY_TO_REGCLASS nodes to help in // choosing the proper register classes. void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) { const BlackfinInstrInfo &TII = getInstrInfo(); const BlackfinRegisterInfo *TRI = getRegisterInfo(); DAG.AssignTopologicalOrder(); HandleSDNode Dummy(DAG.getRoot()); for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(); NI != DAG.allnodes_end(); ++NI) { if (NI->use_empty() || !NI->isMachineOpcode()) continue; const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode()); for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) { if (!UI->isMachineOpcode()) continue; if (UI.getUse().getResNo() >= DefTID.getNumDefs()) continue; const TargetRegisterClass *DefRC = DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI); const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode()); if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands()) continue; const TargetRegisterClass *UseRC = UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI); if (!DefRC || !UseRC) continue; // We cannot copy CC <-> !(CC/D) if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) { SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, NI->getDebugLoc(), MVT::i32, UI.getUse().get(), DAG.getTargetConstant(BF::DRegClassID, MVT::i32)); UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0)); } } } DAG.setRoot(Dummy.getValue()); }
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); // Legalize the chain. SDValue Size = Op.getOperand(1); // Legalize the size. DebugLoc dl = Op.getDebugLoc(); unsigned SPReg = SP::O6; SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain // The resultant pointer is actually 16 words from the bottom of the stack, // to provide a register spill area. SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP, DAG.getConstant(96, MVT::i32)); SDValue Ops[2] = { NewVal, Chain }; return DAG.getMergeValues(Ops, 2, dl); }
unsigned SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const { const Function *CalleeFn = 0; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { CalleeFn = dyn_cast<Function>(G->getGlobal()); } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { const Function *Fn = DAG.getMachineFunction().getFunction(); const Module *M = Fn->getParent(); CalleeFn = M->getFunction(E->getSymbol()); } if (!CalleeFn) return 0; assert(CalleeFn->hasStructRetAttr() && "Callee does not have the StructRet attribute."); const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); const Type *ElementTy = Ty->getElementType(); return getTargetData()->getTypeAllocSize(ElementTy); }
// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), // deciding whether to use a loop or straight-line code. static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, uint64_t Size) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); EVT PtrVT = Src1.getValueType(); // A two-CLC sequence is a clear win over a loop, not least because it // needs only one branch. A three-CLC sequence needs the same number // of branches as a loop (i.e. 2), but is shorter. That brings us to // lengths greater than 768 bytes. It seems relatively likely that // a difference will be found within the first 768 bytes, so we just // optimize for the smallest number of branch instructions, in order // to avoid polluting the prediction buffer too much. A loop only ever // needs 2 branches, whereas a straight-line sequence would need 3 or more. if (Size > 3 * 256) return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, DAG.getConstant(Size, PtrVT), DAG.getConstant(Size / 256, PtrVT)); return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, DAG.getConstant(Size, PtrVT)); }
SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setReturnAddressIsTaken(true); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(TD->getPointerSize(), MVT::i16); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), MachinePointerInfo(), false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo(), false, false, 0); }
SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const unsigned Offset = -4; SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, DAG.getIntPtrConstant(Offset, DL)); return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), false, false, false, 0); } // Return the link register, which contains the return address. // Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); }
SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; bool ExtraLoadRequired = Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false); SDValue Result; if (!IsPic && !ExtraLoadRequired) { Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); Offset = 0; } else { unsigned char OpFlags = 0; if (ExtraLoadRequired) OpFlags = SystemZII::MO_GOTENT; Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); } Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result); if (ExtraLoadRequired) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, DAG.getConstant(Offset, getPointerTy())); return Result; }
SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { unsigned SizeBitWidth = Size.getValueType().getSizeInBits(); // Call __memcpy_4 if the src, dst and size are all 4 byte aligned. if (!AlwaysInline && (Align & 3) == 0 && DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) { const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memcpy. return SDValue(); }
SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (SizeVal < 32 || (SizeVal % 8) != 0) return SDValue(); // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. // const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); const char *SpecialMemcpyName = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*DAG.getContext()), DAG.getTargetExternalSymbol( SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), std::move(Args)) .setDiscardResult(); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; }
SDValue MipsTargetLowering:: LowerJumpTable(SDValue Op, SelectionDAG &DAG) { SDValue ResNode; SDValue HiPart; // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { SDVTList VTs = DAG.getVTList(MVT::i32); SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1); } else // Emit Load from Global Pointer HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); return ResNode; }
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { EVT Ty = Op.getOperand(0).getValueType(); DebugLoc DL = Op.getDebugLoc(); SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, Op.getOperand(0), Op.getOperand(1)); SDValue Lo, Hi; if (HasLo) Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, DAG.getConstant(Mips::sub_lo, MVT::i32)); if (HasHi) Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, DAG.getConstant(Mips::sub_hi, MVT::i32)); if (!HasLo || !HasHi) return HasLo ? Lo : Hi; SDValue Vals[] = { Lo, Hi }; return DAG.getMergeValues(Vals, 2, DL); }
static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, TargetLowering::DAGCombinerInfo &DCI, const Cpu0Subtarget* Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); EVT Ty = N->getValueType(0); unsigned LO = Cpu0::LO; unsigned HI = Cpu0::HI; unsigned opc = N->getOpcode() == ISD::SDIVREM ? Cpu0ISD::DivRem : Cpu0ISD::DivRemU; SDLoc DL(N); SDValue DivRem = DAG.getNode(opc, DL, MVT::Glue, N->getOperand(0), N->getOperand(1)); SDValue InChain = DAG.getEntryNode(); SDValue InGlue = DivRem; // insert MFLO if (N->hasAnyUseOfValue(0)) { SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); InGlue = CopyFromLo.getValue(2); } // insert MFHI if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL, HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } return SDValue(); }
SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &SystemZCC, SelectionDAG &DAG) { // FIXME: Emit a test if RHS is zero bool isUnsigned = false; SystemZCC::CondCodes TCC; switch (CC) { default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: case ISD::SETOEQ: TCC = SystemZCC::E; break; case ISD::SETUEQ: TCC = SystemZCC::NLH; break; case ISD::SETNE: case ISD::SETONE: TCC = SystemZCC::NE; break; case ISD::SETUNE: TCC = SystemZCC::LH; break; case ISD::SETO: TCC = SystemZCC::O; break; case ISD::SETUO: TCC = SystemZCC::NO; break; case ISD::SETULE: if (LHS.getValueType().isFloatingPoint()) { TCC = SystemZCC::NH; break; } isUnsigned = true; // FALLTHROUGH case ISD::SETLE: case ISD::SETOLE: TCC = SystemZCC::LE; break; case ISD::SETUGE: if (LHS.getValueType().isFloatingPoint()) { TCC = SystemZCC::NL; break; } isUnsigned = true; // FALLTHROUGH case ISD::SETGE: case ISD::SETOGE: TCC = SystemZCC::HE; break; case ISD::SETUGT: if (LHS.getValueType().isFloatingPoint()) { TCC = SystemZCC::NLE; break; } isUnsigned = true; // FALLTHROUGH case ISD::SETGT: case ISD::SETOGT: TCC = SystemZCC::H; break; case ISD::SETULT: if (LHS.getValueType().isFloatingPoint()) { TCC = SystemZCC::NHE; break; } isUnsigned = true; // FALLTHROUGH case ISD::SETLT: case ISD::SETOLT: TCC = SystemZCC::L; break; } SystemZCC = DAG.getConstant(TCC, MVT::i32); DebugLoc dl = LHS.getDebugLoc(); return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), dl, MVT::i64, LHS, RHS); }
/// LowerCCCArguments - transform physical registers into virtual registers and /// generate load operations for arguments places on the stack. // FIXME: struct return stuff // FIXME: varargs SDValue SystemZTargetLowering::LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); if (isVarArg) report_fatal_error("Varargs not supported yet"); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue ArgValue; CCValAssign &VA = ArgLocs[i]; EVT LocVT = VA.getLocVT(); if (VA.isRegLoc()) { // Arguments passed in registers TargetRegisterClass *RC; switch (LocVT.getSimpleVT().SimpleTy) { default: #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " << LocVT.getSimpleVT().SimpleTy << "\n"; #endif llvm_unreachable(0); case MVT::i64: RC = SystemZ::GR64RegisterClass; break; case MVT::f32: RC = SystemZ::FP32RegisterClass; break; case MVT::f64: RC = SystemZ::FP64RegisterClass; break; } unsigned VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); } else { // Sanity check assert(VA.isMemLoc()); // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, VA.getLocMemOffset(), true, false); // Create the SelectionDAG nodes corresponding to a load // from this parameter SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, false, false, 0); } // If this is an 8/16/32-bit value, it is really passed promoted to 64 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); } return Chain; }
SDValue BPFTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { switch (CallConv) { default: llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: break; } MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_BPF64); for (auto &VA : ArgLocs) { if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { default: { errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getSimpleVT().SimpleTy << '\n'; llvm_unreachable(0); } case MVT::i64: unsigned VReg = RegInfo.createVirtualRegister(&BPF::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); // If this is an 8/16/32-bit value, it is really passed promoted to 64 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); } } else { DiagnosticInfoUnsupported Err(DL, *MF.getFunction(), "defined with too many args", SDValue()); DAG.getContext()->diagnose(Err); } } if (IsVarArg || MF.getFunction()->hasStructRetAttr()) { DiagnosticInfoUnsupported Err( DL, *MF.getFunction(), "functions with VarArgs or StructRet are not supported", SDValue()); DAG.getContext()->diagnose(Err); } return Chain; }
// Emit, if possible, a specialized version of the given Libcall. Typically this // means selecting the appropriately aligned version, but we also convert memset // of 0 into memclr. SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, RTLIB::Libcall LC) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); // Only use a specialized AEABI function if the default version of this // Libcall is an AEABI function. if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) return SDValue(); // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be // able to translate memset to memclr and use the value to index the function // name array. enum { AEABI_MEMCPY = 0, AEABI_MEMMOVE, AEABI_MEMSET, AEABI_MEMCLR } AEABILibcall; switch (LC) { case RTLIB::MEMCPY: AEABILibcall = AEABI_MEMCPY; break; case RTLIB::MEMMOVE: AEABILibcall = AEABI_MEMMOVE; break; case RTLIB::MEMSET: AEABILibcall = AEABI_MEMSET; if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) if (ConstantSrc->getZExtValue() == 0) AEABILibcall = AEABI_MEMCLR; break; default: return SDValue(); } // Choose the most-aligned libcall variant that we can enum { ALIGN1 = 0, ALIGN4, ALIGN8 } AlignVariant; if ((Align & 7) == 0) AlignVariant = ALIGN8; else if ((Align & 3) == 0) AlignVariant = ALIGN4; else AlignVariant = ALIGN1; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); if (AEABILibcall == AEABI_MEMCLR) { Entry.Node = Size; Args.push_back(Entry); } else if (AEABILibcall == AEABI_MEMSET) { // Adjust parameters for memset, EABI uses format (ptr, size, value), // GNU library uses (ptr, value, size) // See RTABI section 4.3.4 Entry.Node = Size; Args.push_back(Entry); // Extend or truncate the argument to be an i32 value for the call. if (Src.getValueType().bitsGT(MVT::i32)) Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); else if (Src.getValueType().bitsLT(MVT::i32)) Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); Entry.Node = Src; Entry.Ty = Type::getInt32Ty(*DAG.getContext()); Entry.IsSExt = false; Args.push_back(Entry); } else { Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); } char const *FunctionNames[4][3] = { { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } }; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee( TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], TLI->getPointerTy(DAG.getDataLayout())), std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; }
SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); // If we are doing an AND and testing against zero, then the CMP // will not be generated. The AND (or BIT) will generate the condition codes, // but they are different from CMP. // FIXME: since we're doing a post-processing, use a pseudoinstr here, so // lowering & isel wouldn't diverge. bool andCC = false; if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { if (RHSC->isNullValue() && LHS.hasOneUse() && (LHS.getOpcode() == ISD::AND || (LHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getOpcode() == ISD::AND))) { andCC = true; } } ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); // Get the condition codes directly from the status register, if its easy. // Otherwise a branch will be generated. Note that the AND and BIT // instructions generate different flags than CMP, the carry bit can be used // for NE/EQ. bool Invert = false; bool Shift = false; bool Convert = true; switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) { default: Convert = false; break; case MSP430CC::COND_HS: // Res = SRW & 1, no processing is required break; case MSP430CC::COND_LO: // Res = ~(SRW & 1) Invert = true; break; case MSP430CC::COND_NE: if (andCC) { // C = ~Z, thus Res = SRW & 1, no processing is required } else { // Res = ~((SRW >> 1) & 1) Shift = true; Invert = true; } break; case MSP430CC::COND_E: Shift = true; // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however, // Res = (SRW >> 1) & 1 is 1 word shorter. break; } EVT VT = Op.getValueType(); SDValue One = DAG.getConstant(1, VT); if (Convert) { SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW, MVT::i16, Flag); if (Shift) // FIXME: somewhere this is turned into a SRL, lower it MSP specific? SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One); SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One); if (Invert) SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One); return SR; } else { SDValue Zero = DAG.getConstant(0, VT); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SmallVector<SDValue, 4> Ops; Ops.push_back(One); Ops.push_back(Zero); Ops.push_back(TargetCC); Ops.push_back(Flag); return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); } }
/// LowerCCCCallTo - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: sret. SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_MSP430); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes, getPointerTy(), true)); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; SmallVector<SDValue, 12> MemOpChains; SDValue StackPtr; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } // Arguments that can be passed on register must be kept at RegsToPass // vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy()); SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),false, false, 0)); } } // Transform all store nodes into one single node because all store nodes are // independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, getPointerTy(), true), DAG.getConstant(0, getPointerTy(), true), InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); }
SDValue NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); const DataLayout *TD = getDataLayout(); const Function *F = MF.getFunction(); const AttrListPtr &PAL = F->getAttributes(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; bool isKernel = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget.getSmVersion() >= 20); std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { theArgs.push_back(I); argTypes.push_back(I->getType()); } assert(argTypes.size() == Ins.size() && "Ins types and function types did not match"); int idx = 0; for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { Type *Ty = argTypes[i]; EVT ObjectVT = getValueType(Ty); assert(ObjectVT == Ins[i].VT && "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later // matched in the AsmPrinter to output the correct mangled name. if (isImageOrSamplerVal(theArgs[i], (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() : 0))) { assert(isKernel && "Only kernels can have image/sampler params"); InVals.push_back(DAG.getConstant(i+1, MVT::i32)); continue; } if (theArgs[i]->use_empty()) { // argument is dead InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); continue; } // In the following cases, assign a node order of "idx+1" // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, false, TD->getABITypeAlignment(ObjectVT.getTypeForEVT( F->getContext()))); if (p.getNode()) DAG.AssignOrdering(p.getNode(), idx+1); InVals.push_back(p); } else { // If no ABI, just move the param symbol SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) DAG.AssignOrdering(p.getNode(), idx+1); InVals.push_back(p); } continue; } // Param has ByVal attribute if (isABI || isKernel) { // Return MoveParam(param symbol). // Ideally, the param symbol can be returned directly, // but when SDNode builder decides to use it in a CopyToReg(), // machine instruction fails because TargetExternalSymbol // (not lowered) is target dependent, and CopyToReg assumes // the source is lowered. SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) DAG.AssignOrdering(p.getNode(), idx+1); if (isKernel) InVals.push_back(p); else { SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); InVals.push_back(p2); } } else { // Have to move a set of param symbols to registers and // store them locally and return the local pointer in InVals const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); assert(elemPtrType && "Byval parameter should be a pointer type"); Type *elemType = elemPtrType->getElementType(); // Compute the constituent parts SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); unsigned totalsize = 0; for (unsigned j=0, je=vtparts.size(); j!=je; ++j) totalsize += vtparts[j].getStoreSizeInBits(); SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> CreateStackObject(totalsize/8, 16, false), getPointerTy()); unsigned sizesofar = 0; std::vector<SDValue> theChains; for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { unsigned numElems = 1; if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); for (unsigned k=0, ke=numElems; k!=ke; ++k) { EVT tmpvt = vtparts[j]; if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, getParamSymbol(DAG, idx, tmpvt)); SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, DAG.getConstant(sizesofar, getPointerTy())); theChains.push_back(DAG.getStore(Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); sizesofar += tmpvt.getStoreSizeInBits()/8; ++idx; } } --idx; Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], theChains.size()); InVals.push_back(localcopy); } } // Clang will check explicit VarArg and issue error if any. However, Clang // will let code with // implicit var arg like f() pass. // We treat this case as if the arg list is empty. //if (F.isVarArg()) { // assert(0 && "VarArg not supported yet!"); //} if (!OutChains.empty()) DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], OutChains.size())); return Chain; }
SDValue BlackfinTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), DAG.getTarget(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } SDValue Flag; // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Opi = OutVals[i]; // Expand to i32 if necessary switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi); break; case CCValAssign::ZExt: Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi); break; case CCValAssign::AExt: Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi); break; } Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue()); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); } if (Flag.getNode()) { return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag); } else { return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain); } }
SDValue BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ? BF::PRegisterClass : BF::DRegisterClass; assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState"); assert(RC->hasType(RegVT) && "Unexpected regclass in CCState"); unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); unsigned ObjSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo(), false, false, 0)); } } return Chain; }
static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue())); }
/// LowerCCCArguments - transform physical registers into virtual registers and /// generate load operations for arguments places on the stack. // FIXME: struct return stuff // FIXME: varargs SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430); assert(!isVarArg && "Varargs not supported yet"); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { default: { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getSimpleVT().SimpleTy << "\n"; #endif llvm_unreachable(0); } case MVT::i16: unsigned VReg = RegInfo.createVirtualRegister(MSP430::GR16RegisterClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); // If this is an 8-bit value, it is really passed promoted to 16 // bits. Insert an assert[sz]ext to capture this, then truncate to the // right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); } } else { // Sanity check assert(VA.isMemLoc()); // Load the argument to a virtual register unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; if (ObjSize > 2) { errs() << "LowerFormalArguments Unhandled argument type: " << EVT(VA.getLocVT()).getEVTString() << "\n"; } // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } return Chain; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // ESI might be used as a base pointer, in that case we can't simply overwrite // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); if (TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI) return SDValue(); MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { // FIXME: Handle bittests someday assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet"); // FIXME: Handle jump negative someday MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID; switch (CC) { default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: TCC = MSP430CC::COND_E; // aka COND_Z // Minor optimization: if LHS is a constant, swap operands, then the // constant can be folded into comparison. if (LHS.getOpcode() == ISD::Constant) std::swap(LHS, RHS); break; case ISD::SETNE: TCC = MSP430CC::COND_NE; // aka COND_NZ // Minor optimization: if LHS is a constant, swap operands, then the // constant can be folded into comparison. if (LHS.getOpcode() == ISD::Constant) std::swap(LHS, RHS); break; case ISD::SETULE: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETUGE: // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to // fold constant into instruction. if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) { LHS = RHS; RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0)); TCC = MSP430CC::COND_LO; break; } TCC = MSP430CC::COND_HS; // aka COND_C break; case ISD::SETUGT: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETULT: // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to // fold constant into instruction. if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) { LHS = RHS; RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0)); TCC = MSP430CC::COND_HS; break; } TCC = MSP430CC::COND_LO; // aka COND_NC break; case ISD::SETLE: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETGE: // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to // fold constant into instruction. if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) { LHS = RHS; RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0)); TCC = MSP430CC::COND_L; break; } TCC = MSP430CC::COND_GE; break; case ISD::SETGT: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETLT: // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to // fold constant into instruction. if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) { LHS = RHS; RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0)); TCC = MSP430CC::COND_GE; break; } TCC = MSP430CC::COND_L; break; } TargetCC = DAG.getConstant(TCC, MVT::i8); return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget->getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) { EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memset. return SDValue(); } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; ValReg = X86::AX; Val = (Val << 8) | Val; break; case 0: // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; } break; default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; Count = DAG.getIntPtrConstant(SizeVal); break; } if (AVT.bitsGT(MVT::i8)) { unsigned UBytes = AVT.getSizeInBits() / 8; Count = DAG.getIntPtrConstant(SizeVal / UBytes); BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (TwoRepStos) { InFlag = Chain.getValue(1); Count = Size; EVT CVT = Count.getValueType(); SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; }
SDValue BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Blackfin target does not yet support tail call optimization. isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), DAG.getTarget(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); int Offset = VA.getLocMemOffset(); assert(Offset%4 == 0 && "Unaligned LocMemOffset"); assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type"); SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32); SDValue OffsetN = DAG.getIntPtrConstant(Offset); OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, MachinePointerInfo(),false, false, 0)); } } // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); std::vector<EVT> NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. SDValue Ops[] = { Chain, Callee, InFlag }; Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), DAG.getTarget(), RVLocs, *DAG.getContext()); RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &RV = RVLocs[i]; unsigned Reg = RV.getLocReg(); Chain = DAG.getCopyFromReg(Chain, dl, Reg, RVLocs[i].getLocVT(), InFlag); SDValue Val = Chain.getValue(0); InFlag = Chain.getValue(2); Chain = Chain.getValue(1); // Callee is responsible for extending any i16 return values. switch (RV.getLocInfo()) { case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val, DAG.getValueType(RV.getValVT())); break; case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val, DAG.getValueType(RV.getValVT())); break; default: break; } // Truncate to valtype if (RV.getLocInfo() != CCValAssign::Full) Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val); InVals.push_back(Val); } return Chain; }
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMCPY); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMCPY); unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; // Emit a maximum of 4 loads in Thumb1 since we have fewer registers const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; SDValue TFOps[6]; SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to // VLDM/VSTM and make this code emit it when appropriate. This would reduce // pressure on the general purpose registers. However this seems harder to map // onto the register allocator's view of the world. // The number of MEMCPY pseudo-instructions to emit. We use up to // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm // later on. This is a lower bound on the number of MEMCPY operations we must // emit. unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { return SDValue(); } SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); for (unsigned I = 0; I != NumMEMCPYs; ++I) { // Evenly distribute registers among MEMCPY operations to reduce register // pressure. unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, DAG.getConstant(NumRegs, dl, MVT::i32)); Src = Dst.getValue(1); Chain = Dst.getValue(2); DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); EmittedNumMemOps = NextEmittedNumMemOps; } if (BytesLeft == 0) return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. auto getRemainingValueType = [](unsigned BytesLeft) { return (BytesLeft >= 2) ? MVT::i16 : MVT::i8; }; auto getRemainingSize = [](unsigned BytesLeft) { return (BytesLeft >= 2) ? 2 : 1; }; unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { VT = getRemainingValueType(BytesLeft); VTSize = getRemainingSize(BytesLeft); Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, dl, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff)); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; BytesLeft -= VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(TFOps, i)); i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { VT = getRemainingValueType(BytesLeft); VTSize = getRemainingSize(BytesLeft); TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, dl, MVT::i32)), DstPtrInfo.getWithOffset(DstOff)); ++i; DstOff += VTSize; BytesLeft -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(TFOps, i)); }