// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() // (see LegalizeDAG.cpp). This is slow and uses local memory. // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 SDValue NVPTXTargetLowering:: LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); SmallVector<SDValue, 8> Ops; unsigned NumOperands = Node->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue SubOp = Node->getOperand(i); EVT VVT = SubOp.getNode()->getValueType(0); EVT EltVT = VVT.getVectorElementType(); unsigned NumSubElem = VVT.getVectorNumElements(); for (unsigned j=0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, DAG.getIntPtrConstant(j))); } } return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], Ops.size()); }
SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const unsigned Offset = -4; SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, DAG.getIntPtrConstant(Offset, DL)); return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), false, false, false, 0); } // Return the link register, which contains the return address. // Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); }
SDValue NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); unsigned sizesofar = 0; unsigned idx = 0; for (unsigned i=0, e=Outs.size(); i!=e; ++i) { SDValue theVal = OutVals[i]; EVT theValType = theVal.getValueType(); unsigned numElems = 1; if (theValType.isVector()) numElems = theValType.getVectorNumElements(); for (unsigned j=0,je=numElems; j!=je; ++j) { SDValue tmpval = theVal; if (theValType.isVector()) tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, theValType.getVectorElementType(), tmpval, DAG.getIntPtrConstant(j)); Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, dl, MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), tmpval); if (theValType.isVector()) sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; else sizesofar += theValType.getStoreSizeInBits()/8; ++idx; } } return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // ESI might be used as a base pointer, in that case we can't simply overwrite // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); if (TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI) return SDValue(); MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
SDValue BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Blackfin target does not yet support tail call optimization. isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), DAG.getTarget(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); int Offset = VA.getLocMemOffset(); assert(Offset%4 == 0 && "Unaligned LocMemOffset"); assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type"); SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32); SDValue OffsetN = DAG.getIntPtrConstant(Offset); OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, MachinePointerInfo(),false, false, 0)); } } // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); std::vector<EVT> NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. SDValue Ops[] = { Chain, Callee, InFlag }; Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), DAG.getTarget(), RVLocs, *DAG.getContext()); RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &RV = RVLocs[i]; unsigned Reg = RV.getLocReg(); Chain = DAG.getCopyFromReg(Chain, dl, Reg, RVLocs[i].getLocVT(), InFlag); SDValue Val = Chain.getValue(0); InFlag = Chain.getValue(2); Chain = Chain.getValue(1); // Callee is responsible for extending any i16 return values. switch (RV.getLocInfo()) { case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val, DAG.getValueType(RV.getValVT())); break; case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val, DAG.getValueType(RV.getValVT())); break; default: break; } // Truncate to valtype if (RV.getLocInfo() != CCValAssign::Full) Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val); InVals.push_back(Val); } return Chain; }
/// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isVarArg, isTailCall. SDValue MBlazeTargetLowering:: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // MBlaze does not yet support tail call optimization isTailCall = false; // The MBlaze requires stack slots for arguments passed to var arg // functions even if they are passed in registers. bool needsRegArgSlots = isVarArg; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); // Variable argument function calls require a minimum of 24-bytes of stack if (isVarArg && NumBytes < 24) NumBytes = 24; Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; MVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { // Register can't get to this point... assert(VA.isMemLoc()); // Since we are alread passing values on the stack we don't // need to worry about creating additional slots for the // values passed via registers. needsRegArgSlots = false; // Create the frame index object for this incoming parameter unsigned ArgSize = VA.getValVT().getSizeInBits()/8; unsigned StackLoc = VA.getLocMemOffset() + 4; int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } } // If we need to reserve stack space for the arguments passed via registers // then create a fixed stack object at the beginning of the stack. if (needsRegArgSlots && TFI.hasReservedCallFrame(MF)) MFI->CreateFixedObject(28,0,true); // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, 0); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), 0); // MBlazeJmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); } if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); if (!Ins.empty()) InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); }
// TODO refactor? SDValue AVM2TargetLowering:: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool& isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // AVM2 target does not yet support tail call optimization. isTailCall = false; // Count the size of the outgoing arguments. unsigned ArgsSize = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { switch (OutVals[i].getValueType().getSimpleVT().SimpleTy) { default: assert(0 && "Unknown value type!"); case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::f32: ArgsSize += 4; break; case MVT::i64: case MVT::f64: ArgsSize += 8; break; } } unsigned Align = getTargetMachine().getFrameLowering()->getStackAlignment(); unsigned AlignAdjust = (Align - (ArgsSize % Align)) % Align; ArgsSize += AlignAdjust; unsigned ArgOffset = 0; Chain = DAG.getCALLSEQ_START(Chain,DAG.getIntPtrConstant(ArgsSize, true)); // Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_AVM2_32); SDValue StackPtr; SmallVector<SDValue, 8> MemOpChains; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue Val = OutVals[i]; EVT ObjectVT = Val.getValueType(); SDValue ValToStore(0, 0); unsigned ObjSize; switch (ObjectVT.getSimpleVT().SimpleTy) { default: assert(0 && "Unhandled argument type!"); case MVT::i1: case MVT::i8: case MVT::i16: { CCValAssign &VA = ArgLocs[i]; // Promote the integer to 32-bits. If the input type is signed, use a // sign extend, otherwise use a zero extend. ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (VA.getLocInfo() == CCValAssign::SExt) { ExtendKind = ISD::SIGN_EXTEND; } else if (VA.getLocInfo() == CCValAssign::ZExt) { ExtendKind = ISD::ZERO_EXTEND; } else if (VA.getLocInfo() == CCValAssign::AExt) { ExtendKind = ISD::ANY_EXTEND; } Val = DAG.getNode(ExtendKind, DL, MVT::i32, Val); // FALL THROUGH } case MVT::i32: // TODO unify ObjSize = 4; ValToStore = Val; break; case MVT::f32: ObjSize = 4; ValToStore = Val; break; case MVT::f64: ObjSize = 8; ValToStore = Val; break; case MVT::i64: ObjSize = 8; ValToStore = Val; // Whole thing is passed in memory. break; } if (ValToStore.getNode()) { SDValue StackPtr = DAG.getRegister(AVM2::ESP, MVT::i32); SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, DL, MVT::i32, StackPtr, PtrOff); if(ObjectVT == MVT::i64) { // 2 stores for 64 bit SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ValToStore, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ValToStore, DAG.getConstant(1, MVT::i32)); SDValue PtrOff4 = DAG.getNode(ISD::ADD, DL, MVT::i32, PtrOff, DAG.getConstant(4, MVT::i32)); MemOpChains.push_back(DAG.getStore(Chain, DL, Lo, PtrOff, MachinePointerInfo(), false, false, 0)); MemOpChains.push_back(DAG.getStore(Chain, DL, Hi, PtrOff4, MachinePointerInfo(), false, false, 0)); } else MemOpChains.push_back(DAG.getStore(Chain, DL, ValToStore, PtrOff, MachinePointerInfo(), false, false, 0)); } ArgOffset += ObjSize; } // Emit all stores, make sure the occur before any copies into physregs. if (!MemOpChains.empty()) { Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0], MemOpChains.size()); } std::vector<EVT> NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. SDValue Ops[] = { Chain, Callee }; Chain = DAG.getNode(AVM2ISD::CALL, DL, NodeTys, Ops, 2); SDValue InFlag = Chain.getValue(1); // Chain = DAG.getNode(ISD::CALLSEQ_END, DL, MVT::Other, Chain, DAG.getConstant(ArgsSize, getPointerTy())); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), DAG.getIntPtrConstant(0, true), InFlag); // If the function returns void, just return the chain. if (Ins.empty()) { return Chain; } InFlag = Chain.getValue(1); /* // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs, *DAG.getContext()); RVInfo.AnalyzeCallResult(Ins, RetCC_AVM2_32); const MVT::SimpleValueType RetTyVT = RVLocs.size() == 0 ? MVT::isVoid : RVLocs[0].getValVT().SimpleTy; // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { unsigned Reg = RVLocs[i].getLocReg(); Chain = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); } */ SmallVector<CCValAssign, 16> RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs, *DAG.getContext()); if(!RVInfo.CheckReturn(Ins, RetCC_AVM2_32)) { report_fatal_error("Flascc does not yet support LLVM SIMD intrinsics.\n"); } for (unsigned i = 0, e = Ins.size(); i != e; ++i) { const MVT::SimpleValueType RetTyVT = Ins[i].VT.SimpleTy; SDValue RetVal; if (RetTyVT != MVT::isVoid) { // SDVTList Tys = DAG.getVTList(AVM2::EAX, MVT::i32, MVT::Glue); switch (RetTyVT) { default: assert(0 && "Unknown value type to return!"); case MVT::i1: case MVT::i8: case MVT::i16: { assert( i <= 1 && "More than 2 return values for i1/i8/i16." ); if( i == 0 ) { RetVal = DAG.getCopyFromReg(Chain, DL, AVM2::EAX, MVT::i32, InFlag); } else { RetVal = DAG.getCopyFromReg(Chain, DL, AVM2::EDX, MVT::i32, InFlag); } Chain = RetVal.getValue(1); InFlag = Chain.getValue(2); // Add a note to keep track of whether it is sign or zero extended. CCValAssign &VA = RVLocs[i]; ISD::NodeType AssertKind = ISD::AssertZext; if (VA.getLocInfo() == CCValAssign::SExt) { AssertKind = ISD::AssertSext; } RetVal = DAG.getNode(AssertKind, DL, MVT::i32, RetVal, DAG.getValueType(RetTyVT)); RetVal = DAG.getNode(ISD::TRUNCATE, DL, RetTyVT, RetVal); break; } case MVT::i32: assert( i <= 1 && "More than 2 return values for i32." ); if( i == 0 ) { RetVal = DAG.getCopyFromReg(Chain, DL, AVM2::EAX, MVT::i32, InFlag); } else { RetVal = DAG.getCopyFromReg(Chain, DL, AVM2::EDX, MVT::i32, InFlag); } Chain = RetVal.getValue(1); InFlag = Chain.getValue(2); // Chain = DAG.getCopyFromReg(Chain, DL, AVM2::EAX, MVT::i32, InFlag); break; case MVT::f32: assert( i == 0 && "More than 1 return value for f32." ); RetVal = DAG.getCopyFromReg(Chain, DL, AVM2::SST0, MVT::f32, InFlag); Chain = RetVal.getValue(1); InFlag = Chain.getValue(2); break; case MVT::f64: assert( i == 0 && "More than 1 return value for f64." ); RetVal = DAG.getCopyFromReg(Chain, DL, AVM2::ST0, MVT::f64, InFlag); Chain = RetVal.getValue(1); InFlag = Chain.getValue(2); break; case MVT::i64: { assert( i == 0 && "More than 1 return value for i64." ); SDValue Lo = DAG.getCopyFromReg(Chain, DL, AVM2::EAX, MVT::i32, InFlag); SDValue Hi = DAG.getCopyFromReg(Lo.getValue(1), DL, AVM2::EDX, MVT::i32, Lo.getValue(2)); RetVal = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); Chain = Hi.getValue(1); InFlag = Chain.getValue(2); } break; } } InVals.push_back(Chain.getValue(0)); } #ifdef _DEBUG int InVals_size = InVals.size(); int Ins_size = Ins.size(); const char* err; // pre-check conditions that will assert in TargetLowering::LowerCallTo() if( InVals_size != Ins_size ) { // "LowerCall didn't emit the correct number of values!" Ins_size = InVals_size; } for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if( !InVals[i].getNode() ) { err = "LowerCall emitted a null value!"; } EVT vt = Ins[i].VT; if( vt != InVals[i].getValueType() ) { err = "LowerCall emitted a value with the wrong type!"; EVT vt1 = Ins[i].VT; EVT vt2 = InVals[i].getValueType(); vt1 = vt2; } } #endif return Chain; }
SDValue AVM2TargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); switch (Op.getOpcode()) { default: { assert(0 && "Shouldn't custom lower this"); } // XXX TODO isn't this just Promote? case ISD::FADD: case ISD::FSUB: case ISD::FPOW: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { SDValue L = Op.getOperand(0); SDValue R = Op.getOperand(1); assert(L.getValueType() == R.getValueType() && L.getValueType() == MVT::f32); L = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, L); R = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, R); SDValue result = DAG.getNode(Op.getOpcode(), DL, MVT::f64, L, R); return DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, result, DAG.getIntPtrConstant(0)); } // XXX TODO isn't this just Promote? case ISD::FSIN: case ISD::FCOS: case ISD::FSQRT: case ISD::FNEG: case ISD::FABS: { SDValue L = Op.getOperand(0); assert(L.getValueType() == MVT::f32); L = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, L); SDValue result = DAG.getNode(Op.getOpcode(), DL, MVT::f64, L); return DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, result, DAG.getIntPtrConstant(0)); } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue L = Op.getOperand(0); SDValue R = Op.getOperand(1); SDValue A = Op.getOperand(2); SDValue B = Op.getOperand(3); // promote everything that is an f32 if(L.getValueType() == MVT::f32) { L = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, L); R = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, R); } if(A.getValueType() == MVT::f32) { A = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, A); B = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, B); } bool IsFloat = L.getValueType() == MVT::f64; bool RIsFloat = A.getValueType() == MVT::f64; SDValue result = DAG.getNode(RIsFloat ? AVM2ISD::FSL : AVM2ISD::SL, DL, RIsFloat ? MVT::f64 : MVT::i32, DAG.getNode(IsFloat ? AVM2ISD::FCNOP: AVM2ISD::CNOP, DL, MVT::i32, DAG.getTargetConstant((int)CC, MVT::i32), L, R), A, B); if(Op.getValueType() == MVT::f32) { result = DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, result, DAG.getIntPtrConstant(0)); } return result; } // case ISD::BR_CC: { return LowerBR_CC(Op, DAG); } case ISD::BR_CC: { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue L = Op.getOperand(2); SDValue R = Op.getOperand(3); SDValue Dest = Op.getOperand(4); // If this is a br_cc of a "setcc", and if the setcc got lowered into // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. if(L.getValueType() == MVT::f32) { L = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, L); R = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, R); } else if(L.getValueType() == MVT::i32 /*|| L.getValueType() == MVT::i64*/) { if(ConstantSDNode *node = dyn_cast<ConstantSDNode>(L)) { L = DAG.getTargetConstant((int)node->getSExtValue(), MVT::i32); } if(ConstantSDNode *node = dyn_cast<ConstantSDNode>(R)) { R = DAG.getTargetConstant((int)node->getSExtValue(), MVT::i32); } } int Opc; int Cmp; bool IsFloat = L.getValueType() == MVT::f64; if( IsFloat ) { Opc = AVM2ISD::FCBR; Cmp = AVM2ISD::FCNOP; } else { Opc = AVM2ISD::CBR; Cmp = AVM2ISD::CNOP; } SDValue NOP = DAG.getNode( Cmp, DL, MVT::i32, DAG.getTargetConstant(CC, MVT::i32), L, R); return DAG.getNode(Opc, DL, MVT::Other, Chain, NOP, Dest); } // XXX TODO can't this just be Legal? case ISD::ConstantFP: { union { double d; struct { unsigned a, b; } i; } u; APFloat apf(cast<ConstantFPSDNode>(Op)->getValueAPF()); bool losesInfo = false; apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo); u.d = apf.convertToDouble(); SDValue result = DAG.getNode(AVM2ISD::FNOP, DL, MVT::f64, DAG.getNode(AVM2ISD::F64, DL, MVT::f64, DAG.getTargetConstant(u.i.a, MVT::i32), DAG.getTargetConstant(u.i.b, MVT::i32))); if(Op.getValueType() == MVT::f32) { result = DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, result,DAG.getIntPtrConstant(0)); } return result; } case ISD::ConstantPool: { const Constant *C = cast<ConstantPoolSDNode>(Op)->getConstVal(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, cast<ConstantPoolSDNode>(Op)->getAlignment()); return CP; } case ISD::BRIND: { return LowerBRIND(Op, DAG); } case ISD::BlockAddress: { return LowerBlockAddress(Op, DAG); } case ISD::TRAMPOLINE: { return LowerTRAMPOLINE(Op, DAG); } case ISD::GlobalAddress: { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if(GV->isDeclaration()) { // TODO ok? probably a leak std::string Str = "\2" + GV->getNameStr(); const char *N = strdup(Str.c_str()); return DAG.getNode(AVM2ISD::INOP, DL, MVT::i32, DAG.getTargetExternalSymbol(N, MVT::i32)); } EVT VT(MVT::i32); return DAG.getNode(AVM2ISD::INOP, DL, MVT::i32, DAG.getTargetGlobalAddress(GV, DL, VT)); } case ISD::ExternalSymbol : { const char *N = cast<ExternalSymbolSDNode>(Op)->getSymbol(); const Module *M = DAG.getMachineFunction().getFunction()->getParent(); const GlobalValue *GV = M->getGlobalVariable(N); if(GV && !GV->isDeclaration()) { EVT VT(MVT::i32); return DAG.getNode(AVM2ISD::INOP, DL, MVT::i32, DAG.getTargetGlobalAddress(GV, DL, VT)); } return DAG.getNode(AVM2ISD::INOP, DL, MVT::i32, DAG.getTargetExternalSymbol(N, MVT::i32)); } case ISD::VASTART: { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue Offset = DAG.getNode(ISD::ADD, DL, MVT::i32, DAG.getRegister(AVM2::EBP, MVT::i32), DAG.getConstant(DAG.getMachineFunction().getInfo<AVM2MachineFunctionInfo>()->getVarArgsFrameOffset(), MVT::i32)); /* http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?revision=46585&view=markup Create a new class, MemOperand, for describing memory references in the backend. Introduce a new SDNode type, MemOperandSDNode, for holding a MemOperand in the SelectionDAG IR, and add a MemOperand list to MachineInstr, and code to manage them. Remove the offset field from SrcValueSDNode; uses of SrcValueSDNode that were using it are all all using MemOperandSDNode now. Also, begin updating some getLoad and getStore calls to use the PseudoSourceValue objects. Most of this was written by Florian Brander, some reorganization and updating to TOT by me. */ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); } case ISD::VAARG: { SDNode *Node = Op.getNode(); MVT::SimpleValueType VT = Node->getValueType(0).getSimpleVT().SimpleTy; SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); SDValue VAList = DAG.getLoad(getPointerTy(), DL, InChain, VAListPtr, MachinePointerInfo(SV), false, false, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, DAG.getConstant(Node->getValueType(0).getSizeInBits()/8, getPointerTy())); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr, VAListPtr, MachinePointerInfo(SV), false, false, 0); // Load the actual argument out of the pointer VAList return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), false, false, 0); } case ISD::DYNAMIC_STACKALLOC: { SDValue Chain = Op.getOperand(0); // Legalize the chain. SDValue Size = Op.getOperand(1); // Legalize the size. unsigned SPReg = AVM2::ESP; SDValue SP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i32); SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size); // Value unsigned Align = getTargetMachine().getFrameLowering()->getStackAlignment(); assert(!(Align & (Align - 1))); // must be power of 2 SDValue NewSPAligned = DAG.getNode(ISD::AND, DL, MVT::i32, NewSP, DAG.getIntPtrConstant(~(Align - 1))); Chain = DAG.getCopyToReg(SP.getValue(1), DL, SPReg, NewSPAligned); // Output chain std::vector<EVT> Tys; Tys.push_back(MVT::i32); Tys.push_back(MVT::Other); SDValue Ops[2] = { NewSPAligned, Chain }; return DAG.getNode(ISD::MERGE_VALUES, DL, Tys, Ops, 2); } /* http://llvm.org/viewvc/llvm-project?view=rev&revision=78142 Major calling convention code refactoring. Instead of awkwardly encoding calling-convention information with ISD::CALL, ISD::FORMAL_ARGUMENTS, ISD::RET, and ISD::ARG_FLAGS nodes, TargetLowering provides three virtual functions for targets to override: LowerFormalArguments, LowerCall, and LowerRet, which replace the custom lowering done on the special nodes. They provide the same information, but in a more immediately usable format. This also reworks much of the target-independent tail call logic. The decision of whether or not to perform a tail call is now cleanly split between target-independent portions, and the target dependent portion in IsEligibleForTailCallOptimization. This also synchronizes all in-tree targets, to help enable future refactoring and feature work. setOperationAction(ISD::RET , MVT::Other, Custom); case ISD::RET: { SDValue Copy; switch(Op.getNumOperands()) { default: assert(0 && "Do not know how to return this many arguments!"); abort(); case 1: return DAG.getNode(AVM2ISD::RET_FLAG, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); case 3: { unsigned ArgReg; switch(Op.getOperand(1).getValueType()) { default: assert(0 && "Unknown type to return!"); case MVT::i32: ArgReg = AVM2::EAX; break; case MVT::f32: ArgReg = AVM2::SST0; break; case MVT::f64: ArgReg = AVM2::ST0; break; } Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), SDValue()); break; } case 5: Copy = DAG.getCopyToReg(Op.getOperand(0), AVM2::EDX, Op.getOperand(3), SDValue()); Copy = DAG.getCopyToReg(Copy, AVM2::EAX, Op.getOperand(1), Copy.getValue(1)); break; } return DAG.getNode(AVM2ISD::RET_FLAG, DL, MVT::Other, Copy, Copy.getValue(1)); } */ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, SubTarget); // Return address. Currently unimplemented case ISD::RETURNADDR: break; } return SDValue(); }
std::pair<SDValue, SDValue> IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned CallingConv, bool isTailCall, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { MachineFunction &MF = DAG.getMachineFunction(); unsigned NumBytes = 16; unsigned outRegsUsed = 0; if (Args.size() > 8) { NumBytes += (Args.size() - 8) * 8; outRegsUsed = 8; } else { outRegsUsed = Args.size(); } // FIXME? this WILL fail if we ever try to pass around an arg that // consumes more than a single output slot (a 'real' double, int128 // some sort of aggregate etc.), as we'll underestimate how many 'outX' // registers we use. Hopefully, the assembler will notice. MF.getInfo<IA64FunctionInfo>()->outRegsUsed= std::max(outRegsUsed, MF.getInfo<IA64FunctionInfo>()->outRegsUsed); // keep stack frame 16-byte aligned // assert(NumBytes==((NumBytes+15) & ~15) && // "stack frame not 16-byte aligned!"); NumBytes = (NumBytes+15) & ~15; Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue StackPtr; std::vector<SDValue> Stores; std::vector<SDValue> Converts; std::vector<SDValue> RegValuesToPass; unsigned ArgOffset = 16; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SDValue Val = Args[i].Node; MVT ObjectVT = Val.getValueType(); SDValue ValToStore(0, 0), ValToConvert(0, 0); unsigned ObjSize=8; switch (ObjectVT.getSimpleVT()) { default: assert(0 && "unexpected argument type!"); case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: { //promote to 64-bits, sign/zero extending based on type //of the argument ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (Args[i].isSExt) ExtendKind = ISD::SIGN_EXTEND; else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; Val = DAG.getNode(ExtendKind, dl, MVT::i64, Val); // XXX: fall through } case MVT::i64: //ObjSize = 8; if(RegValuesToPass.size() >= 8) { ValToStore = Val; } else { RegValuesToPass.push_back(Val); } break; case MVT::f32: //promote to 64-bits Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); // XXX: fall through case MVT::f64: if(RegValuesToPass.size() >= 8) { ValToStore = Val; } else { RegValuesToPass.push_back(Val); if(1 /* TODO: if(calling external or varadic function)*/ ) { ValToConvert = Val; // additionally pass this FP value as an int } } break; } if(ValToStore.getNode()) { if(!StackPtr.getNode()) { StackPtr = DAG.getRegister(IA64::r12, MVT::i64); } SDValue PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, PtrOff); Stores.push_back(DAG.getStore(Chain, dl, ValToStore, PtrOff, NULL, 0)); ArgOffset += ObjSize; } if(ValToConvert.getNode()) { Converts.push_back(DAG.getNode(IA64ISD::GETFD, dl, MVT::i64, ValToConvert)); } } // Emit all stores, make sure they occur before any copies into physregs. if (!Stores.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],Stores.size()); static const unsigned IntArgRegs[] = { IA64::out0, IA64::out1, IA64::out2, IA64::out3, IA64::out4, IA64::out5, IA64::out6, IA64::out7 }; static const unsigned FPArgRegs[] = { IA64::F8, IA64::F9, IA64::F10, IA64::F11, IA64::F12, IA64::F13, IA64::F14, IA64::F15 }; SDValue InFlag; // save the current GP, SP and RP : FIXME: do we need to do all 3 always? SDValue GPBeforeCall = DAG.getCopyFromReg(Chain, IA64::r1, MVT::i64, InFlag); Chain = GPBeforeCall.getValue(1); InFlag = Chain.getValue(2); SDValue SPBeforeCall = DAG.getCopyFromReg(Chain, IA64::r12, MVT::i64, InFlag); Chain = SPBeforeCall.getValue(1); InFlag = Chain.getValue(2); SDValue RPBeforeCall = DAG.getCopyFromReg(Chain, IA64::rp, MVT::i64, InFlag); Chain = RPBeforeCall.getValue(1); InFlag = Chain.getValue(2); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing integer args into regs out[0-7] // mapped 1:1 and the FP args into regs F8-F15 "lazily" // TODO: for performance, we should only copy FP args into int regs when we // know this is required (i.e. for varardic or external (unknown) functions) // first to the FP->(integer representation) conversions, these are // flagged for now, but shouldn't have to be (TODO) unsigned seenConverts = 0; for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { if(RegValuesToPass[i].getValueType().isFloatingPoint()) { Chain = DAG.getCopyToReg(Chain, IntArgRegs[i], Converts[seenConverts++], InFlag); InFlag = Chain.getValue(1); } } // next copy args into the usual places, these are flagged unsigned usedFPArgs = 0; for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, RegValuesToPass[i].getValueType().isInteger() ? IntArgRegs[i] : FPArgRegs[usedFPArgs++], RegValuesToPass[i], InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. /* if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i64); } */ std::vector<MVT> NodeTys; std::vector<SDValue> CallOperands; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. CallOperands.push_back(Chain); CallOperands.push_back(Callee); // emit the call itself if (InFlag.getNode()) CallOperands.push_back(InFlag); else assert(0 && "this should never happen!\n"); // to make way for a hack: Chain = DAG.getNode(IA64ISD::BRCALL, dl, NodeTys, &CallOperands[0], CallOperands.size()); InFlag = Chain.getValue(1); // restore the GP, SP and RP after the call Chain = DAG.getCopyToReg(Chain, IA64::r1, GPBeforeCall, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, IA64::r12, SPBeforeCall, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, IA64::rp, RPBeforeCall, InFlag); InFlag = Chain.getValue(1); std::vector<MVT> RetVals; RetVals.push_back(MVT::Other); RetVals.push_back(MVT::Flag); MVT RetTyVT = getValueType(RetTy); SDValue RetVal; if (RetTyVT != MVT::isVoid) { switch (RetTyVT.getSimpleVT()) { default: assert(0 && "Unknown value type to return!"); case MVT::i1: { // bools are just like other integers (returned in r8) // we *could* fall through to the truncate below, but this saves a // few redundant predicate ops SDValue boolInR8 = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64,InFlag); InFlag = boolInR8.getValue(2); Chain = boolInR8.getValue(1); SDValue zeroReg = DAG.getCopyFromReg(Chain, IA64::r0, MVT::i64, InFlag); InFlag = zeroReg.getValue(2); Chain = zeroReg.getValue(1); RetVal = DAG.getSetCC(dl, MVT::i1, boolInR8, zeroReg, ISD::SETNE); break; } case MVT::i8: case MVT::i16: case MVT::i32: RetVal = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64, InFlag); Chain = RetVal.getValue(1); // keep track of whether it is sign or zero extended (todo: bools?) /* XXX RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext :ISD::AssertZext, dl, MVT::i64, RetVal, DAG.getValueType(RetTyVT)); */ RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal); break; case MVT::i64: RetVal = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64, InFlag); Chain = RetVal.getValue(1); InFlag = RetVal.getValue(2); // XXX dead break; case MVT::f32: RetVal = DAG.getCopyFromReg(Chain, IA64::F8, MVT::f64, InFlag); Chain = RetVal.getValue(1); RetVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, RetVal, DAG.getIntPtrConstant(0)); break; case MVT::f64: RetVal = DAG.getCopyFromReg(Chain, IA64::F8, MVT::f64, InFlag); Chain = RetVal.getValue(1); InFlag = RetVal.getValue(2); // XXX dead break; } } Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), SDValue()); return std::make_pair(RetVal, Chain); }
// LowerCCCCallTo - functions arguments are copied from virtual regs to // (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue LanaiTargetLowering::LowerCCCCallTo( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool IsVarArg, bool IsTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); NumFixedArgs = 0; if (IsVarArg && G) { const Function *CalleeFn = dyn_cast<Function>(G->getGlobal()); if (CalleeFn) NumFixedArgs = CalleeFn->getFunctionType()->getNumParams(); } if (NumFixedArgs) CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_VarArg); else { if (CallConv == CallingConv::Fast) CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_Fast); else CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32); } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); // Create local copies for byval args. SmallVector<SDValue, 8> ByValArgs; for (unsigned I = 0, E = Outs.size(); I != E; ++I) { ISD::ArgFlagsTy Flags = Outs[I].Flags; if (!Flags.isByVal()) continue; SDValue Arg = OutVals[I]; unsigned Size = Flags.getByValSize(); unsigned Align = Flags.getByValAlign(); int FI = MFI->CreateStackObject(Size, Align, false); SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32); Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, /*IsVolatile=*/false, /*AlwaysInline=*/false, /*IsTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } Chain = DAG.getCALLSEQ_START( Chain, DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true), DL); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; SmallVector<SDValue, 12> MemOpChains; SDValue StackPtr; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned I = 0, J = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; SDValue Arg = OutVals[I]; ISD::ArgFlagsTy Flags = Outs[I].Flags; // Promote the value if needed. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; default: llvm_unreachable("Unknown loc info!"); } // Use local copy if it is a byval arg. if (Flags.isByVal()) Arg = ByValArgs[J++]; // Arguments that can be passed on register must be kept at RegsToPass // vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP, getPointerTy(DAG.getDataLayout())); SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); MemOpChains.push_back(DAG.getStore( Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } } // Transform all store nodes into one single node because all store nodes are // independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArrayRef<SDValue>(&MemOpChains[0], MemOpChains.size())); SDValue InFlag; // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in // necessary since all emitted instructions must be stuck together. for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, RegsToPass[I].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. uint8_t OpFlag = LanaiII::MO_NO_FLAG; if (G) { Callee = DAG.getTargetGlobalAddress( G->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), 0, OpFlag); } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { Callee = DAG.getTargetExternalSymbol( E->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlag); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add a register mask operand representing the call-preserved registers. // TODO: Should return-twice functions be handled? const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) Ops.push_back(DAG.getRegister(RegsToPass[I].first, RegsToPass[I].second.getValueType())); if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(LanaiISD::CALL, DL, NodeTys, ArrayRef<SDValue>(&Ops[0], Ops.size())); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END( Chain, DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true), DAG.getConstant(0, DL, getPointerTy(DAG.getDataLayout()), true), InFlag, DL); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, InVals); }
SDValue SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Sparc target does not yet support tail call optimization. isTailCall = false; #if 0 // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs); CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); // FIXME: We can't use this until f64 is known to take two GPRs. #else (void)CC_Sparc32; // Count the size of the outgoing arguments. unsigned ArgsSize = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown value type!"); case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::f32: ArgsSize += 4; break; case MVT::i64: case MVT::f64: ArgsSize += 8; break; } } if (ArgsSize > 4*6) ArgsSize -= 4*6; // Space for first 6 arguments is prereserved. else ArgsSize = 0; #endif // Keep stack frames 8-byte aligned. ArgsSize = (ArgsSize+7) & ~7; Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; #if 0 // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = Outs[i].Val; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); break; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } assert(VA.isMemLoc()); // Create a store off the stack pointer for this argument. SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); // FIXME: VERIFY THAT 68 IS RIGHT. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68); PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0, false, false, 0)); } #else static const unsigned ArgRegs[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; unsigned ArgOffset = 68; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue Val = Outs[i].Val; EVT ObjectVT = Val.getValueType(); SDValue ValToStore(0, 0); unsigned ObjSize; switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: ObjSize = 4; if (RegsToPass.size() >= 6) { ValToStore = Val; } else { RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val)); } break; case MVT::f32: ObjSize = 4; if (RegsToPass.size() >= 6) { ValToStore = Val; } else { // Convert this to a FP value in an int reg. Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val); RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val)); } break; case MVT::f64: { ObjSize = 8; if (RegsToPass.size() >= 6) { ValToStore = Val; // Whole thing is passed in memory. break; } // Break into top and bottom parts by storing to the stack and loading // out the parts as integers. Top part goes in a reg. SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Val, StackPtr, NULL, 0, false, false, 0); // Sparc is big-endian, so the high part comes first. SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, false, false, 0); RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); if (RegsToPass.size() >= 6) { ValToStore = Lo; ArgOffset += 4; ObjSize = 4; } else { RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo)); } break; } case MVT::i64: { ObjSize = 8; if (RegsToPass.size() >= 6) { ValToStore = Val; // Whole thing is passed in memory. break; } // Split the value into top and bottom part. Top part goes in a reg. SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val, DAG.getConstant(1, MVT::i32)); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val, DAG.getConstant(0, MVT::i32)); RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); if (RegsToPass.size() >= 6) { ValToStore = Lo; ArgOffset += 4; ObjSize = 4; } else { RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo)); } break; } } if (ValToStore.getNode()) { SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, PtrOff, NULL, 0, false, false, 0)); } ArgOffset += ObjSize; } #endif // Emit all stores, make sure the occur before any copies into physregs. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; // Remap I0->I7 -> O0->O7. if (Reg >= SP::I0 && Reg <= SP::I7) Reg = Reg-SP::I0+SP::O0; Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); std::vector<EVT> NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. SDValue Ops[] = { Chain, Callee, InFlag }; Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs, *DAG.getContext()); RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { unsigned Reg = RVLocs[i].getLocReg(); // Remap I0->I7 -> O0->O7. if (Reg >= SP::I0 && Reg <= SP::I7) Reg = Reg-SP::I0+SP::O0; Chain = DAG.getCopyFromReg(Chain, dl, Reg, RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); } return Chain; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) const { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, call the library. if ((Align & 3) != 0) return SDValue(); // DWORD aligned EVT AVT = MVT::i32; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned AVT = MVT::i64; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstSV, DstSVOff + Offset, SrcSV, SrcSVOff + Offset)); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Results[0], Results.size()); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // If ESI is used as a base pointer, we must preserve it when doing rep movs. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI; MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; if (PreserveESI) { // Save ESI to a physical register. (We cannot use a virtual register // because if it is spilled we wouldn't be able to reload it.) // We don't glue this because the register dependencies are explicit. Chain = DAG.getCopyToReg(Chain, dl, X86::EDX, DAG.getRegister(X86::ESI, MVT::i32)); } SDValue InGlue(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InGlue); InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InGlue); InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InGlue); InGlue = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue }; // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue. SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); if (PreserveESI) { InGlue = RepMovs.getValue(1); RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI, DAG.getRegister(X86::EDX, MVT::i32), InGlue); } SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Results[0], Results.size()); }
SDValue SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Sparc target does not yet support tail call optimization. isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); // Keep stack frames 8-byte aligned. ArgsSize = (ArgsSize+7) & ~7; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); //Create local copies for byval args. SmallVector<SDValue, 8> ByValArgs; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; if (!Flags.isByVal()) continue; SDValue Arg = OutVals[i]; unsigned Size = Flags.getByValSize(); unsigned Align = Flags.getByValAlign(); int FI = MFI->CreateStackObject(Size, Align, false); SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); SDValue SizeNode = DAG.getConstant(Size, MVT::i32); Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, false, //isVolatile, (Size <= 32), //AlwaysInline if size <= 32 MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; const unsigned StackOffset = 92; bool hasStructRetAttr = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; //Use local copy if it is a byval arg. if (Flags.isByVal()) Arg = ByValArgs[byvalArgIdx++]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } if (Flags.isSRet()) { assert(VA.needsCustom()); // store SRet argument in %sp+64 SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(64); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); hasStructRetAttr = true; continue; } if (VA.needsCustom()) { assert(VA.getLocVT() == MVT::f64); if (VA.isMemLoc()) { unsigned Offset = VA.getLocMemOffset() + StackOffset; //if it is double-word aligned, just store. if (Offset % 8 == 0) { SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); continue; } } SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Arg, StackPtr, MachinePointerInfo(), false, false, 0); // Sparc is big-endian, so the high part comes first. SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, MachinePointerInfo(), false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, MachinePointerInfo(), false, false, 0); if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi)); assert(i+1 != e); CCValAssign &NextVA = ArgLocs[++i]; if (NextVA.isRegLoc()) { RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo)); } else { //Store the low part in stack. unsigned Offset = NextVA.getLocMemOffset() + StackOffset; SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, MachinePointerInfo(), false, false, 0)); } } else { unsigned Offset = VA.getLocMemOffset() + StackOffset; // Store the high part. SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff, MachinePointerInfo(), false, false, 0)); // Store the low part. PtrOff = DAG.getIntPtrConstant(Offset+4); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, MachinePointerInfo(), false, false, 0)); } continue; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { if (VA.getLocVT() != MVT::f32) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } assert(VA.isMemLoc()); // Create a store off the stack pointer for this argument. SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } // Emit all stores, make sure the occur before any copies into physregs. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; // Remap I0->I7 -> O0->O7. if (Reg >= SP::I0 && Reg <= SP::I7) Reg = Reg-SP::I0+SP::O0; Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0; // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); // Returns a chain & a flag for retval copy to use SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); if (hasStructRetAttr) Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; if (Reg >= SP::I0 && Reg <= SP::I7) Reg = Reg-SP::I0+SP::O0; Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType())); } if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs, *DAG.getContext()); RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { unsigned Reg = RVLocs[i].getLocReg(); // Remap I0->I7 -> O0->O7. if (Reg >= SP::I0 && Reg <= SP::I7) Reg = Reg-SP::I0+SP::O0; Chain = DAG.getCopyFromReg(Chain, dl, Reg, RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); } return Chain; }
std::vector<SDOperand> IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { std::vector<SDOperand> ArgValues; // // add beautiful description of IA64 stack frame format // here (from intel 24535803.pdf most likely) // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); GP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); SP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); RP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); MachineBasicBlock& BB = MF.front(); unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35, IA64::r36, IA64::r37, IA64::r38, IA64::r39}; unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11, IA64::F12,IA64::F13,IA64::F14, IA64::F15}; unsigned argVreg[8]; unsigned argPreg[8]; unsigned argOpc[8]; unsigned used_FPArgs = 0; // how many FP args have been used so far? unsigned ArgOffset = 0; int count = 0; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { SDOperand newroot, argt; if(count < 8) { // need to fix this logic? maybe. switch (getValueType(I->getType())) { default: assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n"); case MVT::f32: // fixme? (well, will need to for weird FP structy stuff, // see intel ABI docs) case MVT::f64: //XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]); MF.getRegInfo().addLiveIn(args_FP[used_FPArgs]); // mark this reg as liveIn // floating point args go into f8..f15 as-needed, the increment argVreg[count] = // is below..: MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::f64)); // FP args go into f8..f15 as needed: (hence the ++) argPreg[count] = args_FP[used_FPArgs++]; argOpc[count] = IA64::FMOV; argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::f64); if (I->getType() == Type::FloatTy) argt = DAG.getNode(ISD::FP_ROUND, MVT::f32, argt, DAG.getIntPtrConstant(0)); break; case MVT::i1: // NOTE: as far as C abi stuff goes, // bools are just boring old ints case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: //XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]); MF.getRegInfo().addLiveIn(args_int[count]); // mark this register as liveIn argVreg[count] = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); argPreg[count] = args_int[count]; argOpc[count] = IA64::MOV; argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), argVreg[count], MVT::i64); if ( getValueType(I->getType()) != MVT::i64) argt = DAG.getNode(ISD::TRUNCATE, getValueType(I->getType()), newroot); break; } } else { // more than 8 args go into the frame // Create the frame index object for this incoming parameter... ArgOffset = 16 + 8 * (count - 8); int FI = MFI->CreateFixedObject(8, ArgOffset); // Create the SelectionDAG nodes corresponding to a load //from this parameter SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64); argt = newroot = DAG.getLoad(getValueType(I->getType()), DAG.getEntryNode(), FIN, NULL, 0); } ++count; DAG.setRoot(newroot.getValue(1)); ArgValues.push_back(argt); } // Create a vreg to hold the output of (what will become) // the "alloc" instruction VirtGPR = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); BuildMI(&BB, TII->get(IA64::PSEUDO_ALLOC), VirtGPR); // we create a PSEUDO_ALLOC (pseudo)instruction for now /* BuildMI(&BB, IA64::IDEF, 0, IA64::r1); // hmm: BuildMI(&BB, IA64::IDEF, 0, IA64::r12); BuildMI(&BB, IA64::IDEF, 0, IA64::rp); // ..hmm. BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1); // hmm: BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12); BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp); // ..hmm. */ unsigned tempOffset=0; // if this is a varargs function, we simply lower llvm.va_start by // pointing to the first entry if(F.isVarArg()) { tempOffset=0; VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset); } // here we actually do the moving of args, and store them to the stack // too if this is a varargs function: for (int i = 0; i < count && i < 8; ++i) { BuildMI(&BB, TII->get(argOpc[i]), argVreg[i]).addReg(argPreg[i]); if(F.isVarArg()) { // if this is a varargs function, we copy the input registers to the stack int FI = MFI->CreateFixedObject(8, tempOffset); tempOffset+=8; //XXX: is it safe to use r22 like this? BuildMI(&BB, TII->get(IA64::MOV), IA64::r22).addFrameIndex(FI); // FIXME: we should use st8.spill here, one day BuildMI(&BB, TII->get(IA64::ST8), IA64::r22).addReg(argPreg[i]); } } // Finally, inform the code generator which regs we return values in. // (see the ISD::RET: case in the instruction selector) switch (getValueType(F.getReturnType())) { default: assert(0 && "i have no idea where to return this type!"); case MVT::isVoid: break; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: MF.getRegInfo().addLiveOut(IA64::r8); break; case MVT::f32: case MVT::f64: MF.getRegInfo().addLiveOut(IA64::F8); break; } return ArgValues; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget->getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) { EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memset. return SDValue(); } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; ValReg = X86::AX; Val = (Val << 8) | Val; break; case 0: // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; } break; default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; Count = DAG.getIntPtrConstant(SizeVal); break; } if (AVT.bitsGT(MVT::i8)) { unsigned UBytes = AVT.getSizeInBits() / 8; Count = DAG.getIntPtrConstant(SizeVal / UBytes); BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (TwoRepStos) { InFlag = Chain.getValue(1); Count = Size; EVT CVT = Count.getValueType(); SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; }
SDValue Cpu0TargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { #if 1 // Cpu0 target does not yet support tail call optimization. isTailCall = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; Cpu0FunctionInfo *Cpu0FI = MF.getInfo<Cpu0FunctionInfo>(); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Cpu0); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. SDValue Chain, CallSeqStart, ByValChain; SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); ByValChain = InChain; #if 0 // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. if (IsO32 && IsPIC && Cpu0FI->globalBaseRegFixed() && !Cpu0FI->getGPFI()) Cpu0FI->setGPFI(MFI->CreateFixedObject(4, 0, true)); #endif // Get the frame index of the stack frame object that points to the location // of dynamically allocated area on the stack. int DynAllocFI = Cpu0FI->getDynAllocFI(); #if 0 // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. if (IsO32) NextStackOffset = std::max(NextStackOffset, (unsigned)16); #endif unsigned MaxCallFrameSize = Cpu0FI->getMaxCallFrameSize(); if (MaxCallFrameSize < NextStackOffset) { Cpu0FI->setMaxCallFrameSize(NextStackOffset); // Set the offsets relative to $sp of the $gp restore slot and dynamically // allocated stack space. These offsets must be aligned to a boundary // determined by the stack alignment of the ABI. unsigned StackAlignment = TFL->getStackAlignment(); NextStackOffset = (NextStackOffset + StackAlignment - 1) / StackAlignment * StackAlignment; if (Cpu0FI->needGPSaveRestore()) MFI->setObjectOffset(Cpu0FI->getGPFI(), NextStackOffset); MFI->setObjectOffset(DynAllocFI, NextStackOffset); } // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Outs[i].Flags; // ByVal Arg. if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); #if 0 if (IsO32) WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); #endif #if 0 else PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); #endif continue; } // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: #if 0 if (VA.isRegLoc()) { if ((ValVT == MVT::f32 && LocVT == MVT::i32) || (ValVT == MVT::f64 && LocVT == MVT::i64)) Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg); else if (ValVT == MVT::f64 && LocVT == MVT::i32) { SDValue Lo = DAG.getNode(Cpu0ISD::ExtractElementF64, dl, MVT::i32, Arg, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(Cpu0ISD::ExtractElementF64, dl, MVT::i32, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); unsigned LocRegLo = VA.getLocReg(); unsigned LocRegHigh = getNextIntArgReg(LocRegLo); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } #else assert("CCValAssign::Full:"); // Gamma debug #endif break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg); break; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } // Register can't get to this point... assert(VA.isMemLoc()); // Create the frame index object for this incoming parameter LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } // Extend range of indices of frame objects for outgoing arguments that were // created during this function call. Skip this step if no such objects were // created. if (LastFI) Cpu0FI->extendOutArgFIRange(FirstFI, LastFI); // If a memcpy has been created to copy a byval arg to a stack, replace the // chain input of CallSeqStart with ByValChain. if (InChain != ByValChain) DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, NextStackOffsetVal); // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. unsigned char OpFlag; #if 0 // cpu0 int 32 only bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 #else bool IsPICCall = IsPIC; // true if calls are translated to jalr $25 #endif bool GlobalOrExternal = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { if (IsPICCall && G->getGlobal()->hasInternalLinkage()) { OpFlag = Cpu0II::MO_GOT; #if 0 unsigned char LoFlag = IsO32 ? Cpu0II::MO_ABS_LO : Cpu0II::MO_GOT_OFST; #else unsigned char LoFlag = Cpu0II::MO_ABS_LO; #endif Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, LoFlag); } else { OpFlag = IsPICCall ? Cpu0II::MO_GOT_CALL : Cpu0II::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); } GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { if (!IsPIC) // static OpFlag = Cpu0II::MO_NO_FLAG; else // O32 & PIC OpFlag = Cpu0II::MO_GOT_CALL; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); GlobalOrExternal = true; } SDValue InFlag; // Create nodes that load address of callee and copy it to T9 if (IsPICCall) { if (GlobalOrExternal) { // Load callee address Callee = DAG.getNode(Cpu0ISD::Wrapper, dl, getPointerTy(), GetGlobalReg(DAG, getPointerTy()), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, false, 0); // Use GOT+LO if callee has internal linkage. if (CalleeLo.getNode()) { SDValue Lo = DAG.getNode(Cpu0ISD::Lo, dl, getPointerTy(), CalleeLo); Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo); } else Callee = LoadValue; } } // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { // copy to T9 unsigned T9Reg = Cpu0::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); Callee = DAG.getRegister(T9Reg, getPointerTy()); } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Cpu0JmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(Cpu0ISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); #else return InChain; #endif }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget<X86Subtarget>(); if (!ConstantSize) return SDValue(); RepMovsRepeats Repeats(ConstantSize->getZExtValue()); if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // If the base register might conflict with our physical registers, bail out. const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, X86::ECX, X86::ESI, X86::EDI}; if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); // If the target has enhanced REPMOVSB, then it's at least as fast to use // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle // BytesLeft. if (!Subtarget.hasERMSB() && !(Align & 1)) { if (Align & 2) // WORD aligned Repeats.AVT = MVT::i16; else if (Align & 4) // DWORD aligned Repeats.AVT = MVT::i32; else // QWORD aligned Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; if (Repeats.BytesLeft() > 0 && DAG.getMachineFunction().getFunction().optForMinSize()) { // When agressively optimizing for size, avoid generating the code to // handle BytesLeft. Repeats.AVT = MVT::i8; } } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (Repeats.BytesLeft()) { // Handle the last 1 - 7 bytes. unsigned Offset = Repeats.Size - Repeats.BytesLeft(); EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), DAG.getConstant(Repeats.BytesLeft(), dl, SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
/// LowerCCCCallTo - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: sret. SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_MSP430); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes, getPointerTy(), true)); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; SmallVector<SDValue, 12> MemOpChains; SDValue StackPtr; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } // Arguments that can be passed on register must be kept at RegsToPass // vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy()); SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),false, false, 0)); } } // Transform all store nodes into one single node because all store nodes are // independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, getPointerTy(), true), DAG.getConstant(0, getPointerTy(), true), InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget<X86Subtarget>(); #ifndef NDEBUG // If the base register might conflict with our physical registers, bail out. const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, X86::ECX, X86::EAX, X86::EDI}; assert(!isBaseRegConflictPossible(DAG, ClobberSet)); #endif // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); if (const char *bzeroName = (ValC && ValC->isNullValue()) ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) : nullptr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(bzeroName, IntPtr), std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } // Otherwise have the target-independent code call memset. return SDValue(); } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); unsigned BytesLeft = 0; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; ValReg = X86::AX; Val = (Val << 8) | Val; break; case 0: // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; } break; default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; Count = DAG.getIntPtrConstant(SizeVal, dl); break; } if (AVT.bitsGT(MVT::i8)) { unsigned UBytes = AVT.getSizeInBits() / 8; Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl); BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal, dl); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag); InFlag = Chain.getValue(1); } bool Use64BitRegs = Subtarget.isTarget64BitLP64(); Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), Val, DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile, false, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; }