std::pair<unsigned, const TargetRegisterClass *> WebAssemblyTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to a // WebAssembly register class. if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': assert(VT != MVT::iPTR && "Pointer MVT not expected here"); if (Subtarget->hasSIMD128() && VT.isVector()) { if (VT.getSizeInBits() == 128) return std::make_pair(0U, &WebAssembly::V128RegClass); } if (VT.isInteger() && !VT.isVector()) { if (VT.getSizeInBits() <= 32) return std::make_pair(0U, &WebAssembly::I32RegClass); if (VT.getSizeInBits() <= 64) return std::make_pair(0U, &WebAssembly::I64RegClass); } break; default: break; } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); }
/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { Opcode = ISD::SHL; Imm = Log2_64(Imm); } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { // div x, 8 -> srl x, 3 Opcode = ISD::SRL; Imm = Log2_64(Imm); } // Horrible hack (to be removed), check to make sure shift amounts are // in-range. if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && Imm >= VT.getSizeInBits()) return 0; // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); if (MaterialReg == 0) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, /*Kill=*/true); }
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); MVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, MVT::i32)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, VAListPtr, SV, 0); // Load the actual argument out of the pointer VAList, unless this is an // f64 load. if (VT != MVT::f64) return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0); // Otherwise, load it as i64, then do a bitconvert. SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V), V.getValue(1) }; return DAG.getMergeValues(Ops, 2, dl); }
SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); assert(OpNo == 1 && "Can only expand the stored value so far"); DebugLoc dl = N->getDebugLoc(); StoreSDNode *St = cast<StoreSDNode>(N); MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); int SVOffset = St->getSrcValueOffset(); unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, isVolatile, Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), SVOffset + IncrementSize, isVolatile, MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); }
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT, TargetLoweringBase *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; MVT NewVT = MVT::getVectorVT(EltTy, NumElts); if (!TLI->isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; unsigned NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); MVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; }
// setGroupSize sets 'SizeInfo' to the size(number of elements) of group // inside mask a shuffleMask. A mask contains exactly 3 groups, where // each group is a monotonically increasing sequence with stride 3. // For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2} static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1); for (int i = 0, FirstGroupElement = 0; i < 3; i++) { int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0); SizeInfo.push_back(GroupSize); FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF; } }
// createShuffleStride returns shuffle mask of size N. // The shuffle pattern is as following : // {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane), // (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),..., // (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)} // Where Lane is the # of lanes in a register: // VectorSize = 128 => Lane = 1 // VectorSize = 256 => Lane = 2 // For example shuffle pattern for VF 16 register size 256 -> lanes = 2 // {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>} static void createShuffleStride(MVT VT, int Stride, SmallVectorImpl<uint32_t> &Mask) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); int LaneCount = std::max(VectorSize / 128, 1); for (int Lane = 0; Lane < LaneCount; Lane++) for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i) Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane); }
// genShuffleBland - Creates shuffle according to two vectors.This function is // only works on instructions with lane inside 256 registers. According to // the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The // offset amount depends on the two integer, 'LowOffset' and 'HighOffset'. // Where the 'LowOffset' refers to the first vector and the highOffset refers to // the second vector. // |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20| // |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25| // |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31| // For the sequence to work as a mirror to the load. // We must consider the elements order as above. // In this function we are combining two types of shuffles. // The first one is vpshufed and the second is a type of "blend" shuffle. // By computing the shuffle on a sequence of 16 elements(one lane) and add the // correct offset. We are creating a vpsuffed + blend sequence between two // shuffles. static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask, SmallVectorImpl<uint32_t> &Out, int LowOffset, int HighOffset) { assert(VT.getSizeInBits() >= 256 && "This function doesn't accept width smaller then 256"); unsigned NumOfElm = VT.getVectorNumElements(); for (unsigned i = 0; i < Mask.size(); i++) Out.push_back(Mask[i] + LowOffset); for (unsigned i = 0; i < Mask.size(); i++) Out.push_back(Mask[i] + HighOffset + NumOfElm); }
bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // On the second pass, go through the HVAs only. if (ArgFlags.isSecArgPass()) { if (ArgFlags.isHva()) return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); return true; } // Process only vector types as defined by vectorcall spec: // "A vector type is either a floating-point type, for example, // a float or double, or an SIMD vector type, for example, __m128 or __m256". if (!(ValVT.isFloatingPoint() || (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { // If R9 was already assigned it means that we are after the fourth element // and because this is not an HVA / Vector type, we need to allocate // shadow XMM register. if (State.isAllocated(X86::R9)) { // Assign shadow XMM register. (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT)); } return false; } if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { // Assign shadow GPR register. (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); // Assign XMM register - (shadow for HVA and non-shadow for non HVA). if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { // In Vectorcall Calling convention, additional shadow stack can be // created on top of the basic 32 bytes of win64. // It can happen if the fifth or sixth argument is vector type or HVA. // At that case for each argument a shadow stack of 8 bytes is allocated. if (Reg == X86::XMM4 || Reg == X86::XMM5) State.AllocateStack(8, 8); if (!ArgFlags.isHva()) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; // Allocated a register - Stop the search. } } } // If this is an HVA - Stop the search, // otherwise continue the search. return ArgFlags.isHva(); }
LLT::LLT(MVT VT) { if (VT.isVector()) { SizeInBits = VT.getVectorElementType().getSizeInBits(); ElementsOrAddrSpace = VT.getVectorNumElements(); Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; } else if (VT.isValid()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. Kind = Scalar; SizeInBits = VT.getSizeInBits(); ElementsOrAddrSpace = 1; assert(SizeInBits != 0 && "invalid zero-sized type"); } else { Kind = Invalid; SizeInBits = ElementsOrAddrSpace = 0; } }
// group2Shuffle reorder the shuffle stride back into continuous order. // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} => // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}. static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask, SmallVectorImpl<uint32_t> &Output) { int IndexGroup[3] = {0, 0, 0}; int Index = 0; int VectorWidth = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); // Find the index of the different groups. int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1; for (int i = 0; i < 3; i++) { IndexGroup[(Index * 3) % (VF / Lane)] = Index; Index += Mask[i]; } // According to the index compute the convert mask. for (int i = 0; i < VF / Lane; i++) { Output.push_back(IndexGroup[i % 3]); IndexGroup[i % 3]++; } }
static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const unsigned ArgRegs[] = { MBlaze::R5, MBlaze::R6, MBlaze::R7, MBlaze::R8, MBlaze::R9, MBlaze::R10 }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs); if (!Reg) return false; unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; State.AllocateStack(SizeInBytes, SizeInBytes); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; }
// DecodePALIGNRMask returns the shuffle mask of vpalign instruction. // vpalign works according to lanes // Where Lane is the # of lanes in a register: // VectorWide = 128 => Lane = 1 // VectorWide = 256 => Lane = 2 // For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}. // For Lane = 2 shuffle pattern is: // {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}. // Imm variable sets the offset amount. The result of the // function is stored inside ShuffleMask vector and it built as described in // the begin of the description. AlignDirection is a boolean that indecat the // direction of the alignment. (false - align to the "right" side while true - // align to the "left" side) static void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<uint32_t> &ShuffleMask, bool AlignDirection = true, bool Unary = false) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1); unsigned NumLaneElts = NumElts / NumLanes; Imm = AlignDirection ? Imm : (NumLaneElts - Imm); unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8); for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Base = i + Offset; // if i+offset is out of this lane then we actually need the other source // If Unary the other source is the first source. if (Base >= NumLaneElts) Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts; ShuffleMask.push_back(Base + l); } } }
bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // On the second pass, go through the HVAs only. if (ArgFlags.isSecArgPass()) { if (ArgFlags.isHva()) return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); return true; } // Process only vector types as defined by vectorcall spec: // "A vector type is either a floating point type, for example, // a float or double, or an SIMD vector type, for example, __m128 or __m256". if (!(ValVT.isFloatingPoint() || (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { return false; } if (ArgFlags.isHva()) return true; // If this is an HVA - Stop the search. // Assign XMM register. if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; } // In case we did not find an available XMM register for a vector - // pass it indirectly. // It is similar to CCPassIndirect, with the addition of inreg. if (!ValVT.isFloatingPoint()) { LocVT = MVT::i32; LocInfo = CCValAssign::Indirect; ArgFlags.setInReg(); } return false; // No register was assigned - Continue the search. }
void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, isVolatile, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset+IncrementSize, isVolatile, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Handle endianness of the load. if (TLI.isBigEndian()) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use // the new one. ReplaceValueWith(SDValue(N, 1), Chain); }
SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); // Performs the following for a >> b: // unsigned r_high = a_high >> b; // r_high = (32 - b <= 0) ? 0 : r_high; // // unsigned r_low = a_low >> b; // r_low = (32 - b <= 0) ? r_high : r_low; // r_low = (b == 0) ? r_low : r_low | (a_high << (32 - b)); // return (unsigned long long)r_high << 32 | r_low; // Note: This takes advantage of Lanai's shift behavior to avoid needing to // mask the shift amount. SDValue Zero = DAG.getConstant(0, dl, MVT::i32); SDValue NegatedPlus32 = DAG.getNode( ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue SetCC = DAG.getSetCC(dl, MVT::i32, NegatedPlus32, Zero, ISD::SETLE); SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpHi, ShAmt); Hi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, Hi); SDValue Lo = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpLo, ShAmt); Lo = DAG.getSelect(dl, MVT::i32, SetCC, Hi, Lo); SDValue CarryBits = DAG.getNode(ISD::SHL, dl, MVT::i32, ShOpHi, NegatedPlus32); SDValue ShiftIsZero = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ); Lo = DAG.getSelect(dl, MVT::i32, ShiftIsZero, Lo, DAG.getNode(ISD::OR, dl, MVT::i32, Lo, CarryBits)); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, dl); }
bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { CallingConv::ID CC = CLI.CallConv; bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; // const char *SymName = CLI.SymName; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // Let SDISel handle vararg functions. if (IsVarArg) return false; // FIXME: Only handle *simple* calls for now. MVT RetVT; if (CLI.RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(CLI.RetTy, RetVT)) return false; for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) return false; // Set up the argument vectors. SmallVector<MVT, 16> OutVTs; OutVTs.reserve(CLI.OutVals.size()); for (auto *Val : CLI.OutVals) { MVT VT; if (!isTypeLegal(Val->getType(), VT) && !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) return false; // We don't handle vector parameters yet. if (VT.isVector() || VT.getSizeInBits() > 64) return false; OutVTs.push_back(VT); } Address Addr; if (!computeCallAddress(Callee, Addr)) return false; // Handle the arguments now that we've gotten them. unsigned NumBytes; if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; // Issue the call. unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), Mips::RA).addReg(Mips::T9); // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); CLI.Call = MIB; // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); CLI.Call = MIB; // Finish off the call including any return values. return finishCall(CLI, RetVT, NumBytes); }
bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &OutVTs, unsigned &NumBytes) { CallingConv::ID CC = CLI.CallConv; SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); // This is the minimum argument area used for A0-A3. if (NumBytes < 16) NumBytes = 16; emitInst(Mips::ADJCALLSTACKDOWN).addImm(16); // Process the args. MVT firstMVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; if (i == 0) { firstMVT = ArgVT; if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F12); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D6); } } else if (i == 1) { if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) { if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F14); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D7); } } } if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32)) && VA.isMemLoc()) { switch (VA.getLocMemOffset()) { case 0: VA.convertToReg(Mips::A0); break; case 4: VA.convertToReg(Mips::A1); break; case 8: VA.convertToReg(Mips::A2); break; case 12: VA.convertToReg(Mips::A3); break; default: break; } } unsigned ArgReg = getRegForValue(ArgVal); if (!ArgReg) return false; // Handle arg promotion: SExt, ZExt, AExt. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::AExt: case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); if (!ArgReg) return false; break; } case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); if (!ArgReg) return false; break; } default: llvm_unreachable("Unknown arg promotion!"); } // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); CLI.OutRegs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { llvm_unreachable("Mips does not use custom args."); return false; } else { // // FIXME: This path will currently return false. It was copied // from the AArch64 port and should be essentially fine for Mips too. // The work to finish up this path will be done in a follow-on patch. // assert(VA.isMemLoc() && "Assuming store on stack."); // Don't emit stores for undef values. if (isa<UndefValue>(ArgVal)) continue; // Need to store on the stack. // FIXME: This alignment is incorrect but this path is disabled // for now (will return false). We need to determine the right alignment // based on the normal alignment for the underlying machine type. // unsigned ArgSize = RoundUpToAlignment(ArgVT.getSizeInBits(), 4); unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittle()) BEAlign = 8 - ArgSize; Address Addr; Addr.setKind(Address::RegBase); Addr.setReg(Mips::SP); Addr.setOffset(VA.getLocMemOffset() + BEAlign); unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getStack(Addr.getOffset()), MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); (void)(MMO); // if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; // can't store on the stack yet. } } return true; }
SDValue Cpu0TargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { #if 1 // Cpu0 target does not yet support tail call optimization. isTailCall = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; Cpu0FunctionInfo *Cpu0FI = MF.getInfo<Cpu0FunctionInfo>(); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Cpu0); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. SDValue Chain, CallSeqStart, ByValChain; SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); ByValChain = InChain; #if 0 // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. if (IsO32 && IsPIC && Cpu0FI->globalBaseRegFixed() && !Cpu0FI->getGPFI()) Cpu0FI->setGPFI(MFI->CreateFixedObject(4, 0, true)); #endif // Get the frame index of the stack frame object that points to the location // of dynamically allocated area on the stack. int DynAllocFI = Cpu0FI->getDynAllocFI(); #if 0 // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. if (IsO32) NextStackOffset = std::max(NextStackOffset, (unsigned)16); #endif unsigned MaxCallFrameSize = Cpu0FI->getMaxCallFrameSize(); if (MaxCallFrameSize < NextStackOffset) { Cpu0FI->setMaxCallFrameSize(NextStackOffset); // Set the offsets relative to $sp of the $gp restore slot and dynamically // allocated stack space. These offsets must be aligned to a boundary // determined by the stack alignment of the ABI. unsigned StackAlignment = TFL->getStackAlignment(); NextStackOffset = (NextStackOffset + StackAlignment - 1) / StackAlignment * StackAlignment; if (Cpu0FI->needGPSaveRestore()) MFI->setObjectOffset(Cpu0FI->getGPFI(), NextStackOffset); MFI->setObjectOffset(DynAllocFI, NextStackOffset); } // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Outs[i].Flags; // ByVal Arg. if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); #if 0 if (IsO32) WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); #endif #if 0 else PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); #endif continue; } // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: #if 0 if (VA.isRegLoc()) { if ((ValVT == MVT::f32 && LocVT == MVT::i32) || (ValVT == MVT::f64 && LocVT == MVT::i64)) Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg); else if (ValVT == MVT::f64 && LocVT == MVT::i32) { SDValue Lo = DAG.getNode(Cpu0ISD::ExtractElementF64, dl, MVT::i32, Arg, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(Cpu0ISD::ExtractElementF64, dl, MVT::i32, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); unsigned LocRegLo = VA.getLocReg(); unsigned LocRegHigh = getNextIntArgReg(LocRegLo); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } #else assert("CCValAssign::Full:"); // Gamma debug #endif break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg); break; } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } // Register can't get to this point... assert(VA.isMemLoc()); // Create the frame index object for this incoming parameter LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } // Extend range of indices of frame objects for outgoing arguments that were // created during this function call. Skip this step if no such objects were // created. if (LastFI) Cpu0FI->extendOutArgFIRange(FirstFI, LastFI); // If a memcpy has been created to copy a byval arg to a stack, replace the // chain input of CallSeqStart with ByValChain. if (InChain != ByValChain) DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, NextStackOffsetVal); // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. unsigned char OpFlag; #if 0 // cpu0 int 32 only bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 #else bool IsPICCall = IsPIC; // true if calls are translated to jalr $25 #endif bool GlobalOrExternal = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { if (IsPICCall && G->getGlobal()->hasInternalLinkage()) { OpFlag = Cpu0II::MO_GOT; #if 0 unsigned char LoFlag = IsO32 ? Cpu0II::MO_ABS_LO : Cpu0II::MO_GOT_OFST; #else unsigned char LoFlag = Cpu0II::MO_ABS_LO; #endif Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, LoFlag); } else { OpFlag = IsPICCall ? Cpu0II::MO_GOT_CALL : Cpu0II::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); } GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { if (!IsPIC) // static OpFlag = Cpu0II::MO_NO_FLAG; else // O32 & PIC OpFlag = Cpu0II::MO_GOT_CALL; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); GlobalOrExternal = true; } SDValue InFlag; // Create nodes that load address of callee and copy it to T9 if (IsPICCall) { if (GlobalOrExternal) { // Load callee address Callee = DAG.getNode(Cpu0ISD::Wrapper, dl, getPointerTy(), GetGlobalReg(DAG, getPointerTy()), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, false, 0); // Use GOT+LO if callee has internal linkage. if (CalleeLo.getNode()) { SDValue Lo = DAG.getNode(Cpu0ISD::Lo, dl, getPointerTy(), CalleeLo); Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo); } else Callee = LoadValue; } } // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { // copy to T9 unsigned T9Reg = Cpu0::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); Callee = DAG.getRegister(T9Reg, getPointerTy()); } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Cpu0JmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); Chain = DAG.getNode(Cpu0ISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals); #else return InChain; #endif }
SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); EVT StoreVT = ST->getMemoryVT(); SDNode *NVPTXST = NULL; // do not support pre/post inc/dec if (ST->isIndexed()) return NULL; if (!StoreVT.isSimple()) return NULL; // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); // Volatile Setting // - .volatile is only availalble for .global and .shared bool isVolatile = ST->isVolatile(); if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) isVolatile = false; // Vector Setting MVT SimpleVT = StoreVT.getSimpleVT(); unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { unsigned num = SimpleVT.getVectorNumElements(); if (num == 2) vecType = NVPTX::PTXLdStInstCode::V2; else if (num == 4) vecType = NVPTX::PTXLdStInstCode::V4; else return NULL; } // Type Setting: toType + toTypeWidth // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); unsigned toTypeWidth = ScalarVT.getSizeInBits(); unsigned int toType; if (ScalarVT.isFloatingPoint()) toType = NVPTX::PTXLdStInstCode::Float; else toType = NVPTX::PTXLdStInstCode::Unsigned; // Create the machine instruction DAG SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); SDValue Addr; SDValue Offset, Base; unsigned Opcode; MVT::SimpleValueType SourceVT = N1.getNode()->getValueType(0).getSimpleVT().SimpleTy; if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; default: return NULL; } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Addr, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; default: return NULL; } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else if (Subtarget.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; default: return NULL; } } else { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; default: return NULL; } } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { if (Subtarget.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; default: return NULL; } } else { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; default: return NULL; } } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), N2, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } if (NVPTXST != NULL) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); } return NVPTXST; }
SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Addr, Offset, Base; unsigned Opcode; DebugLoc DL = N->getDebugLoc(); SDNode *LD; MemSDNode *MemSD = cast<MemSDNode>(N); EVT LoadedVT = MemSD->getMemoryVT(); if (!LoadedVT.isSimple()) return NULL; // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); // Volatile Setting // - .volatile is only availalble for .global and .shared bool IsVolatile = MemSD->isVolatile(); if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) IsVolatile = false; // Vector Setting MVT SimpleVT = LoadedVT.getSimpleVT(); // Type Setting: fromType + fromTypeWidth // // Sign : ISD::SEXTLOAD // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); unsigned FromTypeWidth = ScalarVT.getSizeInBits(); unsigned int FromType; // The last operand holds the original LoadSDNode::getExtensionType() value unsigned ExtensionType = cast<ConstantSDNode>( N->getOperand(N->getNumOperands() - 1))->getZExtValue(); if (ExtensionType == ISD::SEXTLOAD) FromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) FromType = NVPTX::PTXLdStInstCode::Float; else FromType = NVPTX::PTXLdStInstCode::Unsigned; unsigned VecType; switch (N->getOpcode()) { case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; default: return NULL; } EVT EltVT = N->getValueType(0); if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break; case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break; case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break; } break; } SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); } else if (Subtarget.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break; case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break; case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break; } break; } SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); } else if (Subtarget.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break; case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break; case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break; } break; } } else { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break; case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break; case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break; } break; } } SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break; case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break; case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break; } break; } } else { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break; case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break; case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break; case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break; case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break; } break; } } SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Op1, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); return LD; }
SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); SDNode *NVPTXLD = NULL; // do not support pre/post inc/dec if (LD->isIndexed()) return NULL; if (!LoadedVT.isSimple()) return NULL; // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); // Volatile Setting // - .volatile is only availalble for .global and .shared bool isVolatile = LD->isVolatile(); if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) isVolatile = false; // Vector Setting MVT SimpleVT = LoadedVT.getSimpleVT(); unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { unsigned num = SimpleVT.getVectorNumElements(); if (num == 2) vecType = NVPTX::PTXLdStInstCode::V2; else if (num == 4) vecType = NVPTX::PTXLdStInstCode::V4; else return NULL; } // Type Setting: fromType + fromTypeWidth // // Sign : ISD::SEXTLOAD // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); unsigned fromTypeWidth = ScalarVT.getSizeInBits(); unsigned int fromType; if ((LD->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) fromType = NVPTX::PTXLdStInstCode::Float; else fromType = NVPTX::PTXLdStInstCode::Unsigned; // Create the machine instruction DAG SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue Addr; SDValue Offset, Base; unsigned Opcode; MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy; if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Addr, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); } else if (Subtarget.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; default: return NULL; } } else { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; default: return NULL; } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); } else { if (Subtarget.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; default: return NULL; } } else { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; default: return NULL; } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), N1, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } if (NVPTXLD != NULL) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); } return NVPTXLD; }
SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Addr, Offset, Base; unsigned Opcode; DebugLoc DL = N->getDebugLoc(); SDNode *ST; EVT EltVT = Op1.getValueType(); MemSDNode *MemSD = cast<MemSDNode>(N); EVT StoreVT = MemSD->getMemoryVT(); // Address Space Setting unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { report_fatal_error("Cannot store to pointer that points to constant " "memory space"); } // Volatile Setting // - .volatile is only availalble for .global and .shared bool IsVolatile = MemSD->isVolatile(); if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) IsVolatile = false; // Type Setting: toType + toTypeWidth // - for integer type, always use 'u' assert(StoreVT.isSimple() && "Store value is not simple"); MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); unsigned ToTypeWidth = ScalarVT.getSizeInBits(); unsigned ToType; if (ScalarVT.isFloatingPoint()) ToType = NVPTX::PTXLdStInstCode::Float; else ToType = NVPTX::PTXLdStInstCode::Unsigned; SmallVector<SDValue, 12> StOps; SDValue N2; unsigned VecType; switch (N->getOpcode()) { case NVPTXISD::StoreV2: VecType = NVPTX::PTXLdStInstCode::V2; StOps.push_back(N->getOperand(1)); StOps.push_back(N->getOperand(2)); N2 = N->getOperand(3); break; case NVPTXISD::StoreV4: VecType = NVPTX::PTXLdStInstCode::V4; StOps.push_back(N->getOperand(1)); StOps.push_back(N->getOperand(2)); StOps.push_back(N->getOperand(3)); StOps.push_back(N->getOperand(4)); N2 = N->getOperand(5); break; default: return NULL; } StOps.push_back(getI32Imm(IsVolatile)); StOps.push_back(getI32Imm(CodeAddrSpace)); StOps.push_back(getI32Imm(VecType)); StOps.push_back(getI32Imm(ToType)); StOps.push_back(getI32Imm(ToTypeWidth)); if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break; case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break; case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break; case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break; case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break; case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break; case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break; } break; } StOps.push_back(Addr); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break; case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break; case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break; case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break; case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break; case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break; case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break; } break; } StOps.push_back(Base); StOps.push_back(Offset); } else if (Subtarget.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break; case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break; case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break; case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break; case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break; case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break; case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break; } break; } } else { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break; case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break; case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break; case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break; case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break; case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break; case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break; } break; } } StOps.push_back(Base); StOps.push_back(Offset); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break; case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break; case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break; case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break; case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break; case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break; case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break; } break; } } else { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break; case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break; case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break; case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break; case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break; case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break; case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break; } break; } } StOps.push_back(N2); } StOps.push_back(Chain); ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size()); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); return ST; }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // ESI might be used as a base pointer, in that case we can't simply overwrite // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); if (TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI) return SDValue(); MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { MVT OutVT = N->getValueType(0); MVT NOutVT = TLI.getTypeToTransformTo(OutVT); SDValue InOp = N->getOperand(0); MVT InVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); case Legal: case PromoteInteger: break; case SoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case ExpandInteger: case ExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case SplitVector: // Convert the split parts of the input if it was split in two. GetSplitVector(InOp, Lo, Hi); if (Lo.getValueType() == Hi.getValueType()) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; } break; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case WidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); InOp = GetWidenedVector(InOp); MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; } } // Lower the bit-convert to a store/load from the stack. assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); // Load the first half from the stack slot. Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(IncrementSize)); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) std::swap(Lo, Hi); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // If ESI is used as a base pointer, we must preserve it when doing rep movs. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI; MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; if (PreserveESI) { // Save ESI to a physical register. (We cannot use a virtual register // because if it is spilled we wouldn't be able to reload it.) // We don't glue this because the register dependencies are explicit. Chain = DAG.getCopyToReg(Chain, dl, X86::EDX, DAG.getRegister(X86::ESI, MVT::i32)); } SDValue InGlue(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InGlue); InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InGlue); InGlue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InGlue); InGlue = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue }; // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue. SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); if (PreserveESI) { InGlue = RepMovs.getValue(1); RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI, DAG.getRegister(X86::EDX, MVT::i32), InGlue); } SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Results[0], Results.size()); }
/// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLoweringBase::computeRegisterProperties() { assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && "Too many value types for ValueTypeActions to hold!"); // Everything defaults to needing one register. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { NumRegistersForVT[i] = 1; RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; } // ...except isVoid, which doesn't need any registers. NumRegistersForVT[MVT::isVoid] = 0; // Find the largest integer register class. unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); // Every integer value type larger than this largest register takes twice as // many registers to represent as the previous ValueType. for (unsigned ExpandedReg = LargestIntReg + 1; ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, TypeExpandInteger); } // Inspect all of the ValueType's smaller than the largest integer // register to see which ones need promotion. unsigned LegalIntReg = LargestIntReg; for (unsigned IntReg = LargestIntReg - 1; IntReg >= (unsigned)MVT::i1; --IntReg) { MVT IVT = (MVT::SimpleValueType)IntReg; if (isTypeLegal(IVT)) { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } // ppcf128 type is really two f64's. if (!isTypeLegal(MVT::ppcf128)) { NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::ppcf128] = MVT::f64; TransformToType[MVT::ppcf128] = MVT::f64; ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); } // Decide how to handle f128. If the target does not have native f128 support, // expand it to i128 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f128)) { NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; TransformToType[MVT::f128] = MVT::i128; ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); } // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f64)) { NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; TransformToType[MVT::f64] = MVT::i64; ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); } // Decide how to handle f32. If the target does not have native support for // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. if (!isTypeLegal(MVT::f32)) { if (isTypeLegal(MVT::f64)) { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; TransformToType[MVT::f32] = MVT::f64; ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); } else { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; TransformToType[MVT::f32] = MVT::i32; ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } } // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; // Determine if there is a legal wider type. If so, we should promote to // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { bool IsLegalWiderType = false; // First try to promote the elements of integer vectors. If no legal // promotion was found, fallback to the widen-vector method. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType)nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; ValueTypeActions.setTypeAction(VT, TypePromoteInteger); IsLegalWiderType = true; break; } } if (IsLegalWiderType) continue; // Try to widen the vector. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; ValueTypeActions.setTypeAction(VT, TypeWidenVector); IsLegalWiderType = true; break; } } if (IsLegalWiderType) continue; } MVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; MVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; unsigned NumElts = VT.getVectorNumElements(); ValueTypeActions.setTypeAction(VT, NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); } else { TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); } } // Determine the 'representative' register class for each value type. // An representative register class is the largest (meaning one which is // not a sub-register class / subreg register class) legal register class for // a group of value types. For example, on i386, i8, i16, and i32 // representative would be GR32; while on x86_64 it's GR64. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); RepRegClassForVT[i] = RRC; RepRegClassCostForVT[i] = Cost; } }
/// LowerArguments - V8 uses a very simple ABI, where all values are passed in /// either one or two GPRs, including FP values. TODO: we should pass FP values /// in FP registers for fastcc functions. void SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, SmallVectorImpl<SDValue> &ArgValues, DebugLoc dl) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); static const unsigned ArgRegs[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6; unsigned ArgOffset = 68; SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { MVT ObjectVT = getValueType(I->getType()); switch (ObjectVT.getSimpleVT()) { default: assert(0 && "Unhandled argument type!"); case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: if (I->use_empty()) { // Argument is dead. if (CurArgReg < ArgRegEnd) ++CurArgReg; ArgValues.push_back(DAG.getUNDEF(ObjectVT)); } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); if (ObjectVT != MVT::i32) { unsigned AssertOp = ISD::AssertSext; Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg, DAG.getValueType(ObjectVT)); Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg); } ArgValues.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { Load = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); } else { ISD::LoadExtType LoadOp = ISD::SEXTLOAD; // Sparc is big endian, so add an offset based on the ObjectVT. unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Root, FIPtr, NULL, 0, ObjectVT); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } ArgValues.push_back(Load); } ArgOffset += 4; break; case MVT::f32: if (I->use_empty()) { // Argument is dead. if (CurArgReg < ArgRegEnd) ++CurArgReg; ArgValues.push_back(DAG.getUNDEF(ObjectVT)); } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR // FP value is passed in an integer register. unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg); ArgValues.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load = DAG.getLoad(MVT::f32, dl, Root, FIPtr, NULL, 0); ArgValues.push_back(Load); } ArgOffset += 4; break; case MVT::i64: case MVT::f64: if (I->use_empty()) { // Argument is dead. if (CurArgReg < ArgRegEnd) ++CurArgReg; if (CurArgReg < ArgRegEnd) ++CurArgReg; ArgValues.push_back(DAG.getUNDEF(ObjectVT)); } else { SDValue HiVal; if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi); HiVal = DAG.getCopyFromReg(Root, dl, VRegHi, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); HiVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); } SDValue LoVal; if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo); LoVal = DAG.getCopyFromReg(Root, dl, VRegLo, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); } // Compose the two halves together into an i64 unit. SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); // If we want a double, do a bit convert. if (ObjectVT == MVT::f64) WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue); ArgValues.push_back(WholeValue); } ArgOffset += 8; break; } } // Store remaining ArgRegs to the stack if this is a varargs function. if (F.isVarArg()) { // Remember the vararg offset for the va_start implementation. VarArgsFrameOffset = ArgOffset; for (; CurArgReg != ArgRegEnd; ++CurArgReg) { unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg, VReg); SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); ArgOffset += 4; } } if (!OutChains.empty()) DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], OutChains.size())); }
std::string NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); std::stringstream O; O << "prototype_" << uniqueCallSite << " : .callprototype "; if (retTy->getTypeID() == Type::VoidTyID) O << "()"; else { O << "("; if (isABI) { if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); if (size < 32) size = 32; } else { assert(retTy->isFloatingPointTy() && "Floating point type expected here"); size = retTy->getPrimitiveSizeInBits(); } O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) O << ".param .b" << getPointerTy().getSizeInBits() << " _"; else { if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned totalsz = 0; for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } for (unsigned j=0, je=elems; j!=je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (elemtype.isInteger() && (sz < 8)) sz = 8; totalsz += sz/8; } } O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; } else { assert(false && "Unknown return type"); } } } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned idx = 0; for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } for (unsigned j=0, je=elems; j!=je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (elemtype.isInteger() && (sz < 32)) sz = 32; O << ".reg .b" << sz << " _"; if (j<je-1) O << ", "; ++idx; } if (i < e-1) O << ", "; } } O << ") "; } O << "_ ("; bool first = true; MVT thePointerTy = getPointerTy(); for (unsigned i=0,e=Args.size(); i!=e; ++i) { const Type *Ty = Args[i].Ty; if (!first) { O << ", "; } first = false; if (Outs[i].Flags.isByVal() == false) { unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); if (sz < 32) sz = 32; } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); if (isABI) O << ".param .b" << sz << " "; else O << ".reg .b" << sz << " "; O << "_"; continue; } const PointerType *PTy = dyn_cast<PointerType>(Ty); assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); unsigned sz = getDataLayout()->getTypeAllocSize(ETy); O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; continue; } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, ETy, vtparts); for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } for (unsigned j=0,je=elems; j!=je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (elemtype.isInteger() && (sz < 32)) sz = 32; O << ".reg .b" << sz << " "; O << "_"; if (j<je-1) O << ", "; } if (i<e-1) O << ", "; } continue; } } O << ");"; return O.str(); }
/// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. /// /// This method returns the number of registers needed, and the VT for each /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. /// unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { unsigned NumElts = VT.getVectorNumElements(); // If there is a wider vector type with the same element type as this one, // or a promoted vector type that has the same number of elements which // are wider, then we should convert to that legal vector type. // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { IntermediateVT = RegisterEVT; RegisterVT = RegisterEVT.getSimpleVT(); NumIntermediates = 1; return 1; } } // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !isTypeLegal( EVT::getVectorVT(Context, EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); if (!isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; MVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; unsigned NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; }