SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); assert(OpNo == 1 && "Can only expand the stored value so far"); DebugLoc dl = N->getDebugLoc(); StoreSDNode *St = cast<StoreSDNode>(N); MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); int SVOffset = St->getSrcValueOffset(); unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, isVolatile, Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), SVOffset + IncrementSize, isVolatile, MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); }
/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { Opcode = ISD::SHL; Imm = Log2_64(Imm); } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { // div x, 8 -> srl x, 3 Opcode = ISD::SRL; Imm = Log2_64(Imm); } // Horrible hack (to be removed), check to make sure shift amounts are // in-range. if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && Imm >= VT.getSizeInBits()) return 0; // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); if (MaterialReg == 0) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, /*Kill=*/true); }
SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type needs expansion. MVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); MVT OldVT = N->getOperand(0).getValueType(); MVT NewVT = TLI.getTypeToTransformTo(OldVT); DebugLoc dl = N->getDebugLoc(); assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); // Build a vector of twice the length out of the expanded elements. // For example <3 x i64> -> <6 x i32>. std::vector<SDValue> NewElts; NewElts.reserve(NumElts*2); for (unsigned i = 0; i < NumElts; ++i) { SDValue Lo, Hi; GetExpandedOp(N->getOperand(i), Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); NewElts.push_back(Lo); NewElts.push_back(Hi); } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::getVectorVT(NewVT, NewElts.size()), &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); }
void X86InterleavedAccessGroup::interleave8bitStride3( ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix, unsigned VecElems) { // Example: Assuming we start from the following vectors: // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7 // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7 // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7 TransposedMatrix.resize(3); SmallVector<uint32_t, 3> GroupSize; SmallVector<uint32_t, 32> VPShuf; SmallVector<uint32_t, 32> VPAlign[3]; SmallVector<uint32_t, 32> VPAlign2; SmallVector<uint32_t, 32> VPAlign3; Value *Vec[3], *TempVector[3]; MVT VT = MVT::getVectorVT(MVT::i8, VecElems); setGroupSize(VT, GroupSize); for (int i = 0; i < 3; i++) DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]); DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true); DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true); // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2 // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4 // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7 Vec[0] = Builder.CreateShuffleVector( InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2); Vec[1] = Builder.CreateShuffleVector( InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3); Vec[2] = InVec[2]; // Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2 // Vec[1]= c0 c1 c2 c3 c4 a3 a4 a5 // Vec[2]= b3 b4 b5 b6 b7 c5 c6 c7 for (int i = 0; i < 3; i++) TempVector[i] = Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]); // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1 // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4 // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7 for (int i = 0; i < 3; i++) Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3], VPAlign[2]); // TransposedMatrix[0] = a0 b0 c0 a1 b1 c1 a2 b2 // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5 // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7 unsigned NumOfElm = VT.getVectorNumElements(); group2Shuffle(VT, GroupSize, VPShuf); reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder); }
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); MVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, MVT::i32)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, VAListPtr, SV, 0); // Load the actual argument out of the pointer VAList, unless this is an // f64 load. if (VT != MVT::f64) return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0); // Otherwise, load it as i64, then do a bitconvert. SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V), V.getValue(1) }; return DAG.getMergeValues(Ops, 2, dl); }
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. MVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); SDValue Val = N->getOperand(1); MVT OldEVT = Val.getValueType(); MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); assert(OldEVT == VecVT.getVectorElementType() && "Inserted element type doesn't match vector element type!"); // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, N->getOperand(0)); SDValue Lo, Hi; GetExpandedOp(Val, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); SDValue Idx = N->getOperand(2); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); }
MVT MVT::getExtendedIntegerVT(unsigned BitWidth) { const Type *&ET = ExtendedIntegerTypeMap[BitWidth]; if (!ET) ET = new ExtendedIntegerType(BitWidth); MVT VT; VT.LLVMTy = ET; assert(VT.isExtended() && "Type is not extended!"); return VT; }
MVT MVT::getExtendedVectorVT(MVT VT, unsigned NumElements) { const Type *&ET = ExtendedVectorTypeMap[std::make_pair(VT.getRawBits(), NumElements)]; if (!ET) ET = new ExtendedVectorType(VT, NumElements); MVT ResultVT; ResultVT.LLVMTy = ET; assert(ResultVT.isExtended() && "Type is not extended!"); return ResultVT; }
// createShuffleStride returns shuffle mask of size N. // The shuffle pattern is as following : // {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane), // (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),..., // (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)} // Where Lane is the # of lanes in a register: // VectorSize = 128 => Lane = 1 // VectorSize = 256 => Lane = 2 // For example shuffle pattern for VF 16 register size 256 -> lanes = 2 // {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>} static void createShuffleStride(MVT VT, int Stride, SmallVectorImpl<uint32_t> &Mask) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); int LaneCount = std::max(VectorSize / 128, 1); for (int Lane = 0; Lane < LaneCount; Lane++) for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i) Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane); }
// setGroupSize sets 'SizeInfo' to the size(number of elements) of group // inside mask a shuffleMask. A mask contains exactly 3 groups, where // each group is a monotonically increasing sequence with stride 3. // For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2} static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1); for (int i = 0, FirstGroupElement = 0; i < 3; i++) { int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0); SizeInfo.push_back(GroupSize); FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF; } }
// genShuffleBland - Creates shuffle according to two vectors.This function is // only works on instructions with lane inside 256 registers. According to // the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The // offset amount depends on the two integer, 'LowOffset' and 'HighOffset'. // Where the 'LowOffset' refers to the first vector and the highOffset refers to // the second vector. // |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20| // |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25| // |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31| // For the sequence to work as a mirror to the load. // We must consider the elements order as above. // In this function we are combining two types of shuffles. // The first one is vpshufed and the second is a type of "blend" shuffle. // By computing the shuffle on a sequence of 16 elements(one lane) and add the // correct offset. We are creating a vpsuffed + blend sequence between two // shuffles. static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask, SmallVectorImpl<uint32_t> &Out, int LowOffset, int HighOffset) { assert(VT.getSizeInBits() >= 256 && "This function doesn't accept width smaller then 256"); unsigned NumOfElm = VT.getVectorNumElements(); for (unsigned i = 0; i < Mask.size(); i++) Out.push_back(Mask[i] + LowOffset); for (unsigned i = 0; i < Mask.size(); i++) Out.push_back(Mask[i] + HighOffset + NumOfElm); }
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); Ops[0] = N->getOperand(0); SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); }
// NVPTX suuport vector of legal types of any length in Intrinsics because the // NVPTX specific type legalizer // will legalize them to the PTX supported length. bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { if (isTypeLegal(VT)) return true; if (VT.isVector()) { MVT eVT = VT.getVectorElementType(); if (isTypeLegal(eVT)) return true; } return false; }
bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // On the second pass, go through the HVAs only. if (ArgFlags.isSecArgPass()) { if (ArgFlags.isHva()) return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); return true; } // Process only vector types as defined by vectorcall spec: // "A vector type is either a floating-point type, for example, // a float or double, or an SIMD vector type, for example, __m128 or __m256". if (!(ValVT.isFloatingPoint() || (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { // If R9 was already assigned it means that we are after the fourth element // and because this is not an HVA / Vector type, we need to allocate // shadow XMM register. if (State.isAllocated(X86::R9)) { // Assign shadow XMM register. (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT)); } return false; } if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { // Assign shadow GPR register. (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); // Assign XMM register - (shadow for HVA and non-shadow for non HVA). if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { // In Vectorcall Calling convention, additional shadow stack can be // created on top of the basic 32 bytes of win64. // It can happen if the fifth or sixth argument is vector type or HVA. // At that case for each argument a shadow stack of 8 bytes is allocated. if (Reg == X86::XMM4 || Reg == X86::XMM5) State.AllocateStack(8, 8); if (!ArgFlags.isHva()) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; // Allocated a register - Stop the search. } } } // If this is an HVA - Stop the search, // otherwise continue the search. return ArgFlags.isHva(); }
static void EmitTypeGenerate(std::ostream &OS, const Record *ArgType, unsigned &ArgNo) { MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT")); if (ArgType->isSubClassOf("LLVMMatchType")) { unsigned Number = ArgType->getValueAsInt("Number"); assert(Number < ArgNo && "Invalid matching number!"); if (ArgType->isSubClassOf("LLVMExtendedElementVectorType")) OS << "VectorType::getExtendedElementVectorType" << "(dyn_cast<VectorType>(Tys[" << Number << "]))"; else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType")) OS << "VectorType::getTruncatedElementVectorType" << "(dyn_cast<VectorType>(Tys[" << Number << "]))"; else OS << "Tys[" << Number << "]"; } else if (VT == MVT::iAny || VT == MVT::fAny) { // NOTE: The ArgNo variable here is not the absolute argument number, it is // the index of the "arbitrary" type in the Tys array passed to the // Intrinsic::getDeclaration function. Consequently, we only want to // increment it when we actually hit an overloaded type. Getting this wrong // leads to very subtle bugs! OS << "Tys[" << ArgNo++ << "]"; } else if (MVT(VT).isVector()) { MVT VVT = VT; OS << "VectorType::get("; EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT()); OS << ", " << VVT.getVectorNumElements() << ")"; } else if (VT == MVT::iPTR) { OS << "PointerType::getUnqual("; EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo); OS << ")"; } else if (VT == MVT::iPTRAny) { // Make sure the user has passed us an argument type to overload. If not, // treat it as an ordinary (not overloaded) intrinsic. OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo << "] : PointerType::getUnqual("; EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo); OS << ")"; ++ArgNo; } else if (VT == MVT::isVoid) { if (ArgNo == 0) OS << "Type::VoidTy"; else // MVT::isVoid is used to mean varargs here. OS << "..."; } else { EmitTypeForValueType(OS, VT); } }
static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) { if (ValVT.is512BitVector()) { static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2, X86::ZMM3, X86::ZMM4, X86::ZMM5}; return makeArrayRef(std::begin(RegListZMM), std::end(RegListZMM)); } if (ValVT.is256BitVector()) { static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2, X86::YMM3, X86::YMM4, X86::YMM5}; return makeArrayRef(std::begin(RegListYMM), std::end(RegListYMM)); } static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5}; return makeArrayRef(std::begin(RegListXMM), std::end(RegListXMM)); }
LLT::LLT(MVT VT) { if (VT.isVector()) { SizeInBits = VT.getVectorElementType().getSizeInBits(); ElementsOrAddrSpace = VT.getVectorNumElements(); Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; } else if (VT.isValid()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. Kind = Scalar; SizeInBits = VT.getSizeInBits(); ElementsOrAddrSpace = 1; assert(SizeInBits != 0 && "invalid zero-sized type"); } else { Kind = Invalid; SizeInBits = ElementsOrAddrSpace = 0; } }
// group2Shuffle reorder the shuffle stride back into continuous order. // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} => // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}. static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask, SmallVectorImpl<uint32_t> &Output) { int IndexGroup[3] = {0, 0, 0}; int Index = 0; int VectorWidth = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); // Find the index of the different groups. int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1; for (int i = 0; i < 3; i++) { IndexGroup[(Index * 3) % (VF / Lane)] = Index; Index += Mask[i]; } // According to the index compute the convert mask. for (int i = 0; i < VF / Lane; i++) { Output.push_back(IndexGroup[i % 3]); IndexGroup[i % 3]++; } }
// DecodePALIGNRMask returns the shuffle mask of vpalign instruction. // vpalign works according to lanes // Where Lane is the # of lanes in a register: // VectorWide = 128 => Lane = 1 // VectorWide = 256 => Lane = 2 // For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}. // For Lane = 2 shuffle pattern is: // {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}. // Imm variable sets the offset amount. The result of the // function is stored inside ShuffleMask vector and it built as described in // the begin of the description. AlignDirection is a boolean that indecat the // direction of the alignment. (false - align to the "right" side while true - // align to the "left" side) static void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<uint32_t> &ShuffleMask, bool AlignDirection = true, bool Unary = false) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1); unsigned NumLaneElts = NumElts / NumLanes; Imm = AlignDirection ? Imm : (NumLaneElts - Imm); unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8); for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Base = i + Offset; // if i+offset is out of this lane then we actually need the other source // If Unary the other source is the first source. if (Base >= NumLaneElts) Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts; ShuffleMask.push_back(Base + l); } } }
std::pair<unsigned, const TargetRegisterClass *> WebAssemblyTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to a // WebAssembly register class. if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': assert(VT != MVT::iPTR && "Pointer MVT not expected here"); if (Subtarget->hasSIMD128() && VT.isVector()) { if (VT.getSizeInBits() == 128) return std::make_pair(0U, &WebAssembly::V128RegClass); } if (VT.isInteger() && !VT.isVector()) { if (VT.getSizeInBits() <= 32) return std::make_pair(0U, &WebAssembly::I32RegClass); if (VT.getSizeInBits() <= 64) return std::make_pair(0U, &WebAssembly::I64RegClass); } break; default: break; } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); }
SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); // Performs the following for a >> b: // unsigned r_high = a_high >> b; // r_high = (32 - b <= 0) ? 0 : r_high; // // unsigned r_low = a_low >> b; // r_low = (32 - b <= 0) ? r_high : r_low; // r_low = (b == 0) ? r_low : r_low | (a_high << (32 - b)); // return (unsigned long long)r_high << 32 | r_low; // Note: This takes advantage of Lanai's shift behavior to avoid needing to // mask the shift amount. SDValue Zero = DAG.getConstant(0, dl, MVT::i32); SDValue NegatedPlus32 = DAG.getNode( ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue SetCC = DAG.getSetCC(dl, MVT::i32, NegatedPlus32, Zero, ISD::SETLE); SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpHi, ShAmt); Hi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, Hi); SDValue Lo = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpLo, ShAmt); Lo = DAG.getSelect(dl, MVT::i32, SetCC, Hi, Lo); SDValue CarryBits = DAG.getNode(ISD::SHL, dl, MVT::i32, ShOpHi, NegatedPlus32); SDValue ShiftIsZero = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ); Lo = DAG.getSelect(dl, MVT::i32, ShiftIsZero, Lo, DAG.getNode(ISD::OR, dl, MVT::i32, Lo, CarryBits)); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, dl); }
bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // On the second pass, go through the HVAs only. if (ArgFlags.isSecArgPass()) { if (ArgFlags.isHva()) return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); return true; } // Process only vector types as defined by vectorcall spec: // "A vector type is either a floating point type, for example, // a float or double, or an SIMD vector type, for example, __m128 or __m256". if (!(ValVT.isFloatingPoint() || (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { return false; } if (ArgFlags.isHva()) return true; // If this is an HVA - Stop the search. // Assign XMM register. if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; } // In case we did not find an available XMM register for a vector - // pass it indirectly. // It is similar to CCPassIndirect, with the addition of inreg. if (!ValVT.isFloatingPoint()) { LocVT = MVT::i32; LocInfo = CCValAssign::Indirect; ArgFlags.setInReg(); } return false; // No register was assigned - Continue the search. }
void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, isVolatile, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset+IncrementSize, isVolatile, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Handle endianness of the load. if (TLI.isBigEndian()) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use // the new one. ReplaceValueWith(SDValue(N, 1), Chain); }
static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const unsigned ArgRegs[] = { MBlaze::R5, MBlaze::R6, MBlaze::R7, MBlaze::R8, MBlaze::R9, MBlaze::R10 }; const unsigned NumArgRegs = array_lengthof(ArgRegs); unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs); if (!Reg) return false; unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; State.AllocateStack(SizeInBytes, SizeInBytes); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return true; }
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT, TargetLoweringBase *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; MVT NewVT = MVT::getVectorVT(EltTy, NumElts); if (!TLI->isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; unsigned NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); MVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; }
std::string NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); std::stringstream O; O << "prototype_" << uniqueCallSite << " : .callprototype "; if (retTy->getTypeID() == Type::VoidTyID) O << "()"; else { O << "("; if (isABI) { if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); if (size < 32) size = 32; } else { assert(retTy->isFloatingPointTy() && "Floating point type expected here"); size = retTy->getPrimitiveSizeInBits(); } O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) O << ".param .b" << getPointerTy().getSizeInBits() << " _"; else { if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned totalsz = 0; for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } for (unsigned j=0, je=elems; j!=je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (elemtype.isInteger() && (sz < 8)) sz = 8; totalsz += sz/8; } } O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; } else { assert(false && "Unknown return type"); } } } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned idx = 0; for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } for (unsigned j=0, je=elems; j!=je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (elemtype.isInteger() && (sz < 32)) sz = 32; O << ".reg .b" << sz << " _"; if (j<je-1) O << ", "; ++idx; } if (i < e-1) O << ", "; } } O << ") "; } O << "_ ("; bool first = true; MVT thePointerTy = getPointerTy(); for (unsigned i=0,e=Args.size(); i!=e; ++i) { const Type *Ty = Args[i].Ty; if (!first) { O << ", "; } first = false; if (Outs[i].Flags.isByVal() == false) { unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); if (sz < 32) sz = 32; } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); if (isABI) O << ".param .b" << sz << " "; else O << ".reg .b" << sz << " "; O << "_"; continue; } const PointerType *PTy = dyn_cast<PointerType>(Ty); assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); unsigned sz = getDataLayout()->getTypeAllocSize(ETy); O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; continue; } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, ETy, vtparts); for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } for (unsigned j=0,je=elems; j!=je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (elemtype.isInteger() && (sz < 32)) sz = 32; O << ".reg .b" << sz << " "; O << "_"; if (j<je-1) O << ", "; } if (i<e-1) O << ", "; } continue; } } O << ");"; return O.str(); }
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. if (!AlwaysInline && (Align & 3) != 0) return SDValue(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); // ESI might be used as a base pointer, in that case we can't simply overwrite // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); if (TRI->hasBasePointer(DAG.getMachineFunction()) && TRI->getBaseRegister() == X86::ESI) return SDValue(); MVT AVT; if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; else if (Align & 4) // DWORD aligned AVT = MVT::i32; else // QWORD aligned AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); }
// Changing the scale of the vector type by reducing the number of elements and // doubling the scalar size. static MVT scaleVectorType(MVT VT) { unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2; return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), VT.getVectorNumElements() / 2); }
bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { CallingConv::ID CC = CLI.CallConv; bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; // const char *SymName = CLI.SymName; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // Let SDISel handle vararg functions. if (IsVarArg) return false; // FIXME: Only handle *simple* calls for now. MVT RetVT; if (CLI.RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(CLI.RetTy, RetVT)) return false; for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) return false; // Set up the argument vectors. SmallVector<MVT, 16> OutVTs; OutVTs.reserve(CLI.OutVals.size()); for (auto *Val : CLI.OutVals) { MVT VT; if (!isTypeLegal(Val->getType(), VT) && !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) return false; // We don't handle vector parameters yet. if (VT.isVector() || VT.getSizeInBits() > 64) return false; OutVTs.push_back(VT); } Address Addr; if (!computeCallAddress(Callee, Addr)) return false; // Handle the arguments now that we've gotten them. unsigned NumBytes; if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; // Issue the call. unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), Mips::RA).addReg(Mips::T9); // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); CLI.Call = MIB; // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); CLI.Call = MIB; // Finish off the call including any return values. return finishCall(CLI, RetVT, NumBytes); }
bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &OutVTs, unsigned &NumBytes) { CallingConv::ID CC = CLI.CallConv; SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); // This is the minimum argument area used for A0-A3. if (NumBytes < 16) NumBytes = 16; emitInst(Mips::ADJCALLSTACKDOWN).addImm(16); // Process the args. MVT firstMVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; if (i == 0) { firstMVT = ArgVT; if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F12); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D6); } } else if (i == 1) { if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) { if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F14); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D7); } } } if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32)) && VA.isMemLoc()) { switch (VA.getLocMemOffset()) { case 0: VA.convertToReg(Mips::A0); break; case 4: VA.convertToReg(Mips::A1); break; case 8: VA.convertToReg(Mips::A2); break; case 12: VA.convertToReg(Mips::A3); break; default: break; } } unsigned ArgReg = getRegForValue(ArgVal); if (!ArgReg) return false; // Handle arg promotion: SExt, ZExt, AExt. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::AExt: case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); if (!ArgReg) return false; break; } case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); if (!ArgReg) return false; break; } default: llvm_unreachable("Unknown arg promotion!"); } // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); CLI.OutRegs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { llvm_unreachable("Mips does not use custom args."); return false; } else { // // FIXME: This path will currently return false. It was copied // from the AArch64 port and should be essentially fine for Mips too. // The work to finish up this path will be done in a follow-on patch. // assert(VA.isMemLoc() && "Assuming store on stack."); // Don't emit stores for undef values. if (isa<UndefValue>(ArgVal)) continue; // Need to store on the stack. // FIXME: This alignment is incorrect but this path is disabled // for now (will return false). We need to determine the right alignment // based on the normal alignment for the underlying machine type. // unsigned ArgSize = RoundUpToAlignment(ArgVT.getSizeInBits(), 4); unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittle()) BEAlign = 8 - ArgSize; Address Addr; Addr.setKind(Address::RegBase); Addr.setReg(Mips::SP); Addr.setOffset(VA.getLocMemOffset() + BEAlign); unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getStack(Addr.getOffset()), MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); (void)(MMO); // if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; // can't store on the stack yet. } } return true; }