SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type needs expansion. MVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); MVT OldVT = N->getOperand(0).getValueType(); MVT NewVT = TLI.getTypeToTransformTo(OldVT); DebugLoc dl = N->getDebugLoc(); assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); // Build a vector of twice the length out of the expanded elements. // For example <3 x i64> -> <6 x i32>. std::vector<SDValue> NewElts; NewElts.reserve(NumElts*2); for (unsigned i = 0; i < NumElts; ++i) { SDValue Lo, Hi; GetExpandedOp(N->getOperand(i), Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); NewElts.push_back(Lo); NewElts.push_back(Hi); } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::getVectorVT(NewVT, NewElts.size()), &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); }
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. MVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); SDValue Val = N->getOperand(1); MVT OldEVT = Val.getValueType(); MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); assert(OldEVT == VecVT.getVectorElementType() && "Inserted element type doesn't match vector element type!"); // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, N->getOperand(0)); SDValue Lo, Hi; GetExpandedOp(Val, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); SDValue Idx = N->getOperand(2); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); }
void X86InterleavedAccessGroup::interleave8bitStride3( ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix, unsigned VecElems) { // Example: Assuming we start from the following vectors: // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7 // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7 // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7 TransposedMatrix.resize(3); SmallVector<uint32_t, 3> GroupSize; SmallVector<uint32_t, 32> VPShuf; SmallVector<uint32_t, 32> VPAlign[3]; SmallVector<uint32_t, 32> VPAlign2; SmallVector<uint32_t, 32> VPAlign3; Value *Vec[3], *TempVector[3]; MVT VT = MVT::getVectorVT(MVT::i8, VecElems); setGroupSize(VT, GroupSize); for (int i = 0; i < 3; i++) DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]); DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true); DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true); // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2 // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4 // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7 Vec[0] = Builder.CreateShuffleVector( InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2); Vec[1] = Builder.CreateShuffleVector( InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3); Vec[2] = InVec[2]; // Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2 // Vec[1]= c0 c1 c2 c3 c4 a3 a4 a5 // Vec[2]= b3 b4 b5 b6 b7 c5 c6 c7 for (int i = 0; i < 3; i++) TempVector[i] = Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]); // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1 // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4 // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7 for (int i = 0; i < 3; i++) Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3], VPAlign[2]); // TransposedMatrix[0] = a0 b0 c0 a1 b1 c1 a2 b2 // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5 // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7 unsigned NumOfElm = VT.getVectorNumElements(); group2Shuffle(VT, GroupSize, VPShuf); reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder); }
// createShuffleStride returns shuffle mask of size N. // The shuffle pattern is as following : // {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane), // (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),..., // (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)} // Where Lane is the # of lanes in a register: // VectorSize = 128 => Lane = 1 // VectorSize = 256 => Lane = 2 // For example shuffle pattern for VF 16 register size 256 -> lanes = 2 // {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>} static void createShuffleStride(MVT VT, int Stride, SmallVectorImpl<uint32_t> &Mask) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); int LaneCount = std::max(VectorSize / 128, 1); for (int Lane = 0; Lane < LaneCount; Lane++) for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i) Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane); }
// setGroupSize sets 'SizeInfo' to the size(number of elements) of group // inside mask a shuffleMask. A mask contains exactly 3 groups, where // each group is a monotonically increasing sequence with stride 3. // For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2} static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1); for (int i = 0, FirstGroupElement = 0; i < 3; i++) { int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0); SizeInfo.push_back(GroupSize); FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF; } }
// genShuffleBland - Creates shuffle according to two vectors.This function is // only works on instructions with lane inside 256 registers. According to // the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The // offset amount depends on the two integer, 'LowOffset' and 'HighOffset'. // Where the 'LowOffset' refers to the first vector and the highOffset refers to // the second vector. // |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20| // |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25| // |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31| // For the sequence to work as a mirror to the load. // We must consider the elements order as above. // In this function we are combining two types of shuffles. // The first one is vpshufed and the second is a type of "blend" shuffle. // By computing the shuffle on a sequence of 16 elements(one lane) and add the // correct offset. We are creating a vpsuffed + blend sequence between two // shuffles. static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask, SmallVectorImpl<uint32_t> &Out, int LowOffset, int HighOffset) { assert(VT.getSizeInBits() >= 256 && "This function doesn't accept width smaller then 256"); unsigned NumOfElm = VT.getVectorNumElements(); for (unsigned i = 0; i < Mask.size(); i++) Out.push_back(Mask[i] + LowOffset); for (unsigned i = 0; i < Mask.size(); i++) Out.push_back(Mask[i] + HighOffset + NumOfElm); }
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); Ops[0] = N->getOperand(0); SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); }
static void EmitTypeGenerate(std::ostream &OS, const Record *ArgType, unsigned &ArgNo) { MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT")); if (ArgType->isSubClassOf("LLVMMatchType")) { unsigned Number = ArgType->getValueAsInt("Number"); assert(Number < ArgNo && "Invalid matching number!"); if (ArgType->isSubClassOf("LLVMExtendedElementVectorType")) OS << "VectorType::getExtendedElementVectorType" << "(dyn_cast<VectorType>(Tys[" << Number << "]))"; else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType")) OS << "VectorType::getTruncatedElementVectorType" << "(dyn_cast<VectorType>(Tys[" << Number << "]))"; else OS << "Tys[" << Number << "]"; } else if (VT == MVT::iAny || VT == MVT::fAny) { // NOTE: The ArgNo variable here is not the absolute argument number, it is // the index of the "arbitrary" type in the Tys array passed to the // Intrinsic::getDeclaration function. Consequently, we only want to // increment it when we actually hit an overloaded type. Getting this wrong // leads to very subtle bugs! OS << "Tys[" << ArgNo++ << "]"; } else if (MVT(VT).isVector()) { MVT VVT = VT; OS << "VectorType::get("; EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT()); OS << ", " << VVT.getVectorNumElements() << ")"; } else if (VT == MVT::iPTR) { OS << "PointerType::getUnqual("; EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo); OS << ")"; } else if (VT == MVT::iPTRAny) { // Make sure the user has passed us an argument type to overload. If not, // treat it as an ordinary (not overloaded) intrinsic. OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo << "] : PointerType::getUnqual("; EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo); OS << ")"; ++ArgNo; } else if (VT == MVT::isVoid) { if (ArgNo == 0) OS << "Type::VoidTy"; else // MVT::isVoid is used to mean varargs here. OS << "..."; } else { EmitTypeForValueType(OS, VT); } }
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT, TargetLoweringBase *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; MVT NewVT = MVT::getVectorVT(EltTy, NumElts); if (!TLI->isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; unsigned NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); MVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; }
LLT::LLT(MVT VT) { if (VT.isVector()) { SizeInBits = VT.getVectorElementType().getSizeInBits(); ElementsOrAddrSpace = VT.getVectorNumElements(); Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; } else if (VT.isValid()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. Kind = Scalar; SizeInBits = VT.getSizeInBits(); ElementsOrAddrSpace = 1; assert(SizeInBits != 0 && "invalid zero-sized type"); } else { Kind = Invalid; SizeInBits = ElementsOrAddrSpace = 0; } }
// group2Shuffle reorder the shuffle stride back into continuous order. // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} => // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}. static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask, SmallVectorImpl<uint32_t> &Output) { int IndexGroup[3] = {0, 0, 0}; int Index = 0; int VectorWidth = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); // Find the index of the different groups. int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1; for (int i = 0; i < 3; i++) { IndexGroup[(Index * 3) % (VF / Lane)] = Index; Index += Mask[i]; } // According to the index compute the convert mask. for (int i = 0; i < VF / Lane; i++) { Output.push_back(IndexGroup[i % 3]); IndexGroup[i % 3]++; } }
// DecodePALIGNRMask returns the shuffle mask of vpalign instruction. // vpalign works according to lanes // Where Lane is the # of lanes in a register: // VectorWide = 128 => Lane = 1 // VectorWide = 256 => Lane = 2 // For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}. // For Lane = 2 shuffle pattern is: // {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}. // Imm variable sets the offset amount. The result of the // function is stored inside ShuffleMask vector and it built as described in // the begin of the description. AlignDirection is a boolean that indecat the // direction of the alignment. (false - align to the "right" side while true - // align to the "left" side) static void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<uint32_t> &ShuffleMask, bool AlignDirection = true, bool Unary = false) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1); unsigned NumLaneElts = NumElts / NumLanes; Imm = AlignDirection ? Imm : (NumLaneElts - Imm); unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8); for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Base = i + Offset; // if i+offset is out of this lane then we actually need the other source // If Unary the other source is the first source. if (Base >= NumLaneElts) Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts; ShuffleMask.push_back(Base + l); } } }
// Changing the scale of the vector type by reducing the number of elements and // doubling the scalar size. static MVT scaleVectorType(MVT VT) { unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2; return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), VT.getVectorNumElements() / 2); }
SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); EVT StoreVT = ST->getMemoryVT(); SDNode *NVPTXST = NULL; // do not support pre/post inc/dec if (ST->isIndexed()) return NULL; if (!StoreVT.isSimple()) return NULL; // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); // Volatile Setting // - .volatile is only availalble for .global and .shared bool isVolatile = ST->isVolatile(); if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) isVolatile = false; // Vector Setting MVT SimpleVT = StoreVT.getSimpleVT(); unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { unsigned num = SimpleVT.getVectorNumElements(); if (num == 2) vecType = NVPTX::PTXLdStInstCode::V2; else if (num == 4) vecType = NVPTX::PTXLdStInstCode::V4; else return NULL; } // Type Setting: toType + toTypeWidth // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); unsigned toTypeWidth = ScalarVT.getSizeInBits(); unsigned int toType; if (ScalarVT.isFloatingPoint()) toType = NVPTX::PTXLdStInstCode::Float; else toType = NVPTX::PTXLdStInstCode::Unsigned; // Create the machine instruction DAG SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); SDValue Addr; SDValue Offset, Base; unsigned Opcode; MVT::SimpleValueType SourceVT = N1.getNode()->getValueType(0).getSimpleVT().SimpleTy; if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; default: return NULL; } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Addr, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; default: return NULL; } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else if (Subtarget.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; default: return NULL; } } else { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; default: return NULL; } } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { if (Subtarget.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; default: return NULL; } } else { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; default: return NULL; } } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), N2, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } if (NVPTXST != NULL) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); } return NVPTXST; }
SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); SDNode *NVPTXLD = NULL; // do not support pre/post inc/dec if (LD->isIndexed()) return NULL; if (!LoadedVT.isSimple()) return NULL; // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); // Volatile Setting // - .volatile is only availalble for .global and .shared bool isVolatile = LD->isVolatile(); if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) isVolatile = false; // Vector Setting MVT SimpleVT = LoadedVT.getSimpleVT(); unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; if (SimpleVT.isVector()) { unsigned num = SimpleVT.getVectorNumElements(); if (num == 2) vecType = NVPTX::PTXLdStInstCode::V2; else if (num == 4) vecType = NVPTX::PTXLdStInstCode::V4; else return NULL; } // Type Setting: fromType + fromTypeWidth // // Sign : ISD::SEXTLOAD // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); unsigned fromTypeWidth = ScalarVT.getSizeInBits(); unsigned int fromType; if ((LD->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) fromType = NVPTX::PTXLdStInstCode::Float; else fromType = NVPTX::PTXLdStInstCode::Unsigned; // Create the machine instruction DAG SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue Addr; SDValue Offset, Base; unsigned Opcode; MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy; if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Addr, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); } else if (Subtarget.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; default: return NULL; } } else { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; default: return NULL; } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); } else { if (Subtarget.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; default: return NULL; } } else { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; default: return NULL; } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), N1, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } if (NVPTXLD != NULL) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); } return NVPTXLD; }
/// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLoweringBase::computeRegisterProperties() { assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && "Too many value types for ValueTypeActions to hold!"); // Everything defaults to needing one register. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { NumRegistersForVT[i] = 1; RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; } // ...except isVoid, which doesn't need any registers. NumRegistersForVT[MVT::isVoid] = 0; // Find the largest integer register class. unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); // Every integer value type larger than this largest register takes twice as // many registers to represent as the previous ValueType. for (unsigned ExpandedReg = LargestIntReg + 1; ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, TypeExpandInteger); } // Inspect all of the ValueType's smaller than the largest integer // register to see which ones need promotion. unsigned LegalIntReg = LargestIntReg; for (unsigned IntReg = LargestIntReg - 1; IntReg >= (unsigned)MVT::i1; --IntReg) { MVT IVT = (MVT::SimpleValueType)IntReg; if (isTypeLegal(IVT)) { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } // ppcf128 type is really two f64's. if (!isTypeLegal(MVT::ppcf128)) { NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::ppcf128] = MVT::f64; TransformToType[MVT::ppcf128] = MVT::f64; ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); } // Decide how to handle f128. If the target does not have native f128 support, // expand it to i128 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f128)) { NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; TransformToType[MVT::f128] = MVT::i128; ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); } // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f64)) { NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; TransformToType[MVT::f64] = MVT::i64; ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); } // Decide how to handle f32. If the target does not have native support for // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. if (!isTypeLegal(MVT::f32)) { if (isTypeLegal(MVT::f64)) { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; TransformToType[MVT::f32] = MVT::f64; ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); } else { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; TransformToType[MVT::f32] = MVT::i32; ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } } // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; // Determine if there is a legal wider type. If so, we should promote to // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { bool IsLegalWiderType = false; // First try to promote the elements of integer vectors. If no legal // promotion was found, fallback to the widen-vector method. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType)nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; ValueTypeActions.setTypeAction(VT, TypePromoteInteger); IsLegalWiderType = true; break; } } if (IsLegalWiderType) continue; // Try to widen the vector. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; ValueTypeActions.setTypeAction(VT, TypeWidenVector); IsLegalWiderType = true; break; } } if (IsLegalWiderType) continue; } MVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; MVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; unsigned NumElts = VT.getVectorNumElements(); ValueTypeActions.setTypeAction(VT, NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); } else { TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); } } // Determine the 'representative' register class for each value type. // An representative register class is the largest (meaning one which is // not a sub-register class / subreg register class) legal register class for // a group of value types. For example, on i386, i8, i16, and i32 // representative would be GR32; while on x86_64 it's GR64. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); RepRegClassForVT[i] = RRC; RepRegClassCostForVT[i] = Cost; } }
void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { MVT OutVT = N->getValueType(0); MVT NOutVT = TLI.getTypeToTransformTo(OutVT); SDValue InOp = N->getOperand(0); MVT InVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); case Legal: case PromoteInteger: break; case SoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case ExpandInteger: case ExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case SplitVector: // Convert the split parts of the input if it was split in two. GetSplitVector(InOp, Lo, Hi); if (Lo.getValueType() == Hi.getValueType()) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; } break; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case WidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); InOp = GetWidenedVector(InOp); MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; } } // Lower the bit-convert to a store/load from the stack. assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); // Load the first half from the stack slot. Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(IncrementSize)); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) std::swap(Lo, Hi); }
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { // Try to use the register type if we couldn't assign the VT. if (!Handler.isArgumentHandler() || !CurVT.isValid()) return false; CurVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) return false; } } for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) { assert(j < ArgLocs.size() && "Skipped too many arg locs"); CCValAssign &VA = ArgLocs[j]; assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); if (VA.needsCustom()) { j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); continue; } if (VA.isRegLoc()) { MVT OrigVT = MVT::getVT(Args[i].Ty); MVT VAVT = VA.getValVT(); if (Handler.isArgumentHandler() && VAVT != OrigVT) { if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) return false; // Can't handle this type of arg yet. const LLT VATy(VAVT); unsigned NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); // If it's a vector type, we either need to truncate the elements // or do an unmerge to get the lower block of elements. if (VATy.isVector() && VATy.getNumElements() > OrigVT.getVectorNumElements()) { const LLT OrigTy(OrigVT); // Just handle the case where the VA type is 2 * original type. if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { LLVM_DEBUG(dbgs() << "Incoming promoted vector arg has too many elts"); return false; } auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); MIRBuilder.buildCopy(Args[i].Reg, Unmerge.getReg(0)); } else { MIRBuilder.buildTrunc(Args[i].Reg, {NewReg}).getReg(0); } } else { Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA); } } else if (VA.isMemLoc()) { MVT VT = MVT::getVT(Args[i].Ty); unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() : alignTo(VT.getSizeInBits(), 8) / 8; unsigned Offset = VA.getLocMemOffset(); MachinePointerInfo MPO; unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO); Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA); } else { // FIXME: Support byvals and other weirdness return false; } } return true; }