SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); EVT OldVT = N->getOperand(0).getValueType(); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); SDLoc dl(N); assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); // Build a vector of twice the length out of the expanded elements. // For example <3 x i64> -> <6 x i32>. std::vector<SDValue> NewElts; NewElts.reserve(NumElts*2); for (unsigned i = 0; i < NumElts; ++i) { SDValue Lo, Hi; GetExpandedOp(N->getOperand(i), Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); NewElts.push_back(Lo); NewElts.push_back(Hi); } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); }
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); SDLoc dl(N); SDValue Val = N->getOperand(1); EVT OldEVT = Val.getValueType(); EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); assert(OldEVT == VecVT.getVectorElementType() && "Inserted element type doesn't match vector element type!"); // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, NewVecVT, N->getOperand(0)); SDValue Lo, Hi; GetExpandedOp(Val, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); SDValue Idx = N->getOperand(2); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, DAG.getConstant(1, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); }
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDLoc dl(N); EVT VT = N->getValueType(0); assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); Ops[0] = N->getOperand(0); SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); }
SDValue NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); unsigned sizesofar = 0; unsigned idx = 0; for (unsigned i=0, e=Outs.size(); i!=e; ++i) { SDValue theVal = OutVals[i]; EVT theValType = theVal.getValueType(); unsigned numElems = 1; if (theValType.isVector()) numElems = theValType.getVectorNumElements(); for (unsigned j=0,je=numElems; j!=je; ++j) { SDValue tmpval = theVal; if (theValType.isVector()) tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, theValType.getVectorElementType(), tmpval, DAG.getIntPtrConstant(j)); Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, dl, MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), tmpval); if (theValType.isVector()) sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; else sizesofar += theValType.getStoreSizeInBits()/8; ++idx; } } return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); }
static void EmitTypeGenerate(raw_ostream &OS, const Record *ArgType, unsigned &ArgNo) { MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT")); if (ArgType->isSubClassOf("LLVMMatchType")) { unsigned Number = ArgType->getValueAsInt("Number"); assert(Number < ArgNo && "Invalid matching number!"); if (ArgType->isSubClassOf("LLVMExtendedElementVectorType")) OS << "VectorType::getExtendedElementVectorType" << "(dyn_cast<VectorType>(Tys[" << Number << "]))"; else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType")) OS << "VectorType::getTruncatedElementVectorType" << "(dyn_cast<VectorType>(Tys[" << Number << "]))"; else OS << "Tys[" << Number << "]"; } else if (VT == MVT::iAny || VT == MVT::fAny || VT == MVT::vAny) { // NOTE: The ArgNo variable here is not the absolute argument number, it is // the index of the "arbitrary" type in the Tys array passed to the // Intrinsic::getDeclaration function. Consequently, we only want to // increment it when we actually hit an overloaded type. Getting this wrong // leads to very subtle bugs! OS << "Tys[" << ArgNo++ << "]"; } else if (EVT(VT).isVector()) { EVT VVT = VT; OS << "VectorType::get("; EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT().SimpleTy); OS << ", " << VVT.getVectorNumElements() << ")"; } else if (VT == MVT::iPTR) { OS << "PointerType::getUnqual("; EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo); OS << ")"; } else if (VT == MVT::iPTRAny) { // Make sure the user has passed us an argument type to overload. If not, // treat it as an ordinary (not overloaded) intrinsic. OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo << "] : PointerType::getUnqual("; EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo); OS << ")"; ++ArgNo; } else if (VT == MVT::isVoid) { if (ArgNo == 0) OS << "Type::getVoidTy(Context)"; else // MVT::isVoid is used to mean varargs here. OS << "..."; } else { EmitTypeForValueType(OS, VT); } }
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() // (see LegalizeDAG.cpp). This is slow and uses local memory. // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 SDValue NVPTXTargetLowering:: LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); SmallVector<SDValue, 8> Ops; unsigned NumOperands = Node->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue SubOp = Node->getOperand(i); EVT VVT = SubOp.getNode()->getValueType(0); EVT EltVT = VVT.getVectorElementType(); unsigned NumSubElem = VVT.getVectorNumElements(); for (unsigned j=0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, DAG.getIntPtrConstant(j))); } } return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], Ops.size()); }
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget *Subtarget) { // See if this is a vector splat immediate node. APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); if (!BV || !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, EltSize, !Subtarget->isLittle()) || (SplatBitSize != EltSize) || (SplatValue.getZExtValue() >= EltSize)) return SDValue(); return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0), DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); }
SDValue NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); const DataLayout *TD = getDataLayout(); const Function *F = MF.getFunction(); const AttrListPtr &PAL = F->getAttributes(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; bool isKernel = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget.getSmVersion() >= 20); std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { theArgs.push_back(I); argTypes.push_back(I->getType()); } assert(argTypes.size() == Ins.size() && "Ins types and function types did not match"); int idx = 0; for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { Type *Ty = argTypes[i]; EVT ObjectVT = getValueType(Ty); assert(ObjectVT == Ins[i].VT && "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later // matched in the AsmPrinter to output the correct mangled name. if (isImageOrSamplerVal(theArgs[i], (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() : 0))) { assert(isKernel && "Only kernels can have image/sampler params"); InVals.push_back(DAG.getConstant(i+1, MVT::i32)); continue; } if (theArgs[i]->use_empty()) { // argument is dead InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); continue; } // In the following cases, assign a node order of "idx+1" // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, false, TD->getABITypeAlignment(ObjectVT.getTypeForEVT( F->getContext()))); if (p.getNode()) DAG.AssignOrdering(p.getNode(), idx+1); InVals.push_back(p); } else { // If no ABI, just move the param symbol SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) DAG.AssignOrdering(p.getNode(), idx+1); InVals.push_back(p); } continue; } // Param has ByVal attribute if (isABI || isKernel) { // Return MoveParam(param symbol). // Ideally, the param symbol can be returned directly, // but when SDNode builder decides to use it in a CopyToReg(), // machine instruction fails because TargetExternalSymbol // (not lowered) is target dependent, and CopyToReg assumes // the source is lowered. SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) DAG.AssignOrdering(p.getNode(), idx+1); if (isKernel) InVals.push_back(p); else { SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); InVals.push_back(p2); } } else { // Have to move a set of param symbols to registers and // store them locally and return the local pointer in InVals const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); assert(elemPtrType && "Byval parameter should be a pointer type"); Type *elemType = elemPtrType->getElementType(); // Compute the constituent parts SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); unsigned totalsize = 0; for (unsigned j=0, je=vtparts.size(); j!=je; ++j) totalsize += vtparts[j].getStoreSizeInBits(); SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> CreateStackObject(totalsize/8, 16, false), getPointerTy()); unsigned sizesofar = 0; std::vector<SDValue> theChains; for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { unsigned numElems = 1; if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); for (unsigned k=0, ke=numElems; k!=ke; ++k) { EVT tmpvt = vtparts[j]; if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, getParamSymbol(DAG, idx, tmpvt)); SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, DAG.getConstant(sizesofar, getPointerTy())); theChains.push_back(DAG.getStore(Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); sizesofar += tmpvt.getStoreSizeInBits()/8; ++idx; } } --idx; Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], theChains.size()); InVals.push_back(localcopy); } } // Clang will check explicit VarArg and issue error if any. However, Clang // will let code with // implicit var arg like f() pass. // We treat this case as if the arg list is empty. //if (F.isVarArg()) { // assert(0 && "VarArg not supported yet!"); //} if (!OutChains.empty()) DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], OutChains.size())); return Chain; }
std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); /// // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// SDNode *Result; switch(Opcode) { default: break; case ISD::SUBE: { SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); return std::make_pair(true, Result); } case ISD::ADDE: { if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC. break; SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); return std::make_pair(true, Result); } case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget.hasMips64()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); } else { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, Zero); } return std::make_pair(true, Result); } break; } case ISD::Constant: { const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); unsigned Size = CN->getValueSizeInBits(0); if (Size == 32) break; MipsAnalyzeImmediate AnalyzeImm; int64_t Imm = CN->getSExtValue(); const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, false); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); SDLoc DL(CN); SDNode *RegOpnd; SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); // The first instruction can be a LUi which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. if (Inst->Opc == Mips::LUi64) RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); else RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, CurDAG->getRegister(Mips::ZERO_64, MVT::i64), ImmOpnd); // The remaining instructions in the sequence are handled here. for (++Inst; Inst != Seq.end(); ++Inst) { ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, SDValue(RegOpnd, 0), ImmOpnd); } return std::make_pair(true, RegOpnd); } case ISD::INTRINSIC_W_CHAIN: { switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::mips_cfcmsa: { SDValue ChainIn = Node->getOperand(0); SDValue RegIdx = Node->getOperand(2); SDValue Reg = CurDAG->getCopyFromReg(ChainIn, DL, getMSACtrlReg(RegIdx), MVT::i32); return std::make_pair(true, Reg.getNode()); } } break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) { default: break; case Intrinsic::mips_move_v: // Like an assignment but will always produce a move.v even if // unnecessary. return std::make_pair(true, CurDAG->getMachineNode(Mips::MOVE_V, DL, Node->getValueType(0), Node->getOperand(1))); } break; } case ISD::INTRINSIC_VOID: { switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::mips_ctcmsa: { SDValue ChainIn = Node->getOperand(0); SDValue RegIdx = Node->getOperand(2); SDValue Value = Node->getOperand(3); SDValue ChainOut = CurDAG->getCopyToReg(ChainIn, DL, getMSACtrlReg(RegIdx), Value); return std::make_pair(true, ChainOut.getNode()); } } break; } case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(); unsigned RdhwrOpc, DestReg; if (PtrVT == MVT::i32) { RdhwrOpc = Mips::RDHWR; DestReg = Mips::V1; } else { RdhwrOpc = Mips::RDHWR64; DestReg = Mips::V1_64; } SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node), Node->getValueType(0), CurDAG->getRegister(Mips::HWR29, MVT::i32)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, SDValue(Rdhwr, 0)); SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); ReplaceUses(SDValue(Node, 0), ResNode); return std::make_pair(true, ResNode.getNode()); } case ISD::BUILD_VECTOR: { // Select appropriate ldi.[bhwd] instructions for constant splats of // 128-bit when MSA is enabled. Fixup any register class mismatches that // occur as a result. // // This allows the compiler to use a wider range of immediates than would // otherwise be allowed. If, for example, v4i32 could only use ldi.h then // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101, // 0x01010101 } without using a constant pool. This would be sub-optimal // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the // same set/ of registers. Similarly, ldi.h isn't capable of producing { // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can. BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; unsigned LdiOp; EVT ResVecTy = BVN->getValueType(0); EVT ViaVecTy; if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) return std::make_pair(false, (SDNode*)NULL); if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, !Subtarget.isLittle())) return std::make_pair(false, (SDNode*)NULL); switch (SplatBitSize) { default: return std::make_pair(false, (SDNode*)NULL); case 8: LdiOp = Mips::LDI_B; ViaVecTy = MVT::v16i8; break; case 16: LdiOp = Mips::LDI_H; ViaVecTy = MVT::v8i16; break; case 32: LdiOp = Mips::LDI_W; ViaVecTy = MVT::v4i32; break; case 64: LdiOp = Mips::LDI_D; ViaVecTy = MVT::v2i64; break; } if (!SplatValue.isSignedIntN(10)) return std::make_pair(false, (SDNode*)NULL); SDValue Imm = CurDAG->getTargetConstant(SplatValue, ViaVecTy.getVectorElementType()); SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm); if (ResVecTy != ViaVecTy) { // If LdiOp is writing to a different register class to ResVecTy, then // fix it up here. This COPY_TO_REGCLASS should never cause a move.v // since the source and destination register sets contain the same // registers. const TargetLowering *TLI = getTargetLowering(); MVT ResVecTySimple = ResVecTy.getSimpleVT(); const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node), ResVecTy, SDValue(Res, 0), CurDAG->getTargetConstant(RC->getID(), MVT::i32)); } return std::make_pair(true, Res); } } return std::make_pair(false, (SDNode*)NULL); }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { N->setNodeId(-1); return NULL; // Already selected. } switch (Opc) { default: break; // We are selecting i64 ADD here instead of custom lower it during // DAG legalization, so we can fold some i64 ADDs used for address // calculation into the LOAD and STORE instructions. case ISD::ADD: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (N->getValueType(0) != MVT::i64 || ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) break; SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i32, LHS, Sub0); SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i32, LHS, Sub1); SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i32, RHS, Sub0); SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i32, RHS, Sub1); SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); SmallVector<SDValue, 8> AddLoArgs; AddLoArgs.push_back(SDValue(Lo0, 0)); AddLoArgs.push_back(SDValue(Lo1, 0)); SDNode *AddLo = CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, VTList, AddLoArgs); SDValue Carry = SDValue(AddLo, 1); SDNode *AddHi = CurDAG->getMachineNode(AMDGPU::S_ADDC_U32, DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); SDValue Args[5] = { CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), SDValue(AddLo,0), Sub0, SDValue(AddHi,0), Sub1, }; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5); } case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo()); const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); assert(VT.getVectorElementType().bitsEq(MVT::i32)); if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { bool UseVReg = true; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); U != E; ++U) { if (!U->isMachineOpcode()) { continue; } const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); if (!RC) { continue; } if (SIRI->isSGPRClass(RC)) { UseVReg = false; } } switch(NumVectorElts) { case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : AMDGPU::SReg_32RegClassID; break; case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : AMDGPU::SReg_64RegClassID; break; case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : AMDGPU::SReg_128RegClassID; break; case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : AMDGPU::SReg_256RegClassID; break; case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : AMDGPU::SReg_512RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } else { // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. switch(NumVectorElts) { case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); if (NumVectorElts == 1) { return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT.getVectorElementType(), N->getOperand(0), RegClass); } assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " "supported yet"); // 16 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class SDValue RegSeqArgs[16 * 2 + 1]; RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { // XXX: Why is this here? if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { llvm_unreachable("Unhandled value type for BUILD_PAIR"); } const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } case AMDGPUISD::REGISTER_LOAD: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; SDValue Addr, Offset; SelectADDRIndirect(N->getOperand(1), Addr, Offset); const SDValue Ops[] = { Addr, Offset, CurDAG->getTargetConstant(0, MVT::i32), N->getOperand(0), }; return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), Ops); } case AMDGPUISD::REGISTER_STORE: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; SDValue Addr, Offset; SelectADDRIndirect(N->getOperand(2), Addr, Offset); const SDValue Ops[] = { N->getOperand(1), Addr, Offset, CurDAG->getTargetConstant(0, MVT::i32), N->getOperand(0), }; return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), CurDAG->getVTList(MVT::Other), Ops); } } return SelectCode(N); }
void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); SDLoc dl(N); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; case TargetLowering::TypeSoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getConstant(0, TLI.getVectorIdxTy())); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getConstant(InNVT.getVectorNumElements(), TLI.getVectorIdxTy())); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; } } if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. unsigned NumElems = 2; EVT ElemVT = NOutVT; EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. while (!isTypeLegal(NVT)) { unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; // If the element size is smaller than byte, bail. if (NewSizeInBits < 8) break; NumElems *= 2; ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); } if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp, DAG.getConstant(i, TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { // Each iteration will BUILD_PAIR two nodes and append the result until // there are only two nodes left, i.e. Lo and Hi. SDValue LHS = Vals[Slot]; SDValue RHS = Vals[Slot + 1]; if (TLI.isBigEndian()) std::swap(LHS, RHS); Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, EVT::getIntegerVT( *DAG.getContext(), LHS.getValueType().getSizeInBits() << 1), LHS, RHS)); } Lo = Vals[Slot++]; Hi = Vals[Slot++]; if (TLI.isBigEndian()) std::swap(Lo, Hi); return; } } // Lower the bit-convert to a store/load from the stack. assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(NOutVT. getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, false, false, 0); // Load the first half from the stack slot. Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(IncrementSize)); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo.getWithOffset(IncrementSize), false, false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) std::swap(Lo, Hi); }
void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); case Legal: case PromoteInteger: break; case SoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case ExpandInteger: case ExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case SplitVector: GetSplitVector(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case WidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); InOp = GetWidenedVector(InOp); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; } } if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand // is legal but the result is not. EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, DAG.getIntPtrConstant(1)); if (TLI.isBigEndian()) std::swap(Lo, Hi); return; } } // Lower the bit-convert to a store/load from the stack. assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); // Load the first half from the stack slot. Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(IncrementSize)); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) std::swap(Lo, Hi); }
/// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. /// /// This method returns the number of registers needed, and the VT for each /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. /// unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { unsigned NumElts = VT.getVectorNumElements(); // If there is a wider vector type with the same element type as this one, // or a promoted vector type that has the same number of elements which // are wider, then we should convert to that legal vector type. // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { IntermediateVT = RegisterEVT; RegisterVT = RegisterEVT.getSimpleVT(); NumIntermediates = 1; return 1; } } // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !isTypeLegal( EVT::getVectorVT(Context, EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); if (!isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; MVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; unsigned NewVTSize = NewVT.getSizeInBits(); // Convert sizes such as i33 to i64. if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo()); const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); assert(VT.getVectorElementType().bitsEq(MVT::i32)); if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { bool UseVReg = true; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); U != E; ++U) { if (!U->isMachineOpcode()) { continue; } const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); if (!RC) { continue; } if (SIRI->isSGPRClass(RC)) { UseVReg = false; } } switch(NumVectorElts) { case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : AMDGPU::SReg_32RegClassID; break; case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : AMDGPU::SReg_64RegClassID; break; case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : AMDGPU::SReg_128RegClassID; break; case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : AMDGPU::SReg_256RegClassID; break; case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : AMDGPU::SReg_512RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } else { // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. switch(NumVectorElts) { case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); if (NumVectorElts == 1) { return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT.getVectorElementType(), N->getOperand(0), RegClass); } assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " "supported yet"); // 16 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class SDValue RegSeqArgs[16 * 2 + 1]; RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { // XXX: Why is this here? if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { llvm_unreachable("Unhandled value type for BUILD_PAIR"); } const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } } return SelectCode(N); }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; case AMDGPUISD::CONST_ADDRESS: { for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I); I != SDNode::use_end(); I = Next) { Next = llvm::next(I); if (!I->isMachineOpcode()) { continue; } unsigned Opcode = I->getMachineOpcode(); bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; int SrcIdx = I.getOperandNo(); int SelIdx; // Unlike MachineInstrs, SDNodes do not have results in their operand // list, so we need to increment the SrcIdx, since // R600InstrInfo::getOperandIdx is based on the MachineInstr indices. if (HasDst) { SrcIdx++; } SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx); if (SelIdx < 0) { continue; } SDValue CstOffset; if (N->getValueType(0).isVector() || !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset)) continue; // Gather constants values int SrcIndices[] = { TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) }; std::vector<unsigned> Consts; for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { int OtherSrcIdx = SrcIndices[i]; int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); if (OtherSrcIdx < 0 || OtherSelIdx < 0) { continue; } if (HasDst) { OtherSrcIdx--; OtherSelIdx--; } if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) { if (Reg->getReg() == AMDGPU::ALU_CONST) { ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); } } } ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); Consts.push_back(Cst->getZExtValue()); if (!TII->fitsConstReadLimitations(Consts)) continue; // Convert back to SDNode indices if (HasDst) { SrcIdx--; SelIdx--; } std::vector<SDValue> Ops; for (int i = 0, e = I->getNumOperands(); i != e; ++i) { if (i == SrcIdx) { Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); } else if (i == SelIdx) { Ops.push_back(CstOffset); } else { Ops.push_back(I->getOperand(i)); } } CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size()); } break; } case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo()); const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); assert(VT.getVectorElementType().bitsEq(MVT::i32)); if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { bool UseVReg = true; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); U != E; ++U) { if (!U->isMachineOpcode()) { continue; } const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); if (!RC) { continue; } if (SIRI->isSGPRClass(RC)) { UseVReg = false; } } switch(NumVectorElts) { case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : AMDGPU::SReg_32RegClassID; break; case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : AMDGPU::SReg_64RegClassID; break; case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : AMDGPU::SReg_128RegClassID; break; case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : AMDGPU::SReg_256RegClassID; break; case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : AMDGPU::SReg_512RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } else { // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. switch(NumVectorElts) { case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); if (NumVectorElts == 1) { return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT.getVectorElementType(), N->getOperand(0), RegClass); } assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " "supported yet"); // 16 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class SDValue RegSeqArgs[16 * 2 + 1]; RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { // XXX: Why is this here? if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { llvm_unreachable("Unhandled value type for BUILD_PAIR"); } const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); // XXX: Custom immediate lowering not implemented yet. Instead we use // pseudo instructions defined in SIInstructions.td if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } uint64_t ImmValue = 0; unsigned ImmReg = AMDGPU::ALU_LITERAL_X; if (N->getOpcode() == ISD::ConstantFP) { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::f64); ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); APFloat Value = C->getValueAPF(); float FloatValue = Value.convertToFloat(); if (FloatValue == 0.0) { ImmReg = AMDGPU::ZERO; } else if (FloatValue == 0.5) { ImmReg = AMDGPU::HALF; } else if (FloatValue == 1.0) { ImmReg = AMDGPU::ONE; } else { ImmValue = Value.bitcastToAPInt().getZExtValue(); } } else { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::i64); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); if (C->getZExtValue() == 0) { ImmReg = AMDGPU::ZERO; } else if (C->getZExtValue() == 1) { ImmReg = AMDGPU::ONE_INT; } else { ImmValue = C->getZExtValue(); } } for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); Use != SDNode::use_end(); Use = Next) { Next = llvm::next(Use); std::vector<SDValue> Ops; for (unsigned i = 0; i < Use->getNumOperands(); ++i) { Ops.push_back(Use->getOperand(i)); } if (!Use->isMachineOpcode()) { if (ImmReg == AMDGPU::ALU_LITERAL_X) { // We can only use literal constants (e.g. AMDGPU::ZERO, // AMDGPU::ONE, etc) in machine opcodes. continue; } } else { switch(Use->getMachineOpcode()) { case AMDGPU::REG_SEQUENCE: break; default: if (!TII->isALUInstr(Use->getMachineOpcode()) || (TII->get(Use->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)) { continue; } } // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == AMDGPU::ALU_LITERAL_X) { int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::literal); if (ImmIdx == -1) { continue; } if (TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::dst) != -1) { // subtract one from ImmIdx, because the DST operand is usually index // 0 for MachineInstrs, but we have no DST in the Ops vector. ImmIdx--; } ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); assert(C); if (C->getZExtValue() != 0) { // This instruction is already using an immediate. continue; } // Set the immediate value Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); } } // Set the immediate register Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); } break; } } SDNode *Result = SelectCode(N); // Fold operands of selected node const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) { bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->hasInstrModifiers(Result->getMachineOpcode())) { // Fold FNEG/FABS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); if (PotentialClamp->isMachineOpcode() && PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { unsigned ClampIdx = TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp); std::vector<SDValue> Ops; unsigned NumOp = Result->getNumOperands(); for (unsigned i = 0; i < NumOp; ++i) { Ops.push_back(Result->getOperand(i)); } Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); Result = CurDAG->SelectNodeTo(PotentialClamp, Result->getMachineOpcode(), PotentialClamp->getVTList(), Ops.data(), NumOp); } } } } return Result; }