bool AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); if (!Imm || !Imm->getValueAPF().isPosZero()) return false; // Doesn't actually carry any information, but keeps TableGen quiet. Dummy = CurDAG->getTargetConstant(0, MVT::i32); return true; }
// There is no native floating point division, but we can convert this to a // reciprocal/multiply operation. If the first parameter is constant 1.0, then // just a reciprocal will suffice. SDValue VectorProcTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT type = Op.getOperand(1).getValueType(); SDValue two = DAG.getConstantFP(2.0, type); SDValue denom = Op.getOperand(1); SDValue estimate = DAG.getNode(VectorProcISD::RECIPROCAL_EST, dl, type, denom); // Perform a series of newton/raphson refinements. Each iteration doubles // the precision. The initial estimate has 6 bits of precision, so two iteration // results in 24 bits, which is larger than the significand. for (int i = 0; i < 2; i++) { // trial = x * estimate (ideally, x * 1/x should be 1.0) // error = 2.0 - trial // estimate = estimate * error SDValue trial = DAG.getNode(ISD::FMUL, dl, type, estimate, denom); SDValue error = DAG.getNode(ISD::FSUB, dl, type, two, trial); estimate = DAG.getNode(ISD::FMUL, dl, type, estimate, error); } // Check if the first parameter is constant 1.0. If so, we don't need // to multiply. bool isOne = false; if (type.isVector()) { if (isSplatVector(Op.getOperand(0).getNode())) { ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op.getOperand(0).getOperand(0)); isOne = C && C->isExactlyValue(1.0); } } else { ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)); isOne = C && C->isExactlyValue(1.0); } if (!isOne) estimate = DAG.getNode(ISD::FMUL, dl, type, Op.getOperand(0), estimate); return estimate; }
std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); /// // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// SDNode *Result; switch(Opcode) { default: break; case ISD::SUBE: { SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); return std::make_pair(true, Result); } case ISD::ADDE: { if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC. break; SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); return std::make_pair(true, Result); } case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget.hasMips64()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); } else { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, Zero); } return std::make_pair(true, Result); } break; } case ISD::Constant: { const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); unsigned Size = CN->getValueSizeInBits(0); if (Size == 32) break; MipsAnalyzeImmediate AnalyzeImm; int64_t Imm = CN->getSExtValue(); const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, false); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); SDLoc DL(CN); SDNode *RegOpnd; SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); // The first instruction can be a LUi which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. if (Inst->Opc == Mips::LUi64) RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); else RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, CurDAG->getRegister(Mips::ZERO_64, MVT::i64), ImmOpnd); // The remaining instructions in the sequence are handled here. for (++Inst; Inst != Seq.end(); ++Inst) { ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, SDValue(RegOpnd, 0), ImmOpnd); } return std::make_pair(true, RegOpnd); } case ISD::INTRINSIC_W_CHAIN: { switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::mips_cfcmsa: { SDValue ChainIn = Node->getOperand(0); SDValue RegIdx = Node->getOperand(2); SDValue Reg = CurDAG->getCopyFromReg(ChainIn, DL, getMSACtrlReg(RegIdx), MVT::i32); return std::make_pair(true, Reg.getNode()); } } break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) { default: break; case Intrinsic::mips_move_v: // Like an assignment but will always produce a move.v even if // unnecessary. return std::make_pair(true, CurDAG->getMachineNode(Mips::MOVE_V, DL, Node->getValueType(0), Node->getOperand(1))); } break; } case ISD::INTRINSIC_VOID: { switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::mips_ctcmsa: { SDValue ChainIn = Node->getOperand(0); SDValue RegIdx = Node->getOperand(2); SDValue Value = Node->getOperand(3); SDValue ChainOut = CurDAG->getCopyToReg(ChainIn, DL, getMSACtrlReg(RegIdx), Value); return std::make_pair(true, ChainOut.getNode()); } } break; } case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(); unsigned RdhwrOpc, DestReg; if (PtrVT == MVT::i32) { RdhwrOpc = Mips::RDHWR; DestReg = Mips::V1; } else { RdhwrOpc = Mips::RDHWR64; DestReg = Mips::V1_64; } SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node), Node->getValueType(0), CurDAG->getRegister(Mips::HWR29, MVT::i32)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, SDValue(Rdhwr, 0)); SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); ReplaceUses(SDValue(Node, 0), ResNode); return std::make_pair(true, ResNode.getNode()); } case ISD::BUILD_VECTOR: { // Select appropriate ldi.[bhwd] instructions for constant splats of // 128-bit when MSA is enabled. Fixup any register class mismatches that // occur as a result. // // This allows the compiler to use a wider range of immediates than would // otherwise be allowed. If, for example, v4i32 could only use ldi.h then // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101, // 0x01010101 } without using a constant pool. This would be sub-optimal // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the // same set/ of registers. Similarly, ldi.h isn't capable of producing { // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can. BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; unsigned LdiOp; EVT ResVecTy = BVN->getValueType(0); EVT ViaVecTy; if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) return std::make_pair(false, (SDNode*)NULL); if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, !Subtarget.isLittle())) return std::make_pair(false, (SDNode*)NULL); switch (SplatBitSize) { default: return std::make_pair(false, (SDNode*)NULL); case 8: LdiOp = Mips::LDI_B; ViaVecTy = MVT::v16i8; break; case 16: LdiOp = Mips::LDI_H; ViaVecTy = MVT::v8i16; break; case 32: LdiOp = Mips::LDI_W; ViaVecTy = MVT::v4i32; break; case 64: LdiOp = Mips::LDI_D; ViaVecTy = MVT::v2i64; break; } if (!SplatValue.isSignedIntN(10)) return std::make_pair(false, (SDNode*)NULL); SDValue Imm = CurDAG->getTargetConstant(SplatValue, ViaVecTy.getVectorElementType()); SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm); if (ResVecTy != ViaVecTy) { // If LdiOp is writing to a different register class to ResVecTy, then // fix it up here. This COPY_TO_REGCLASS should never cause a move.v // since the source and destination register sets contain the same // registers. const TargetLowering *TLI = getTargetLowering(); MVT ResVecTySimple = ResVecTy.getSimpleVT(); const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node), ResVecTy, SDValue(Res, 0), CurDAG->getTargetConstant(RC->getID(), MVT::i32)); } return std::make_pair(true, Res); } } return std::make_pair(false, (SDNode*)NULL); }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; case AMDGPUISD::CONST_ADDRESS: { for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I); I != SDNode::use_end(); I = Next) { Next = llvm::next(I); if (!I->isMachineOpcode()) { continue; } unsigned Opcode = I->getMachineOpcode(); bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; int SrcIdx = I.getOperandNo(); int SelIdx; // Unlike MachineInstrs, SDNodes do not have results in their operand // list, so we need to increment the SrcIdx, since // R600InstrInfo::getOperandIdx is based on the MachineInstr indices. if (HasDst) { SrcIdx++; } SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx); if (SelIdx < 0) { continue; } SDValue CstOffset; if (N->getValueType(0).isVector() || !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset)) continue; // Gather constants values int SrcIndices[] = { TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) }; std::vector<unsigned> Consts; for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { int OtherSrcIdx = SrcIndices[i]; int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); if (OtherSrcIdx < 0 || OtherSelIdx < 0) { continue; } if (HasDst) { OtherSrcIdx--; OtherSelIdx--; } if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) { if (Reg->getReg() == AMDGPU::ALU_CONST) { ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); } } } ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); Consts.push_back(Cst->getZExtValue()); if (!TII->fitsConstReadLimitations(Consts)) continue; // Convert back to SDNode indices if (HasDst) { SrcIdx--; SelIdx--; } std::vector<SDValue> Ops; for (int i = 0, e = I->getNumOperands(); i != e; ++i) { if (i == SrcIdx) { Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); } else if (i == SelIdx) { Ops.push_back(CstOffset); } else { Ops.push_back(I->getOperand(i)); } } CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size()); } break; } case ISD::BUILD_VECTOR: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } unsigned RegClassID; switch(N->getValueType(0).getVectorNumElements()) { case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. SDValue RegSeqArgs[9] = { CurDAG->getTargetConstant(RegClassID, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) }; bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[2 * i + 1] = N->getOperand(i); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } if (N->getValueType(0) == MVT::i128) { RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { llvm_unreachable("Unhandled value type for BUILD_PAIR"); } const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); // XXX: Custom immediate lowering not implemented yet. Instead we use // pseudo instructions defined in SIInstructions.td if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } uint64_t ImmValue = 0; unsigned ImmReg = AMDGPU::ALU_LITERAL_X; if (N->getOpcode() == ISD::ConstantFP) { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::f64); ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); APFloat Value = C->getValueAPF(); float FloatValue = Value.convertToFloat(); if (FloatValue == 0.0) { ImmReg = AMDGPU::ZERO; } else if (FloatValue == 0.5) { ImmReg = AMDGPU::HALF; } else if (FloatValue == 1.0) { ImmReg = AMDGPU::ONE; } else { ImmValue = Value.bitcastToAPInt().getZExtValue(); } } else { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::i64); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); if (C->getZExtValue() == 0) { ImmReg = AMDGPU::ZERO; } else if (C->getZExtValue() == 1) { ImmReg = AMDGPU::ONE_INT; } else { ImmValue = C->getZExtValue(); } } for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); Use != SDNode::use_end(); Use = Next) { Next = llvm::next(Use); std::vector<SDValue> Ops; for (unsigned i = 0; i < Use->getNumOperands(); ++i) { Ops.push_back(Use->getOperand(i)); } if (!Use->isMachineOpcode()) { if (ImmReg == AMDGPU::ALU_LITERAL_X) { // We can only use literal constants (e.g. AMDGPU::ZERO, // AMDGPU::ONE, etc) in machine opcodes. continue; } } else { if (!TII->isALUInstr(Use->getMachineOpcode()) || (TII->get(Use->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)) { continue; } int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::literal); if (ImmIdx == -1) { continue; } if (TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::dst) != -1) { // subtract one from ImmIdx, because the DST operand is usually index // 0 for MachineInstrs, but we have no DST in the Ops vector. ImmIdx--; } // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == AMDGPU::ALU_LITERAL_X) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); assert(C); if (C->getZExtValue() != 0) { // This instruction is already using an immediate. continue; } // Set the immediate value Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); } } // Set the immediate register Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); } break; } } SDNode *Result = SelectCode(N); // Fold operands of selected node const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) { bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->hasInstrModifiers(Result->getMachineOpcode())) { // Fold FNEG/FABS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); if (PotentialClamp->isMachineOpcode() && PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { unsigned ClampIdx = TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp); std::vector<SDValue> Ops; unsigned NumOp = Result->getNumOperands(); for (unsigned i = 0; i < NumOp; ++i) { Ops.push_back(Result->getOperand(i)); } Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); Result = CurDAG->SelectNodeTo(PotentialClamp, Result->getMachineOpcode(), PotentialClamp->getVTList(), Ops.data(), NumOp); } } } } return Result; }
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. } switch (Opc) { default: break; case ISD::BUILD_VECTOR: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { break; } // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions // pass. We want to avoid 128 bits copies as much as possible because they // can't be bundled by our scheduler. SDValue RegSeqArgs[9] = { CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) }; bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } RegSeqArgs[2 * i + 1] = N->getOperand(i); } if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); // XXX: Custom immediate lowering not implemented yet. Instead we use // pseudo instructions defined in SIInstructions.td if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { break; } const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); uint64_t ImmValue = 0; unsigned ImmReg = AMDGPU::ALU_LITERAL_X; if (N->getOpcode() == ISD::ConstantFP) { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::f64); ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); APFloat Value = C->getValueAPF(); float FloatValue = Value.convertToFloat(); if (FloatValue == 0.0) { ImmReg = AMDGPU::ZERO; } else if (FloatValue == 0.5) { ImmReg = AMDGPU::HALF; } else if (FloatValue == 1.0) { ImmReg = AMDGPU::ONE; } else { ImmValue = Value.bitcastToAPInt().getZExtValue(); } } else { // XXX: 64-bit Immediates not supported yet assert(N->getValueType(0) != MVT::i64); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); if (C->getZExtValue() == 0) { ImmReg = AMDGPU::ZERO; } else if (C->getZExtValue() == 1) { ImmReg = AMDGPU::ONE_INT; } else { ImmValue = C->getZExtValue(); } } for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); Use != SDNode::use_end(); Use = Next) { Next = llvm::next(Use); std::vector<SDValue> Ops; for (unsigned i = 0; i < Use->getNumOperands(); ++i) { Ops.push_back(Use->getOperand(i)); } if (!Use->isMachineOpcode()) { if (ImmReg == AMDGPU::ALU_LITERAL_X) { // We can only use literal constants (e.g. AMDGPU::ZERO, // AMDGPU::ONE, etc) in machine opcodes. continue; } } else { if (!TII->isALUInstr(Use->getMachineOpcode()) || (TII->get(Use->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)) { continue; } int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM); assert(ImmIdx != -1); // subtract one from ImmIdx, because the DST operand is usually index // 0 for MachineInstrs, but we have no DST in the Ops vector. ImmIdx--; // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == AMDGPU::ALU_LITERAL_X) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); assert(C); if (C->getZExtValue() != 0) { // This instruction is already using an immediate. continue; } // Set the immediate value Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); } } // Set the immediate register Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); } break; } } SDNode *Result = SelectCode(N); // Fold operands of selected node const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->isALUInstr(Result->getMachineOpcode())) { // Fold FNEG/FABS/CONST_ADDRESS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result bool IsModified = false; do { std::vector<SDValue> Ops; for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); I != E; ++I) Ops.push_back(*I); IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); if (IsModified) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); if (PotentialClamp->isMachineOpcode() && PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { unsigned ClampIdx = TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP); std::vector<SDValue> Ops; unsigned NumOp = Result->getNumOperands(); for (unsigned i = 0; i < NumOp; ++i) { Ops.push_back(Result->getOperand(i)); } Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); Result = CurDAG->SelectNodeTo(PotentialClamp, Result->getMachineOpcode(), PotentialClamp->getVTList(), Ops.data(), NumOp); } } } } return Result; }
std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); /// // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// SDNode *Result; switch(Opcode) { default: break; case ISD::SUBE: { SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); return std::make_pair(true, Result); } case ISD::ADDE: { if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC. break; SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); return std::make_pair(true, Result); } case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget.hasMips64()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); } else { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, Zero); } return std::make_pair(true, Result); } break; } case ISD::Constant: { const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); unsigned Size = CN->getValueSizeInBits(0); if (Size == 32) break; MipsAnalyzeImmediate AnalyzeImm; int64_t Imm = CN->getSExtValue(); const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, false); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); SDLoc DL(CN); SDNode *RegOpnd; SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); // The first instruction can be a LUi which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. if (Inst->Opc == Mips::LUi64) RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); else RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, CurDAG->getRegister(Mips::ZERO_64, MVT::i64), ImmOpnd); // The remaining instructions in the sequence are handled here. for (++Inst; Inst != Seq.end(); ++Inst) { ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, SDValue(RegOpnd, 0), ImmOpnd); } return std::make_pair(true, RegOpnd); } case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(); unsigned RdhwrOpc, SrcReg, DestReg; if (PtrVT == MVT::i32) { RdhwrOpc = Mips::RDHWR; SrcReg = Mips::HWR29; DestReg = Mips::V1; } else { RdhwrOpc = Mips::RDHWR64; SrcReg = Mips::HWR29_64; DestReg = Mips::V1_64; } SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node), Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, SDValue(Rdhwr, 0)); SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); ReplaceUses(SDValue(Node, 0), ResNode); return std::make_pair(true, ResNode.getNode()); } case MipsISD::InsertLOHI: { unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID : Mips::ACRegsRegClassID; SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32); SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32); SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32); const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx, Node->getOperand(1), HiIdx }; SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); return std::make_pair(true, Res); } } return std::make_pair(false, (SDNode*)NULL); }
std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc DL = Node->getDebugLoc(); /// // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// EVT NodeTy = Node->getValueType(0); SDNode *Result; unsigned MultOpc; switch(Opcode) { default: break; case ISD::SUBE: { SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); return std::make_pair(true, Result); } case ISD::ADDE: { SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); return std::make_pair(true, Result); } /// Mul with two results case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { if (NodeTy == MVT::i32) MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); else MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, true, true); if (!SDValue(Node, 0).use_empty()) ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); if (!SDValue(Node, 1).use_empty()) ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); return std::make_pair(true, (SDNode*)NULL); } /// Special Muls case ISD::MUL: { // Mips32 has a 32-bit three operand mul instruction. if (Subtarget.hasMips32() && NodeTy == MVT::i32) break; MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; return std::make_pair(true, Result); } case ISD::MULHS: case ISD::MULHU: { if (NodeTy == MVT::i32) MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); else MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; return std::make_pair(true, Result); } case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget.hasMips64()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); } else { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, Zero); } return std::make_pair(true, Result); } break; } case ISD::Constant: { const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); unsigned Size = CN->getValueSizeInBits(0); if (Size == 32) break; MipsAnalyzeImmediate AnalyzeImm; int64_t Imm = CN->getSExtValue(); const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, false); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); DebugLoc DL = CN->getDebugLoc(); SDNode *RegOpnd; SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); // The first instruction can be a LUi which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. if (Inst->Opc == Mips::LUi64) RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); else RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, CurDAG->getRegister(Mips::ZERO_64, MVT::i64), ImmOpnd); // The remaining instructions in the sequence are handled here. for (++Inst; Inst != Seq.end(); ++Inst) { ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), MVT::i64); RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, SDValue(RegOpnd, 0), ImmOpnd); } return std::make_pair(true, RegOpnd); } case MipsISD::ThreadPointer: { EVT PtrVT = TLI.getPointerTy(); unsigned RdhwrOpc, SrcReg, DestReg; if (PtrVT == MVT::i32) { RdhwrOpc = Mips::RDHWR; SrcReg = Mips::HWR29; DestReg = Mips::V1; } else { RdhwrOpc = Mips::RDHWR64; SrcReg = Mips::HWR29_64; DestReg = Mips::V1_64; } SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, SDValue(Rdhwr, 0)); SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); ReplaceUses(SDValue(Node, 0), ResNode); return std::make_pair(true, ResNode.getNode()); } } return std::make_pair(false, (SDNode*)NULL); }