SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  const R600InstrInfo *TII =
                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case AMDGPUISD::CONST_ADDRESS: {
    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
                              I != SDNode::use_end(); I = Next) {
      Next = llvm::next(I);
      if (!I->isMachineOpcode()) {
        continue;
      }
      unsigned Opcode = I->getMachineOpcode();
      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
      int SrcIdx = I.getOperandNo();
      int SelIdx;
      // Unlike MachineInstrs, SDNodes do not have results in their operand
      // list, so we need to increment the SrcIdx, since
      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
      if (HasDst) {
        SrcIdx++;
      }

      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
      if (SelIdx < 0) {
        continue;
      }

      SDValue CstOffset;
      if (N->getValueType(0).isVector() ||
          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
        continue;

      // Gather constants values
      int SrcIndices[] = {
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
      };
      std::vector<unsigned> Consts;
      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
        int OtherSrcIdx = SrcIndices[i];
        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
          continue;
        }
        if (HasDst) {
          OtherSrcIdx--;
          OtherSelIdx--;
        }
        if (RegisterSDNode *Reg =
                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        continue;

      // Convert back to SDNode indices
      if (HasDst) {
        SrcIdx--;
        SelIdx--;
      }
      std::vector<SDValue> Ops;
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        if (i == SrcIdx) {
          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
        } else if (i == SelIdx) {
          Ops.push_back(CstOffset);
        } else {
          Ops.push_back(I->getOperand(i));
        }
      }
      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
    }
    break;
  }
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    unsigned RegClassID;
    switch(N->getValueType(0).getVectorNumElements()) {
    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::BUILD_PAIR: {
    SDValue RC, SubReg0, SubReg1;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }
    if (N->getValueType(0) == MVT::i128) {
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    } else if (N->getValueType(0) == MVT::i64) {
      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    } else {
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
    }
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                            N->getOperand(1), SubReg1 };
    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
                                  SDLoc(N), N->getValueType(0), Ops);
  }

  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
                                        AMDGPU::OpName::literal);
        if (ImmIdx == -1) {
          continue;
        }

        if (TII->getOperandIdx(Use->getMachineOpcode(),
                               AMDGPU::OpName::dst) != -1) {
          // subtract one from ImmIdx, because the DST operand is usually index
          // 0 for MachineInstrs, but we have no DST in the Ops vector.
          ImmIdx--;
        }

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

    }
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
      // Fold FNEG/FABS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
Exemplo n.º 2
0
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *IA64DAGToDAGISel::Select(SDValue Op) {
    SDNode *N = Op.getNode();
    if (N->isMachineOpcode())
        return NULL;   // Already selected.
    DebugLoc dl = Op.getDebugLoc();

    switch (N->getOpcode()) {
    default:
        break;

    case IA64ISD::BRCALL: { // XXX: this is also a hack!
        SDValue Chain = N->getOperand(0);
        SDValue InFlag;  // Null incoming flag value.

        if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag
            InFlag = N->getOperand(2);
        }

        unsigned CallOpcode;
        SDValue CallOperand;

        // if we can call directly, do so
        if (GlobalAddressSDNode *GASD =
                    dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
            CallOpcode = IA64::BRCALL_IPREL_GA;
            CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64);
        } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) {
            // FIXME: we currently NEED this case for correctness, to avoid
            // "non-pic code with imm reloc.n against dynamic symbol" errors
            CallOpcode = IA64::BRCALL_IPREL_ES;
            CallOperand = N->getOperand(1);
        } else {
            // otherwise we need to load the function descriptor,
            // load the branch target (function)'s entry point and GP,
            // branch (call) then restore the GP
            SDValue FnDescriptor = N->getOperand(1);

            // load the branch target's entry point [mem] and
            // GP value [mem+8]
            SDValue targetEntryPoint=
                SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other,
                                              FnDescriptor, CurDAG->getEntryNode()), 0);
            Chain = targetEntryPoint.getValue(1);
            SDValue targetGPAddr=
                SDValue(CurDAG->getTargetNode(IA64::ADDS, dl, MVT::i64,
                                              FnDescriptor,
                                              CurDAG->getConstant(8, MVT::i64)), 0);
            Chain = targetGPAddr.getValue(1);
            SDValue targetGP =
                SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64,MVT::Other,
                                              targetGPAddr, CurDAG->getEntryNode()), 0);
            Chain = targetGP.getValue(1);

            Chain = CurDAG->getCopyToReg(Chain, dl, IA64::r1, targetGP, InFlag);
            InFlag = Chain.getValue(1);
            Chain = CurDAG->getCopyToReg(Chain, dl, IA64::B6,
                                         targetEntryPoint, InFlag); // FLAG these?
            InFlag = Chain.getValue(1);

            CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64);
            CallOpcode = IA64::BRCALL_INDIRECT;
        }

        // Finally, once everything is setup, emit the call itself
        if (InFlag.getNode())
            Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
                                                  MVT::Flag, CallOperand, InFlag), 0);
        else // there might be no arguments
            Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
                                                  MVT::Flag, CallOperand, Chain), 0);
        InFlag = Chain.getValue(1);

        std::vector<SDValue> CallResults;

        CallResults.push_back(Chain);
        CallResults.push_back(InFlag);

        for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
            ReplaceUses(Op.getValue(i), CallResults[i]);
        return NULL;
    }

    case IA64ISD::GETFD: {
        SDValue Input = N->getOperand(0);
        return CurDAG->getTargetNode(IA64::GETFD, dl, MVT::i64, Input);
    }

    case ISD::FDIV:
    case ISD::SDIV:
    case ISD::UDIV:
    case ISD::SREM:
    case ISD::UREM:
        return SelectDIV(Op);

    case ISD::TargetConstantFP: {
        SDValue Chain = CurDAG->getEntryNode(); // this is a constant, so..

        SDValue V;
        ConstantFPSDNode* N2 = cast<ConstantFPSDNode>(N);
        if (N2->getValueAPF().isPosZero()) {
            V = CurDAG->getCopyFromReg(Chain, dl, IA64::F0, MVT::f64);
        } else if (N2->isExactlyValue(N2->getValueType(0) == MVT::f32 ?
                                      APFloat(+1.0f) : APFloat(+1.0))) {
            V = CurDAG->getCopyFromReg(Chain, dl, IA64::F1, MVT::f64);
        } else
            assert(0 && "Unexpected FP constant!");

        ReplaceUses(SDValue(N, 0), V);
        return 0;
    }

    case ISD::FrameIndex: { // TODO: reduce creepyness
        int FI = cast<FrameIndexSDNode>(N)->getIndex();
        if (N->hasOneUse())
            return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
                                        CurDAG->getTargetFrameIndex(FI, MVT::i64));
        else
            return CurDAG->getTargetNode(IA64::MOV, dl, MVT::i64,
                                         CurDAG->getTargetFrameIndex(FI, MVT::i64));
    }

    case ISD::ConstantPool: { // TODO: nuke the constant pool
        // (ia64 doesn't need one)
        ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
        Constant *C = CP->getConstVal();
        SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64,
                      CP->getAlignment());
        return CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, // ?
                                     CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
    }

    case ISD::GlobalAddress: {
        GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
        SDValue GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
        SDValue Tmp =
            SDValue(CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64,
                                          CurDAG->getRegister(IA64::r1,
                                                  MVT::i64), GA), 0);
        return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, Tmp,
                                     CurDAG->getEntryNode());
    }

    /* XXX
       case ISD::ExternalSymbol: {
         SDValue EA = CurDAG->getTargetExternalSymbol(
           cast<ExternalSymbolSDNode>(N)->getSymbol(),
           MVT::i64);
         SDValue Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, dl, MVT::i64,
                                               CurDAG->getRegister(IA64::r1,
                                                                   MVT::i64),
                                               EA);
         return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, Tmp);
       }
    */

    case ISD::LOAD: { // FIXME: load -1, not 1, for bools?
        LoadSDNode *LD = cast<LoadSDNode>(N);
        SDValue Chain = LD->getChain();
        SDValue Address = LD->getBasePtr();

        MVT TypeBeingLoaded = LD->getMemoryVT();
        unsigned Opc;
        switch (TypeBeingLoaded.getSimpleVT()) {
        default:
#ifndef NDEBUG
            N->dump(CurDAG);
#endif
            assert(0 && "Cannot load this type!");
        case MVT::i1: { // this is a bool
            Opc = IA64::LD1; // first we load a byte, then compare for != 0
            if(N->getValueType(0) == MVT::i1) { // XXX: early exit!
                return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other,
                                            SDValue(CurDAG->getTargetNode(Opc, dl,
                                                    MVT::i64,
                                                    Address), 0),
                                            CurDAG->getRegister(IA64::r0, MVT::i64),
                                            Chain);
            }
            /* otherwise, we want to load a bool into something bigger: LD1
               will do that for us, so we just fall through */
        }
        case MVT::i8:
            Opc = IA64::LD1;
            break;
        case MVT::i16:
            Opc = IA64::LD2;
            break;
        case MVT::i32:
            Opc = IA64::LD4;
            break;
        case MVT::i64:
            Opc = IA64::LD8;
            break;

        case MVT::f32:
            Opc = IA64::LDF4;
            break;
        case MVT::f64:
            Opc = IA64::LDF8;
            break;
        }

        // TODO: comment this
        return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
                                    Address, Chain);
    }

    case ISD::STORE: {
        StoreSDNode *ST = cast<StoreSDNode>(N);
        SDValue Address = ST->getBasePtr();
        SDValue Chain = ST->getChain();

        unsigned Opc;
        if (ISD::isNON_TRUNCStore(N)) {
            switch (N->getOperand(1).getValueType().getSimpleVT()) {
            default:
                assert(0 && "unknown type in store");
            case MVT::i1: { // this is a bool
                Opc = IA64::ST1; // we store either 0 or 1 as a byte
                // first load zero!
                SDValue Initial = CurDAG->getCopyFromReg(Chain, dl, IA64::r0, MVT::i64);
                Chain = Initial.getValue(1);
                // then load 1 into the same reg iff the predicate to store is 1
                SDValue Tmp = ST->getValue();
                Tmp =
                    SDValue(CurDAG->getTargetNode(IA64::TPCADDS, dl, MVT::i64, Initial,
                                                  CurDAG->getTargetConstant(1,
                                                          MVT::i64),
                                                  Tmp), 0);
                return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain);
            }
            case MVT::i64:
                Opc = IA64::ST8;
                break;
            case MVT::f64:
                Opc = IA64::STF8;
                break;
            }
        } else { // Truncating store
            switch(ST->getMemoryVT().getSimpleVT()) {
            default:
                assert(0 && "unknown type in truncstore");
            case MVT::i8:
                Opc = IA64::ST1;
                break;
            case MVT::i16:
                Opc = IA64::ST2;
                break;
            case MVT::i32:
                Opc = IA64::ST4;
                break;
            case MVT::f32:
                Opc = IA64::STF4;
                break;
            }
        }

        SDValue N1 = N->getOperand(1);
        SDValue N2 = N->getOperand(2);
        return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain);
    }

    case ISD::BRCOND: {
        SDValue Chain = N->getOperand(0);
        SDValue CC = N->getOperand(1);
        MachineBasicBlock *Dest =
            cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
        //FIXME - we do NOT need long branches all the time
        return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC,
                                    CurDAG->getBasicBlock(Dest), Chain);
    }

    case ISD::CALLSEQ_START:
    case ISD::CALLSEQ_END: {
        int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
        unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
                       IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
        SDValue N0 = N->getOperand(0);
        return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0);
    }

    case ISD::BR:
        // FIXME: we don't need long branches all the time!
        SDValue N0 = N->getOperand(0);
        return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other,
                                    N->getOperand(1), N0);
    }

    return SelectCode(Op);
}
Exemplo n.º 3
0
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
        assert(ImmIdx != -1);

        // subtract one from ImmIdx, because the DST operand is usually index
        // 0 for MachineInstrs, but we have no DST in the Ops vector.
        ImmIdx--;

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->isALUInstr(Result->getMachineOpcode())) {
      // Fold FNEG/FABS/CONST_ADDRESS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}