Exemple #1
0
void SystemZTDCPass::convertFCmp(CmpInst &I) {
  Value *Op0 = I.getOperand(0);
  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
  auto Pred = I.getPredicate();
  // Only comparisons with consts are interesting.
  if (!Const)
    return;
  // Compute the smallest normal number (and its negation).
  auto &Sem = Op0->getType()->getFltSemantics();
  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
  APFloat NegSmallest = Smallest;
  NegSmallest.changeSign();
  // Check if Const is one of our recognized consts.
  int WhichConst;
  if (Const->isZero()) {
    // All comparisons with 0 can be converted.
    WhichConst = 0;
  } else if (Const->isInfinity()) {
    // Likewise for infinities.
    WhichConst = Const->isNegative() ? 2 : 1;
  } else if (Const->isExactlyValue(Smallest)) {
    // For Smallest, we cannot do EQ separately from GT.
    if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
        (Pred & CmpInst::FCMP_OGE) != 0)
      return;
    WhichConst = 3;
  } else if (Const->isExactlyValue(NegSmallest)) {
    // Likewise for NegSmallest, we cannot do EQ separately from LT.
    if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
        (Pred & CmpInst::FCMP_OLE) != 0)
      return;
    WhichConst = 4;
  } else {
    // Not one of our special constants.
    return;
  }
  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
  static const int Masks[][4] = {
    { // 0
      SystemZ::TDCMASK_ZERO,              // eq
      SystemZ::TDCMASK_POSITIVE,          // gt
      SystemZ::TDCMASK_NEGATIVE,          // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // inf
      SystemZ::TDCMASK_INFINITY_PLUS,     // eq
      0,                                  // gt
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_NEGATIVE |
       SystemZ::TDCMASK_NORMAL_PLUS |
       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // -inf
      SystemZ::TDCMASK_INFINITY_MINUS,    // eq
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_POSITIVE |
       SystemZ::TDCMASK_NORMAL_MINUS |
       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
      0,                                  // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // minnorm
      0,                                  // eq (unsupported)
      (SystemZ::TDCMASK_NORMAL_PLUS |
       SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_NEGATIVE |
       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
      SystemZ::TDCMASK_NAN,               // un
    },
    { // -minnorm
      0,                                  // eq (unsupported)
      (SystemZ::TDCMASK_ZERO |
       SystemZ::TDCMASK_POSITIVE |
       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
      (SystemZ::TDCMASK_NORMAL_MINUS |
       SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
      SystemZ::TDCMASK_NAN,               // un
    }
  };
  // Construct the mask as a combination of the partial masks.
  int Mask = 0;
  if (Pred & CmpInst::FCMP_OEQ)
    Mask |= Masks[WhichConst][0];
  if (Pred & CmpInst::FCMP_OGT)
    Mask |= Masks[WhichConst][1];
  if (Pred & CmpInst::FCMP_OLT)
    Mask |= Masks[WhichConst][2];
  if (Pred & CmpInst::FCMP_UNO)
    Mask |= Masks[WhichConst][3];
  // A lone fcmp is unworthy of tdc conversion on its own, but may become
  // worthy if combined with fabs.
  bool Worthy = false;
  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
    Function *F = CI->getCalledFunction();
    if (F && F->getIntrinsicID() == Intrinsic::fabs) {
      // Fold with fabs - adjust the mask appropriately.
      Mask &= SystemZ::TDCMASK_PLUS;
      Mask |= Mask >> 1;
      Op0 = CI->getArgOperand(0);
      // A combination of fcmp with fabs is a win, unless the constant
      // involved is 0 (which is handled by later passes).
      Worthy = WhichConst != 0;
      PossibleJunk.insert(CI);
    }
Exemple #2
0
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case ISD::BUILD_VECTOR: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    // that adds a 128 bits reg copy when going through TwoAddressInstructions
    // pass. We want to avoid 128 bits copies as much as possible because they
    // can't be bundled by our scheduler.
    SDValue RegSeqArgs[9] = {
      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
    };
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[2 * i + 1] = N->getOperand(i);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
      break;
    }
    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
            (TII->get(Use->getMachineOpcode()).TSFlags &
            R600_InstFlag::VECTOR)) {
          continue;
        }

        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
        assert(ImmIdx != -1);

        // subtract one from ImmIdx, because the DST operand is usually index
        // 0 for MachineInstrs, but we have no DST in the Ops vector.
        ImmIdx--;

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->isALUInstr(Result->getMachineOpcode())) {
      // Fold FNEG/FABS/CONST_ADDRESS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  const R600InstrInfo *TII =
                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  unsigned int Opc = N->getOpcode();
  if (N->isMachineOpcode()) {
    return NULL;   // Already selected.
  }
  switch (Opc) {
  default: break;
  case AMDGPUISD::CONST_ADDRESS: {
    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
                              I != SDNode::use_end(); I = Next) {
      Next = llvm::next(I);
      if (!I->isMachineOpcode()) {
        continue;
      }
      unsigned Opcode = I->getMachineOpcode();
      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
      int SrcIdx = I.getOperandNo();
      int SelIdx;
      // Unlike MachineInstrs, SDNodes do not have results in their operand
      // list, so we need to increment the SrcIdx, since
      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
      if (HasDst) {
        SrcIdx++;
      }

      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
      if (SelIdx < 0) {
        continue;
      }

      SDValue CstOffset;
      if (N->getValueType(0).isVector() ||
          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
        continue;

      // Gather constants values
      int SrcIndices[] = {
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
      };
      std::vector<unsigned> Consts;
      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
        int OtherSrcIdx = SrcIndices[i];
        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
          continue;
        }
        if (HasDst) {
          OtherSrcIdx--;
          OtherSelIdx--;
        }
        if (RegisterSDNode *Reg =
                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
          if (Reg->getReg() == AMDGPU::ALU_CONST) {
            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
            Consts.push_back(Cst->getZExtValue());
          }
        }
      }

      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
      Consts.push_back(Cst->getZExtValue());
      if (!TII->fitsConstReadLimitations(Consts))
        continue;

      // Convert back to SDNode indices
      if (HasDst) {
        SrcIdx--;
        SelIdx--;
      }
      std::vector<SDValue> Ops;
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        if (i == SrcIdx) {
          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
        } else if (i == SelIdx) {
          Ops.push_back(CstOffset);
        } else {
          Ops.push_back(I->getOperand(i));
        }
      }
      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
    }
    break;
  }
  case ISD::BUILD_VECTOR: {
    unsigned RegClassID;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    const AMDGPURegisterInfo *TRI =
                   static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
    const SIRegisterInfo *SIRI =
                   static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
    EVT VT = N->getValueType(0);
    unsigned NumVectorElts = VT.getVectorNumElements();
    assert(VT.getVectorElementType().bitsEq(MVT::i32));
    if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
      bool UseVReg = true;
      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
                                                    U != E; ++U) {
        if (!U->isMachineOpcode()) {
          continue;
        }
        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
        if (!RC) {
          continue;
        }
        if (SIRI->isSGPRClass(RC)) {
          UseVReg = false;
        }
      }
      switch(NumVectorElts) {
      case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
                                     AMDGPU::SReg_32RegClassID;
        break;
      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
                                     AMDGPU::SReg_64RegClassID;
        break;
      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
                                     AMDGPU::SReg_128RegClassID;
        break;
      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
                                     AMDGPU::SReg_256RegClassID;
        break;
      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
                                      AMDGPU::SReg_512RegClassID;
        break;
      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
      }
    } else {
      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
      // that adds a 128 bits reg copy when going through TwoAddressInstructions
      // pass. We want to avoid 128 bits copies as much as possible because they
      // can't be bundled by our scheduler.
      switch(NumVectorElts) {
      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
      case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
      }
    }

    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);

    if (NumVectorElts == 1) {
      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
                                  VT.getVectorElementType(),
                                  N->getOperand(0), RegClass);
    }

    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
                                  "supported yet");
    // 16 = Max Num Vector Elements
    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
    // 1 = Vector Register Class
    SDValue RegSeqArgs[16 * 2 + 1];

    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
    bool IsRegSeq = true;
    for (unsigned i = 0; i < N->getNumOperands(); i++) {
      // XXX: Why is this here?
      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
        IsRegSeq = false;
        break;
      }
      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
      RegSeqArgs[1 + (2 * i) + 1] =
              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
    }
    if (!IsRegSeq)
      break;
    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
        RegSeqArgs, 2 * N->getNumOperands() + 1);
  }
  case ISD::BUILD_PAIR: {
    SDValue RC, SubReg0, SubReg1;
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }
    if (N->getValueType(0) == MVT::i128) {
      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    } else if (N->getValueType(0) == MVT::i64) {
      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    } else {
      llvm_unreachable("Unhandled value type for BUILD_PAIR");
    }
    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                            N->getOperand(1), SubReg1 };
    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
                                  SDLoc(N), N->getValueType(0), Ops);
  }

  case ISD::ConstantFP:
  case ISD::Constant: {
    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    // XXX: Custom immediate lowering not implemented yet.  Instead we use
    // pseudo instructions defined in SIInstructions.td
    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
      break;
    }

    uint64_t ImmValue = 0;
    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;

    if (N->getOpcode() == ISD::ConstantFP) {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::f64);

      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
      APFloat Value = C->getValueAPF();
      float FloatValue = Value.convertToFloat();
      if (FloatValue == 0.0) {
        ImmReg = AMDGPU::ZERO;
      } else if (FloatValue == 0.5) {
        ImmReg = AMDGPU::HALF;
      } else if (FloatValue == 1.0) {
        ImmReg = AMDGPU::ONE;
      } else {
        ImmValue = Value.bitcastToAPInt().getZExtValue();
      }
    } else {
      // XXX: 64-bit Immediates not supported yet
      assert(N->getValueType(0) != MVT::i64);

      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
      if (C->getZExtValue() == 0) {
        ImmReg = AMDGPU::ZERO;
      } else if (C->getZExtValue() == 1) {
        ImmReg = AMDGPU::ONE_INT;
      } else {
        ImmValue = C->getZExtValue();
      }
    }

    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
                              Use != SDNode::use_end(); Use = Next) {
      Next = llvm::next(Use);
      std::vector<SDValue> Ops;
      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
        Ops.push_back(Use->getOperand(i));
      }

      if (!Use->isMachineOpcode()) {
          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
            // We can only use literal constants (e.g. AMDGPU::ZERO,
            // AMDGPU::ONE, etc) in machine opcodes.
            continue;
          }
      } else {
        switch(Use->getMachineOpcode()) {
        case AMDGPU::REG_SEQUENCE: break;
        default:
          if (!TII->isALUInstr(Use->getMachineOpcode()) ||
              (TII->get(Use->getMachineOpcode()).TSFlags &
               R600_InstFlag::VECTOR)) {
            continue;
          }
        }

        // Check that we aren't already using an immediate.
        // XXX: It's possible for an instruction to have more than one
        // immediate operand, but this is not supported yet.
        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
          int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
                                          AMDGPU::OpName::literal);
          if (ImmIdx == -1) {
            continue;
          }

          if (TII->getOperandIdx(Use->getMachineOpcode(),
                                 AMDGPU::OpName::dst) != -1) {
            // subtract one from ImmIdx, because the DST operand is usually index
            // 0 for MachineInstrs, but we have no DST in the Ops vector.
            ImmIdx--;
          }
          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
          assert(C);

          if (C->getZExtValue() != 0) {
            // This instruction is already using an immediate.
            continue;
          }

          // Set the immediate value
          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
        }
      }
      // Set the immediate register
      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);

      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
    }
    break;
  }
  }
  SDNode *Result = SelectCode(N);

  // Fold operands of selected node

  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    const R600InstrInfo *TII =
        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

    }
    if (Result && Result->isMachineOpcode() &&
        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
      // Fold FNEG/FABS
      // TODO: Isel can generate multiple MachineInst, we need to recursively
      // parse Result
      bool IsModified = false;
      do {
        std::vector<SDValue> Ops;
        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
            I != E; ++I)
          Ops.push_back(*I);
        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
        if (IsModified) {
          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
        }
      } while (IsModified);

      // If node has a single use which is CLAMP_R600, folds it
      if (Result->hasOneUse() && Result->isMachineOpcode()) {
        SDNode *PotentialClamp = *Result->use_begin();
        if (PotentialClamp->isMachineOpcode() &&
            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
          unsigned ClampIdx =
            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
          std::vector<SDValue> Ops;
          unsigned NumOp = Result->getNumOperands();
          for (unsigned i = 0; i < NumOp; ++i) {
            Ops.push_back(Result->getOperand(i));
          }
          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
          Result = CurDAG->SelectNodeTo(PotentialClamp,
              Result->getMachineOpcode(), PotentialClamp->getVTList(),
              Ops.data(), NumOp);
        }
      }
    }
  }

  return Result;
}
// Create an MCInst from a MachineInstr
void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
                            HexagonAsmPrinter& AP) {
  if(MI->getOpcode() == Hexagon::ENDLOOP0){
    HexagonMCInstrInfo::setInnerLoop(MCB);
    return;
  }
  if(MI->getOpcode() == Hexagon::ENDLOOP1){
    HexagonMCInstrInfo::setOuterLoop(MCB);
    return;
  }
  MCInst* MCI = new (AP.OutContext) MCInst;
  MCI->setOpcode(MI->getOpcode());
  assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
         "MCI opcode should have been set on construction");

  for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
    const MachineOperand &MO = MI->getOperand(i);
    MCOperand MCO;

    switch (MO.getType()) {
    default:
      MI->dump();
      llvm_unreachable("unknown operand type");
    case MachineOperand::MO_Register:
      // Ignore all implicit register operands.
      if (MO.isImplicit()) continue;
      MCO = MCOperand::createReg(MO.getReg());
      break;
    case MachineOperand::MO_FPImmediate: {
      APFloat Val = MO.getFPImm()->getValueAPF();
      // FP immediates are used only when setting GPRs, so they may be dealt
      // with like regular immediates from this point on.
      MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData());
      break;
    }
    case MachineOperand::MO_Immediate:
      MCO = MCOperand::createImm(MO.getImm());
      break;
    case MachineOperand::MO_MachineBasicBlock:
      MCO = MCOperand::createExpr
              (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(),
               AP.OutContext));
      break;
    case MachineOperand::MO_GlobalAddress:
      MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP);
      break;
    case MachineOperand::MO_ExternalSymbol:
      MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
                         AP);
      break;
    case MachineOperand::MO_JumpTableIndex:
      MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
      break;
    case MachineOperand::MO_ConstantPoolIndex:
      MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
      break;
    case MachineOperand::MO_BlockAddress:
      MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
      break;
    }

    MCI->addOperand(MCO);
  }
  MCB.addOperand(MCOperand::createInst(MCI));
}
Exemple #5
0
// Walk forwards down the list of seen instructions, so we visit defs before
// uses.
void Float2IntPass::walkForwards() {
  for (auto &It : reverse(SeenInsts)) {
    if (It.second != unknownRange())
      continue;

    Instruction *I = It.first;
    std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
    switch (I->getOpcode()) {
      // FIXME: Handle select and phi nodes.
    default:
    case Instruction::UIToFP:
    case Instruction::SIToFP:
      llvm_unreachable("Should have been handled in walkForwards!");

    case Instruction::FAdd:
    case Instruction::FSub:
    case Instruction::FMul:
      Op = [I](ArrayRef<ConstantRange> Ops) {
        assert(Ops.size() == 2 && "its a binary operator!");
        auto BinOp = (Instruction::BinaryOps) I->getOpcode();
        return Ops[0].binaryOp(BinOp, Ops[1]);
      };
      break;

    //
    // Root-only instructions - we'll only see these if they're the
    //                          first node in a walk.
    //
    case Instruction::FPToUI:
    case Instruction::FPToSI:
      Op = [I](ArrayRef<ConstantRange> Ops) {
        assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
        // Note: We're ignoring the casts output size here as that's what the
        // caller expects.
        auto CastOp = (Instruction::CastOps)I->getOpcode();
        return Ops[0].castOp(CastOp, MaxIntegerBW+1);
      };
      break;

    case Instruction::FCmp:
      Op = [](ArrayRef<ConstantRange> Ops) {
        assert(Ops.size() == 2 && "FCmp is a binary operator!");
        return Ops[0].unionWith(Ops[1]);
      };
      break;
    }

    bool Abort = false;
    SmallVector<ConstantRange,4> OpRanges;
    for (Value *O : I->operands()) {
      if (Instruction *OI = dyn_cast<Instruction>(O)) {
        assert(SeenInsts.find(OI) != SeenInsts.end() &&
               "def not seen before use!");
        OpRanges.push_back(SeenInsts.find(OI)->second);
      } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
        // Work out if the floating point number can be losslessly represented
        // as an integer.
        // APFloat::convertToInteger(&Exact) purports to do what we want, but
        // the exactness can be too precise. For example, negative zero can
        // never be exactly converted to an integer.
        //
        // Instead, we ask APFloat to round itself to an integral value - this
        // preserves sign-of-zero - then compare the result with the original.
        //
        const APFloat &F = CF->getValueAPF();

        // First, weed out obviously incorrect values. Non-finite numbers
        // can't be represented and neither can negative zero, unless
        // we're in fast math mode.
        if (!F.isFinite() ||
            (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
             !I->hasNoSignedZeros())) {
          seen(I, badRange());
          Abort = true;
          break;
        }

        APFloat NewF = F;
        auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
        if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
          seen(I, badRange());
          Abort = true;
          break;
        }
        // OK, it's representable. Now get it.
        APSInt Int(MaxIntegerBW+1, false);
        bool Exact;
        CF->getValueAPF().convertToInteger(Int,
                                           APFloat::rmNearestTiesToEven,
                                           &Exact);
        OpRanges.push_back(ConstantRange(Int));
      } else {
        llvm_unreachable("Should have already marked this as badRange!");
      }
    }

    // Reduce the operands' ranges to a single range and return.
    if (!Abort)
      seen(I, Op(OpRanges));
  }
}
static SILInstruction *constantFoldBuiltin(BuiltinInst *BI,
                                           Optional<bool> &ResultsInError) {
  const IntrinsicInfo &Intrinsic = BI->getIntrinsicInfo();
  SILModule &M = BI->getModule();

  // If it's an llvm intrinsic, fold the intrinsic.
  if (Intrinsic.ID != llvm::Intrinsic::not_intrinsic)
    return constantFoldIntrinsic(BI, Intrinsic.ID, ResultsInError);

  // Otherwise, it should be one of the builtin functions.
  OperandValueArrayRef Args = BI->getArguments();
  const BuiltinInfo &Builtin = BI->getBuiltinInfo();

  switch (Builtin.ID) {
  default: break;

// Check and fold binary arithmetic with overflow.
#define BUILTIN(id, name, Attrs)
#define BUILTIN_BINARY_OPERATION_WITH_OVERFLOW(id, name, _, attrs, overload) \
  case BuiltinValueKind::id:
#include "swift/AST/Builtins.def"
    return constantFoldBinaryWithOverflow(BI, Builtin.ID, ResultsInError);

#define BUILTIN(id, name, Attrs)
#define BUILTIN_BINARY_OPERATION(id, name, attrs, overload) \
case BuiltinValueKind::id:
#include "swift/AST/Builtins.def"
      return constantFoldBinary(BI, Builtin.ID, ResultsInError);

// Fold comparison predicates.
#define BUILTIN(id, name, Attrs)
#define BUILTIN_BINARY_PREDICATE(id, name, attrs, overload) \
case BuiltinValueKind::id:
#include "swift/AST/Builtins.def"
      return constantFoldCompare(BI, Builtin.ID);

  case BuiltinValueKind::Trunc:
  case BuiltinValueKind::ZExt:
  case BuiltinValueKind::SExt:
  case BuiltinValueKind::TruncOrBitCast:
  case BuiltinValueKind::ZExtOrBitCast:
  case BuiltinValueKind::SExtOrBitCast: {

    // We can fold if the value being cast is a constant.
    auto *V = dyn_cast<IntegerLiteralInst>(Args[0]);
    if (!V)
      return nullptr;

    APInt CastResV = constantFoldCast(V->getValue(), Builtin);

    // Add the literal instruction to represent the result of the cast.
    SILBuilderWithScope B(BI);
    return B.createIntegerLiteral(BI->getLoc(), BI->getType(), CastResV);
  }

  // Process special builtins that are designed to check for overflows in
  // integer conversions.
  case BuiltinValueKind::SToSCheckedTrunc:
  case BuiltinValueKind::UToUCheckedTrunc:
  case BuiltinValueKind::SToUCheckedTrunc:
  case BuiltinValueKind::UToSCheckedTrunc:
  case BuiltinValueKind::SUCheckedConversion:
  case BuiltinValueKind::USCheckedConversion: {
    return constantFoldAndCheckIntegerConversions(BI, Builtin, ResultsInError);
  }

  case BuiltinValueKind::IntToFPWithOverflow: {
    // Get the value. It should be a constant in most cases.
    // Note, this will not always be a constant, for example, when analyzing
    // _convertFromBuiltinIntegerLiteral function itself.
    auto *V = dyn_cast<IntegerLiteralInst>(Args[0]);
    if (!V)
      return nullptr;
    APInt SrcVal = V->getValue();
    Type DestTy = Builtin.Types[1];

    APFloat TruncVal(
        DestTy->castTo<BuiltinFloatType>()->getAPFloatSemantics());
    APFloat::opStatus ConversionStatus = TruncVal.convertFromAPInt(
        SrcVal, /*isSigned=*/true, APFloat::rmNearestTiesToEven);

    SILLocation Loc = BI->getLoc();
    const ApplyExpr *CE = Loc.getAsASTNode<ApplyExpr>();

    // Check for overflow.
    if (ConversionStatus & APFloat::opOverflow) {
      // If we overflow and are not asked for diagnostics, just return nullptr.
      if (!ResultsInError.hasValue())
        return nullptr;

      SmallString<10> SrcAsString;
      SrcVal.toString(SrcAsString, /*radix*/10, true /*isSigned*/);
      
      // Otherwise emit our diagnostics and then return nullptr.
      diagnose(M.getASTContext(), Loc.getSourceLoc(),
               diag::integer_literal_overflow,
               CE ? CE->getType() : DestTy, SrcAsString);
      ResultsInError = Optional<bool>(true);
      return nullptr;
    }

    // The call to the builtin should be replaced with the constant value.
    SILBuilderWithScope B(BI);
    return B.createFloatLiteral(Loc, BI->getType(), TruncVal);
  }

  case BuiltinValueKind::FPTrunc: {
    // Get the value. It should be a constant in most cases.
    auto *V = dyn_cast<FloatLiteralInst>(Args[0]);
    if (!V)
      return nullptr;
    APFloat TruncVal = V->getValue();
    Type DestTy = Builtin.Types[1];
    bool losesInfo;
    APFloat::opStatus ConversionStatus = TruncVal.convert(
        DestTy->castTo<BuiltinFloatType>()->getAPFloatSemantics(),
        APFloat::rmNearestTiesToEven, &losesInfo);
    SILLocation Loc = BI->getLoc();

    // Check if conversion was successful.
    if (ConversionStatus != APFloat::opStatus::opOK &&
        ConversionStatus != APFloat::opStatus::opInexact) {
      return nullptr;
    }

    // The call to the builtin should be replaced with the constant value.
    SILBuilderWithScope B(BI);
    return B.createFloatLiteral(Loc, BI->getType(), TruncVal);
  }

  case BuiltinValueKind::AssumeNonNegative: {
    auto *V = dyn_cast<IntegerLiteralInst>(Args[0]);
    if (!V)
      return nullptr;

    APInt VInt = V->getValue();
    if (VInt.isNegative() && ResultsInError.hasValue()) {
      diagnose(M.getASTContext(), BI->getLoc().getSourceLoc(),
               diag::wrong_non_negative_assumption,
               VInt.toString(/*Radix*/ 10, /*Signed*/ true));
      ResultsInError = Optional<bool>(true);
    }
    return V;
  }
  }
  return nullptr;
}
/// \brief Fold binary operations.
///
/// The list of operations we constant fold might not be complete. Start with
/// folding the operations used by the standard library.
static SILInstruction *constantFoldBinary(BuiltinInst *BI,
                                          BuiltinValueKind ID,
                                          Optional<bool> &ResultsInError) {
  switch (ID) {
  default:
    llvm_unreachable("Not all BUILTIN_BINARY_OPERATIONs are covered!");

  // Not supported yet (not easily computable for APInt).
  case BuiltinValueKind::ExactSDiv:
  case BuiltinValueKind::ExactUDiv:
    return nullptr;

  // Not supported now.
  case BuiltinValueKind::FRem:
    return nullptr;

  // Fold constant division operations and report div by zero.
  case BuiltinValueKind::SDiv:
  case BuiltinValueKind::SRem:
  case BuiltinValueKind::UDiv:
  case BuiltinValueKind::URem: {
    return constantFoldAndCheckDivision(BI, ID, ResultsInError);
  }

  // Are there valid uses for these in stdlib?
  case BuiltinValueKind::Add:
  case BuiltinValueKind::Mul:
  case BuiltinValueKind::Sub:
    return nullptr;

  case BuiltinValueKind::And:
  case BuiltinValueKind::AShr:
  case BuiltinValueKind::LShr:
  case BuiltinValueKind::Or:
  case BuiltinValueKind::Shl:
  case BuiltinValueKind::Xor: {
    OperandValueArrayRef Args = BI->getArguments();
    auto *LHS = dyn_cast<IntegerLiteralInst>(Args[0]);
    auto *RHS = dyn_cast<IntegerLiteralInst>(Args[1]);
    if (!RHS || !LHS)
      return nullptr;
    APInt LHSI = LHS->getValue();
    APInt RHSI = RHS->getValue();

    bool IsShift = ID == BuiltinValueKind::AShr ||
                   ID == BuiltinValueKind::LShr ||
                   ID == BuiltinValueKind::Shl;

    // Reject shifting all significant bits
    if (IsShift && RHSI.getZExtValue() >= LHSI.getBitWidth()) {
      diagnose(BI->getModule().getASTContext(),
               RHS->getLoc().getSourceLoc(),
               diag::shifting_all_significant_bits);

      ResultsInError = Optional<bool>(true);
      return nullptr;
    }

    APInt ResI = constantFoldBitOperation(LHSI, RHSI, ID);
    // Add the literal instruction to represent the result.
    SILBuilderWithScope B(BI);
    return B.createIntegerLiteral(BI->getLoc(), BI->getType(), ResI);
  }
  case BuiltinValueKind::FAdd:
  case BuiltinValueKind::FDiv:
  case BuiltinValueKind::FMul:
  case BuiltinValueKind::FSub: {
    OperandValueArrayRef Args = BI->getArguments();
    auto *LHS = dyn_cast<FloatLiteralInst>(Args[0]);
    auto *RHS = dyn_cast<FloatLiteralInst>(Args[1]);
    if (!RHS || !LHS)
      return nullptr;
    APFloat LHSF = LHS->getValue();
    APFloat RHSF = RHS->getValue();
    switch (ID) {
    default: llvm_unreachable("Not all cases are covered!");
    case BuiltinValueKind::FAdd:
      LHSF.add(RHSF, APFloat::rmNearestTiesToEven);
      break;
    case BuiltinValueKind::FDiv:
      LHSF.divide(RHSF, APFloat::rmNearestTiesToEven);
      break;
    case BuiltinValueKind::FMul:
      LHSF.multiply(RHSF, APFloat::rmNearestTiesToEven);
      break;
    case BuiltinValueKind::FSub:
      LHSF.subtract(RHSF, APFloat::rmNearestTiesToEven);
      break;
    }

    // Add the literal instruction to represent the result.
    SILBuilderWithScope B(BI);
    return B.createFloatLiteral(BI->getLoc(), BI->getType(), LHSF);
  }
  }
}