Example #1
0
SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
  // The vector type is legal but the element type needs expansion.
  MVT VecVT = N->getValueType(0);
  unsigned NumElts = VecVT.getVectorNumElements();
  MVT OldVT = N->getOperand(0).getValueType();
  MVT NewVT = TLI.getTypeToTransformTo(OldVT);
  DebugLoc dl = N->getDebugLoc();

  assert(OldVT == VecVT.getVectorElementType() &&
         "BUILD_VECTOR operand type doesn't match vector element type!");

  // Build a vector of twice the length out of the expanded elements.
  // For example <3 x i64> -> <6 x i32>.
  std::vector<SDValue> NewElts;
  NewElts.reserve(NumElts*2);

  for (unsigned i = 0; i < NumElts; ++i) {
    SDValue Lo, Hi;
    GetExpandedOp(N->getOperand(i), Lo, Hi);
    if (TLI.isBigEndian())
      std::swap(Lo, Hi);
    NewElts.push_back(Lo);
    NewElts.push_back(Hi);
  }

  SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
                                 MVT::getVectorVT(NewVT, NewElts.size()),
                                 &NewElts[0], NewElts.size());

  // Convert the new vector to the old vector type.
  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
}
Example #2
0
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
  // The vector type is legal but the element type needs expansion.
  MVT VecVT = N->getValueType(0);
  unsigned NumElts = VecVT.getVectorNumElements();
  DebugLoc dl = N->getDebugLoc();

  SDValue Val = N->getOperand(1);
  MVT OldEVT = Val.getValueType();
  MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);

  assert(OldEVT == VecVT.getVectorElementType() &&
         "Inserted element type doesn't match vector element type!");

  // Bitconvert to a vector of twice the length with elements of the expanded
  // type, insert the expanded vector elements, and then convert back.
  MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);
  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
                               NewVecVT, N->getOperand(0));

  SDValue Lo, Hi;
  GetExpandedOp(Val, Lo, Hi);
  if (TLI.isBigEndian())
    std::swap(Lo, Hi);

  SDValue Idx = N->getOperand(2);
  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
  NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
  Idx = DAG.getNode(ISD::ADD, dl,
                    Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
  NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);

  // Convert the new vector to the old vector type.
  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
}
void X86InterleavedAccessGroup::interleave8bitStride3(
    ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
    unsigned VecElems) {
  // Example: Assuming we start from the following vectors:
  // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
  // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
  // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7

  TransposedMatrix.resize(3);
  SmallVector<uint32_t, 3> GroupSize;
  SmallVector<uint32_t, 32> VPShuf;
  SmallVector<uint32_t, 32> VPAlign[3];
  SmallVector<uint32_t, 32> VPAlign2;
  SmallVector<uint32_t, 32> VPAlign3;

  Value *Vec[3], *TempVector[3];
  MVT VT = MVT::getVectorVT(MVT::i8, VecElems);

  setGroupSize(VT, GroupSize);

  for (int i = 0; i < 3; i++)
    DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]);

  DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true);
  DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true);

  // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2
  // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
  // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7

  Vec[0] = Builder.CreateShuffleVector(
      InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2);
  Vec[1] = Builder.CreateShuffleVector(
      InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3);
  Vec[2] = InVec[2];

  // Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2
  // Vec[1]= c0 c1 c2 c3 c4 a3 a4 a5
  // Vec[2]= b3 b4 b5 b6 b7 c5 c6 c7

  for (int i = 0; i < 3; i++)
    TempVector[i] =
        Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]);

  // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1
  // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4
  // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7

  for (int i = 0; i < 3; i++)
    Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3],
                                         VPAlign[2]);

  // TransposedMatrix[0] = a0 b0 c0 a1 b1 c1 a2 b2
  // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5
  // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7

  unsigned NumOfElm = VT.getVectorNumElements();
  group2Shuffle(VT, GroupSize, VPShuf);
  reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder);
}
//  createShuffleStride returns shuffle mask of size N.
//  The shuffle pattern is as following :
//  {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane),
//  (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),...,
//  (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)}
//  Where Lane is the # of lanes in a register:
//  VectorSize = 128 => Lane = 1
//  VectorSize = 256 => Lane = 2
//  For example shuffle pattern for VF 16 register size 256 -> lanes = 2
//  {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
static void createShuffleStride(MVT VT, int Stride,
                                SmallVectorImpl<uint32_t> &Mask) {
  int VectorSize = VT.getSizeInBits();
  int VF = VT.getVectorNumElements();
  int LaneCount = std::max(VectorSize / 128, 1);
  for (int Lane = 0; Lane < LaneCount; Lane++)
    for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i)
      Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane);
}
//  setGroupSize sets 'SizeInfo' to the size(number of elements) of group
//  inside mask a shuffleMask. A mask contains exactly 3 groups, where
//  each group is a monotonically increasing sequence with stride 3.
//  For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
  int VectorSize = VT.getSizeInBits();
  int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
  for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
    int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0);
    SizeInfo.push_back(GroupSize);
    FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF;
  }
}
// genShuffleBland - Creates shuffle according to two vectors.This function is
// only works on instructions with lane inside 256 registers. According to
// the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The
// offset amount depends on the two integer, 'LowOffset' and 'HighOffset'.
// Where the 'LowOffset' refers to the first vector and the highOffset refers to
// the second vector.
// |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20|
// |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25|
// |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31|
// For the sequence to work as a mirror to the load.
// We must consider the elements order as above.
// In this function we are combining two types of shuffles.
// The first one is vpshufed and the second is a type of "blend" shuffle.
// By computing the shuffle on a sequence of 16 elements(one lane) and add the
// correct offset. We are creating a vpsuffed + blend sequence between two
// shuffles.
static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
  SmallVectorImpl<uint32_t> &Out, int LowOffset,
  int HighOffset) {
  assert(VT.getSizeInBits() >= 256 &&
    "This function doesn't accept width smaller then 256");
  unsigned NumOfElm = VT.getVectorNumElements();
  for (unsigned i = 0; i < Mask.size(); i++)
    Out.push_back(Mask[i] + LowOffset);
  for (unsigned i = 0; i < Mask.size(); i++)
    Out.push_back(Mask[i] + HighOffset + NumOfElm);
}
Example #7
0
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
  DebugLoc dl = N->getDebugLoc();
  MVT VT = N->getValueType(0);
  unsigned NumElts = VT.getVectorNumElements();
  SmallVector<SDValue, 16> Ops(NumElts);
  Ops[0] = N->getOperand(0);
  SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
  for (unsigned i = 1; i < NumElts; ++i)
    Ops[i] = UndefVal;
  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
}
static void EmitTypeGenerate(std::ostream &OS, const Record *ArgType,
                             unsigned &ArgNo) {
  MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));

  if (ArgType->isSubClassOf("LLVMMatchType")) {
    unsigned Number = ArgType->getValueAsInt("Number");
    assert(Number < ArgNo && "Invalid matching number!");
    if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
      OS << "VectorType::getExtendedElementVectorType"
         << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
    else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
      OS << "VectorType::getTruncatedElementVectorType"
         << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
    else
      OS << "Tys[" << Number << "]";
  } else if (VT == MVT::iAny || VT == MVT::fAny) {
    // NOTE: The ArgNo variable here is not the absolute argument number, it is
    // the index of the "arbitrary" type in the Tys array passed to the
    // Intrinsic::getDeclaration function. Consequently, we only want to
    // increment it when we actually hit an overloaded type. Getting this wrong
    // leads to very subtle bugs!
    OS << "Tys[" << ArgNo++ << "]";
  } else if (MVT(VT).isVector()) {
    MVT VVT = VT;
    OS << "VectorType::get(";
    EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT());
    OS << ", " << VVT.getVectorNumElements() << ")";
  } else if (VT == MVT::iPTR) {
    OS << "PointerType::getUnqual(";
    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
    OS << ")";
  } else if (VT == MVT::iPTRAny) {
    // Make sure the user has passed us an argument type to overload. If not,
    // treat it as an ordinary (not overloaded) intrinsic.
    OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo 
    << "] : PointerType::getUnqual(";
    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
    OS << ")";
    ++ArgNo;
  } else if (VT == MVT::isVoid) {
    if (ArgNo == 0)
      OS << "Type::VoidTy";
    else
      // MVT::isVoid is used to mean varargs here.
      OS << "...";
  } else {
    EmitTypeForValueType(OS, VT);
  }
}
Example #9
0
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                          unsigned &NumIntermediates,
                                          MVT &RegisterVT,
                                          TargetLoweringBase *TLI) {
  // Figure out the right, legal destination reg to copy into.
  unsigned NumElts = VT.getVectorNumElements();
  MVT EltTy = VT.getVectorElementType();

  unsigned NumVectorRegs = 1;

  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
  // could break down into LHS/RHS like LegalizeDAG does.
  if (!isPowerOf2_32(NumElts)) {
    NumVectorRegs = NumElts;
    NumElts = 1;
  }

  // Divide the input until we get to a supported size.  This will always
  // end with a scalar if the target doesn't support vectors.
  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
    NumElts >>= 1;
    NumVectorRegs <<= 1;
  }

  NumIntermediates = NumVectorRegs;

  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
  if (!TLI->isTypeLegal(NewVT))
    NewVT = EltTy;
  IntermediateVT = NewVT;

  unsigned NewVTSize = NewVT.getSizeInBits();

  // Convert sizes such as i33 to i64.
  if (!isPowerOf2_32(NewVTSize))
    NewVTSize = NextPowerOf2(NewVTSize);

  MVT DestVT = TLI->getRegisterType(NewVT);
  RegisterVT = DestVT;
  if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());

  // Otherwise, promotion or legal types use the same number of registers as
  // the vector decimated to the appropriate level.
  return NumVectorRegs;
}
Example #10
0
LLT::LLT(MVT VT) {
  if (VT.isVector()) {
    SizeInBits = VT.getVectorElementType().getSizeInBits();
    ElementsOrAddrSpace = VT.getVectorNumElements();
    Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
  } else if (VT.isValid()) {
    // Aggregates are no different from real scalars as far as GlobalISel is
    // concerned.
    Kind = Scalar;
    SizeInBits = VT.getSizeInBits();
    ElementsOrAddrSpace = 1;
    assert(SizeInBits != 0 && "invalid zero-sized type");
  } else {
    Kind = Invalid;
    SizeInBits = ElementsOrAddrSpace = 0;
  }
}
// group2Shuffle reorder the shuffle stride back into continuous order.
// For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
// MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
                          SmallVectorImpl<uint32_t> &Output) {
  int IndexGroup[3] = {0, 0, 0};
  int Index = 0;
  int VectorWidth = VT.getSizeInBits();
  int VF = VT.getVectorNumElements();
  // Find the index of the different groups.
  int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1;
  for (int i = 0; i < 3; i++) {
    IndexGroup[(Index * 3) % (VF / Lane)] = Index;
    Index += Mask[i];
  }
  // According to the index compute the convert mask.
  for (int i = 0; i < VF / Lane; i++) {
    Output.push_back(IndexGroup[i % 3]);
    IndexGroup[i % 3]++;
  }
}
//  DecodePALIGNRMask returns the shuffle mask of vpalign instruction.
//  vpalign works according to lanes
//  Where Lane is the # of lanes in a register:
//  VectorWide = 128 => Lane = 1
//  VectorWide = 256 => Lane = 2
//  For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}.
//  For Lane = 2 shuffle pattern is:
//  {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
//  Imm variable sets the offset amount. The result of the
//  function is stored inside ShuffleMask vector and it built as described in
//  the begin of the description. AlignDirection is a boolean that indecat the
//  direction of the alignment. (false - align to the "right" side while true -
//  align to the "left" side)
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
                              SmallVectorImpl<uint32_t> &ShuffleMask,
                              bool AlignDirection = true, bool Unary = false) {
  unsigned NumElts = VT.getVectorNumElements();
  unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
  unsigned NumLaneElts = NumElts / NumLanes;

  Imm = AlignDirection ? Imm : (NumLaneElts - Imm);
  unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8);

  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
    for (unsigned i = 0; i != NumLaneElts; ++i) {
      unsigned Base = i + Offset;
      // if i+offset is out of this lane then we actually need the other source
      // If Unary the other source is the first source.
      if (Base >= NumLaneElts)
        Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts;
      ShuffleMask.push_back(Base + l);
    }
  }
}
Example #13
0
// Changing the scale of the vector type by reducing the number of elements and
// doubling the scalar size.
static MVT scaleVectorType(MVT VT) {
  unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2;
  return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize),
                          VT.getVectorNumElements() / 2);
}
Example #14
0
SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
  DebugLoc dl = N->getDebugLoc();
  StoreSDNode *ST = cast<StoreSDNode>(N);
  EVT StoreVT = ST->getMemoryVT();
  SDNode *NVPTXST = NULL;

  // do not support pre/post inc/dec
  if (ST->isIndexed())
    return NULL;

  if (!StoreVT.isSimple())
    return NULL;

  // Address Space Setting
  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);

  // Volatile Setting
  // - .volatile is only availalble for .global and .shared
  bool isVolatile = ST->isVolatile();
  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    isVolatile = false;

  // Vector Setting
  MVT SimpleVT = StoreVT.getSimpleVT();
  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
  if (SimpleVT.isVector()) {
    unsigned num = SimpleVT.getVectorNumElements();
    if (num == 2)
      vecType = NVPTX::PTXLdStInstCode::V2;
    else if (num == 4)
      vecType = NVPTX::PTXLdStInstCode::V4;
    else
      return NULL;
  }

  // Type Setting: toType + toTypeWidth
  // - for integer type, always use 'u'
  //
  MVT ScalarVT = SimpleVT.getScalarType();
  unsigned toTypeWidth = ScalarVT.getSizeInBits();
  unsigned int toType;
  if (ScalarVT.isFloatingPoint())
    toType = NVPTX::PTXLdStInstCode::Float;
  else
    toType = NVPTX::PTXLdStInstCode::Unsigned;

  // Create the machine instruction DAG
  SDValue Chain = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
  SDValue N2 = N->getOperand(2);
  SDValue Addr;
  SDValue Offset, Base;
  unsigned Opcode;
  MVT::SimpleValueType SourceVT =
      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;

  if (SelectDirectAddr(N2, Addr)) {
    switch (SourceVT) {
    case MVT::i8:
      Opcode = NVPTX::ST_i8_avar;
      break;
    case MVT::i16:
      Opcode = NVPTX::ST_i16_avar;
      break;
    case MVT::i32:
      Opcode = NVPTX::ST_i32_avar;
      break;
    case MVT::i64:
      Opcode = NVPTX::ST_i64_avar;
      break;
    case MVT::f32:
      Opcode = NVPTX::ST_f32_avar;
      break;
    case MVT::f64:
      Opcode = NVPTX::ST_f64_avar;
      break;
    default:
      return NULL;
    }
    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(toType),
                      getI32Imm(toTypeWidth), Addr, Chain };
    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
  } else if (Subtarget.is64Bit()
                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
    switch (SourceVT) {
    case MVT::i8:
      Opcode = NVPTX::ST_i8_asi;
      break;
    case MVT::i16:
      Opcode = NVPTX::ST_i16_asi;
      break;
    case MVT::i32:
      Opcode = NVPTX::ST_i32_asi;
      break;
    case MVT::i64:
      Opcode = NVPTX::ST_i64_asi;
      break;
    case MVT::f32:
      Opcode = NVPTX::ST_f32_asi;
      break;
    case MVT::f64:
      Opcode = NVPTX::ST_f64_asi;
      break;
    default:
      return NULL;
    }
    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(toType),
                      getI32Imm(toTypeWidth), Base, Offset, Chain };
    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
  } else if (Subtarget.is64Bit()
                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
    if (Subtarget.is64Bit()) {
      switch (SourceVT) {
      case MVT::i8:
        Opcode = NVPTX::ST_i8_ari_64;
        break;
      case MVT::i16:
        Opcode = NVPTX::ST_i16_ari_64;
        break;
      case MVT::i32:
        Opcode = NVPTX::ST_i32_ari_64;
        break;
      case MVT::i64:
        Opcode = NVPTX::ST_i64_ari_64;
        break;
      case MVT::f32:
        Opcode = NVPTX::ST_f32_ari_64;
        break;
      case MVT::f64:
        Opcode = NVPTX::ST_f64_ari_64;
        break;
      default:
        return NULL;
      }
    } else {
      switch (SourceVT) {
      case MVT::i8:
        Opcode = NVPTX::ST_i8_ari;
        break;
      case MVT::i16:
        Opcode = NVPTX::ST_i16_ari;
        break;
      case MVT::i32:
        Opcode = NVPTX::ST_i32_ari;
        break;
      case MVT::i64:
        Opcode = NVPTX::ST_i64_ari;
        break;
      case MVT::f32:
        Opcode = NVPTX::ST_f32_ari;
        break;
      case MVT::f64:
        Opcode = NVPTX::ST_f64_ari;
        break;
      default:
        return NULL;
      }
    }
    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(toType),
                      getI32Imm(toTypeWidth), Base, Offset, Chain };
    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
  } else {
    if (Subtarget.is64Bit()) {
      switch (SourceVT) {
      case MVT::i8:
        Opcode = NVPTX::ST_i8_areg_64;
        break;
      case MVT::i16:
        Opcode = NVPTX::ST_i16_areg_64;
        break;
      case MVT::i32:
        Opcode = NVPTX::ST_i32_areg_64;
        break;
      case MVT::i64:
        Opcode = NVPTX::ST_i64_areg_64;
        break;
      case MVT::f32:
        Opcode = NVPTX::ST_f32_areg_64;
        break;
      case MVT::f64:
        Opcode = NVPTX::ST_f64_areg_64;
        break;
      default:
        return NULL;
      }
    } else {
      switch (SourceVT) {
      case MVT::i8:
        Opcode = NVPTX::ST_i8_areg;
        break;
      case MVT::i16:
        Opcode = NVPTX::ST_i16_areg;
        break;
      case MVT::i32:
        Opcode = NVPTX::ST_i32_areg;
        break;
      case MVT::i64:
        Opcode = NVPTX::ST_i64_areg;
        break;
      case MVT::f32:
        Opcode = NVPTX::ST_f32_areg;
        break;
      case MVT::f64:
        Opcode = NVPTX::ST_f64_areg;
        break;
      default:
        return NULL;
      }
    }
    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(toType),
                      getI32Imm(toTypeWidth), N2, Chain };
    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
  }

  if (NVPTXST != NULL) {
    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
  }

  return NVPTXST;
}
Example #15
0
SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
  DebugLoc dl = N->getDebugLoc();
  LoadSDNode *LD = cast<LoadSDNode>(N);
  EVT LoadedVT = LD->getMemoryVT();
  SDNode *NVPTXLD = NULL;

  // do not support pre/post inc/dec
  if (LD->isIndexed())
    return NULL;

  if (!LoadedVT.isSimple())
    return NULL;

  // Address Space Setting
  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);

  // Volatile Setting
  // - .volatile is only availalble for .global and .shared
  bool isVolatile = LD->isVolatile();
  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    isVolatile = false;

  // Vector Setting
  MVT SimpleVT = LoadedVT.getSimpleVT();
  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
  if (SimpleVT.isVector()) {
    unsigned num = SimpleVT.getVectorNumElements();
    if (num == 2)
      vecType = NVPTX::PTXLdStInstCode::V2;
    else if (num == 4)
      vecType = NVPTX::PTXLdStInstCode::V4;
    else
      return NULL;
  }

  // Type Setting: fromType + fromTypeWidth
  //
  // Sign   : ISD::SEXTLOAD
  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
  //          type is integer
  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
  MVT ScalarVT = SimpleVT.getScalarType();
  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
  unsigned int fromType;
  if ((LD->getExtensionType() == ISD::SEXTLOAD))
    fromType = NVPTX::PTXLdStInstCode::Signed;
  else if (ScalarVT.isFloatingPoint())
    fromType = NVPTX::PTXLdStInstCode::Float;
  else
    fromType = NVPTX::PTXLdStInstCode::Unsigned;

  // Create the machine instruction DAG
  SDValue Chain = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
  SDValue Addr;
  SDValue Offset, Base;
  unsigned Opcode;
  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;

  if (SelectDirectAddr(N1, Addr)) {
    switch (TargetVT) {
    case MVT::i8:
      Opcode = NVPTX::LD_i8_avar;
      break;
    case MVT::i16:
      Opcode = NVPTX::LD_i16_avar;
      break;
    case MVT::i32:
      Opcode = NVPTX::LD_i32_avar;
      break;
    case MVT::i64:
      Opcode = NVPTX::LD_i64_avar;
      break;
    case MVT::f32:
      Opcode = NVPTX::LD_f32_avar;
      break;
    case MVT::f64:
      Opcode = NVPTX::LD_f64_avar;
      break;
    default:
      return NULL;
    }
    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(fromType),
                      getI32Imm(fromTypeWidth), Addr, Chain };
    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
  } else if (Subtarget.is64Bit()
                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    switch (TargetVT) {
    case MVT::i8:
      Opcode = NVPTX::LD_i8_asi;
      break;
    case MVT::i16:
      Opcode = NVPTX::LD_i16_asi;
      break;
    case MVT::i32:
      Opcode = NVPTX::LD_i32_asi;
      break;
    case MVT::i64:
      Opcode = NVPTX::LD_i64_asi;
      break;
    case MVT::f32:
      Opcode = NVPTX::LD_f32_asi;
      break;
    case MVT::f64:
      Opcode = NVPTX::LD_f64_asi;
      break;
    default:
      return NULL;
    }
    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(fromType),
                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
  } else if (Subtarget.is64Bit()
                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    if (Subtarget.is64Bit()) {
      switch (TargetVT) {
      case MVT::i8:
        Opcode = NVPTX::LD_i8_ari_64;
        break;
      case MVT::i16:
        Opcode = NVPTX::LD_i16_ari_64;
        break;
      case MVT::i32:
        Opcode = NVPTX::LD_i32_ari_64;
        break;
      case MVT::i64:
        Opcode = NVPTX::LD_i64_ari_64;
        break;
      case MVT::f32:
        Opcode = NVPTX::LD_f32_ari_64;
        break;
      case MVT::f64:
        Opcode = NVPTX::LD_f64_ari_64;
        break;
      default:
        return NULL;
      }
    } else {
      switch (TargetVT) {
      case MVT::i8:
        Opcode = NVPTX::LD_i8_ari;
        break;
      case MVT::i16:
        Opcode = NVPTX::LD_i16_ari;
        break;
      case MVT::i32:
        Opcode = NVPTX::LD_i32_ari;
        break;
      case MVT::i64:
        Opcode = NVPTX::LD_i64_ari;
        break;
      case MVT::f32:
        Opcode = NVPTX::LD_f32_ari;
        break;
      case MVT::f64:
        Opcode = NVPTX::LD_f64_ari;
        break;
      default:
        return NULL;
      }
    }
    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(fromType),
                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
  } else {
    if (Subtarget.is64Bit()) {
      switch (TargetVT) {
      case MVT::i8:
        Opcode = NVPTX::LD_i8_areg_64;
        break;
      case MVT::i16:
        Opcode = NVPTX::LD_i16_areg_64;
        break;
      case MVT::i32:
        Opcode = NVPTX::LD_i32_areg_64;
        break;
      case MVT::i64:
        Opcode = NVPTX::LD_i64_areg_64;
        break;
      case MVT::f32:
        Opcode = NVPTX::LD_f32_areg_64;
        break;
      case MVT::f64:
        Opcode = NVPTX::LD_f64_areg_64;
        break;
      default:
        return NULL;
      }
    } else {
      switch (TargetVT) {
      case MVT::i8:
        Opcode = NVPTX::LD_i8_areg;
        break;
      case MVT::i16:
        Opcode = NVPTX::LD_i16_areg;
        break;
      case MVT::i32:
        Opcode = NVPTX::LD_i32_areg;
        break;
      case MVT::i64:
        Opcode = NVPTX::LD_i64_areg;
        break;
      case MVT::f32:
        Opcode = NVPTX::LD_f32_areg;
        break;
      case MVT::f64:
        Opcode = NVPTX::LD_f64_areg;
        break;
      default:
        return NULL;
      }
    }
    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                      getI32Imm(vecType), getI32Imm(fromType),
                      getI32Imm(fromTypeWidth), N1, Chain };
    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
  }

  if (NVPTXLD != NULL) {
    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
  }

  return NVPTXLD;
}
Example #16
0
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties() {
  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
         "Too many value types for ValueTypeActions to hold!");

  // Everything defaults to needing one register.
  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
    NumRegistersForVT[i] = 1;
    RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
  }
  // ...except isVoid, which doesn't need any registers.
  NumRegistersForVT[MVT::isVoid] = 0;

  // Find the largest integer register class.
  unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");

  // Every integer value type larger than this largest register takes twice as
  // many registers to represent as the previous ValueType.
  for (unsigned ExpandedReg = LargestIntReg + 1;
       ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
    RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
    TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
    ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
                                   TypeExpandInteger);
  }

  // Inspect all of the ValueType's smaller than the largest integer
  // register to see which ones need promotion.
  unsigned LegalIntReg = LargestIntReg;
  for (unsigned IntReg = LargestIntReg - 1;
       IntReg >= (unsigned)MVT::i1; --IntReg) {
    MVT IVT = (MVT::SimpleValueType)IntReg;
    if (isTypeLegal(IVT)) {
      LegalIntReg = IntReg;
    } else {
      RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
        (const MVT::SimpleValueType)LegalIntReg;
      ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
    }
  }

  // ppcf128 type is really two f64's.
  if (!isTypeLegal(MVT::ppcf128)) {
    NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
    RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
    TransformToType[MVT::ppcf128] = MVT::f64;
    ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
  }

  // Decide how to handle f128. If the target does not have native f128 support,
  // expand it to i128 and we will be generating soft float library calls.
  if (!isTypeLegal(MVT::f128)) {
    NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
    RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
    TransformToType[MVT::f128] = MVT::i128;
    ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
  }

  // Decide how to handle f64. If the target does not have native f64 support,
  // expand it to i64 and we will be generating soft float library calls.
  if (!isTypeLegal(MVT::f64)) {
    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
    TransformToType[MVT::f64] = MVT::i64;
    ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
  }

  // Decide how to handle f32. If the target does not have native support for
  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
  if (!isTypeLegal(MVT::f32)) {
    if (isTypeLegal(MVT::f64)) {
      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
      TransformToType[MVT::f32] = MVT::f64;
      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
    } else {
      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
      TransformToType[MVT::f32] = MVT::i32;
      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
    }
  }

  // Loop over all of the vector value types to see which need transformations.
  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
    MVT VT = (MVT::SimpleValueType)i;
    if (isTypeLegal(VT)) continue;

    // Determine if there is a legal wider type.  If so, we should promote to
    // that wider vector type.
    MVT EltVT = VT.getVectorElementType();
    unsigned NElts = VT.getVectorNumElements();
    if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
      bool IsLegalWiderType = false;
      // First try to promote the elements of integer vectors. If no legal
      // promotion was found, fallback to the widen-vector method.
      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
        MVT SVT = (MVT::SimpleValueType)nVT;
        // Promote vectors of integers to vectors with the same number
        // of elements, with a wider element type.
        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
            && SVT.getVectorNumElements() == NElts &&
            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
          TransformToType[i] = SVT;
          RegisterTypeForVT[i] = SVT;
          NumRegistersForVT[i] = 1;
          ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
          IsLegalWiderType = true;
          break;
        }
      }

      if (IsLegalWiderType) continue;

      // Try to widen the vector.
      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
        MVT SVT = (MVT::SimpleValueType)nVT;
        if (SVT.getVectorElementType() == EltVT &&
            SVT.getVectorNumElements() > NElts &&
            isTypeLegal(SVT)) {
          TransformToType[i] = SVT;
          RegisterTypeForVT[i] = SVT;
          NumRegistersForVT[i] = 1;
          ValueTypeActions.setTypeAction(VT, TypeWidenVector);
          IsLegalWiderType = true;
          break;
        }
      }
      if (IsLegalWiderType) continue;
    }

    MVT IntermediateVT;
    MVT RegisterVT;
    unsigned NumIntermediates;
    NumRegistersForVT[i] =
      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
                                RegisterVT, this);
    RegisterTypeForVT[i] = RegisterVT;

    MVT NVT = VT.getPow2VectorType();
    if (NVT == VT) {
      // Type is already a power of 2.  The default action is to split.
      TransformToType[i] = MVT::Other;
      unsigned NumElts = VT.getVectorNumElements();
      ValueTypeActions.setTypeAction(VT,
            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
    } else {
      TransformToType[i] = NVT;
      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
    }
  }

  // Determine the 'representative' register class for each value type.
  // An representative register class is the largest (meaning one which is
  // not a sub-register class / subreg register class) legal register class for
  // a group of value types. For example, on i386, i8, i16, and i32
  // representative would be GR32; while on x86_64 it's GR64.
  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
    const TargetRegisterClass* RRC;
    uint8_t Cost;
    tie(RRC, Cost) =  findRepresentativeClass((MVT::SimpleValueType)i);
    RepRegClassForVT[i] = RRC;
    RepRegClassCostForVT[i] = Cost;
  }
}
Example #17
0
void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                             SDValue &Hi) {
  MVT OutVT = N->getValueType(0);
  MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
  SDValue InOp = N->getOperand(0);
  MVT InVT = InOp.getValueType();
  DebugLoc dl = N->getDebugLoc();

  // Handle some special cases efficiently.
  switch (getTypeAction(InVT)) {
    default:
      assert(false && "Unknown type action!");
    case Legal:
    case PromoteInteger:
      break;
    case SoftenFloat:
      // Convert the integer operand instead.
      SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
      return;
    case ExpandInteger:
    case ExpandFloat:
      // Convert the expanded pieces of the input.
      GetExpandedOp(InOp, Lo, Hi);
      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
      return;
    case SplitVector:
      // Convert the split parts of the input if it was split in two.
      GetSplitVector(InOp, Lo, Hi);
      if (Lo.getValueType() == Hi.getValueType()) {
        if (TLI.isBigEndian())
          std::swap(Lo, Hi);
        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
        return;
      }
      break;
    case ScalarizeVector:
      // Convert the element instead.
      SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
      return;
    case WidenVector: {
      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
      InOp = GetWidenedVector(InOp);
      MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
                                   InVT.getVectorNumElements()/2);
      Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                       DAG.getIntPtrConstant(0));
      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
                       DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
      if (TLI.isBigEndian())
        std::swap(Lo, Hi);
      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
      return;
    }
  }

  // Lower the bit-convert to a store/load from the stack.
  assert(NOutVT.isByteSized() && "Expanded type not byte sized!");

  // Create the stack frame object.  Make sure it is aligned for both
  // the source and expanded destination types.
  unsigned Alignment =
    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT());
  SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);

  // Emit a store to the stack slot.
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);

  // Load the first half from the stack slot.
  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);

  // Increment the pointer to the other half.
  unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                         DAG.getIntPtrConstant(IncrementSize));

  // Load the second half from the stack slot.
  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
                   MinAlign(Alignment, IncrementSize));

  // Handle endianness of the load.
  if (TLI.isBigEndian())
    std::swap(Lo, Hi);
}
Example #18
0
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
                                     ArrayRef<ArgInfo> Args,
                                     ValueHandler &Handler) const {
  MachineFunction &MF = MIRBuilder.getMF();
  const Function &F = MF.getFunction();
  const DataLayout &DL = F.getParent()->getDataLayout();

  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());

  unsigned NumArgs = Args.size();
  for (unsigned i = 0; i != NumArgs; ++i) {
    MVT CurVT = MVT::getVT(Args[i].Ty);
    if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) {
      // Try to use the register type if we couldn't assign the VT.
      if (!Handler.isArgumentHandler() || !CurVT.isValid())
        return false; 
      CurVT = TLI->getRegisterTypeForCallingConv(
          F.getContext(), F.getCallingConv(), EVT(CurVT));
      if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
        return false;
    }
  }

  for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
    assert(j < ArgLocs.size() && "Skipped too many arg locs");

    CCValAssign &VA = ArgLocs[j];
    assert(VA.getValNo() == i && "Location doesn't correspond to current arg");

    if (VA.needsCustom()) {
      j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
      continue;
    }

    if (VA.isRegLoc()) {
      MVT OrigVT = MVT::getVT(Args[i].Ty);
      MVT VAVT = VA.getValVT();
      if (Handler.isArgumentHandler() && VAVT != OrigVT) {
        if (VAVT.getSizeInBits() < OrigVT.getSizeInBits())
          return false; // Can't handle this type of arg yet.
        const LLT VATy(VAVT);
        unsigned NewReg =
            MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
        Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
        // If it's a vector type, we either need to truncate the elements
        // or do an unmerge to get the lower block of elements.
        if (VATy.isVector() &&
            VATy.getNumElements() > OrigVT.getVectorNumElements()) {
          const LLT OrigTy(OrigVT);
          // Just handle the case where the VA type is 2 * original type.
          if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
            LLVM_DEBUG(dbgs()
                       << "Incoming promoted vector arg has too many elts");
            return false;
          }
          auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
          MIRBuilder.buildCopy(Args[i].Reg, Unmerge.getReg(0));
        } else {
          MIRBuilder.buildTrunc(Args[i].Reg, {NewReg}).getReg(0);
        }
      } else {
        Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
      }
    } else if (VA.isMemLoc()) {
      MVT VT = MVT::getVT(Args[i].Ty);
      unsigned Size = VT == MVT::iPTR ? DL.getPointerSize()
                                      : alignTo(VT.getSizeInBits(), 8) / 8;
      unsigned Offset = VA.getLocMemOffset();
      MachinePointerInfo MPO;
      unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
      Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
    } else {
      // FIXME: Support byvals and other weirdness
      return false;
    }
  }
  return true;
}