Example #1
0
SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
  assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
  assert(OpNo == 1 && "Can only expand the stored value so far");
  DebugLoc dl = N->getDebugLoc();

  StoreSDNode *St = cast<StoreSDNode>(N);
  MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType());
  SDValue Chain = St->getChain();
  SDValue Ptr = St->getBasePtr();
  int SVOffset = St->getSrcValueOffset();
  unsigned Alignment = St->getAlignment();
  bool isVolatile = St->isVolatile();

  assert(NVT.isByteSized() && "Expanded type not byte sized!");
  unsigned IncrementSize = NVT.getSizeInBits() / 8;

  SDValue Lo, Hi;
  GetExpandedOp(St->getValue(), Lo, Hi);

  if (TLI.isBigEndian())
    std::swap(Lo, Hi);

  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
                    isVolatile, Alignment);

  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                    DAG.getIntPtrConstant(IncrementSize));
  assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
  Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
                    SVOffset + IncrementSize,
                    isVolatile, MinAlign(Alignment, IncrementSize));

  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
Example #2
0
/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
/// to emit an instruction with an immediate operand using FastEmit_ri.
/// If that fails, it materializes the immediate into a register and try
/// FastEmit_rr instead.
unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
                                unsigned Op0, bool Op0IsKill,
                                uint64_t Imm, MVT ImmType) {
  // If this is a multiply by a power of two, emit this as a shift left.
  if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
    Opcode = ISD::SHL;
    Imm = Log2_64(Imm);
  } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
    // div x, 8 -> srl x, 3
    Opcode = ISD::SRL;
    Imm = Log2_64(Imm);
  }

  // Horrible hack (to be removed), check to make sure shift amounts are
  // in-range.
  if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
      Imm >= VT.getSizeInBits())
    return 0;

  // First check if immediate type is legal. If not, we can't use the ri form.
  unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
  if (ResultReg != 0)
    return ResultReg;
  unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
  if (MaterialReg == 0) {
    // This is a bit ugly/slow, but failing here means falling out of
    // fast-isel, which would be very slow.
    const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
                                              VT.getSizeInBits());
    MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
  }
  return FastEmit_rr(VT, VT, Opcode,
                     Op0, Op0IsKill,
                     MaterialReg, /*Kill=*/true);
}
Example #3
0
SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
  // The vector type is legal but the element type needs expansion.
  MVT VecVT = N->getValueType(0);
  unsigned NumElts = VecVT.getVectorNumElements();
  MVT OldVT = N->getOperand(0).getValueType();
  MVT NewVT = TLI.getTypeToTransformTo(OldVT);
  DebugLoc dl = N->getDebugLoc();

  assert(OldVT == VecVT.getVectorElementType() &&
         "BUILD_VECTOR operand type doesn't match vector element type!");

  // Build a vector of twice the length out of the expanded elements.
  // For example <3 x i64> -> <6 x i32>.
  std::vector<SDValue> NewElts;
  NewElts.reserve(NumElts*2);

  for (unsigned i = 0; i < NumElts; ++i) {
    SDValue Lo, Hi;
    GetExpandedOp(N->getOperand(i), Lo, Hi);
    if (TLI.isBigEndian())
      std::swap(Lo, Hi);
    NewElts.push_back(Lo);
    NewElts.push_back(Hi);
  }

  SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
                                 MVT::getVectorVT(NewVT, NewElts.size()),
                                 &NewElts[0], NewElts.size());

  // Convert the new vector to the old vector type.
  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
}
void X86InterleavedAccessGroup::interleave8bitStride3(
    ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
    unsigned VecElems) {
  // Example: Assuming we start from the following vectors:
  // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
  // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
  // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7

  TransposedMatrix.resize(3);
  SmallVector<uint32_t, 3> GroupSize;
  SmallVector<uint32_t, 32> VPShuf;
  SmallVector<uint32_t, 32> VPAlign[3];
  SmallVector<uint32_t, 32> VPAlign2;
  SmallVector<uint32_t, 32> VPAlign3;

  Value *Vec[3], *TempVector[3];
  MVT VT = MVT::getVectorVT(MVT::i8, VecElems);

  setGroupSize(VT, GroupSize);

  for (int i = 0; i < 3; i++)
    DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]);

  DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true);
  DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true);

  // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2
  // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
  // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7

  Vec[0] = Builder.CreateShuffleVector(
      InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2);
  Vec[1] = Builder.CreateShuffleVector(
      InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3);
  Vec[2] = InVec[2];

  // Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2
  // Vec[1]= c0 c1 c2 c3 c4 a3 a4 a5
  // Vec[2]= b3 b4 b5 b6 b7 c5 c6 c7

  for (int i = 0; i < 3; i++)
    TempVector[i] =
        Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]);

  // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1
  // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4
  // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7

  for (int i = 0; i < 3; i++)
    Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3],
                                         VPAlign[2]);

  // TransposedMatrix[0] = a0 b0 c0 a1 b1 c1 a2 b2
  // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5
  // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7

  unsigned NumOfElm = VT.getVectorNumElements();
  group2Shuffle(VT, GroupSize, VPShuf);
  reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder);
}
Example #5
0
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
  SDNode *Node = Op.getNode();
  MVT VT = Node->getValueType(0);
  SDValue InChain = Node->getOperand(0);
  SDValue VAListPtr = Node->getOperand(1);
  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
  DebugLoc dl = Node->getDebugLoc();
  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0);
  // Increment the pointer, VAList, to the next vaarg
  SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                  DAG.getConstant(VT.getSizeInBits()/8,
                                                  MVT::i32));
  // Store the incremented VAList to the legalized pointer
  InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
                         VAListPtr, SV, 0);
  // Load the actual argument out of the pointer VAList, unless this is an
  // f64 load.
  if (VT != MVT::f64)
    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0);

  // Otherwise, load it as i64, then do a bitconvert.
  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0);

  // Bit-Convert the value to f64.
  SDValue Ops[2] = {
    DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V),
    V.getValue(1)
  };
  return DAG.getMergeValues(Ops, 2, dl);
}
Example #6
0
SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
  // The vector type is legal but the element type needs expansion.
  MVT VecVT = N->getValueType(0);
  unsigned NumElts = VecVT.getVectorNumElements();
  DebugLoc dl = N->getDebugLoc();

  SDValue Val = N->getOperand(1);
  MVT OldEVT = Val.getValueType();
  MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);

  assert(OldEVT == VecVT.getVectorElementType() &&
         "Inserted element type doesn't match vector element type!");

  // Bitconvert to a vector of twice the length with elements of the expanded
  // type, insert the expanded vector elements, and then convert back.
  MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);
  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
                               NewVecVT, N->getOperand(0));

  SDValue Lo, Hi;
  GetExpandedOp(Val, Lo, Hi);
  if (TLI.isBigEndian())
    std::swap(Lo, Hi);

  SDValue Idx = N->getOperand(2);
  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
  NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
  Idx = DAG.getNode(ISD::ADD, dl,
                    Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
  NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);

  // Convert the new vector to the old vector type.
  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
}
Example #7
0
MVT MVT::getExtendedIntegerVT(unsigned BitWidth) {
  const Type *&ET = ExtendedIntegerTypeMap[BitWidth];
  if (!ET) ET = new ExtendedIntegerType(BitWidth);
  MVT VT;
  VT.LLVMTy = ET;
  assert(VT.isExtended() && "Type is not extended!");
  return VT;
}
Example #8
0
MVT MVT::getExtendedVectorVT(MVT VT, unsigned NumElements) {
  const Type *&ET = ExtendedVectorTypeMap[std::make_pair(VT.getRawBits(),
                                                         NumElements)];
  if (!ET) ET = new ExtendedVectorType(VT, NumElements);
  MVT ResultVT;
  ResultVT.LLVMTy = ET;
  assert(ResultVT.isExtended() && "Type is not extended!");
  return ResultVT;
}
//  createShuffleStride returns shuffle mask of size N.
//  The shuffle pattern is as following :
//  {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane),
//  (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),...,
//  (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)}
//  Where Lane is the # of lanes in a register:
//  VectorSize = 128 => Lane = 1
//  VectorSize = 256 => Lane = 2
//  For example shuffle pattern for VF 16 register size 256 -> lanes = 2
//  {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
static void createShuffleStride(MVT VT, int Stride,
                                SmallVectorImpl<uint32_t> &Mask) {
  int VectorSize = VT.getSizeInBits();
  int VF = VT.getVectorNumElements();
  int LaneCount = std::max(VectorSize / 128, 1);
  for (int Lane = 0; Lane < LaneCount; Lane++)
    for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i)
      Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane);
}
Example #10
0
//  setGroupSize sets 'SizeInfo' to the size(number of elements) of group
//  inside mask a shuffleMask. A mask contains exactly 3 groups, where
//  each group is a monotonically increasing sequence with stride 3.
//  For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
  int VectorSize = VT.getSizeInBits();
  int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
  for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
    int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0);
    SizeInfo.push_back(GroupSize);
    FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF;
  }
}
// genShuffleBland - Creates shuffle according to two vectors.This function is
// only works on instructions with lane inside 256 registers. According to
// the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The
// offset amount depends on the two integer, 'LowOffset' and 'HighOffset'.
// Where the 'LowOffset' refers to the first vector and the highOffset refers to
// the second vector.
// |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20|
// |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25|
// |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31|
// For the sequence to work as a mirror to the load.
// We must consider the elements order as above.
// In this function we are combining two types of shuffles.
// The first one is vpshufed and the second is a type of "blend" shuffle.
// By computing the shuffle on a sequence of 16 elements(one lane) and add the
// correct offset. We are creating a vpsuffed + blend sequence between two
// shuffles.
static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
  SmallVectorImpl<uint32_t> &Out, int LowOffset,
  int HighOffset) {
  assert(VT.getSizeInBits() >= 256 &&
    "This function doesn't accept width smaller then 256");
  unsigned NumOfElm = VT.getVectorNumElements();
  for (unsigned i = 0; i < Mask.size(); i++)
    Out.push_back(Mask[i] + LowOffset);
  for (unsigned i = 0; i < Mask.size(); i++)
    Out.push_back(Mask[i] + HighOffset + NumOfElm);
}
Example #12
0
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
  DebugLoc dl = N->getDebugLoc();
  MVT VT = N->getValueType(0);
  unsigned NumElts = VT.getVectorNumElements();
  SmallVector<SDValue, 16> Ops(NumElts);
  Ops[0] = N->getOperand(0);
  SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
  for (unsigned i = 1; i < NumElts; ++i)
    Ops[i] = UndefVal;
  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
}
Example #13
0
// NVPTX suuport vector of legal types of any length in Intrinsics because the
// NVPTX specific type legalizer
// will legalize them to the PTX supported length.
bool
NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
  if (isTypeLegal(VT))
    return true;
  if (VT.isVector()) {
    MVT eVT = VT.getVectorElementType();
    if (isTypeLegal(eVT))
      return true;
  }
  return false;
}
Example #14
0
bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                          CCValAssign::LocInfo &LocInfo,
                          ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  // On the second pass, go through the HVAs only.
  if (ArgFlags.isSecArgPass()) {
    if (ArgFlags.isHva())
      return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
                                             ArgFlags, State);
    return true;
  }

  // Process only vector types as defined by vectorcall spec:
  // "A vector type is either a floating-point type, for example,
  //  a float or double, or an SIMD vector type, for example, __m128 or __m256".
  if (!(ValVT.isFloatingPoint() ||
        (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
    // If R9 was already assigned it means that we are after the fourth element
    // and because this is not an HVA / Vector type, we need to allocate
    // shadow XMM register.
    if (State.isAllocated(X86::R9)) {
      // Assign shadow XMM register.
      (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));
    }

    return false;
  }

  if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {
    // Assign shadow GPR register.
    (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());

    // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
    if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
      // In Vectorcall Calling convention, additional shadow stack can be
      // created on top of the basic 32 bytes of win64.
      // It can happen if the fifth or sixth argument is vector type or HVA.
      // At that case for each argument a shadow stack of 8 bytes is allocated.
      if (Reg == X86::XMM4 || Reg == X86::XMM5)
        State.AllocateStack(8, 8);

      if (!ArgFlags.isHva()) {
        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
        return true; // Allocated a register - Stop the search.
      }
    }
  }

  // If this is an HVA - Stop the search,
  // otherwise continue the search.
  return ArgFlags.isHva();
}
static void EmitTypeGenerate(std::ostream &OS, const Record *ArgType,
                             unsigned &ArgNo) {
  MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));

  if (ArgType->isSubClassOf("LLVMMatchType")) {
    unsigned Number = ArgType->getValueAsInt("Number");
    assert(Number < ArgNo && "Invalid matching number!");
    if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
      OS << "VectorType::getExtendedElementVectorType"
         << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
    else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
      OS << "VectorType::getTruncatedElementVectorType"
         << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
    else
      OS << "Tys[" << Number << "]";
  } else if (VT == MVT::iAny || VT == MVT::fAny) {
    // NOTE: The ArgNo variable here is not the absolute argument number, it is
    // the index of the "arbitrary" type in the Tys array passed to the
    // Intrinsic::getDeclaration function. Consequently, we only want to
    // increment it when we actually hit an overloaded type. Getting this wrong
    // leads to very subtle bugs!
    OS << "Tys[" << ArgNo++ << "]";
  } else if (MVT(VT).isVector()) {
    MVT VVT = VT;
    OS << "VectorType::get(";
    EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT());
    OS << ", " << VVT.getVectorNumElements() << ")";
  } else if (VT == MVT::iPTR) {
    OS << "PointerType::getUnqual(";
    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
    OS << ")";
  } else if (VT == MVT::iPTRAny) {
    // Make sure the user has passed us an argument type to overload. If not,
    // treat it as an ordinary (not overloaded) intrinsic.
    OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo 
    << "] : PointerType::getUnqual(";
    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
    OS << ")";
    ++ArgNo;
  } else if (VT == MVT::isVoid) {
    if (ArgNo == 0)
      OS << "Type::VoidTy";
    else
      // MVT::isVoid is used to mean varargs here.
      OS << "...";
  } else {
    EmitTypeForValueType(OS, VT);
  }
}
Example #16
0
static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) {
  if (ValVT.is512BitVector()) {
    static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,
                                           X86::ZMM3, X86::ZMM4, X86::ZMM5};
    return makeArrayRef(std::begin(RegListZMM), std::end(RegListZMM));
  }

  if (ValVT.is256BitVector()) {
    static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,
                                           X86::YMM3, X86::YMM4, X86::YMM5};
    return makeArrayRef(std::begin(RegListYMM), std::end(RegListYMM));
  }

  static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,
                                         X86::XMM3, X86::XMM4, X86::XMM5};
  return makeArrayRef(std::begin(RegListXMM), std::end(RegListXMM));
}
Example #17
0
LLT::LLT(MVT VT) {
  if (VT.isVector()) {
    SizeInBits = VT.getVectorElementType().getSizeInBits();
    ElementsOrAddrSpace = VT.getVectorNumElements();
    Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
  } else if (VT.isValid()) {
    // Aggregates are no different from real scalars as far as GlobalISel is
    // concerned.
    Kind = Scalar;
    SizeInBits = VT.getSizeInBits();
    ElementsOrAddrSpace = 1;
    assert(SizeInBits != 0 && "invalid zero-sized type");
  } else {
    Kind = Invalid;
    SizeInBits = ElementsOrAddrSpace = 0;
  }
}
// group2Shuffle reorder the shuffle stride back into continuous order.
// For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
// MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
                          SmallVectorImpl<uint32_t> &Output) {
  int IndexGroup[3] = {0, 0, 0};
  int Index = 0;
  int VectorWidth = VT.getSizeInBits();
  int VF = VT.getVectorNumElements();
  // Find the index of the different groups.
  int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1;
  for (int i = 0; i < 3; i++) {
    IndexGroup[(Index * 3) % (VF / Lane)] = Index;
    Index += Mask[i];
  }
  // According to the index compute the convert mask.
  for (int i = 0; i < VF / Lane; i++) {
    Output.push_back(IndexGroup[i % 3]);
    IndexGroup[i % 3]++;
  }
}
//  DecodePALIGNRMask returns the shuffle mask of vpalign instruction.
//  vpalign works according to lanes
//  Where Lane is the # of lanes in a register:
//  VectorWide = 128 => Lane = 1
//  VectorWide = 256 => Lane = 2
//  For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}.
//  For Lane = 2 shuffle pattern is:
//  {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
//  Imm variable sets the offset amount. The result of the
//  function is stored inside ShuffleMask vector and it built as described in
//  the begin of the description. AlignDirection is a boolean that indecat the
//  direction of the alignment. (false - align to the "right" side while true -
//  align to the "left" side)
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
                              SmallVectorImpl<uint32_t> &ShuffleMask,
                              bool AlignDirection = true, bool Unary = false) {
  unsigned NumElts = VT.getVectorNumElements();
  unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
  unsigned NumLaneElts = NumElts / NumLanes;

  Imm = AlignDirection ? Imm : (NumLaneElts - Imm);
  unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8);

  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
    for (unsigned i = 0; i != NumLaneElts; ++i) {
      unsigned Base = i + Offset;
      // if i+offset is out of this lane then we actually need the other source
      // If Unary the other source is the first source.
      if (Base >= NumLaneElts)
        Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts;
      ShuffleMask.push_back(Base + l);
    }
  }
}
std::pair<unsigned, const TargetRegisterClass *>
WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
  // First, see if this is a constraint that directly corresponds to a
  // WebAssembly register class.
  if (Constraint.size() == 1) {
    switch (Constraint[0]) {
      case 'r':
        assert(VT != MVT::iPTR && "Pointer MVT not expected here");
        if (Subtarget->hasSIMD128() && VT.isVector()) {
          if (VT.getSizeInBits() == 128)
            return std::make_pair(0U, &WebAssembly::V128RegClass);
        }
        if (VT.isInteger() && !VT.isVector()) {
          if (VT.getSizeInBits() <= 32)
            return std::make_pair(0U, &WebAssembly::I32RegClass);
          if (VT.getSizeInBits() <= 64)
            return std::make_pair(0U, &WebAssembly::I64RegClass);
        }
        break;
      default:
        break;
    }
  }

  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op,
                                            SelectionDAG &DAG) const {
  MVT VT = Op.getSimpleValueType();
  unsigned VTBits = VT.getSizeInBits();
  SDLoc dl(Op);
  SDValue ShOpLo = Op.getOperand(0);
  SDValue ShOpHi = Op.getOperand(1);
  SDValue ShAmt = Op.getOperand(2);

  // Performs the following for a >> b:
  //   unsigned r_high = a_high >> b;
  //   r_high = (32 - b <= 0) ? 0 : r_high;
  //
  //   unsigned r_low = a_low >> b;
  //   r_low = (32 - b <= 0) ? r_high : r_low;
  //   r_low = (b == 0) ? r_low : r_low | (a_high << (32 - b));
  //   return (unsigned long long)r_high << 32 | r_low;
  // Note: This takes advantage of Lanai's shift behavior to avoid needing to
  // mask the shift amount.

  SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
  SDValue NegatedPlus32 = DAG.getNode(
      ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
  SDValue SetCC = DAG.getSetCC(dl, MVT::i32, NegatedPlus32, Zero, ISD::SETLE);

  SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpHi, ShAmt);
  Hi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, Hi);

  SDValue Lo = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpLo, ShAmt);
  Lo = DAG.getSelect(dl, MVT::i32, SetCC, Hi, Lo);
  SDValue CarryBits =
      DAG.getNode(ISD::SHL, dl, MVT::i32, ShOpHi, NegatedPlus32);
  SDValue ShiftIsZero = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ);
  Lo = DAG.getSelect(dl, MVT::i32, ShiftIsZero, Lo,
                     DAG.getNode(ISD::OR, dl, MVT::i32, Lo, CarryBits));

  SDValue Ops[2] = {Lo, Hi};
  return DAG.getMergeValues(Ops, dl);
}
Example #22
0
bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                          CCValAssign::LocInfo &LocInfo,
                          ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  // On the second pass, go through the HVAs only.
  if (ArgFlags.isSecArgPass()) {
    if (ArgFlags.isHva())
      return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
                                             ArgFlags, State);
    return true;
  }

  // Process only vector types as defined by vectorcall spec:
  // "A vector type is either a floating point type, for example,
  //  a float or double, or an SIMD vector type, for example, __m128 or __m256".
  if (!(ValVT.isFloatingPoint() ||
        (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
    return false;
  }

  if (ArgFlags.isHva())
    return true; // If this is an HVA - Stop the search.

  // Assign XMM register.
  if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
    return true;
  }

  // In case we did not find an available XMM register for a vector -
  // pass it indirectly.
  // It is similar to CCPassIndirect, with the addition of inreg.
  if (!ValVT.isFloatingPoint()) {
    LocVT = MVT::i32;
    LocInfo = CCValAssign::Indirect;
    ArgFlags.setInReg();
  }

  return false; // No register was assigned - Continue the search.
}
Example #23
0
void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
  assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
  DebugLoc dl = N->getDebugLoc();

  LoadSDNode *LD = cast<LoadSDNode>(N);
  MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
  SDValue Chain = LD->getChain();
  SDValue Ptr = LD->getBasePtr();
  int SVOffset = LD->getSrcValueOffset();
  unsigned Alignment = LD->getAlignment();
  bool isVolatile = LD->isVolatile();

  assert(NVT.isByteSized() && "Expanded type not byte sized!");

  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
                   isVolatile, Alignment);

  // Increment the pointer to the other half.
  unsigned IncrementSize = NVT.getSizeInBits() / 8;
  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                    DAG.getIntPtrConstant(IncrementSize));
  Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
                   SVOffset+IncrementSize,
                   isVolatile, MinAlign(Alignment, IncrementSize));

  // Build a factor node to remember that this load is independent of the
  // other one.
  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                      Hi.getValue(1));

  // Handle endianness of the load.
  if (TLI.isBigEndian())
    std::swap(Lo, Hi);

  // Modified the chain - switch anything that used the old chain to use
  // the new one.
  ReplaceValueWith(SDValue(N, 1), Chain);
}
Example #24
0
static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                CCValAssign::LocInfo &LocInfo,
                                ISD::ArgFlagsTy &ArgFlags,
                                CCState &State) {
    static const unsigned ArgRegs[] = {
        MBlaze::R5, MBlaze::R6, MBlaze::R7,
        MBlaze::R8, MBlaze::R9, MBlaze::R10
    };

    const unsigned NumArgRegs = array_lengthof(ArgRegs);
    unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
    if (!Reg) return false;

    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
    State.AllocateStack(SizeInBytes, SizeInBytes);
    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));

    return true;
}
Example #25
0
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                          unsigned &NumIntermediates,
                                          MVT &RegisterVT,
                                          TargetLoweringBase *TLI) {
  // Figure out the right, legal destination reg to copy into.
  unsigned NumElts = VT.getVectorNumElements();
  MVT EltTy = VT.getVectorElementType();

  unsigned NumVectorRegs = 1;

  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
  // could break down into LHS/RHS like LegalizeDAG does.
  if (!isPowerOf2_32(NumElts)) {
    NumVectorRegs = NumElts;
    NumElts = 1;
  }

  // Divide the input until we get to a supported size.  This will always
  // end with a scalar if the target doesn't support vectors.
  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
    NumElts >>= 1;
    NumVectorRegs <<= 1;
  }

  NumIntermediates = NumVectorRegs;

  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
  if (!TLI->isTypeLegal(NewVT))
    NewVT = EltTy;
  IntermediateVT = NewVT;

  unsigned NewVTSize = NewVT.getSizeInBits();

  // Convert sizes such as i33 to i64.
  if (!isPowerOf2_32(NewVTSize))
    NewVTSize = NextPowerOf2(NewVTSize);

  MVT DestVT = TLI->getRegisterType(NewVT);
  RegisterVT = DestVT;
  if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());

  // Otherwise, promotion or legal types use the same number of registers as
  // the vector decimated to the appropriate level.
  return NumVectorRegs;
}
Example #26
0
std::string NVPTXTargetLowering::getPrototype(Type *retTy,
                                              const ArgListTy &Args,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                                              unsigned retAlignment) const {

  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);

  std::stringstream O;
  O << "prototype_" << uniqueCallSite << " : .callprototype ";

  if (retTy->getTypeID() == Type::VoidTyID)
    O << "()";
  else {
    O << "(";
    if (isABI) {
      if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
        unsigned size = 0;
        if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
          size = ITy->getBitWidth();
          if (size < 32) size = 32;
        }
        else {
          assert(retTy->isFloatingPointTy() &&
                 "Floating point type expected here");
          size = retTy->getPrimitiveSizeInBits();
        }

        O << ".param .b" << size << " _";
      }
      else if (isa<PointerType>(retTy))
        O << ".param .b" << getPointerTy().getSizeInBits()
        << " _";
      else {
        if ((retTy->getTypeID() == Type::StructTyID) ||
            isa<VectorType>(retTy)) {
          SmallVector<EVT, 16> vtparts;
          ComputeValueVTs(*this, retTy, vtparts);
          unsigned totalsz = 0;
          for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
            unsigned elems = 1;
            EVT elemtype = vtparts[i];
            if (vtparts[i].isVector()) {
              elems = vtparts[i].getVectorNumElements();
              elemtype = vtparts[i].getVectorElementType();
            }
            for (unsigned j=0, je=elems; j!=je; ++j) {
              unsigned sz = elemtype.getSizeInBits();
              if (elemtype.isInteger() && (sz < 8)) sz = 8;
              totalsz += sz/8;
            }
          }
          O << ".param .align "
              << retAlignment
              << " .b8 _["
              << totalsz << "]";
        }
        else {
          assert(false &&
                 "Unknown return type");
        }
      }
    }
    else {
      SmallVector<EVT, 16> vtparts;
      ComputeValueVTs(*this, retTy, vtparts);
      unsigned idx = 0;
      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
        unsigned elems = 1;
        EVT elemtype = vtparts[i];
        if (vtparts[i].isVector()) {
          elems = vtparts[i].getVectorNumElements();
          elemtype = vtparts[i].getVectorElementType();
        }

        for (unsigned j=0, je=elems; j!=je; ++j) {
          unsigned sz = elemtype.getSizeInBits();
          if (elemtype.isInteger() && (sz < 32)) sz = 32;
          O << ".reg .b" << sz << " _";
          if (j<je-1) O << ", ";
          ++idx;
        }
        if (i < e-1)
          O << ", ";
      }
    }
    O << ") ";
  }
  O << "_ (";

  bool first = true;
  MVT thePointerTy = getPointerTy();

  for (unsigned i=0,e=Args.size(); i!=e; ++i) {
    const Type *Ty = Args[i].Ty;
    if (!first) {
      O << ", ";
    }
    first = false;

    if (Outs[i].Flags.isByVal() == false) {
      unsigned sz = 0;
      if (isa<IntegerType>(Ty)) {
        sz = cast<IntegerType>(Ty)->getBitWidth();
        if (sz < 32) sz = 32;
      }
      else if (isa<PointerType>(Ty))
        sz = thePointerTy.getSizeInBits();
      else
        sz = Ty->getPrimitiveSizeInBits();
      if (isABI)
        O << ".param .b" << sz << " ";
      else
        O << ".reg .b" << sz << " ";
      O << "_";
      continue;
    }
    const PointerType *PTy = dyn_cast<PointerType>(Ty);
    assert(PTy &&
           "Param with byval attribute should be a pointer type");
    Type *ETy = PTy->getElementType();

    if (isABI) {
      unsigned align = Outs[i].Flags.getByValAlign();
      unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
      O << ".param .align " << align
          << " .b8 ";
      O << "_";
      O << "[" << sz << "]";
      continue;
    }
    else {
      SmallVector<EVT, 16> vtparts;
      ComputeValueVTs(*this, ETy, vtparts);
      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
        unsigned elems = 1;
        EVT elemtype = vtparts[i];
        if (vtparts[i].isVector()) {
          elems = vtparts[i].getVectorNumElements();
          elemtype = vtparts[i].getVectorElementType();
        }

        for (unsigned j=0,je=elems; j!=je; ++j) {
          unsigned sz = elemtype.getSizeInBits();
          if (elemtype.isInteger() && (sz < 32)) sz = 32;
          O << ".reg .b" << sz << " ";
          O << "_";
          if (j<je-1) O << ", ";
        }
        if (i<e-1)
          O << ", ";
      }
      continue;
    }
  }
  O << ");";
  return O.str();
}
Example #27
0
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                        SDValue Chain, SDValue Dst, SDValue Src,
                                        SDValue Size, unsigned Align,
                                        bool isVolatile, bool AlwaysInline,
                                         MachinePointerInfo DstPtrInfo,
                                         MachinePointerInfo SrcPtrInfo) const {
  // This requires the copy size to be a constant, preferably
  // within a subtarget-specific limit.
  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (!ConstantSize)
    return SDValue();
  uint64_t SizeVal = ConstantSize->getZExtValue();
  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
    return SDValue();

  /// If not DWORD aligned, it is more efficient to call the library.  However
  /// if calling the library is not allowed (AlwaysInline), then soldier on as
  /// the code generated here is better than the long load-store sequence we
  /// would otherwise get.
  if (!AlwaysInline && (Align & 3) != 0)
    return SDValue();

  // If to a segment-relative address space, use the default lowering.
  if (DstPtrInfo.getAddrSpace() >= 256 ||
      SrcPtrInfo.getAddrSpace() >= 256)
    return SDValue();

  // ESI might be used as a base pointer, in that case we can't simply overwrite
  // the register.  Fall back to generic code.
  const X86RegisterInfo *TRI =
      static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
  if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
      TRI->getBaseRegister() == X86::ESI)
    return SDValue();

  MVT AVT;
  if (Align & 1)
    AVT = MVT::i8;
  else if (Align & 2)
    AVT = MVT::i16;
  else if (Align & 4)
    // DWORD aligned
    AVT = MVT::i32;
  else
    // QWORD aligned
    AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;

  unsigned UBytes = AVT.getSizeInBits() / 8;
  unsigned CountVal = SizeVal / UBytes;
  SDValue Count = DAG.getIntPtrConstant(CountVal);
  unsigned BytesLeft = SizeVal % UBytes;

  SDValue InFlag;
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
                                                              X86::ECX,
                            Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
                                                              X86::EDI,
                            Dst, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
                                                              X86::ESI,
                            Src, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);

  SmallVector<SDValue, 4> Results;
  Results.push_back(RepMovs);
  if (BytesLeft) {
    // Handle the last 1 - 7 bytes.
    unsigned Offset = SizeVal - BytesLeft;
    EVT DstVT = Dst.getValueType();
    EVT SrcVT = Src.getValueType();
    EVT SizeVT = Size.getValueType();
    Results.push_back(DAG.getMemcpy(Chain, dl,
                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
                                                DAG.getConstant(Offset, DstVT)),
                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                DAG.getConstant(Offset, SrcVT)),
                                    DAG.getConstant(BytesLeft, SizeVT),
                                    Align, isVolatile, AlwaysInline,
                                    DstPtrInfo.getWithOffset(Offset),
                                    SrcPtrInfo.getWithOffset(Offset)));
  }

  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
}
Example #28
0
// Changing the scale of the vector type by reducing the number of elements and
// doubling the scalar size.
static MVT scaleVectorType(MVT VT) {
  unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2;
  return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize),
                          VT.getVectorNumElements() / 2);
}
Example #29
0
bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
  CallingConv::ID CC = CLI.CallConv;
  bool IsTailCall = CLI.IsTailCall;
  bool IsVarArg = CLI.IsVarArg;
  const Value *Callee = CLI.Callee;
  // const char *SymName = CLI.SymName;

  // Allow SelectionDAG isel to handle tail calls.
  if (IsTailCall)
    return false;

  // Let SDISel handle vararg functions.
  if (IsVarArg)
    return false;

  // FIXME: Only handle *simple* calls for now.
  MVT RetVT;
  if (CLI.RetTy->isVoidTy())
    RetVT = MVT::isVoid;
  else if (!isTypeLegal(CLI.RetTy, RetVT))
    return false;

  for (auto Flag : CLI.OutFlags)
    if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
      return false;

  // Set up the argument vectors.
  SmallVector<MVT, 16> OutVTs;
  OutVTs.reserve(CLI.OutVals.size());

  for (auto *Val : CLI.OutVals) {
    MVT VT;
    if (!isTypeLegal(Val->getType(), VT) &&
        !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
      return false;

    // We don't handle vector parameters yet.
    if (VT.isVector() || VT.getSizeInBits() > 64)
      return false;

    OutVTs.push_back(VT);
  }

  Address Addr;
  if (!computeCallAddress(Callee, Addr))
    return false;

  // Handle the arguments now that we've gotten them.
  unsigned NumBytes;
  if (!processCallArgs(CLI, OutVTs, NumBytes))
    return false;

  // Issue the call.
  unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
  emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress);
  MachineInstrBuilder MIB =
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR),
              Mips::RA).addReg(Mips::T9);

  // Add implicit physical register uses to the call.
  for (auto Reg : CLI.OutRegs)
    MIB.addReg(Reg, RegState::Implicit);

  // Add a register mask with the call-preserved registers.
  // Proper defs for return values will be added by setPhysRegsDeadExcept().
  MIB.addRegMask(TRI.getCallPreservedMask(CC));

  CLI.Call = MIB;

  // Add implicit physical register uses to the call.
  for (auto Reg : CLI.OutRegs)
    MIB.addReg(Reg, RegState::Implicit);

  // Add a register mask with the call-preserved registers.  Proper
  // defs for return values will be added by setPhysRegsDeadExcept().
  MIB.addRegMask(TRI.getCallPreservedMask(CC));

  CLI.Call = MIB;
  // Finish off the call including any return values.
  return finishCall(CLI, RetVT, NumBytes);
}
Example #30
0
bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
                                   SmallVectorImpl<MVT> &OutVTs,
                                   unsigned &NumBytes) {
  CallingConv::ID CC = CLI.CallConv;
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
  // Get a count of how many bytes are to be pushed on the stack.
  NumBytes = CCInfo.getNextStackOffset();
  // This is the minimum argument area used for A0-A3.
  if (NumBytes < 16)
    NumBytes = 16;

  emitInst(Mips::ADJCALLSTACKDOWN).addImm(16);
  // Process the args.
  MVT firstMVT;
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
    MVT ArgVT = OutVTs[VA.getValNo()];

    if (i == 0) {
      firstMVT = ArgVT;
      if (ArgVT == MVT::f32) {
        VA.convertToReg(Mips::F12);
      } else if (ArgVT == MVT::f64) {
        VA.convertToReg(Mips::D6);
      }
    } else if (i == 1) {
      if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) {
        if (ArgVT == MVT::f32) {
          VA.convertToReg(Mips::F14);
        } else if (ArgVT == MVT::f64) {
          VA.convertToReg(Mips::D7);
        }
      }
    }
    if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32)) && VA.isMemLoc()) {
      switch (VA.getLocMemOffset()) {
      case 0:
        VA.convertToReg(Mips::A0);
        break;
      case 4:
        VA.convertToReg(Mips::A1);
        break;
      case 8:
        VA.convertToReg(Mips::A2);
        break;
      case 12:
        VA.convertToReg(Mips::A3);
        break;
      default:
        break;
      }
    }
    unsigned ArgReg = getRegForValue(ArgVal);
    if (!ArgReg)
      return false;

    // Handle arg promotion: SExt, ZExt, AExt.
    switch (VA.getLocInfo()) {
    case CCValAssign::Full:
      break;
    case CCValAssign::AExt:
    case CCValAssign::SExt: {
      MVT DestVT = VA.getLocVT();
      MVT SrcVT = ArgVT;
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
      if (!ArgReg)
        return false;
      break;
    }
    case CCValAssign::ZExt: {
      MVT DestVT = VA.getLocVT();
      MVT SrcVT = ArgVT;
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
      if (!ArgReg)
        return false;
      break;
    }
    default:
      llvm_unreachable("Unknown arg promotion!");
    }

    // Now copy/store arg to correct locations.
    if (VA.isRegLoc() && !VA.needsCustom()) {
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
      CLI.OutRegs.push_back(VA.getLocReg());
    } else if (VA.needsCustom()) {
      llvm_unreachable("Mips does not use custom args.");
      return false;
    } else {
      //
      // FIXME: This path will currently return false. It was copied
      // from the AArch64 port and should be essentially fine for Mips too.
      // The work to finish up this path will be done in a follow-on patch.
      //
      assert(VA.isMemLoc() && "Assuming store on stack.");
      // Don't emit stores for undef values.
      if (isa<UndefValue>(ArgVal))
        continue;

      // Need to store on the stack.
      // FIXME: This alignment is incorrect but this path is disabled
      // for now (will return false). We need to determine the right alignment
      // based on the normal alignment for the underlying machine type.
      //
      unsigned ArgSize = RoundUpToAlignment(ArgVT.getSizeInBits(), 4);

      unsigned BEAlign = 0;
      if (ArgSize < 8 && !Subtarget->isLittle())
        BEAlign = 8 - ArgSize;

      Address Addr;
      Addr.setKind(Address::RegBase);
      Addr.setReg(Mips::SP);
      Addr.setOffset(VA.getLocMemOffset() + BEAlign);

      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
          MachinePointerInfo::getStack(Addr.getOffset()),
          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
      (void)(MMO);
      // if (!emitStore(ArgVT, ArgReg, Addr, MMO))
      return false; // can't store on the stack yet.
    }
  }

  return true;
}