SDValue
NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
                                        CallingConv::ID CallConv, bool isVarArg,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                          DebugLoc dl, SelectionDAG &DAG,
                                       SmallVectorImpl<SDValue> &InVals) const {
  MachineFunction &MF = DAG.getMachineFunction();
  const DataLayout *TD = getDataLayout();

  const Function *F = MF.getFunction();
  const AttrListPtr &PAL = F->getAttributes();

  SDValue Root = DAG.getRoot();
  std::vector<SDValue> OutChains;

  bool isKernel = llvm::isKernelFunction(*F);
  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);

  std::vector<Type *> argTypes;
  std::vector<const Argument *> theArgs;
  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
      I != E; ++I) {
    theArgs.push_back(I);
    argTypes.push_back(I->getType());
  }
  assert(argTypes.size() == Ins.size() &&
         "Ins types and function types did not match");

  int idx = 0;
  for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
    Type *Ty = argTypes[i];
    EVT ObjectVT = getValueType(Ty);
    assert(ObjectVT == Ins[i].VT &&
           "Ins type did not match function type");

    // If the kernel argument is image*_t or sampler_t, convert it to
    // a i32 constant holding the parameter position. This can later
    // matched in the AsmPrinter to output the correct mangled name.
    if (isImageOrSamplerVal(theArgs[i],
                           (theArgs[i]->getParent() ?
                               theArgs[i]->getParent()->getParent() : 0))) {
      assert(isKernel && "Only kernels can have image/sampler params");
      InVals.push_back(DAG.getConstant(i+1, MVT::i32));
      continue;
    }

    if (theArgs[i]->use_empty()) {
      // argument is dead
      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
      continue;
    }

    // In the following cases, assign a node order of "idx+1"
    // to newly created nodes. The SDNOdes for params have to
    // appear in the same order as their order of appearance
    // in the original function. "idx+1" holds that order.
    if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) {
      // A plain scalar.
      if (isABI || isKernel) {
        // If ABI, load from the param symbol
        SDValue Arg = getParamSymbol(DAG, idx);
        Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
            F->getContext()),
            llvm::ADDRESS_SPACE_PARAM));
        SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
                                MachinePointerInfo(srcValue), false, false,
                                false,
                                TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
                                  F->getContext())));
        if (p.getNode())
          DAG.AssignOrdering(p.getNode(), idx+1);
        InVals.push_back(p);
      }
      else {
        // If no ABI, just move the param symbol
        SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
        SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
        if (p.getNode())
          DAG.AssignOrdering(p.getNode(), idx+1);
        InVals.push_back(p);
      }
      continue;
    }

    // Param has ByVal attribute
    if (isABI || isKernel) {
      // Return MoveParam(param symbol).
      // Ideally, the param symbol can be returned directly,
      // but when SDNode builder decides to use it in a CopyToReg(),
      // machine instruction fails because TargetExternalSymbol
      // (not lowered) is target dependent, and CopyToReg assumes
      // the source is lowered.
      SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
      SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
      if (p.getNode())
        DAG.AssignOrdering(p.getNode(), idx+1);
      if (isKernel)
        InVals.push_back(p);
      else {
        SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
                    DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
                                 p);
        InVals.push_back(p2);
      }
    } else {
      // Have to move a set of param symbols to registers and
      // store them locally and return the local pointer in InVals
      const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
      assert(elemPtrType &&
             "Byval parameter should be a pointer type");
      Type *elemType = elemPtrType->getElementType();
      // Compute the constituent parts
      SmallVector<EVT, 16> vtparts;
      SmallVector<uint64_t, 16> offsets;
      ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
      unsigned totalsize = 0;
      for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
        totalsize += vtparts[j].getStoreSizeInBits();
      SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
                                      CreateStackObject(totalsize/8, 16, false),
                                             getPointerTy());
      unsigned sizesofar = 0;
      std::vector<SDValue> theChains;
      for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
        unsigned numElems = 1;
        if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
        for (unsigned k=0, ke=numElems; k!=ke; ++k) {
          EVT tmpvt = vtparts[j];
          if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
          SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
                                    getParamSymbol(DAG, idx, tmpvt));
          SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
                                    DAG.getConstant(sizesofar, getPointerTy()));
          theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
                                        MachinePointerInfo(), false, false, 0));
          sizesofar += tmpvt.getStoreSizeInBits()/8;
          ++idx;
        }
      }
      --idx;
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
                          theChains.size());
      InVals.push_back(localcopy);
    }
  }

  // Clang will check explicit VarArg and issue error if any. However, Clang
  // will let code with
  // implicit var arg like f() pass.
  // We treat this case as if the arg list is empty.
  //if (F.isVarArg()) {
  // assert(0 && "VarArg not supported yet!");
  //}

  if (!OutChains.empty())
    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                            &OutChains[0], OutChains.size()));

  return Chain;
}