SDOperand AlphaTargetLowering::CustomPromoteOperation(SDOperand Op, 
                                                      SelectionDAG &DAG) {
  assert(Op.getValueType() == MVT::i32 && 
         Op.getOpcode() == ISD::VAARG &&
         "Unknown node to custom promote!");
  
  // The code in LowerOperation already handles i32 vaarg
  return LowerOperation(Op, DAG);
}
 static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
   SDOperand Op = EI.getNode()->getOperand(EI.getOperand());
   MVT::ValueType VT = Op.getValueType();
   if (VT == MVT::Flag)
     return "color=red,style=bold";
   else if (VT == MVT::Other)
     return "style=dashed";
   return "";
 }
static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
  SDOperand Copy = DAG.getCopyToReg(Op.getOperand(0), Alpha::R26, 
                                    DAG.getNode(AlphaISD::GlobalRetAddr, 
                                                MVT::i64),
                                    SDOperand());
  switch (Op.getNumOperands()) {
  default:
    assert(0 && "Do not know how to return this many arguments!");
    abort();
  case 1: 
    break;
    //return SDOperand(); // ret void is legal
  case 3: {
    MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
    unsigned ArgReg;
    if (MVT::isInteger(ArgVT))
      ArgReg = Alpha::R0;
    else {
      assert(MVT::isFloatingPoint(ArgVT));
      ArgReg = Alpha::F0;
    }
    Copy = DAG.getCopyToReg(Copy, ArgReg, Op.getOperand(1), Copy.getValue(1));
    if (DAG.getMachineFunction().liveout_empty())
      DAG.getMachineFunction().addLiveOut(ArgReg);
    break;
  }
  }
  return DAG.getNode(AlphaISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
}
static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
  MVT::ValueType PtrVT = Op.getValueType();
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
  SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
  SDOperand Zero = DAG.getConstant(0, PtrVT);
  
  SDOperand Hi = DAG.getNode(AlphaISD::GPRelHi,  MVT::i64, JTI,
                             DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
  SDOperand Lo = DAG.getNode(AlphaISD::GPRelLo, MVT::i64, JTI, Hi);
  return Lo;
}
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand AlphaTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
  switch (Op.getOpcode()) {
  default: assert(0 && "Wasn't expecting to be able to lower this!");
  case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, 
                                                           VarArgsBase,
                                                           VarArgsOffset);

  case ISD::RET: return LowerRET(Op,DAG);
  case ISD::JumpTable: return LowerJumpTable(Op, DAG);

  case ISD::SINT_TO_FP: {
    assert(MVT::i64 == Op.getOperand(0).getValueType() && 
           "Unhandled SINT_TO_FP type in custom expander!");
    SDOperand LD;
    bool isDouble = MVT::f64 == Op.getValueType();
    LD = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
    SDOperand FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_,
                               isDouble?MVT::f64:MVT::f32, LD);
    return FP;
  }
  case ISD::FP_TO_SINT: {
    bool isDouble = MVT::f64 == Op.getOperand(0).getValueType();
    SDOperand src = Op.getOperand(0);

    if (!isDouble) //Promote
      src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, src);
    
    src = DAG.getNode(AlphaISD::CVTTQ_, MVT::f64, src);

    return DAG.getNode(ISD::BIT_CONVERT, MVT::i64, src);
  }
  case ISD::ConstantPool: {
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
    Constant *C = CP->getConstVal();
    SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
    
    SDOperand Hi = DAG.getNode(AlphaISD::GPRelHi,  MVT::i64, CPI,
                               DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
    SDOperand Lo = DAG.getNode(AlphaISD::GPRelLo, MVT::i64, CPI, Hi);
    return Lo;
  }
  case ISD::GlobalAddress: {
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
    GlobalValue *GV = GSDN->getGlobal();
    SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());

    //    if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) {
    if (GV->hasInternalLinkage()) {
      SDOperand Hi = DAG.getNode(AlphaISD::GPRelHi,  MVT::i64, GA,
                                DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
      SDOperand Lo = DAG.getNode(AlphaISD::GPRelLo, MVT::i64, GA, Hi);
      return Lo;
    } else
      return DAG.getNode(AlphaISD::RelLit, MVT::i64, GA, 
                         DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
  }
  case ISD::ExternalSymbol: {
    return DAG.getNode(AlphaISD::RelLit, MVT::i64, 
                       DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
                                                   ->getSymbol(), MVT::i64),
                       DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, MVT::i64));
  }

  case ISD::UREM:
  case ISD::SREM:
    //Expand only on constant case
    if (Op.getOperand(1).getOpcode() == ISD::Constant) {
      MVT::ValueType VT = Op.Val->getValueType(0);
      SDOperand Tmp1 = Op.Val->getOpcode() == ISD::UREM ?
        BuildUDIV(Op.Val, DAG, NULL) :
        BuildSDIV(Op.Val, DAG, NULL);
      Tmp1 = DAG.getNode(ISD::MUL, VT, Tmp1, Op.getOperand(1));
      Tmp1 = DAG.getNode(ISD::SUB, VT, Op.getOperand(0), Tmp1);
      return Tmp1;
    }
    //fall through
  case ISD::SDIV:
  case ISD::UDIV:
    if (MVT::isInteger(Op.getValueType())) {
      if (Op.getOperand(1).getOpcode() == ISD::Constant)
        return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.Val, DAG, NULL) 
          : BuildUDIV(Op.Val, DAG, NULL);
      const char* opstr = 0;
      switch (Op.getOpcode()) {
      case ISD::UREM: opstr = "__remqu"; break;
      case ISD::SREM: opstr = "__remq";  break;
      case ISD::UDIV: opstr = "__divqu"; break;
      case ISD::SDIV: opstr = "__divq";  break;
      }
      SDOperand Tmp1 = Op.getOperand(0),
        Tmp2 = Op.getOperand(1),
        Addr = DAG.getExternalSymbol(opstr, MVT::i64);
      return DAG.getNode(AlphaISD::DivCall, MVT::i64, Addr, Tmp1, Tmp2);
    }
    break;

  case ISD::VAARG: {
    SDOperand Chain = Op.getOperand(0);
    SDOperand VAListP = Op.getOperand(1);
    SrcValueSDNode *VAListS = cast<SrcValueSDNode>(Op.getOperand(2));
    
    SDOperand Base = DAG.getLoad(MVT::i64, Chain, VAListP, VAListS->getValue(),
                                 VAListS->getOffset());
    SDOperand Tmp = DAG.getNode(ISD::ADD, MVT::i64, VAListP,
                                DAG.getConstant(8, MVT::i64));
    SDOperand Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, Base.getValue(1),
                                      Tmp, NULL, 0, MVT::i32);
    SDOperand DataPtr = DAG.getNode(ISD::ADD, MVT::i64, Base, Offset);
    if (MVT::isFloatingPoint(Op.getValueType()))
    {
      //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
      SDOperand FPDataPtr = DAG.getNode(ISD::SUB, MVT::i64, DataPtr,
                                        DAG.getConstant(8*6, MVT::i64));
      SDOperand CC = DAG.getSetCC(MVT::i64, Offset,
                                  DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
      DataPtr = DAG.getNode(ISD::SELECT, MVT::i64, CC, FPDataPtr, DataPtr);
    }

    SDOperand NewOffset = DAG.getNode(ISD::ADD, MVT::i64, Offset,
                                      DAG.getConstant(8, MVT::i64));
    SDOperand Update = DAG.getTruncStore(Offset.getValue(1), NewOffset,
                                         Tmp, NULL, 0, MVT::i32);
    
    SDOperand Result;
    if (Op.getValueType() == MVT::i32)
      Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, Update, DataPtr,
                              NULL, 0, MVT::i32);
    else
      Result = DAG.getLoad(Op.getValueType(), Update, DataPtr, NULL, 0);
    return Result;
  }
  case ISD::VACOPY: {
    SDOperand Chain = Op.getOperand(0);
    SDOperand DestP = Op.getOperand(1);
    SDOperand SrcP = Op.getOperand(2);
    SrcValueSDNode *DestS = cast<SrcValueSDNode>(Op.getOperand(3));
    SrcValueSDNode *SrcS = cast<SrcValueSDNode>(Op.getOperand(4));
    
    SDOperand Val = DAG.getLoad(getPointerTy(), Chain, SrcP,
                                SrcS->getValue(), SrcS->getOffset());
    SDOperand Result = DAG.getStore(Val.getValue(1), Val, DestP, DestS->getValue(),
                                    DestS->getOffset());
    SDOperand NP = DAG.getNode(ISD::ADD, MVT::i64, SrcP, 
                               DAG.getConstant(8, MVT::i64));
    Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, Result, NP, NULL,0, MVT::i32);
    SDOperand NPD = DAG.getNode(ISD::ADD, MVT::i64, DestP,
                                DAG.getConstant(8, MVT::i64));
    return DAG.getTruncStore(Val.getValue(1), Val, NPD, NULL, 0, MVT::i32);
  }
  case ISD::VASTART: {
    SDOperand Chain = Op.getOperand(0);
    SDOperand VAListP = Op.getOperand(1);
    SrcValueSDNode *VAListS = cast<SrcValueSDNode>(Op.getOperand(2));
    
    // vastart stores the address of the VarArgsBase and VarArgsOffset
    SDOperand FR  = DAG.getFrameIndex(VarArgsBase, MVT::i64);
    SDOperand S1  = DAG.getStore(Chain, FR, VAListP, VAListS->getValue(),
                                 VAListS->getOffset());
    SDOperand SA2 = DAG.getNode(ISD::ADD, MVT::i64, VAListP,
                                DAG.getConstant(8, MVT::i64));
    return DAG.getTruncStore(S1, DAG.getConstant(VarArgsOffset, MVT::i64),
                             SA2, NULL, 0, MVT::i32);
  }
  case ISD::RETURNADDR:        
    return DAG.getNode(AlphaISD::GlobalRetAddr, MVT::i64);
      //FIXME: implement
  case ISD::FRAMEADDR:          break;
  }
  
  return SDOperand();
}
std::pair<SDOperand, SDOperand>
AlphaTargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 
                                 bool RetTyIsSigned, bool isVarArg,
                                 unsigned CallingConv, bool isTailCall,
                                 SDOperand Callee, ArgListTy &Args,
                                 SelectionDAG &DAG) {
  int NumBytes = 0;
  if (Args.size() > 6)
    NumBytes = (Args.size() - 6) * 8;

  Chain = DAG.getCALLSEQ_START(Chain,
                               DAG.getConstant(NumBytes, getPointerTy()));
  std::vector<SDOperand> args_to_use;
  for (unsigned i = 0, e = Args.size(); i != e; ++i)
  {
    switch (getValueType(Args[i].Ty)) {
    default: assert(0 && "Unexpected ValueType for argument!");
    case MVT::i1:
    case MVT::i8:
    case MVT::i16:
    case MVT::i32:
      // Promote the integer to 64 bits.  If the input type is signed use a
      // sign extend, otherwise use a zero extend.
      if (Args[i].isSExt)
        Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, MVT::i64, Args[i].Node);
      else if (Args[i].isZExt)
        Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i64, Args[i].Node);
      else
        Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, MVT::i64, Args[i].Node);
      break;
    case MVT::i64:
    case MVT::f64:
    case MVT::f32:
      break;
    }
    args_to_use.push_back(Args[i].Node);
  }

  std::vector<MVT::ValueType> RetVals;
  MVT::ValueType RetTyVT = getValueType(RetTy);
  MVT::ValueType ActualRetTyVT = RetTyVT;
  if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i32)
    ActualRetTyVT = MVT::i64;

  if (RetTyVT != MVT::isVoid)
    RetVals.push_back(ActualRetTyVT);
  RetVals.push_back(MVT::Other);

  std::vector<SDOperand> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);
  Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
  SDOperand TheCall = DAG.getNode(AlphaISD::CALL, RetVals, &Ops[0], Ops.size());
  Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
                      DAG.getConstant(NumBytes, getPointerTy()));
  SDOperand RetVal = TheCall;

  if (RetTyVT != ActualRetTyVT) {
    RetVal = DAG.getNode(RetTyIsSigned ? ISD::AssertSext : ISD::AssertZext,
                         MVT::i64, RetVal, DAG.getValueType(RetTyVT));
    RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
  }

  return std::make_pair(RetVal, Chain);
}
static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
                                       int &VarArgsBase,
                                       int &VarArgsOffset) {
  MachineFunction &MF = DAG.getMachineFunction();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  std::vector<SDOperand> ArgValues;
  SDOperand Root = Op.getOperand(0);

  AddLiveIn(MF, Alpha::R29, &Alpha::GPRCRegClass); //GP
  AddLiveIn(MF, Alpha::R26, &Alpha::GPRCRegClass); //RA

  unsigned args_int[] = {
    Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
  unsigned args_float[] = {
    Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
  
  for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
    SDOperand argt;
    MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
    SDOperand ArgVal;

    if (ArgNo  < 6) {
      switch (ObjectVT) {
      default:
        cerr << "Unknown Type " << ObjectVT << "\n";
        abort();
      case MVT::f64:
        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], 
                                      &Alpha::F8RCRegClass);
        ArgVal = DAG.getCopyFromReg(Root, args_float[ArgNo], ObjectVT);
        break;
      case MVT::f32:
        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], 
                                      &Alpha::F4RCRegClass);
        ArgVal = DAG.getCopyFromReg(Root, args_float[ArgNo], ObjectVT);
        break;
      case MVT::i64:
        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], 
                                    &Alpha::GPRCRegClass);
        ArgVal = DAG.getCopyFromReg(Root, args_int[ArgNo], MVT::i64);
        break;
      }
    } else { //more args
      // Create the frame index object for this incoming parameter...
      int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6));

      // Create the SelectionDAG nodes corresponding to a load
      //from this parameter
      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i64);
      ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
    }
    ArgValues.push_back(ArgVal);
  }

  // If the functions takes variable number of arguments, copy all regs to stack
  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
  if (isVarArg) {
    VarArgsOffset = (Op.Val->getNumValues()-1) * 8;
    std::vector<SDOperand> LS;
    for (int i = 0; i < 6; ++i) {
      if (MRegisterInfo::isPhysicalRegister(args_int[i]))
        args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
      SDOperand argt = DAG.getCopyFromReg(Root, args_int[i], MVT::i64);
      int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
      if (i == 0) VarArgsBase = FI;
      SDOperand SDFI = DAG.getFrameIndex(FI, MVT::i64);
      LS.push_back(DAG.getStore(Root, argt, SDFI, NULL, 0));

      if (MRegisterInfo::isPhysicalRegister(args_float[i]))
        args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
      argt = DAG.getCopyFromReg(Root, args_float[i], MVT::f64);
      FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
      SDFI = DAG.getFrameIndex(FI, MVT::i64);
      LS.push_back(DAG.getStore(Root, argt, SDFI, NULL, 0));
    }

    //Set up a token factor with all the stack traffic
    Root = DAG.getNode(ISD::TokenFactor, MVT::Other, &LS[0], LS.size());
  }

  ArgValues.push_back(Root);

  // Return the new list of results.
  std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
                                    Op.Val->value_end());
  return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
}
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *IA64DAGToDAGISel::Select(SDOperand Op) {
  SDNode *N = Op.Val;
  if (N->getOpcode() >= ISD::BUILTIN_OP_END &&
      N->getOpcode() < IA64ISD::FIRST_NUMBER)
    return NULL;   // Already selected.

  switch (N->getOpcode()) {
  default: break;

  case IA64ISD::BRCALL: { // XXX: this is also a hack!
    SDOperand Chain = N->getOperand(0);
    SDOperand InFlag;  // Null incoming flag value.

    AddToISelQueue(Chain);
    if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag
      InFlag = N->getOperand(2);
      AddToISelQueue(InFlag);
    }

    unsigned CallOpcode;
    SDOperand CallOperand;
    
    // if we can call directly, do so
    if (GlobalAddressSDNode *GASD =
      dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
      CallOpcode = IA64::BRCALL_IPREL_GA;
      CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64);
    } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) {
      // FIXME: we currently NEED this case for correctness, to avoid
      // "non-pic code with imm reloc.n against dynamic symbol" errors
    CallOpcode = IA64::BRCALL_IPREL_ES;
    CallOperand = N->getOperand(1);
  } else {
    // otherwise we need to load the function descriptor,
    // load the branch target (function)'s entry point and GP,
    // branch (call) then restore the GP
    SDOperand FnDescriptor = N->getOperand(1);
    AddToISelQueue(FnDescriptor);
   
    // load the branch target's entry point [mem] and 
    // GP value [mem+8]
    SDOperand targetEntryPoint=
      SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, FnDescriptor), 0);
    Chain = targetEntryPoint.getValue(1);
    SDOperand targetGPAddr=
      SDOperand(CurDAG->getTargetNode(IA64::ADDS, MVT::i64, 
                                      FnDescriptor,
                                      CurDAG->getConstant(8, MVT::i64)), 0);
    Chain = targetGPAddr.getValue(1);
    SDOperand targetGP =
      SDOperand(CurDAG->getTargetNode(IA64::LD8, MVT::i64, targetGPAddr), 0);
    Chain = targetGP.getValue(1);

    Chain = CurDAG->getCopyToReg(Chain, IA64::r1, targetGP, InFlag);
    InFlag = Chain.getValue(1);
    Chain = CurDAG->getCopyToReg(Chain, IA64::B6, targetEntryPoint, InFlag); // FLAG these?
    InFlag = Chain.getValue(1);
    
    CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64);
    CallOpcode = IA64::BRCALL_INDIRECT;
  }
 
   // Finally, once everything is setup, emit the call itself
   if(InFlag.Val)
     Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag,
                                             CallOperand, InFlag), 0);
   else // there might be no arguments
     Chain = SDOperand(CurDAG->getTargetNode(CallOpcode, MVT::Other, MVT::Flag,
                                             CallOperand, Chain), 0);
   InFlag = Chain.getValue(1);

   std::vector<SDOperand> CallResults;

   CallResults.push_back(Chain);
   CallResults.push_back(InFlag);

   for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
     ReplaceUses(Op.getValue(i), CallResults[i]);
   return NULL;
  }
  
  case IA64ISD::GETFD: {
    SDOperand Input = N->getOperand(0);
    AddToISelQueue(Input);
    return CurDAG->getTargetNode(IA64::GETFD, MVT::i64, Input);
  } 
  
  case ISD::FDIV:
  case ISD::SDIV:
  case ISD::UDIV:
  case ISD::SREM:
  case ISD::UREM:
    return SelectDIV(Op);
 
  case ISD::TargetConstantFP: {
    SDOperand Chain = CurDAG->getEntryNode(); // this is a constant, so..

    SDOperand V;
    if (cast<ConstantFPSDNode>(N)->isExactlyValue(+0.0)) {
      V = CurDAG->getCopyFromReg(Chain, IA64::F0, MVT::f64);
    } else if (cast<ConstantFPSDNode>(N)->isExactlyValue(+1.0)) {
      V = CurDAG->getCopyFromReg(Chain, IA64::F1, MVT::f64);
    } else
      assert(0 && "Unexpected FP constant!");
    
    ReplaceUses(SDOperand(N, 0), V);
    return 0;
  }

  case ISD::FrameIndex: { // TODO: reduce creepyness
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
    if (N->hasOneUse())
      return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
                                  CurDAG->getTargetFrameIndex(FI, MVT::i64));
    else
      return CurDAG->getTargetNode(IA64::MOV, MVT::i64,
                                   CurDAG->getTargetFrameIndex(FI, MVT::i64));
  }

  case ISD::ConstantPool: { // TODO: nuke the constant pool
    // (ia64 doesn't need one)
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
    Constant *C = CP->getConstVal();
    SDOperand CPI = CurDAG->getTargetConstantPool(C, MVT::i64,
                                                  CP->getAlignment());
    return CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, // ?
                                 CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
  }

  case ISD::GlobalAddress: {
    GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
    SDOperand GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
    SDOperand Tmp =
      SDOperand(CurDAG->getTargetNode(IA64::ADDL_GA, MVT::i64, 
                                      CurDAG->getRegister(IA64::r1,
                                                          MVT::i64), GA), 0);
    return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp);
  }
  
/* XXX
   case ISD::ExternalSymbol: {
     SDOperand EA = CurDAG->getTargetExternalSymbol(
       cast<ExternalSymbolSDNode>(N)->getSymbol(),
       MVT::i64);
     SDOperand Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, MVT::i64, 
                                           CurDAG->getRegister(IA64::r1,
                                                               MVT::i64),
                                           EA);
     return CurDAG->getTargetNode(IA64::LD8, MVT::i64, Tmp);
   }
*/

  case ISD::LOAD: { // FIXME: load -1, not 1, for bools?
    LoadSDNode *LD = cast<LoadSDNode>(N);
    SDOperand Chain = LD->getChain();
    SDOperand Address = LD->getBasePtr();
    AddToISelQueue(Chain);
    AddToISelQueue(Address);

    MVT::ValueType TypeBeingLoaded = LD->getLoadedVT();
    unsigned Opc;
    switch (TypeBeingLoaded) {
    default:
#ifndef NDEBUG
      N->dump();
#endif
      assert(0 && "Cannot load this type!");
    case MVT::i1: { // this is a bool
      Opc = IA64::LD1; // first we load a byte, then compare for != 0
      if(N->getValueType(0) == MVT::i1) { // XXX: early exit!
        return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other, 
                    SDOperand(CurDAG->getTargetNode(Opc, MVT::i64, Address), 0),
                                    CurDAG->getRegister(IA64::r0, MVT::i64), 
                                    Chain);
      }
      /* otherwise, we want to load a bool into something bigger: LD1
         will do that for us, so we just fall through */
    }
    case MVT::i8:  Opc = IA64::LD1; break;
    case MVT::i16: Opc = IA64::LD2; break;
    case MVT::i32: Opc = IA64::LD4; break;
    case MVT::i64: Opc = IA64::LD8; break;
    
    case MVT::f32: Opc = IA64::LDF4; break;
    case MVT::f64: Opc = IA64::LDF8; break;
    }

    // TODO: comment this
    return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
                                Address, Chain);
  }
  
  case ISD::STORE: {
    StoreSDNode *ST = cast<StoreSDNode>(N);
    SDOperand Address = ST->getBasePtr();
    SDOperand Chain = ST->getChain();
    AddToISelQueue(Address);
    AddToISelQueue(Chain);
   
    unsigned Opc;
    if (ISD::isNON_TRUNCStore(N)) {
      switch (N->getOperand(1).getValueType()) {
      default: assert(0 && "unknown type in store");
      case MVT::i1: { // this is a bool
        Opc = IA64::ST1; // we store either 0 or 1 as a byte 
        // first load zero!
        SDOperand Initial = CurDAG->getCopyFromReg(Chain, IA64::r0, MVT::i64);
        Chain = Initial.getValue(1);
        // then load 1 into the same reg iff the predicate to store is 1
        SDOperand Tmp = ST->getValue();
        AddToISelQueue(Tmp);
        Tmp =
          SDOperand(CurDAG->getTargetNode(IA64::TPCADDS, MVT::i64, Initial,
                                          CurDAG->getTargetConstant(1, MVT::i64),
                                          Tmp), 0);
        return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain);
      }
      case MVT::i64: Opc = IA64::ST8;  break;
      case MVT::f64: Opc = IA64::STF8; break;
      }
    } else { // Truncating store
      switch(ST->getStoredVT()) {
      default: assert(0 && "unknown type in truncstore");
      case MVT::i8:  Opc = IA64::ST1;  break;
      case MVT::i16: Opc = IA64::ST2;  break;
      case MVT::i32: Opc = IA64::ST4;  break;
      case MVT::f32: Opc = IA64::STF4; break;
      }
    }
    
    SDOperand N1 = N->getOperand(1);
    SDOperand N2 = N->getOperand(2);
    AddToISelQueue(N1);
    AddToISelQueue(N2);
    return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain);
  }

  case ISD::BRCOND: {
    SDOperand Chain = N->getOperand(0);
    SDOperand CC = N->getOperand(1);
    AddToISelQueue(Chain);
    AddToISelQueue(CC);
    MachineBasicBlock *Dest =
      cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
    //FIXME - we do NOT need long branches all the time
    return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC, 
                                CurDAG->getBasicBlock(Dest), Chain);
  }

  case ISD::CALLSEQ_START:
  case ISD::CALLSEQ_END: {
    int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getValue();
    unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
      IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
    SDOperand N0 = N->getOperand(0);
    AddToISelQueue(N0);
    return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0);
  }

  case ISD::BR:
    // FIXME: we don't need long branches all the time!
    SDOperand N0 = N->getOperand(0);
    AddToISelQueue(N0);
    return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other, 
                                N->getOperand(1), N0);
  }
  
  return SelectCode(Op);
}
SDNode *IA64DAGToDAGISel::SelectDIV(SDOperand Op) {
  SDNode *N = Op.Val;
  SDOperand Chain = N->getOperand(0);
  SDOperand Tmp1 = N->getOperand(0);
  SDOperand Tmp2 = N->getOperand(1);
  AddToISelQueue(Chain);

  AddToISelQueue(Tmp1);
  AddToISelQueue(Tmp2);

  bool isFP=false;

  if(MVT::isFloatingPoint(Tmp1.getValueType()))
    isFP=true;
    
  bool isModulus=false; // is it a division or a modulus?
  bool isSigned=false;

  switch(N->getOpcode()) {
    case ISD::FDIV:
    case ISD::SDIV:  isModulus=false; isSigned=true;  break;
    case ISD::UDIV:  isModulus=false; isSigned=false; break;
    case ISD::FREM:
    case ISD::SREM:  isModulus=true;  isSigned=true;  break;
    case ISD::UREM:  isModulus=true;  isSigned=false; break;
  }

  // TODO: check for integer divides by powers of 2 (or other simple patterns?)

    SDOperand TmpPR, TmpPR2;
    SDOperand TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8;
    SDOperand TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15;
    SDNode *Result;

    // we'll need copies of F0 and F1
    SDOperand F0 = CurDAG->getRegister(IA64::F0, MVT::f64);
    SDOperand F1 = CurDAG->getRegister(IA64::F1, MVT::f64);
    
    // OK, emit some code:

    if(!isFP) {
      // first, load the inputs into FP regs.
      TmpF1 =
        SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp1), 0);
      Chain = TmpF1.getValue(1);
      TmpF2 =
        SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, Tmp2), 0);
      Chain = TmpF2.getValue(1);
      
      // next, convert the inputs to FP
      if(isSigned) {
        TmpF3 =
          SDOperand(CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF1), 0);
        Chain = TmpF3.getValue(1);
        TmpF4 =
          SDOperand(CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF2), 0);
        Chain = TmpF4.getValue(1);
      } else { // is unsigned
        TmpF3 =
          SDOperand(CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1), 0);
        Chain = TmpF3.getValue(1);
        TmpF4 =
          SDOperand(CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2), 0);
        Chain = TmpF4.getValue(1);
      }

    } else { // this is an FP divide/remainder, so we 'leak' some temp
             // regs and assign TmpF3=Tmp1, TmpF4=Tmp2
      TmpF3=Tmp1;
      TmpF4=Tmp2;
    }

    // we start by computing an approximate reciprocal (good to 9 bits?)
    // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate)
    if(isFP)
      TmpF5 = SDOperand(CurDAG->getTargetNode(IA64::FRCPAS0, MVT::f64, MVT::i1,
                                              TmpF3, TmpF4), 0);
    else
      TmpF5 = SDOperand(CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1,
                                              TmpF3, TmpF4), 0);
                                  
    TmpPR = TmpF5.getValue(1);
    Chain = TmpF5.getValue(2);

    SDOperand minusB;
    if(isModulus) { // for remainders, it'll be handy to have
                             // copies of -input_b
      minusB = SDOperand(CurDAG->getTargetNode(IA64::SUB, MVT::i64,
                  CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0);
      Chain = minusB.getValue(1);
    }
    
    SDOperand TmpE0, TmpY1, TmpE1, TmpY2;

    SDOperand OpsE0[] = { TmpF4, TmpF5, F1, TmpPR };
    TmpE0 = SDOperand(CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
                                            OpsE0, 4), 0);
    Chain = TmpE0.getValue(1);
    SDOperand OpsY1[] = { TmpF5, TmpE0, TmpF5, TmpPR };
    TmpY1 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
                                            OpsY1, 4), 0);
    Chain = TmpY1.getValue(1);
    SDOperand OpsE1[] = { TmpE0, TmpE0, F0, TmpPR };
    TmpE1 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
                                            OpsE1, 4), 0);
    Chain = TmpE1.getValue(1);
    SDOperand OpsY2[] = { TmpY1, TmpE1, TmpY1, TmpPR };
    TmpY2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
                                            OpsY2, 4), 0);
    Chain = TmpY2.getValue(1);
    
    if(isFP) { // if this is an FP divide, we finish up here and exit early
      if(isModulus)
        assert(0 && "Sorry, try another FORTRAN compiler.");
 
      SDOperand TmpE2, TmpY3, TmpQ0, TmpR0;

      SDOperand OpsE2[] = { TmpE1, TmpE1, F0, TmpPR };
      TmpE2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
                                              OpsE2, 4), 0);
      Chain = TmpE2.getValue(1);
      SDOperand OpsY3[] = { TmpY2, TmpE2, TmpY2, TmpPR };
      TmpY3 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
                                              OpsY3, 4), 0);
      Chain = TmpY3.getValue(1);
      SDOperand OpsQ0[] = { Tmp1, TmpY3, F0, TmpPR };
      TmpQ0 =
        SDOperand(CurDAG->getTargetNode(IA64::CFMADS1, MVT::f64, // double prec!
                                        OpsQ0, 4), 0);
      Chain = TmpQ0.getValue(1);
      SDOperand OpsR0[] = { Tmp2, TmpQ0, Tmp1, TmpPR };
      TmpR0 =
        SDOperand(CurDAG->getTargetNode(IA64::CFNMADS1, MVT::f64, // double prec!
                                        OpsR0, 4), 0);
      Chain = TmpR0.getValue(1);

// we want Result to have the same target register as the frcpa, so
// we two-address hack it. See the comment "for this to work..." on
// page 48 of Intel application note #245415
      SDOperand Ops[] = { TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR };
      Result = CurDAG->getTargetNode(IA64::TCFMADS0, MVT::f64, // d.p. s0 rndg!
                                     Ops, 5);
      Chain = SDOperand(Result, 1);
      return Result; // XXX: early exit!
    } else { // this is *not* an FP divide, so there's a bit left to do:
    
      SDOperand TmpQ2, TmpR2, TmpQ3, TmpQ;

      SDOperand OpsQ2[] = { TmpF3, TmpY2, F0, TmpPR };
      TmpQ2 = SDOperand(CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
                                              OpsQ2, 4), 0);
      Chain = TmpQ2.getValue(1);
      SDOperand OpsR2[] = { TmpF4, TmpQ2, TmpF3, TmpPR };
      TmpR2 = SDOperand(CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
                                              OpsR2, 4), 0);
      Chain = TmpR2.getValue(1);
      
// we want TmpQ3 to have the same target register as the frcpa? maybe we
// should two-address hack it. See the comment "for this to work..." on page
// 48 of Intel application note #245415
      SDOperand OpsQ3[] = { TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR };
      TmpQ3 = SDOperand(CurDAG->getTargetNode(IA64::TCFMAS1, MVT::f64,
                                         OpsQ3, 5), 0);
      Chain = TmpQ3.getValue(1);

      // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0)
      // the FPSWA won't be able to help out in the case of large/tiny
      // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0.
      
      if(isSigned)
        TmpQ = SDOperand(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1,
                                               MVT::f64, TmpQ3), 0);
      else
        TmpQ = SDOperand(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1,
                                               MVT::f64, TmpQ3), 0);
      
      Chain = TmpQ.getValue(1);

      if(isModulus) {
        SDOperand FPminusB =
          SDOperand(CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, minusB), 0);
        Chain = FPminusB.getValue(1);
        SDOperand Remainder =
          SDOperand(CurDAG->getTargetNode(IA64::XMAL, MVT::f64,
                                          TmpQ, FPminusB, TmpF1), 0);
        Chain = Remainder.getValue(1);
        Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, Remainder);
        Chain = SDOperand(Result, 1);
      } else { // just an integer divide
        Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpQ);
        Chain = SDOperand(Result, 1);
      }

      return Result;
    } // wasn't an FP divide
}
Exemplo n.º 10
0
SDOperand IA64TargetLowering::
LowerOperation(SDOperand Op, SelectionDAG &DAG) {
  switch (Op.getOpcode()) {
  default: assert(0 && "Should not custom lower this!");
  case ISD::GlobalTLSAddress:
    assert(0 && "TLS not implemented for IA64.");
  case ISD::RET: {
    SDOperand AR_PFSVal, Copy;
    
    switch(Op.getNumOperands()) {
     default:
      assert(0 && "Do not know how to return this many arguments!");
      abort();
    case 1: 
      AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), VirtGPR, MVT::i64);
      AR_PFSVal = DAG.getCopyToReg(AR_PFSVal.getValue(1), IA64::AR_PFS, 
                                   AR_PFSVal);
      return DAG.getNode(IA64ISD::RET_FLAG, MVT::Other, AR_PFSVal);
    case 3: {
      // Copy the result into the output register & restore ar.pfs
      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
      unsigned ArgReg = MVT::isInteger(ArgVT) ? IA64::r8 : IA64::F8;

      AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), VirtGPR, MVT::i64);
      Copy = DAG.getCopyToReg(AR_PFSVal.getValue(1), ArgReg, Op.getOperand(1),
                              SDOperand());
      AR_PFSVal = DAG.getCopyToReg(Copy.getValue(0), IA64::AR_PFS, AR_PFSVal,
                                   Copy.getValue(1));
      return DAG.getNode(IA64ISD::RET_FLAG, MVT::Other,
                         AR_PFSVal, AR_PFSVal.getValue(1));
    }
    }
    return SDOperand();
  }
  case ISD::VAARG: {
    MVT::ValueType VT = getPointerTy();
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
    SDOperand VAList = DAG.getLoad(VT, Op.getOperand(0), Op.getOperand(1), 
                                   SV, 0);
    // Increment the pointer, VAList, to the next vaarg
    SDOperand VAIncr = DAG.getNode(ISD::ADD, VT, VAList, 
                                   DAG.getConstant(MVT::getSizeInBits(VT)/8, 
                                                   VT));
    // Store the incremented VAList to the legalized pointer
    VAIncr = DAG.getStore(VAList.getValue(1), VAIncr,
                          Op.getOperand(1), SV, 0);
    // Load the actual argument out of the pointer VAList
    return DAG.getLoad(Op.getValueType(), VAIncr, VAList, NULL, 0);
  }
  case ISD::VASTART: {
    // vastart just stores the address of the VarArgsFrameIndex slot into the
    // memory location argument.
    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64);
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
    return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0);
  }
  // Frame & Return address.  Currently unimplemented
  case ISD::RETURNADDR:         break;
  case ISD::FRAMEADDR:          break;
  }
  return SDOperand();
}
Exemplo n.º 11
0
std::pair<SDOperand, SDOperand>
IA64TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
                                bool RetSExt, bool RetZExt,
                                bool isVarArg, unsigned CallingConv, 
                                bool isTailCall, SDOperand Callee, 
                                ArgListTy &Args, SelectionDAG &DAG) {

  MachineFunction &MF = DAG.getMachineFunction();

  unsigned NumBytes = 16;
  unsigned outRegsUsed = 0;

  if (Args.size() > 8) {
    NumBytes += (Args.size() - 8) * 8;
    outRegsUsed = 8;
  } else {
    outRegsUsed = Args.size();
  }

  // FIXME? this WILL fail if we ever try to pass around an arg that
  // consumes more than a single output slot (a 'real' double, int128
  // some sort of aggregate etc.), as we'll underestimate how many 'outX'
  // registers we use. Hopefully, the assembler will notice.
  MF.getInfo<IA64FunctionInfo>()->outRegsUsed=
    std::max(outRegsUsed, MF.getInfo<IA64FunctionInfo>()->outRegsUsed);

  // keep stack frame 16-byte aligned
  // assert(NumBytes==((NumBytes+15) & ~15) && 
  //        "stack frame not 16-byte aligned!");
  NumBytes = (NumBytes+15) & ~15;
  
  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));

  SDOperand StackPtr;
  std::vector<SDOperand> Stores;
  std::vector<SDOperand> Converts;
  std::vector<SDOperand> RegValuesToPass;
  unsigned ArgOffset = 16;
  
  for (unsigned i = 0, e = Args.size(); i != e; ++i)
    {
      SDOperand Val = Args[i].Node;
      MVT::ValueType ObjectVT = Val.getValueType();
      SDOperand ValToStore(0, 0), ValToConvert(0, 0);
      unsigned ObjSize=8;
      switch (ObjectVT) {
      default: assert(0 && "unexpected argument type!");
      case MVT::i1:
      case MVT::i8:
      case MVT::i16:
      case MVT::i32: {
        //promote to 64-bits, sign/zero extending based on type
        //of the argument
        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
        if (Args[i].isSExt)
          ExtendKind = ISD::SIGN_EXTEND;
        else if (Args[i].isZExt)
          ExtendKind = ISD::ZERO_EXTEND;
        Val = DAG.getNode(ExtendKind, MVT::i64, Val);
        // XXX: fall through
      }
      case MVT::i64:
        //ObjSize = 8;
        if(RegValuesToPass.size() >= 8) {
          ValToStore = Val;
        } else {
          RegValuesToPass.push_back(Val);
        }
        break;
      case MVT::f32:
        //promote to 64-bits
        Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
        // XXX: fall through
      case MVT::f64:
        if(RegValuesToPass.size() >= 8) {
          ValToStore = Val;
        } else {
          RegValuesToPass.push_back(Val);
          if(1 /* TODO: if(calling external or varadic function)*/ ) {
            ValToConvert = Val; // additionally pass this FP value as an int
          }
        }
        break;
      }
      
      if(ValToStore.Val) {
        if(!StackPtr.Val) {
          StackPtr = DAG.getRegister(IA64::r12, MVT::i64);
        }
        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
        PtrOff = DAG.getNode(ISD::ADD, MVT::i64, StackPtr, PtrOff);
        Stores.push_back(DAG.getStore(Chain, ValToStore, PtrOff, NULL, 0));
        ArgOffset += ObjSize;
      }

      if(ValToConvert.Val) {
        Converts.push_back(DAG.getNode(IA64ISD::GETFD, MVT::i64, ValToConvert)); 
      }
    }

  // Emit all stores, make sure they occur before any copies into physregs.
  if (!Stores.empty())
    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Stores[0],Stores.size());

  static const unsigned IntArgRegs[] = {
    IA64::out0, IA64::out1, IA64::out2, IA64::out3, 
    IA64::out4, IA64::out5, IA64::out6, IA64::out7
  };

  static const unsigned FPArgRegs[] = {
    IA64::F8,  IA64::F9,  IA64::F10, IA64::F11, 
    IA64::F12, IA64::F13, IA64::F14, IA64::F15
  };

  SDOperand InFlag;
  
  // save the current GP, SP and RP : FIXME: do we need to do all 3 always?
  SDOperand GPBeforeCall = DAG.getCopyFromReg(Chain, IA64::r1, MVT::i64, InFlag);
  Chain = GPBeforeCall.getValue(1);
  InFlag = Chain.getValue(2);
  SDOperand SPBeforeCall = DAG.getCopyFromReg(Chain, IA64::r12, MVT::i64, InFlag);
  Chain = SPBeforeCall.getValue(1);
  InFlag = Chain.getValue(2);
  SDOperand RPBeforeCall = DAG.getCopyFromReg(Chain, IA64::rp, MVT::i64, InFlag);
  Chain = RPBeforeCall.getValue(1);
  InFlag = Chain.getValue(2);

  // Build a sequence of copy-to-reg nodes chained together with token chain
  // and flag operands which copy the outgoing integer args into regs out[0-7]
  // mapped 1:1 and the FP args into regs F8-F15 "lazily"
  // TODO: for performance, we should only copy FP args into int regs when we
  // know this is required (i.e. for varardic or external (unknown) functions)

  // first to the FP->(integer representation) conversions, these are
  // flagged for now, but shouldn't have to be (TODO)
  unsigned seenConverts = 0;
  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
    if(MVT::isFloatingPoint(RegValuesToPass[i].getValueType())) {
      Chain = DAG.getCopyToReg(Chain, IntArgRegs[i], Converts[seenConverts++], 
                               InFlag);
      InFlag = Chain.getValue(1);
    }
  }

  // next copy args into the usual places, these are flagged
  unsigned usedFPArgs = 0;
  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
    Chain = DAG.getCopyToReg(Chain,
      MVT::isInteger(RegValuesToPass[i].getValueType()) ?
        IntArgRegs[i] : FPArgRegs[usedFPArgs++], RegValuesToPass[i], InFlag);
    InFlag = Chain.getValue(1);
  }

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
/*
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i64);
  }
*/

  std::vector<MVT::ValueType> NodeTys;
  std::vector<SDOperand> CallOperands;
  NodeTys.push_back(MVT::Other);   // Returns a chain
  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
  CallOperands.push_back(Chain);
  CallOperands.push_back(Callee);

  // emit the call itself
  if (InFlag.Val)
    CallOperands.push_back(InFlag);
  else
    assert(0 && "this should never happen!\n");

  // to make way for a hack:
  Chain = DAG.getNode(IA64ISD::BRCALL, NodeTys,
                      &CallOperands[0], CallOperands.size());
  InFlag = Chain.getValue(1);

  // restore the GP, SP and RP after the call  
  Chain = DAG.getCopyToReg(Chain, IA64::r1, GPBeforeCall, InFlag);
  InFlag = Chain.getValue(1);
  Chain = DAG.getCopyToReg(Chain, IA64::r12, SPBeforeCall, InFlag);
  InFlag = Chain.getValue(1);
  Chain = DAG.getCopyToReg(Chain, IA64::rp, RPBeforeCall, InFlag);
  InFlag = Chain.getValue(1);
 
  std::vector<MVT::ValueType> RetVals;
  RetVals.push_back(MVT::Other);
  RetVals.push_back(MVT::Flag);
 
  MVT::ValueType RetTyVT = getValueType(RetTy);
  SDOperand RetVal;
  if (RetTyVT != MVT::isVoid) {
    switch (RetTyVT) {
    default: assert(0 && "Unknown value type to return!");
    case MVT::i1: { // bools are just like other integers (returned in r8)
      // we *could* fall through to the truncate below, but this saves a
      // few redundant predicate ops
      SDOperand boolInR8 = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64,InFlag);
      InFlag = boolInR8.getValue(2);
      Chain = boolInR8.getValue(1);
      SDOperand zeroReg = DAG.getCopyFromReg(Chain, IA64::r0, MVT::i64, InFlag);
      InFlag = zeroReg.getValue(2);
      Chain = zeroReg.getValue(1);
      
      RetVal = DAG.getSetCC(MVT::i1, boolInR8, zeroReg, ISD::SETNE);
      break;
    }
    case MVT::i8:
    case MVT::i16:
    case MVT::i32:
      RetVal = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64, InFlag);
      Chain = RetVal.getValue(1);
      
      // keep track of whether it is sign or zero extended (todo: bools?)
/* XXX
      RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext :ISD::AssertZext,
                           MVT::i64, RetVal, DAG.getValueType(RetTyVT));
*/
      RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
      break;
    case MVT::i64:
      RetVal = DAG.getCopyFromReg(Chain, IA64::r8, MVT::i64, InFlag);
      Chain = RetVal.getValue(1);
      InFlag = RetVal.getValue(2); // XXX dead
      break;
    case MVT::f32:
      RetVal = DAG.getCopyFromReg(Chain, IA64::F8, MVT::f64, InFlag);
      Chain = RetVal.getValue(1);
      RetVal = DAG.getNode(ISD::TRUNCATE, MVT::f32, RetVal);
      break;
    case MVT::f64:
      RetVal = DAG.getCopyFromReg(Chain, IA64::F8, MVT::f64, InFlag);
      Chain = RetVal.getValue(1);
      InFlag = RetVal.getValue(2); // XXX dead
      break;
    }
  }
  
  Chain = DAG.getCALLSEQ_END(Chain,
                             DAG.getConstant(NumBytes, getPointerTy()),
                             DAG.getConstant(0, getPointerTy()),
                             SDOperand());
  return std::make_pair(RetVal, Chain);
}