void XCoreFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                     RegScavenger *RS) const {
  MachineFrameInfo *MFI = MF.getFrameInfo();
  bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
  const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
  if (LRUsed) {
    MF.getRegInfo().setPhysRegUnused(XCore::LR);

    bool isVarArg = MF.getFunction()->isVarArg();
    int FrameIdx;
    if (! isVarArg) {
      // A fixed offset of 0 allows us to save/restore LR using entsp/retsp.
      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
    } else {
      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
                                        false);
    }
    XFI->setUsesLR(FrameIdx);
    XFI->setLRSpillSlot(FrameIdx);
  }

  // A callee save register is used to hold the FP.
  // This needs saving / restoring in the epilogue / prologue.
  if (hasFP(MF))
    XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
                                               RC->getAlignment(),
                                               false));
}
示例#2
0
bool MIRParserImpl::initializeFrameInfo(
    const Function &F, MachineFrameInfo &MFI,
    const yaml::MachineFunction &YamlMF,
    DenseMap<unsigned, int> &StackObjectSlots,
    DenseMap<unsigned, int> &FixedStackObjectSlots) {
  const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
  MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
  MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
  MFI.setHasStackMap(YamlMFI.HasStackMap);
  MFI.setHasPatchPoint(YamlMFI.HasPatchPoint);
  MFI.setStackSize(YamlMFI.StackSize);
  MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
  if (YamlMFI.MaxAlignment)
    MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
  MFI.setAdjustsStack(YamlMFI.AdjustsStack);
  MFI.setHasCalls(YamlMFI.HasCalls);
  MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
  MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
  MFI.setHasVAStart(YamlMFI.HasVAStart);
  MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);

  // Initialize the fixed frame objects.
  for (const auto &Object : YamlMF.FixedStackObjects) {
    int ObjectIdx;
    if (Object.Type != yaml::FixedMachineStackObject::SpillSlot)
      ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset,
                                        Object.IsImmutable, Object.IsAliased);
    else
      ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
    MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
    // TODO: Report an error when objects are redefined.
    FixedStackObjectSlots.insert(std::make_pair(Object.ID, ObjectIdx));
  }

  // Initialize the ordinary frame objects.
  for (const auto &Object : YamlMF.StackObjects) {
    int ObjectIdx;
    const AllocaInst *Alloca = nullptr;
    const yaml::StringValue &Name = Object.Name;
    if (!Name.Value.empty()) {
      Alloca = dyn_cast_or_null<AllocaInst>(
          F.getValueSymbolTable().lookup(Name.Value));
      if (!Alloca)
        return error(Name.SourceRange.Start,
                     "alloca instruction named '" + Name.Value +
                         "' isn't defined in the function '" + F.getName() +
                         "'");
    }
    if (Object.Type == yaml::MachineStackObject::VariableSized)
      ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
    else
      ObjectIdx = MFI.CreateStackObject(
          Object.Size, Object.Alignment,
          Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
    MFI.setObjectOffset(ObjectIdx, Object.Offset);
    // TODO: Report an error when objects are redefined.
    StackObjectSlots.insert(std::make_pair(Object.ID, ObjectIdx));
  }
  return false;
}
int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) {
  if (FPSpillSlotSet) {
    return FPSpillSlot;
  }
  const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
  MachineFrameInfo *MFI = MF.getFrameInfo();
  FPSpillSlot = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), true);
  FPSpillSlotSet = true;
  return FPSpillSlot;
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
    MachineFunction &MF, const TargetRegisterInfo *TRI,
    std::vector<CalleeSavedInfo> &CSI,
    unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const {
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());

  if (!MF.getFunction()->getAttributes().hasAttrSomewhere(
            Attribute::SwiftError))
    return false;

  if (CSI.empty())
    return true; // Early exit if no callee saved registers are modified!

  MachineFrameInfo *MFI = MF.getFrameInfo();

  // Simplify the logic here since AArch64 does not have fixed or reserved
  // spill slots.

  // Now that we know which registers need to be saved and restored, allocate
  // stack slots according to getCalleeSavedRegsForLayout.
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegsForLayout(&MF);

  for (unsigned i = 0; CSRegs[i]; ++i) {
    unsigned Reg = CSRegs[i];
    // Assign a slot for functions which call __builtin_unwind_init.
    if (!MF.getRegInfo().isPhysRegUsed(Reg) && !MF.getMMI().callsUnwindInit())
      continue;

    DEBUG(dbgs() << "try to allocate stack slot for reg " << Reg << '\n');
    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);

    // Just spill it anywhere convenient.
    unsigned Align = RC->getAlignment();
    unsigned StackAlign = getStackAlignment();

    // We may not be able to satisfy the desired alignment specification of
    // the TargetRegisterClass if the stack alignment is smaller. Use the
    // min.
    Align = std::min(Align, StackAlign);
    int FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
    if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
    if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;

    // Find the corresponding entry in CSI for Reg.
    for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
         I != E; ++I)
      if (I->getReg() == Reg) {
        I->setFrameIdx(FrameIdx);
        break;
      }
  }

  return true;
}
示例#5
0
void XCoreFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                    RegScavenger *RS) const {
  assert(RS && "requiresRegisterScavenging failed");
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
  // Reserve slots close to SP or frame pointer for Scavenging spills.
  // When using SP for small frames, we don't need any scratch registers.
  // When using SP for large frames, we may need 2 scratch registers.
  // When using FP, for large or small frames, we may need 1 scratch register.
  if (XFI->isLargeFrame(MF) || hasFP(MF))
    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                       RC->getAlignment(),
                                                       false));
  if (XFI->isLargeFrame(MF) && !hasFP(MF))
    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                       RC->getAlignment(),
                                                       false));
}
示例#6
0
void
AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                       RegScavenger *RS) const {
  const AArch64RegisterInfo *RegInfo =
    static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const AArch64InstrInfo &TII =
    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());

  if (hasFP(MF)) {
    MF.getRegInfo().setPhysRegUsed(AArch64::X29);
    MF.getRegInfo().setPhysRegUsed(AArch64::X30);
  }

  // If addressing of local variables is going to be more complicated than
  // shoving a base register and an offset into the instruction then we may well
  // need to scavenge registers. We should either specifically add an
  // callee-save register for this purpose or allocate an extra spill slot.
  bool BigStack =
    MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
    || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
    || (MFI->adjustsStack() && !hasReservedCallFrame(MF));

  if (!BigStack)
    return;

  // We certainly need some slack space for the scavenger, preferably an extra
  // register.
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
  MCPhysReg ExtraReg = AArch64::NoRegister;

  for (unsigned i = 0; CSRegs[i]; ++i) {
    if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
        !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
      ExtraReg = CSRegs[i];
      break;
    }
  }

  if (ExtraReg != 0) {
    MF.getRegInfo().setPhysRegUsed(ExtraReg);
  } else {
    assert(RS && "Expect register scavenger to be available");

    // Create a stack slot for scavenging purposes. PrologEpilogInserter
    // helpfully places it near either SP or FP for us to avoid
    // infinitely-regression during scavenging.
    const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                       RC->getAlignment(),
                                                       false));
  }
}
int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) {
  if (LRSpillSlotSet) {
    return LRSpillSlot;
  }
  const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
  MachineFrameInfo *MFI = MF.getFrameInfo();
  if (! MF.getFunction()->isVarArg()) {
    // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
    LRSpillSlot = MFI->CreateFixedObject(RC->getSize(), 0, true);
  } else {
    LRSpillSlot = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), true);
  }
  LRSpillSlotSet = true;
  return LRSpillSlot;
}
示例#8
0
void SystemZFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                    RegScavenger *RS) const {
  MachineFrameInfo *MFFrame = MF.getFrameInfo();
  uint64_t MaxReach = (MFFrame->estimateStackSize(MF) +
                       SystemZMC::CallFrameSize * 2);
  if (!isUInt<12>(MaxReach)) {
    // We may need register scavenging slots if some parts of the frame
    // are outside the reach of an unsigned 12-bit displacement.
    // Create 2 for the case where both addresses in an MVC are
    // out of range.
    RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false));
    RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false));
  }
}
void TMS320C64XFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                     RegScavenger *RS) const {
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const TMS320C64XRegisterInfo *RegInfo =
    static_cast<const TMS320C64XRegisterInfo*>(MF.getTarget().getRegisterInfo());
  const TargetRegisterClass *RC = TMS320C64X::GPRegsRegisterClass;

  if (RegInfo->requiresRegisterScavenging(MF)) {
    // Reserve a slot for emergency spilling.
    // XXX It is essential that the slot can be addressed with a small offset!
    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                       RC->getAlignment(),
                                                       false));
  }
}
示例#10
0
void SPUFrameInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                        RegScavenger *RS) const{
  // Mark LR and SP unused, since the prolog spills them to stack and
  // we don't want anyone else to spill them for us.
  //
  // Also, unless R2 is really used someday, don't spill it automatically.
  MF.getRegInfo().setPhysRegUnused(SPU::R0);
  MF.getRegInfo().setPhysRegUnused(SPU::R1);
  MF.getRegInfo().setPhysRegUnused(SPU::R2);

  MachineFrameInfo *MFI = MF.getFrameInfo();
  const TargetRegisterClass *RC = &SPU::R32CRegClass;
  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                     RC->getAlignment(),
                                                     false));
}
// Generate code to call a function
SDValue
VectorProcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const 
{
	SelectionDAG &DAG = CLI.DAG;
	DebugLoc &dl = CLI.DL;
	SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
	SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
	SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	CallingConv::ID CallConv = CLI.CallConv;
	bool isVarArg = CLI.IsVarArg;
	
	// We do not support tail calls. This flag must be cleared in order
	// to indicate that to subsequent passes.
	CLI.IsTailCall = false;

	MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();

	// Analyze operands of the call, assigning locations to each operand.
	// VectorProcCallingConv.td will auto-generate CC_VectorProc32, which 
	// knows how to handle operands (what go in registers vs. stack, etc).
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
		DAG.getTarget(), ArgLocs, *DAG.getContext());
	CCInfo.AnalyzeCallOperands(Outs, CC_VectorProc32);

	// Get the size of the outgoing arguments stack space requirement.
	unsigned ArgsSize = CCInfo.getNextStackOffset();

	// We always keep the stack pointer 64 byte aligned so we can use block
	// loads/stores for vector arguments
	ArgsSize = (ArgsSize + 63) & ~63;

	// Create local copies for all arguments that are passed by value
	SmallVector<SDValue, 8> ByValArgs;
	for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
		ISD::ArgFlagsTy Flags = Outs[i].Flags;
		if (!Flags.isByVal())
			continue;

		SDValue Arg = OutVals[i];
		unsigned Size = Flags.getByValSize();
		unsigned Align = Flags.getByValAlign();

		int FI = MFI->CreateStackObject(Size, Align, false);
		SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
		SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
		Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
			false,        //isVolatile,
			(Size <= 32), //AlwaysInline if size <= 32
			MachinePointerInfo(), MachinePointerInfo());

		ByValArgs.push_back(FIPtr);
	}

	// CALLSEQ_START will decrement the stack to reserve space
	Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;

	// Walk through arguments, storing each one to the proper palce
	bool hasStructRetAttr = false;
	for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
		i != e; ++i, ++realArgIdx) {

		CCValAssign &VA = ArgLocs[i];
		SDValue Arg = OutVals[realArgIdx];

		ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;

		// Use the local copy we created above if this is passed by value
		if (Flags.isByVal())
			Arg = ByValArgs[byvalArgIdx++];

		// Promote the value if needed.
		switch (VA.getLocInfo()) {
			case CCValAssign::Full: 
				break;
				
			case CCValAssign::SExt:
				Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
				break;

			case CCValAssign::ZExt:
				Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
				break;

			case CCValAssign::AExt:
				Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
				break;

			case CCValAssign::BCvt:
				Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
				break;

			default: 
				llvm_unreachable("Unknown loc info!");
		}

		if (Flags.isSRet()) {
			// Structure return
			assert(VA.needsCustom());
			SDValue StackPtr = DAG.getRegister(VectorProc::SP_REG, MVT::i32);
			SDValue PtrOff = DAG.getIntPtrConstant(64);
			PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
			MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
				MachinePointerInfo(), false, false, 0));
			hasStructRetAttr = true;
			continue;
		}

		// Arguments that can be passed on register must be kept at
		// RegsToPass vector
		if (VA.isRegLoc()) {
			RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
			continue;
		}

		// This needs to be pushed on the stack
		assert(VA.isMemLoc());

		// Create a store off the stack pointer for this argument.
		SDValue StackPtr = DAG.getRegister(VectorProc::SP_REG, MVT::i32);
		SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
		PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
		MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
			MachinePointerInfo(), false, false, 0));
	}

	// Emit all stores, make sure the occur before any copies into physregs.
	if (!MemOpChains.empty())
	{
		Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
			&MemOpChains[0], MemOpChains.size());
	}

	// Build a sequence of copy-to-reg nodes chained together with token
	// chain and flag operands which copy the outgoing args into registers.
	// The InFlag in necessary since all emitted instructions must be
	// stuck together.
	SDValue InFlag;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
		Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag);
		InFlag = Chain.getValue(1);
	}

	unsigned SRetArgSize = hasStructRetAttr ? getSRetArgSize(DAG, Callee) : 0;

	// Get the function address.
	// If the callee is a GlobalAddress node (quite common, every direct call is)
	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
	// Likewise ExternalSymbol -> TargetExternalSymbol.
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
		Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
	else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
		Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);

	// Returns a chain & a flag for retval copy to use
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);
	if (hasStructRetAttr)
		Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));

	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
	{
		Ops.push_back(DAG.getRegister(RegsToPass[i].first,
			RegsToPass[i].second.getValueType()));
	}
	
	// Add a register mask operand representing the call-preserved registers.
	const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
	const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(CLI.DAG.getRegisterMask(Mask));

	if (InFlag.getNode())
		Ops.push_back(InFlag);

	Chain = DAG.getNode(VectorProcISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
	InFlag = Chain.getValue(1);

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
		DAG.getIntPtrConstant(0, true), InFlag);
	InFlag = Chain.getValue(1);

	// The call has returned, handle return values
	SmallVector<CCValAssign, 16> RVLocs;
	CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
		DAG.getTarget(), RVLocs, *DAG.getContext());

	RVInfo.AnalyzeCallResult(Ins, RetCC_VectorProc32);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
		Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
			RVLocs[i].getValVT(), InFlag).getValue(1);
		InFlag = Chain.getValue(2);
		InVals.push_back(Chain.getValue(0));
	}

	return Chain;
}
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
  MachineFrameInfo *MFI = Fn.getFrameInfo();

  // Get the callee saved register list...
  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);

  // These are used to keep track the callee-save area. Initialize them.
  MinCSFrameIndex = INT_MAX;
  MaxCSFrameIndex = 0;

  // Early exit for targets which have no callee saved registers.
  if (CSRegs == 0 || CSRegs[0] == 0)
    return;

  // In Naked functions we aren't going to save any registers.
  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
    return;

  std::vector<CalleeSavedInfo> CSI;
  for (unsigned i = 0; CSRegs[i]; ++i) {
    unsigned Reg = CSRegs[i];
    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
      // If the reg is modified, save it!
      CSI.push_back(CalleeSavedInfo(Reg));
    } else {
      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
           *AliasSet; ++AliasSet) {  // Check alias registers too.
        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
          CSI.push_back(CalleeSavedInfo(Reg));
          break;
        }
      }
    }
  }

  if (CSI.empty())
    return;   // Early exit if no callee saved registers are modified!

  unsigned NumFixedSpillSlots;
  const TargetFrameLowering::SpillSlot *FixedSpillSlots =
    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);

  // Now that we know which registers need to be saved and restored, allocate
  // stack slots for them.
  for (std::vector<CalleeSavedInfo>::iterator
         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
    unsigned Reg = I->getReg();
    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);

    int FrameIdx;
    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
      I->setFrameIdx(FrameIdx);
      continue;
    }

    // Check to see if this physreg must be spilled to a particular stack slot
    // on this target.
    const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
           FixedSlot->Reg != Reg)
      ++FixedSlot;

    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
      // Nope, just spill it anywhere convenient.
      unsigned Align = RC->getAlignment();
      unsigned StackAlign = TFI->getStackAlignment();

      // We may not be able to satisfy the desired alignment specification of
      // the TargetRegisterClass if the stack alignment is smaller. Use the
      // min.
      Align = std::min(Align, StackAlign);
      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
    } else {
      // Spill it to the stack where we must.
      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
    }

    I->setFrameIdx(FrameIdx);
  }

  MFI->setCalleeSavedInfo(CSI);
}
示例#13
0
void Nios2RegisterInfo::adjustNios2StackFrame(MachineFunction &MF) const
{
    MachineFrameInfo *MFI = MF.getFrameInfo();
    Nios2FunctionInfo *Nios2FI = MF.getInfo<Nios2FunctionInfo>();
    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
    unsigned RegSize = 4;
    bool HasGP = Nios2FI->needGPSaveRestore();

    // Min and Max CSI FrameIndex.
    int MinCSFI = -1, MaxCSFI = -1; 

    // See the description at Nios2MachineFunction.h
    int TopCPUSavedRegOff = -1;

    // It happens that the default stack frame allocation order does not directly 
    // map to the convention used for nios2. So we must fix it. We move the callee 
    // save register slots after the local variables area, as described in the
    // stack frame above.
    unsigned CalleeSavedAreaSize = 0;
    if (!CSI.empty()) {
        MinCSFI = CSI[0].getFrameIdx();
        MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
    }
    for (unsigned i = 0, e = CSI.size(); i != e; ++i)
        CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());

  unsigned StackOffset = HasGP ? (Nios2FI->getGPStackOffset()+RegSize) : 16;

    // Adjust local variables. They should come on the stack right
    // after the arguments.
    int LastOffsetFI = -1;
    for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
        if (i >= MinCSFI && i <= MaxCSFI)
            continue;
        if (MFI->isDeadObjectIndex(i))
            continue;
        unsigned Offset =
            StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
        if (LastOffsetFI == -1)
            LastOffsetFI = i;
        if (Offset > MFI->getObjectOffset(LastOffsetFI))
            LastOffsetFI = i;
        MFI->setObjectOffset(i, Offset);
    }

    // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
    // be saved in this CPU Area. This whole Area must be aligned to the
    // default Stack Alignment requirements.

    if (LastOffsetFI >= 0)
        StackOffset = MFI->getObjectOffset(LastOffsetFI)+ 
                      MFI->getObjectSize(LastOffsetFI);
    StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);

    int stackSize  = MF.getFrameInfo()->getStackSize();
    if (MFI->hasCalls()) {
        stackSize += RegSize;
    }

    if (hasFP(MF)) {
        stackSize += RegSize;
    }
    
    for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
        if (CSI[i].getRegClass() != Nios2::CPURegsRegisterClass)
            break;
        stackSize += RegSize;
    }

    if (MFI->hasCalls()) {
        MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                             hasFP(MF) ? 4 : 0);
        Nios2FI->setRAStackOffset(hasFP(MF) ? 4 : 0);
        TopCPUSavedRegOff = hasFP(MF) ? 4 : 0;
        StackOffset += RegSize;
    }

    if (hasFP(MF)) {
        MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), 0);
        Nios2FI->setFPStackOffset(0);
        TopCPUSavedRegOff = 0;
        StackOffset += RegSize;
    }

    for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
        if (CSI[i].getRegClass() != Nios2::CPURegsRegisterClass)
            break;
        MFI->setObjectOffset(CSI[i].getFrameIdx(), stackSize + (-(StackOffset + 4)));
        TopCPUSavedRegOff = stackSize + (-(StackOffset + 4));
        StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
    }

    // Update frame info
    MFI->setStackSize(stackSize);
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                BitVector &SavedRegs,
                                                RegScavenger *RS) const {
  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  SmallVector<unsigned, 4> UnspilledCSGPRs;
  SmallVector<unsigned, 4> UnspilledCSFPRs;

  // The frame record needs to be created by saving the appropriate registers
  if (hasFP(MF)) {
    SavedRegs.set(AArch64::FP);
    SavedRegs.set(AArch64::LR);
  }

  // Spill the BasePtr if it's used. Do this first thing so that the
  // getCalleeSavedRegs() below will get the right answer.
  if (RegInfo->hasBasePointer(MF))
    SavedRegs.set(RegInfo->getBaseRegister());

  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
    SavedRegs.set(AArch64::X9);

  // If any callee-saved registers are used, the frame cannot be eliminated.
  unsigned NumGPRSpilled = 0;
  unsigned NumFPRSpilled = 0;
  bool ExtraCSSpill = false;
  bool CanEliminateFrame = true;
  DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

  // Check pairs of consecutive callee-saved registers.
  for (unsigned i = 0; CSRegs[i]; i += 2) {
    assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");

    const unsigned OddReg = CSRegs[i];
    const unsigned EvenReg = CSRegs[i + 1];
    assert((AArch64::GPR64RegClass.contains(OddReg) &&
            AArch64::GPR64RegClass.contains(EvenReg)) ^
               (AArch64::FPR64RegClass.contains(OddReg) &&
                AArch64::FPR64RegClass.contains(EvenReg)) &&
           "Register class mismatch!");

    const bool OddRegUsed = SavedRegs.test(OddReg);
    const bool EvenRegUsed = SavedRegs.test(EvenReg);

    // Early exit if none of the registers in the register pair is actually
    // used.
    if (!OddRegUsed && !EvenRegUsed) {
      if (AArch64::GPR64RegClass.contains(OddReg)) {
        UnspilledCSGPRs.push_back(OddReg);
        UnspilledCSGPRs.push_back(EvenReg);
      } else {
        UnspilledCSFPRs.push_back(OddReg);
        UnspilledCSFPRs.push_back(EvenReg);
      }
      continue;
    }

    unsigned Reg = AArch64::NoRegister;
    // If only one of the registers of the register pair is used, make sure to
    // mark the other one as used as well.
    if (OddRegUsed ^ EvenRegUsed) {
      // Find out which register is the additional spill.
      Reg = OddRegUsed ? EvenReg : OddReg;
      SavedRegs.set(Reg);
    }

    DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
    DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));

    assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
            (RegInfo->getEncodingValue(OddReg) + 1 ==
             RegInfo->getEncodingValue(EvenReg))) &&
           "Register pair of non-adjacent registers!");
    if (AArch64::GPR64RegClass.contains(OddReg)) {
      NumGPRSpilled += 2;
      // If it's not a reserved register, we can use it in lieu of an
      // emergency spill slot for the register scavenger.
      // FIXME: It would be better to instead keep looking and choose another
      // unspilled register that isn't reserved, if there is one.
      if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
        ExtraCSSpill = true;
    } else
      NumFPRSpilled += 2;

    CanEliminateFrame = false;
  }

  // FIXME: Set BigStack if any stack slot references may be out of range.
  // For now, just conservatively guestimate based on unscaled indexing
  // range. We'll end up allocating an unnecessary spill slot a lot, but
  // realistically that's not a big deal at this stage of the game.
  // The CSR spill slots have not been allocated yet, so estimateStackSize
  // won't include them.
  MachineFrameInfo *MFI = MF.getFrameInfo();
  unsigned CFSize =
      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
  DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
  bool BigStack = (CFSize >= 256);
  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    AFI->setHasStackFrame(true);

  // Estimate if we might need to scavenge a register at some point in order
  // to materialize a stack offset. If so, either spill one additional
  // callee-saved register or reserve a special spill slot to facilitate
  // register scavenging. If we already spilled an extra callee-saved register
  // above to keep the number of spills even, we don't need to do anything else
  // here.
  if (BigStack && !ExtraCSSpill) {

    // If we're adding a register to spill here, we have to add two of them
    // to keep the number of regs to spill even.
    assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
    unsigned Count = 0;
    while (!UnspilledCSGPRs.empty() && Count < 2) {
      unsigned Reg = UnspilledCSGPRs.back();
      UnspilledCSGPRs.pop_back();
      DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
                   << " to get a scratch register.\n");
      SavedRegs.set(Reg);
      ExtraCSSpill = true;
      ++Count;
    }

    // If we didn't find an extra callee-saved register to spill, create
    // an emergency spill slot.
    if (!ExtraCSSpill) {
      const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
      int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
      RS->addScavengingFrameIndex(FI);
      DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
                   << " as the emergency spill slot.\n");
    }
  }
}
static void interruptFrameLayout(MachineFunction &MF) {
  const Function *F = MF.getFunction();
  llvm::CallingConv::ID CallConv = F->getCallingConv();

  // If this function is not using either the interrupt_handler
  // calling convention or the save_volatiles calling convention
  // then we don't need to do any additional frame layout.
  if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
      CallConv != llvm::CallingConv::MBLAZE_SVOL)
      return;

  MachineFrameInfo *MFI = MF.getFrameInfo();
  const MachineRegisterInfo &MRI = MF.getRegInfo();
  const MBlazeInstrInfo &TII =
    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());

  // Determine if the calling convention is the interrupt_handler
  // calling convention. Some pieces of the prologue and epilogue
  // only need to be emitted if we are lowering and interrupt handler.
  bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;

  // Determine where to put prologue and epilogue additions
  MachineBasicBlock &MENT   = MF.front();
  MachineBasicBlock &MEXT   = MF.back();

  MachineBasicBlock::iterator MENTI = MENT.begin();
  MachineBasicBlock::iterator MEXTI = prior(MEXT.end());

  DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
  DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();

  // Store the frame indexes generated during prologue additions for use
  // when we are generating the epilogue additions.
  SmallVector<int, 10> VFI;

  // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
  // always have a SWI because it is used when processing RMSR.
  for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
    if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
    
    int FI = MFI->CreateStackObject(4,4,false,false);
    VFI.push_back(FI);

    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
      .addFrameIndex(FI).addImm(0);
  }
    
  // Build the prologue SWI for R17, R18
  int R17FI = MFI->CreateStackObject(4,4,false,false);
  int R18FI = MFI->CreateStackObject(4,4,false,false);

  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
    .addFrameIndex(R17FI).addImm(0);
    
  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
    .addFrameIndex(R18FI).addImm(0);

  // Buid the prologue SWI and the epilogue LWI for RMSR if needed
  if (isIntr) {
    int MSRFI = MFI->CreateStackObject(4,4,false,false);
    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
      .addReg(MBlaze::RMSR);
    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
      .addFrameIndex(MSRFI).addImm(0);

    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
      .addFrameIndex(MSRFI).addImm(0);
    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
      .addReg(MBlaze::R11);
  }

  // Build the epilogue LWI for R17, R18
  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
    .addFrameIndex(R18FI).addImm(0);

  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
    .addFrameIndex(R17FI).addImm(0);

  // Build the epilogue LWI for R3 - R12 if needed
  for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
    if (!MRI.isPhysRegUsed(r)) continue;
    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
      .addFrameIndex(VFI[--i]).addImm(0);
  }
}
示例#16
0
void PEI::assignCalleeSavedSpillSlots(MachineFunction &F,
                                      const BitVector &SavedRegs) {
  // These are used to keep track the callee-save area. Initialize them.
  MinCSFrameIndex = INT_MAX;
  MaxCSFrameIndex = 0;

  if (SavedRegs.empty())
    return;

  const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);

  std::vector<CalleeSavedInfo> CSI;
  for (unsigned i = 0; CSRegs[i]; ++i) {
    unsigned Reg = CSRegs[i];
    if (SavedRegs.test(Reg))
      CSI.push_back(CalleeSavedInfo(Reg));
  }

  const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
  MachineFrameInfo *MFI = F.getFrameInfo();
  if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
    // If target doesn't implement this, use generic code.

    if (CSI.empty())
      return; // Early exit if no callee saved registers are modified!

    unsigned NumFixedSpillSlots;
    const TargetFrameLowering::SpillSlot *FixedSpillSlots =
        TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);

    // Now that we know which registers need to be saved and restored, allocate
    // stack slots for them.
    for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
         I != E; ++I) {
      unsigned Reg = I->getReg();
      const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);

      int FrameIdx;
      if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
        I->setFrameIdx(FrameIdx);
        continue;
      }

      // Check to see if this physreg must be spilled to a particular stack slot
      // on this target.
      const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
      while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
             FixedSlot->Reg != Reg)
        ++FixedSlot;

      if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
        // Nope, just spill it anywhere convenient.
        unsigned Align = RC->getAlignment();
        unsigned StackAlign = TFI->getStackAlignment();

        // We may not be able to satisfy the desired alignment specification of
        // the TargetRegisterClass if the stack alignment is smaller. Use the
        // min.
        Align = std::min(Align, StackAlign);
        FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
        if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
        if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
      } else {
        // Spill it to the stack where we must.
        FrameIdx =
            MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
      }

      I->setFrameIdx(FrameIdx);
    }
  }

  MFI->setCalleeSavedInfo(CSI);
}
SDValue
SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                               CallingConv::ID CallConv, bool isVarArg,
                               bool &isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<SDValue> &OutVals,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals) const {
  // Sparc target does not yet support tail call optimization.
  isTailCall = false;

  // Analyze operands of the call, assigning locations to each operand.
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
                 *DAG.getContext());
  CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);

  // Get the size of the outgoing arguments stack space requirement.
  unsigned ArgsSize = CCInfo.getNextStackOffset();

  // Keep stack frames 8-byte aligned.
  ArgsSize = (ArgsSize+7) & ~7;

  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();

  //Create local copies for byval args.
  SmallVector<SDValue, 8> ByValArgs;
  for (unsigned i = 0,  e = Outs.size(); i != e; ++i) {
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
    if (!Flags.isByVal())
      continue;

    SDValue Arg = OutVals[i];
    unsigned Size = Flags.getByValSize();
    unsigned Align = Flags.getByValAlign();

    int FI = MFI->CreateStackObject(Size, Align, false);
    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
    SDValue SizeNode = DAG.getConstant(Size, MVT::i32);

    Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
                          false,        //isVolatile,
                          (Size <= 32), //AlwaysInline if size <= 32
                          MachinePointerInfo(), MachinePointerInfo());
    ByValArgs.push_back(FIPtr);
  }

  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));

  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
  SmallVector<SDValue, 8> MemOpChains;

  const unsigned StackOffset = 92;
  bool hasStructRetAttr = false;
  // Walk the register/memloc assignments, inserting copies/loads.
  for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
       i != e;
       ++i, ++realArgIdx) {
    CCValAssign &VA = ArgLocs[i];
    SDValue Arg = OutVals[realArgIdx];

    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;

    //Use local copy if it is a byval arg.
    if (Flags.isByVal())
      Arg = ByValArgs[byvalArgIdx++];

    // Promote the value if needed.
    switch (VA.getLocInfo()) {
    default: llvm_unreachable("Unknown loc info!");
    case CCValAssign::Full: break;
    case CCValAssign::SExt:
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
      break;
    case CCValAssign::ZExt:
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
      break;
    case CCValAssign::AExt:
      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
      break;
    case CCValAssign::BCvt:
      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
      break;
    }

    if (Flags.isSRet()) {
      assert(VA.needsCustom());
      // store SRet argument in %sp+64
      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
      SDValue PtrOff = DAG.getIntPtrConstant(64);
      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                         MachinePointerInfo(),
                                         false, false, 0));
      hasStructRetAttr = true;
      continue;
    }

    if (VA.needsCustom()) {
      assert(VA.getLocVT() == MVT::f64);

      if (VA.isMemLoc()) {
        unsigned Offset = VA.getLocMemOffset() + StackOffset;
        //if it is double-word aligned, just store.
        if (Offset % 8 == 0) {
          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
          MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                             MachinePointerInfo(),
                                             false, false, 0));
          continue;
        }
      }

      SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
                                   Arg, StackPtr, MachinePointerInfo(),
                                   false, false, 0);
      // Sparc is big-endian, so the high part comes first.
      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
                               MachinePointerInfo(), false, false, 0);
      // Increment the pointer to the other half.
      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                             DAG.getIntPtrConstant(4));
      // Load the low part.
      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
                               MachinePointerInfo(), false, false, 0);

      if (VA.isRegLoc()) {
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
        assert(i+1 != e);
        CCValAssign &NextVA = ArgLocs[++i];
        if (NextVA.isRegLoc()) {
          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
        } else {
          //Store the low part in stack.
          unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
                                             MachinePointerInfo(),
                                             false, false, 0));
        }
      } else {
        unsigned Offset = VA.getLocMemOffset() + StackOffset;
        // Store the high part.
        SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
                                           MachinePointerInfo(),
                                           false, false, 0));
        // Store the low part.
        PtrOff = DAG.getIntPtrConstant(Offset+4);
        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
                                           MachinePointerInfo(),
                                           false, false, 0));
      }
      continue;
    }

    // Arguments that can be passed on register must be kept at
    // RegsToPass vector
    if (VA.isRegLoc()) {
      if (VA.getLocVT() != MVT::f32) {
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
        continue;
      }
      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
      continue;
    }

    assert(VA.isMemLoc());

    // Create a store off the stack pointer for this argument.
    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
    PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                       MachinePointerInfo(),
                                       false, false, 0));
  }


  // Emit all stores, make sure the occur before any copies into physregs.
  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                        &MemOpChains[0], MemOpChains.size());

  // Build a sequence of copy-to-reg nodes chained together with token
  // chain and flag operands which copy the outgoing args into registers.
  // The InFlag in necessary since all emitted instructions must be
  // stuck together.
  SDValue InFlag;
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    unsigned Reg = RegsToPass[i].first;
    // Remap I0->I7 -> O0->O7.
    if (Reg >= SP::I0 && Reg <= SP::I7)
      Reg = Reg-SP::I0+SP::O0;

    Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
    InFlag = Chain.getValue(1);
  }

  unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  // Likewise ExternalSymbol -> TargetExternalSymbol.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);

  // Returns a chain & a flag for retval copy to use
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  SmallVector<SDValue, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);
  if (hasStructRetAttr)
    Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    unsigned Reg = RegsToPass[i].first;
    if (Reg >= SP::I0 && Reg <= SP::I7)
      Reg = Reg-SP::I0+SP::O0;

    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
  }
  if (InFlag.getNode())
    Ops.push_back(InFlag);

  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
  InFlag = Chain.getValue(1);

  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
                             DAG.getIntPtrConstant(0, true), InFlag);
  InFlag = Chain.getValue(1);

  // Assign locations to each value returned by this call.
  SmallVector<CCValAssign, 16> RVLocs;
  CCState RVInfo(CallConv, isVarArg, DAG.getTarget(),
                 RVLocs, *DAG.getContext());

  RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);

  // Copy all of the result registers out of their specified physreg.
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
    unsigned Reg = RVLocs[i].getLocReg();

    // Remap I0->I7 -> O0->O7.
    if (Reg >= SP::I0 && Reg <= SP::I7)
      Reg = Reg-SP::I0+SP::O0;

    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
                               RVLocs[i].getValVT(), InFlag).getValue(1);
    InFlag = Chain.getValue(2);
    InVals.push_back(Chain.getValue(0));
  }

  return Chain;
}
示例#18
0
void MipsFrameInfo::adjustMipsStackFrame(MachineFunction &MF) const {
  MachineFrameInfo *MFI = MF.getFrameInfo();
  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
  unsigned RegSize = STI.isGP32bit() ? 4 : 8;
  bool HasGP = MipsFI->needGPSaveRestore();

  // Min and Max CSI FrameIndex.
  int MinCSFI = -1, MaxCSFI = -1;

  // See the description at MipsMachineFunction.h
  int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;

  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
  // LowerFormalArguments. Leaving '0' for while is necessary to avoid the
  // approach done by calculateFrameObjectOffsets to the stack frame.
  MipsFI->adjustLoadArgsFI(MFI);
  MipsFI->adjustStoreVarArgsFI(MFI);

  // It happens that the default stack frame allocation order does not directly
  // map to the convention used for mips. So we must fix it. We move the callee
  // save register slots after the local variables area, as described in the
  // stack frame above.
  unsigned CalleeSavedAreaSize = 0;
  if (!CSI.empty()) {
    MinCSFI = CSI[0].getFrameIdx();
    MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
  }
  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
    CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());

  unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
                : (STI.isABI_O32() ? 16 : 0);

  // Adjust local variables. They should come on the stack right
  // after the arguments.
  int LastOffsetFI = -1;
  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
    if (i >= MinCSFI && i <= MaxCSFI)
      continue;
    if (MFI->isDeadObjectIndex(i))
      continue;
    unsigned Offset =
      StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
    if (LastOffsetFI == -1)
      LastOffsetFI = i;
    if (Offset > MFI->getObjectOffset(LastOffsetFI))
      LastOffsetFI = i;
    MFI->setObjectOffset(i, Offset);
  }

  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
  // be saved in this CPU Area. This whole area must be aligned to the
  // default Stack Alignment requirements.
  if (LastOffsetFI >= 0)
    StackOffset = MFI->getObjectOffset(LastOffsetFI)+
                  MFI->getObjectSize(LastOffsetFI);
  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);

  for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
    unsigned Reg = CSI[i].getReg();
    if (!Mips::CPURegsRegisterClass->contains(Reg))
      break;
    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
    TopCPUSavedRegOff = StackOffset;
    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
  }

  // Stack locations for FP and RA. If only one of them is used,
  // the space must be allocated for both, otherwise no space at all.
  if (hasFP(MF) || MFI->adjustsStack()) {
    // FP stack location
    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                         StackOffset);
    MipsFI->setFPStackOffset(StackOffset);
    TopCPUSavedRegOff = StackOffset;
    StackOffset += RegSize;

    // SP stack location
    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
                         StackOffset);
    MipsFI->setRAStackOffset(StackOffset);
    StackOffset += RegSize;

    if (MFI->adjustsStack())
      TopCPUSavedRegOff += RegSize;
  }

  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);

  // Adjust FPU Callee Saved Registers Area. This Area must be
  // aligned to the default Stack Alignment requirements.
  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
    unsigned Reg = CSI[i].getReg();
    if (Mips::CPURegsRegisterClass->contains(Reg))
      continue;
    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
    TopFPUSavedRegOff = StackOffset;
    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
  }
  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);

  // Update frame info
  MFI->setStackSize(StackOffset);

  // Recalculate the final tops offset. The final values must be '0'
  // if there isn't a callee saved register for CPU or FPU, otherwise
  // a negative offset is needed.
  if (TopCPUSavedRegOff >= 0)
    MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);

  if (TopFPUSavedRegOff >= 0)
    MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
}
void
ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                       RegScavenger *RS) const {
  // This tells PEI to spill the FP as if it is any other callee-save register
  // to take advantage the eliminateFrameIndex machinery. This also ensures it
  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
  // to combine multiple loads / stores.
  bool CanEliminateFrame = true;
  bool CS1Spilled = false;
  bool LRSpilled = false;
  unsigned NumGPRSpills = 0;
  SmallVector<unsigned, 4> UnspilledCS1GPRs;
  SmallVector<unsigned, 4> UnspilledCS2GPRs;
  const ARMBaseRegisterInfo *RegInfo =
    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
  const ARMBaseInstrInfo &TII =
    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  unsigned FramePtr = RegInfo->getFrameRegister(MF);

  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
  // since it's not always possible to restore sp from fp in a single
  // instruction.
  // FIXME: It will be better just to find spare register here.
  if (AFI->isThumb2Function() &&
      (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
    MF.getRegInfo().setPhysRegUsed(ARM::R4);

  if (AFI->isThumb1OnlyFunction()) {
    // Spill LR if Thumb1 function uses variable length argument lists.
    if (AFI->getVarArgsRegSaveSize() > 0)
      MF.getRegInfo().setPhysRegUsed(ARM::LR);

    // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
    // for sure what the stack size will be, but for this, an estimate is good
    // enough. If there anything changes it, it'll be a spill, which implies
    // we've used all the registers and so R4 is already used, so not marking
    // it here will be OK.  Also spill R4 if Thumb1 function requires stack
    // realignment.
    // FIXME: It will be better just to find spare register here.
    unsigned StackSize = estimateStackSize(MF);
    if (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
        StackSize > 508)
      MF.getRegInfo().setPhysRegUsed(ARM::R4);
  }

  // Spill the BasePtr if it's used.
  if (RegInfo->hasBasePointer(MF))
    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());

  // Don't spill FP if the frame can be eliminated. This is determined
  // by scanning the callee-save registers to see if any is used.
  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
  for (unsigned i = 0; CSRegs[i]; ++i) {
    unsigned Reg = CSRegs[i];
    bool Spilled = false;
    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
      Spilled = true;
      CanEliminateFrame = false;
    } else {
      // Check alias registers too.
      for (const unsigned *Aliases =
             RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
          Spilled = true;
          CanEliminateFrame = false;
        }
      }
    }

    if (!ARM::GPRRegisterClass->contains(Reg))
      continue;

    if (Spilled) {
      NumGPRSpills++;

      if (!STI.isTargetDarwin()) {
        if (Reg == ARM::LR)
          LRSpilled = true;
        CS1Spilled = true;
        continue;
      }

      // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
      switch (Reg) {
      case ARM::LR:
        LRSpilled = true;
        // Fallthrough
      case ARM::R4: case ARM::R5:
      case ARM::R6: case ARM::R7:
        CS1Spilled = true;
        break;
      default:
        break;
      }
    } else {
      if (!STI.isTargetDarwin()) {
        UnspilledCS1GPRs.push_back(Reg);
        continue;
      }

      switch (Reg) {
      case ARM::R4: case ARM::R5:
      case ARM::R6: case ARM::R7:
      case ARM::LR:
        UnspilledCS1GPRs.push_back(Reg);
        break;
      default:
        UnspilledCS2GPRs.push_back(Reg);
        break;
      }
    }
  }

  bool ForceLRSpill = false;
  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
    // Force LR to be spilled if the Thumb function size is > 2048. This enables
    // use of BL to implement far jump. If it turns out that it's not needed
    // then the branch fix up path will undo it.
    if (FnSize >= (1 << 11)) {
      CanEliminateFrame = false;
      ForceLRSpill = true;
    }
  }

  // If any of the stack slot references may be out of range of an immediate
  // offset, make sure a register (or a spill slot) is available for the
  // register scavenger. Note that if we're indexing off the frame pointer, the
  // effective stack size is 4 bytes larger since the FP points to the stack
  // slot of the previous FP. Also, if we have variable sized objects in the
  // function, stack slot references will often be negative, and some of
  // our instructions are positive-offset only, so conservatively consider
  // that case to want a spill slot (or register) as well. Similarly, if
  // the function adjusts the stack pointer during execution and the
  // adjustments aren't already part of our stack size estimate, our offset
  // calculations may be off, so be conservative.
  // FIXME: We could add logic to be more precise about negative offsets
  //        and which instructions will need a scratch register for them. Is it
  //        worth the effort and added fragility?
  bool BigStack =
    (RS &&
     (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
      estimateRSStackSizeLimit(MF, this)))
    || MFI->hasVarSizedObjects()
    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));

  bool ExtraCSSpill = false;
  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
    AFI->setHasStackFrame(true);

    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
    if (!LRSpilled && CS1Spilled) {
      MF.getRegInfo().setPhysRegUsed(ARM::LR);
      NumGPRSpills++;
      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
      ForceLRSpill = false;
      ExtraCSSpill = true;
    }

    if (hasFP(MF)) {
      MF.getRegInfo().setPhysRegUsed(FramePtr);
      NumGPRSpills++;
    }

    // If stack and double are 8-byte aligned and we are spilling an odd number
    // of GPRs, spill one extra callee save GPR so we won't have to pad between
    // the integer and double callee save areas.
    unsigned TargetAlign = getStackAlignment();
    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
          unsigned Reg = UnspilledCS1GPRs[i];
          // Don't spill high register if the function is thumb1
          if (!AFI->isThumb1OnlyFunction() ||
              isARMLowRegister(Reg) || Reg == ARM::LR) {
            MF.getRegInfo().setPhysRegUsed(Reg);
            if (!RegInfo->isReservedReg(MF, Reg))
              ExtraCSSpill = true;
            break;
          }
        }
      } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
        unsigned Reg = UnspilledCS2GPRs.front();
        MF.getRegInfo().setPhysRegUsed(Reg);
        if (!RegInfo->isReservedReg(MF, Reg))
          ExtraCSSpill = true;
      }
    }

    // Estimate if we might need to scavenge a register at some point in order
    // to materialize a stack offset. If so, either spill one additional
    // callee-saved register or reserve a special spill slot to facilitate
    // register scavenging. Thumb1 needs a spill slot for stack pointer
    // adjustments also, even when the frame itself is small.
    if (BigStack && !ExtraCSSpill) {
      // If any non-reserved CS register isn't spilled, just spill one or two
      // extra. That should take care of it!
      unsigned NumExtras = TargetAlign / 4;
      SmallVector<unsigned, 2> Extras;
      while (NumExtras && !UnspilledCS1GPRs.empty()) {
        unsigned Reg = UnspilledCS1GPRs.back();
        UnspilledCS1GPRs.pop_back();
        if (!RegInfo->isReservedReg(MF, Reg) &&
            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
             Reg == ARM::LR)) {
          Extras.push_back(Reg);
          NumExtras--;
        }
      }
      // For non-Thumb1 functions, also check for hi-reg CS registers
      if (!AFI->isThumb1OnlyFunction()) {
        while (NumExtras && !UnspilledCS2GPRs.empty()) {
          unsigned Reg = UnspilledCS2GPRs.back();
          UnspilledCS2GPRs.pop_back();
          if (!RegInfo->isReservedReg(MF, Reg)) {
            Extras.push_back(Reg);
            NumExtras--;
          }
        }
      }
      if (Extras.size() && NumExtras == 0) {
        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
          MF.getRegInfo().setPhysRegUsed(Extras[i]);
        }
      } else if (!AFI->isThumb1OnlyFunction()) {
        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
        // closest to SP or frame pointer.
        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                           RC->getAlignment(),
                                                           false));
      }
    }
  }

  if (ForceLRSpill) {
    MF.getRegInfo().setPhysRegUsed(ARM::LR);
    AFI->setLRIsSpilledForFarJump(true);
  }
}
示例#20
0
// LowerCCCCallTo - functions arguments are copied from virtual regs to
// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
SDValue LanaiTargetLowering::LowerCCCCallTo(
    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool IsVarArg,
    bool IsTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs,
    const SmallVectorImpl<SDValue> &OutVals,
    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
    SmallVectorImpl<SDValue> &InVals) const {
  // Analyze operands of the call, assigning locations to each operand.
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
                 *DAG.getContext());
  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();

  NumFixedArgs = 0;
  if (IsVarArg && G) {
    const Function *CalleeFn = dyn_cast<Function>(G->getGlobal());
    if (CalleeFn)
      NumFixedArgs = CalleeFn->getFunctionType()->getNumParams();
  }
  if (NumFixedArgs)
    CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_VarArg);
  else {
    if (CallConv == CallingConv::Fast)
      CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_Fast);
    else
      CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32);
  }

  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = CCInfo.getNextStackOffset();

  // Create local copies for byval args.
  SmallVector<SDValue, 8> ByValArgs;
  for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
    ISD::ArgFlagsTy Flags = Outs[I].Flags;
    if (!Flags.isByVal())
      continue;

    SDValue Arg = OutVals[I];
    unsigned Size = Flags.getByValSize();
    unsigned Align = Flags.getByValAlign();

    int FI = MFI->CreateStackObject(Size, Align, false);
    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
    SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32);

    Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
                          /*IsVolatile=*/false,
                          /*AlwaysInline=*/false,
                          /*IsTailCall=*/false, MachinePointerInfo(),
                          MachinePointerInfo());
    ByValArgs.push_back(FIPtr);
  }

  Chain = DAG.getCALLSEQ_START(
      Chain,
      DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true),
      DL);

  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
  SmallVector<SDValue, 12> MemOpChains;
  SDValue StackPtr;

  // Walk the register/memloc assignments, inserting copies/loads.
  for (unsigned I = 0, J = 0, E = ArgLocs.size(); I != E; ++I) {
    CCValAssign &VA = ArgLocs[I];
    SDValue Arg = OutVals[I];
    ISD::ArgFlagsTy Flags = Outs[I].Flags;

    // Promote the value if needed.
    switch (VA.getLocInfo()) {
    case CCValAssign::Full:
      break;
    case CCValAssign::SExt:
      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
      break;
    case CCValAssign::ZExt:
      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
      break;
    case CCValAssign::AExt:
      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
      break;
    default:
      llvm_unreachable("Unknown loc info!");
    }

    // Use local copy if it is a byval arg.
    if (Flags.isByVal())
      Arg = ByValArgs[J++];

    // Arguments that can be passed on register must be kept at RegsToPass
    // vector
    if (VA.isRegLoc()) {
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else {
      assert(VA.isMemLoc());

      if (StackPtr.getNode() == 0)
        StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
                                      getPointerTy(DAG.getDataLayout()));

      SDValue PtrOff =
          DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr,
                      DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));

      MemOpChains.push_back(DAG.getStore(
          Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, false, 0));
    }
  }

  // Transform all store nodes into one single node because all store nodes are
  // independent of each other.
  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                        ArrayRef<SDValue>(&MemOpChains[0], MemOpChains.size()));

  SDValue InFlag;

  // Build a sequence of copy-to-reg nodes chained together with token chain and
  // flag operands which copy the outgoing args into registers.  The InFlag in
  // necessary since all emitted instructions must be stuck together.
  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
                             RegsToPass[I].second, InFlag);
    InFlag = Chain.getValue(1);
  }

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  // Likewise ExternalSymbol -> TargetExternalSymbol.
  uint8_t OpFlag = LanaiII::MO_NO_FLAG;
  if (G) {
    Callee = DAG.getTargetGlobalAddress(
        G->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), 0, OpFlag);
  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    Callee = DAG.getTargetExternalSymbol(
        E->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlag);
  }

  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  SmallVector<SDValue, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  // Add a register mask operand representing the call-preserved registers.
  // TODO: Should return-twice functions be handled?
  const uint32_t *Mask =
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
  assert(Mask && "Missing call preserved mask for calling convention");
  Ops.push_back(DAG.getRegisterMask(Mask));

  // Add argument registers to the end of the list so that they are
  // known live into the call.
  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
    Ops.push_back(DAG.getRegister(RegsToPass[I].first,
                                  RegsToPass[I].second.getValueType()));

  if (InFlag.getNode())
    Ops.push_back(InFlag);

  Chain = DAG.getNode(LanaiISD::CALL, DL, NodeTys,
                      ArrayRef<SDValue>(&Ops[0], Ops.size()));
  InFlag = Chain.getValue(1);

  // Create the CALLSEQ_END node.
  Chain = DAG.getCALLSEQ_END(
      Chain,
      DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true),
      DAG.getConstant(0, DL, getPointerTy(DAG.getDataLayout()), true), InFlag,
      DL);
  InFlag = Chain.getValue(1);

  // Handle result values, copying them out of physregs into vregs that we
  // return.
  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
                         InVals);
}