Пример #1
0
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
                                               MachineBasicBlock &MBB,
                                               MachineBasicBlock::iterator I,
                                               CallContext &Context) {
  // Check that this particular call sequence is amenable to the
  // transformation.
  const X86RegisterInfo &RegInfo =
      *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());

  // We expect to enter this at the beginning of a call sequence
  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
  MachineBasicBlock::iterator FrameSetup = I++;
  Context.FrameSetup = FrameSetup;

  // How much do we adjust the stack? This puts an upper bound on
  // the number of parameters actually passed on it.
  unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;

  // A zero adjustment means no stack parameters
  if (!MaxAdjust) {
    Context.NoStackParams = true;
    return;
  }

  // Skip over DEBUG_VALUE.
  // For globals in PIC mode, we can have some LEAs here. Skip them as well.
  // TODO: Extend this to something that covers more cases.
  while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
    ++I;

  unsigned StackPtr = RegInfo.getStackRegister();
  auto StackPtrCopyInst = MBB.end();
  // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
  // register.  If it's there, use that virtual register as stack pointer
  // instead. Also, we need to locate this instruction so that we can later
  // safely ignore it while doing the conservative processing of the call chain.
  // The COPY can be located anywhere between the call-frame setup
  // instruction and its first use. We use the call instruction as a boundary
  // because it is usually cheaper to check if an instruction is a call than
  // checking if an instruction uses a register.
  for (auto J = I; !J->isCall(); ++J)
    if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
        J->getOperand(1).getReg() == StackPtr) {
      StackPtrCopyInst = J;
      Context.SPCopy = &*J++;
      StackPtr = Context.SPCopy->getOperand(0).getReg();
      break;
    }

  // Scan the call setup sequence for the pattern we're looking for.
  // We only handle a simple case - a sequence of store instructions that
  // push a sequence of stack-slot-aligned values onto the stack, with
  // no gaps between them.
  if (MaxAdjust > 4)
    Context.ArgStoreVector.resize(MaxAdjust, nullptr);

  DenseSet<unsigned int> UsedRegs;

  for (InstClassification Classification = Skip; Classification != Exit; ++I) {
    // If this is the COPY of the stack pointer, it's ok to ignore.
    if (I == StackPtrCopyInst)
      continue;
    Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
    if (Classification != Convert)
      continue;
    // We know the instruction has a supported store opcode.
    // We only want movs of the form:
    // mov imm/reg, k(%StackPtr)
    // If we run into something else, bail.
    // Note that AddrBaseReg may, counter to its name, not be a register,
    // but rather a frame index.
    // TODO: Support the fi case. This should probably work now that we
    // have the infrastructure to track the stack pointer within a call
    // sequence.
    if (!I->getOperand(X86::AddrBaseReg).isReg() ||
        (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
        !I->getOperand(X86::AddrScaleAmt).isImm() ||
        (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
        (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
        (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
        !I->getOperand(X86::AddrDisp).isImm())
      return;

    int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
    assert(StackDisp >= 0 &&
           "Negative stack displacement when passing parameters");

    // We really don't want to consider the unaligned case.
    if (StackDisp & (SlotSize - 1))
      return;
    StackDisp >>= Log2SlotSize;

    assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
           "Function call has more parameters than the stack is adjusted for.");

    // If the same stack slot is being filled twice, something's fishy.
    if (Context.ArgStoreVector[StackDisp] != nullptr)
      return;
    Context.ArgStoreVector[StackDisp] = &*I;

    for (const MachineOperand &MO : I->uses()) {
      if (!MO.isReg())
        continue;
      unsigned int Reg = MO.getReg();
      if (RegInfo.isPhysicalRegister(Reg))
        UsedRegs.insert(Reg);
    }
  }

  --I;

  // We now expect the end of the sequence. If we stopped early,
  // or reached the end of the block without finding a call, bail.
  if (I == MBB.end() || !I->isCall())
    return;

  Context.Call = &*I;
  if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
    return;

  // Now, go through the vector, and see that we don't have any gaps,
  // but only a series of storing instructions.
  auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
  for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
    if (*MMI == nullptr)
      break;

  // If the call had no parameters, do nothing
  if (MMI == Context.ArgStoreVector.begin())
    return;

  // We are either at the last parameter, or a gap.
  // Make sure it's not a gap
  for (; MMI != MME; ++MMI)
    if (*MMI != nullptr)
      return;

  Context.UsePush = true;
}
Пример #2
0
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
                                               MachineBasicBlock &MBB,
                                               MachineBasicBlock::iterator I,
                                               CallContext &Context) {
  // Check that this particular call sequence is amenable to the
  // transformation.
  const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
                                       STI->getRegisterInfo());
  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();

  // We expect to enter this at the beginning of a call sequence
  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
  MachineBasicBlock::iterator FrameSetup = I++;
  Context.FrameSetup = FrameSetup;

  // How much do we adjust the stack? This puts an upper bound on
  // the number of parameters actually passed on it.
  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;

  // A zero adjustment means no stack parameters
  if (!MaxAdjust) {
    Context.NoStackParams = true;
    return;
  }

  // For globals in PIC mode, we can have some LEAs here.
  // Ignore them, they don't bother us.
  // TODO: Extend this to something that covers more cases.
  while (I->getOpcode() == X86::LEA32r)
    ++I;

  // We expect a copy instruction here.
  // TODO: The copy instruction is a lowering artifact.
  //       We should also support a copy-less version, where the stack
  //       pointer is used directly.
  if (!I->isCopy() || !I->getOperand(0).isReg())
    return;
  Context.SPCopy = I++;

  unsigned StackPtr = Context.SPCopy->getOperand(0).getReg();

  // Scan the call setup sequence for the pattern we're looking for.
  // We only handle a simple case - a sequence of MOV32mi or MOV32mr
  // instructions, that push a sequence of 32-bit values onto the stack, with
  // no gaps between them.
  if (MaxAdjust > 4)
    Context.MovVector.resize(MaxAdjust, nullptr);

  InstClassification Classification;
  DenseSet<unsigned int> UsedRegs;

  while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
         Exit) {
    if (Classification == Skip) {
      ++I;
      continue;
    }

    // We know the instruction is a MOV32mi/MOV32mr.
    // We only want movs of the form:
    // movl imm/r32, k(%esp)
    // If we run into something else, bail.
    // Note that AddrBaseReg may, counter to its name, not be a register,
    // but rather a frame index.
    // TODO: Support the fi case. This should probably work now that we
    // have the infrastructure to track the stack pointer within a call
    // sequence.
    if (!I->getOperand(X86::AddrBaseReg).isReg() ||
        (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
        !I->getOperand(X86::AddrScaleAmt).isImm() ||
        (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
        (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
        (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
        !I->getOperand(X86::AddrDisp).isImm())
      return;

    int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
    assert(StackDisp >= 0 &&
           "Negative stack displacement when passing parameters");

    // We really don't want to consider the unaligned case.
    if (StackDisp % 4)
      return;
    StackDisp /= 4;

    assert((size_t)StackDisp < Context.MovVector.size() &&
           "Function call has more parameters than the stack is adjusted for.");

    // If the same stack slot is being filled twice, something's fishy.
    if (Context.MovVector[StackDisp] != nullptr)
      return;
    Context.MovVector[StackDisp] = I;

    for (const MachineOperand &MO : I->uses()) {
      if (!MO.isReg())
        continue;
      unsigned int Reg = MO.getReg();
      if (RegInfo.isPhysicalRegister(Reg))
        UsedRegs.insert(Reg);
    }

    ++I;
  }

  // We now expect the end of the sequence. If we stopped early,
  // or reached the end of the block without finding a call, bail.
  if (I == MBB.end() || !I->isCall())
    return;

  Context.Call = I;
  if ((++I)->getOpcode() != FrameDestroyOpcode)
    return;

  // Now, go through the vector, and see that we don't have any gaps,
  // but only a series of 32-bit MOVs.
  auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end();
  for (; MMI != MME; ++MMI, Context.ExpectedDist += 4)
    if (*MMI == nullptr)
      break;

  // If the call had no parameters, do nothing
  if (MMI == Context.MovVector.begin())
    return;

  // We are either at the last parameter, or a gap.
  // Make sure it's not a gap
  for (; MMI != MME; ++MMI)
    if (*MMI != nullptr)
      return;

  Context.UsePush = true;
}