Example #1
0
// ins, ext, dext*, dins have the following constraints:
// X <= pos      <  Y
// X <  size     <= Y
// X <  pos+size <= Y
//
// dinsm and dinsu have the following constraints:
// X <= pos      <  Y
// X <= size     <= Y
// X <  pos+size <= Y
//
// The callee of verifyInsExtInstruction however gives the bounds of
// dins[um] like the other (d)ins (d)ext(um) instructions, so that this
// function doesn't have to vary it's behaviour based on the instruction
// being checked.
static bool verifyInsExtInstruction(const MachineInstr &MI, StringRef &ErrInfo,
                                    const int64_t PosLow, const int64_t PosHigh,
                                    const int64_t SizeLow,
                                    const int64_t SizeHigh,
                                    const int64_t BothLow,
                                    const int64_t BothHigh) {
  MachineOperand MOPos = MI.getOperand(2);
  if (!MOPos.isImm()) {
    ErrInfo = "Position is not an immediate!";
    return false;
  }
  int64_t Pos = MOPos.getImm();
  if (!((PosLow <= Pos) && (Pos < PosHigh))) {
    ErrInfo = "Position operand is out of range!";
    return false;
  }

  MachineOperand MOSize = MI.getOperand(3);
  if (!MOSize.isImm()) {
    ErrInfo = "Size operand is not an immediate!";
    return false;
  }
  int64_t Size = MOSize.getImm();
  if (!((SizeLow < Size) && (Size <= SizeHigh))) {
    ErrInfo = "Size operand is out of range!";
    return false;
  }

  if (!((BothLow < (Pos + Size)) && ((Pos + Size) <= BothHigh))) {
    ErrInfo = "Position + Size is out of range!";
    return false;
  }

  return true;
}
Example #2
0
void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
  assert (&Dst != &Src && "Cannot duplicate into itself");
  switch (Dst.getType()) {
    case MachineOperand::MO_Register:
      if (Src.isReg()) {
        Dst.setReg(Src.getReg());
      } else if (Src.isImm()) {
        Dst.ChangeToImmediate(Src.getImm());
      } else {
        llvm_unreachable("Unexpected src operand type");
      }
      break;

    case MachineOperand::MO_Immediate:
      if (Src.isImm()) {
        Dst.setImm(Src.getImm());
      } else if (Src.isReg()) {
        Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
                             Src.isKill(), Src.isDead(), Src.isUndef(),
                             Src.isDebug());
      } else {
        llvm_unreachable("Unexpected src operand type");
      }
      break;

    default:
      llvm_unreachable("Unexpected dst operand type");
      break;
  }
}
Example #3
0
void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt,
                                       unsigned DoubleDestReg,
                                       MachineOperand &HiOperand,
                                       MachineOperand &LoOperand) {
  DEBUG(dbgs() << "Found a CONST64\n");

  DebugLoc DL = InsertPt->getDebugLoc();
  MachineBasicBlock *BB = InsertPt->getParent();
  assert(LoOperand.isImm() && HiOperand.isImm() &&
         "Both operands must be immediate");

  int64_t V = HiOperand.getImm();
  V = (V << 32) | (0x0ffffffffLL & LoOperand.getImm());
  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64), DoubleDestReg)
    .addImm(V);
}
/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
/// with the obvious type and an immediate 0 as either wzr or xzr.
static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
                                       const TargetRegisterInfo *TRI,
                                       const TargetRegisterClass &RegClass,
                                       raw_ostream &O) {
  char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';

  if (MO.isImm() && MO.getImm() == 0) {
    O << Prefix << "zr";
    return false;
  } else if (MO.isReg()) {
    if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
      O << (Prefix == 'x' ? "sp" : "wsp");
      return false;
    }

    for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
      if (RegClass.contains(*AR)) {
        O << AArch64InstPrinter::getRegisterName(*AR);
        return false;
      }
    }
  }

  return true;
}
/// Get the opcode for a conditional transfer of the value in SO (source
/// operand). The condition (true/false) is given in Cond.
unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
      bool Cond) {
  using namespace Hexagon;
  if (SO.isReg()) {
    unsigned PhysR;
    RegisterRef RS = SO;
    if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
      assert(VC->begin() != VC->end() && "Empty register class");
      PhysR = *VC->begin();
    } else {
      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
      PhysR = RS.Reg;
    }
    unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
    switch (RC->getSize()) {
      case 4:
        return Cond ? A2_tfrt : A2_tfrf;
      case 8:
        return Cond ? A2_tfrpt : A2_tfrpf;
    }
    llvm_unreachable("Invalid register operand");
  }
  if (SO.isImm() || SO.isFPImm())
    return Cond ? C2_cmoveit : C2_cmoveif;
  llvm_unreachable("Unexpected source operand");
}
MachineOperand
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
                                           unsigned SubIdx) const {

  MachineInstr *MI = MO.getParent();
  MachineBasicBlock *BB = MO.getParent()->getParent();
  MachineFunction *MF = BB->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);

  if (MO.isReg()) {
    unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
    unsigned Reg = MO.getReg();
    BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
            .addReg(Reg, 0, ComposedSubIdx);

    return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
                                     MO.isKill(), MO.isDead(), MO.isUndef(),
                                     MO.isEarlyClobber(), 0, MO.isDebug(),
                                     MO.isInternalRead());
  }

  assert(MO.isImm());

  APInt Imm(64, MO.getImm());

  switch (SubIdx) {
  default:
    llvm_unreachable("do not know to split immediate with this sub index.");
  case AMDGPU::sub0:
    return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
  case AMDGPU::sub1:
    return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
  }
}
Example #7
0
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const {
  if (MO.isReg())
    return MipsRegisterInfo::getRegisterNumbering(MO.getReg());
  else if (MO.isImm())
    return static_cast<unsigned>(MO.getImm());
  else if (MO.isGlobal()) {
    if (MI.getOpcode() == Mips::ULW || MI.getOpcode() == Mips::USW ||
          MI.getOpcode() == Mips::ULH || MI.getOpcode() == Mips::ULHu)
      emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 4);
    else if (MI.getOpcode() == Mips::USH)
      emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 8);
    else
      emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
  } else if (MO.isSymbol())
    emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
  else if (MO.isCPI())
    emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
  else if (MO.isJTI())
    emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
  else if (MO.isMBB())
    emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
  else
    llvm_unreachable("Unable to encode MachineOperand!");
  return 0;
}
Example #8
0
unsigned
SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                 int &FrameIndex) const {
  switch (MI->getOpcode()) {
  default: break;
  case SPU::STQDv16i8:
  case SPU::STQDv8i16:
  case SPU::STQDv4i32:
  case SPU::STQDv4f32:
  case SPU::STQDv2f64:
  case SPU::STQDr128:
  case SPU::STQDr64:
  case SPU::STQDr32:
  case SPU::STQDr16:
  case SPU::STQDr8: {
    const MachineOperand MOp1 = MI->getOperand(1);
    const MachineOperand MOp2 = MI->getOperand(2);
    if (MOp1.isImm() && MOp2.isFI()) {
      FrameIndex = MOp2.getIndex();
      return MI->getOperand(0).getReg();
    }
    break;
  }
  }
  return 0;
}
Example #9
0
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                           const MachineOperand &MO) {
  if (MO.isReg())
    return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
  else if (MO.isImm())
    return static_cast<unsigned>(MO.getImm());
  else if (MO.isGlobal())
    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
  else if (MO.isSymbol())
    emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
  else if (MO.isCPI()) {
    const TargetInstrDesc &TID = MI.getDesc();
    // For VFP load, the immediate offset is multiplied by 4.
    unsigned Reloc =  ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
      ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
    emitConstPoolAddress(MO.getIndex(), Reloc);
  } else if (MO.isJTI())
    emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
  else if (MO.isMBB())
    emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
  else {
#ifndef NDEBUG
    errs() << MO;
#endif
    llvm_unreachable(0);
  }
  return 0;
}
static bool isSimilarDispOp(const MachineOperand &MO1,
                            const MachineOperand &MO2) {
  assert(isValidDispOp(MO1) && isValidDispOp(MO2) &&
         "Address displacement operand is not valid");
  return (MO1.isImm() && MO2.isImm()) ||
         (MO1.isCPI() && MO2.isCPI() && MO1.getIndex() == MO2.getIndex()) ||
         (MO1.isJTI() && MO2.isJTI() && MO1.getIndex() == MO2.getIndex()) ||
         (MO1.isSymbol() && MO2.isSymbol() &&
          MO1.getSymbolName() == MO2.getSymbolName()) ||
         (MO1.isGlobal() && MO2.isGlobal() &&
          MO1.getGlobal() == MO2.getGlobal()) ||
         (MO1.isBlockAddress() && MO2.isBlockAddress() &&
          MO1.getBlockAddress() == MO2.getBlockAddress()) ||
         (MO1.isMCSymbol() && MO2.isMCSymbol() &&
          MO1.getMCSymbol() == MO2.getMCSymbol()) ||
         (MO1.isMBB() && MO2.isMBB() && MO1.getMBB() == MO2.getMBB());
}
Example #11
0
unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
                                     const MachineOperand &Src2) const {
    bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
    if (IsReg1)
        return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir;
    if (IsReg2)
        return Hexagon::C2_muxri;

    // Neither is a register. The first source is extendable, but the second
    // is not (s8).
    if (Src2.isImm() && isInt<8>(Src2.getImm()))
        return Hexagon::C2_muxii;

    return 0;
}
Example #12
0
unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                           const MachineOperand &MO) const {

  if (MO.isReg()) {
    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
    // The GPR operand should come through here though.
    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
             MI.getOpcode() != PPC::MFOCRF) ||
           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
  }
  
  assert(MO.isImm() &&
         "Relocation required in an instruction that we cannot encode!");
  return MO.getImm();
}
unsigned CoffeeCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                           const MachineOperand &MO) const {

  if (MO.isReg()) {
    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
    // The GPR operand should come through here though.
    /*assert((MI.getOpcode() != Coffee::MTCRF && MI.getOpcode() != Coffee::MTCRF8 &&
             MI.getOpcode() != Coffee::MFOCRF) ||
           MO.getReg() < Coffee::CR0 || MO.getReg() > Coffee::CR7);*/
    return getCoffeeRegisterNumbering(MO.getReg());
  }
  
  assert(MO.isImm() &&
         "Relocation required in an instruction that we cannot encode!");
  return MO.getImm();
}
Example #14
0
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
  if(MO.isImm()) {
    return MO.getImm() >= -16 && MO.getImm() <= 64;
  }
  if (MO.isFPImm()) {
    return MO.getFPImm()->isExactlyValue(0.0)  ||
           MO.getFPImm()->isExactlyValue(0.5)  ||
           MO.getFPImm()->isExactlyValue(-0.5) ||
           MO.getFPImm()->isExactlyValue(1.0)  ||
           MO.getFPImm()->isExactlyValue(-1.0) ||
           MO.getFPImm()->isExactlyValue(2.0)  ||
           MO.getFPImm()->isExactlyValue(-2.0) ||
           MO.getFPImm()->isExactlyValue(4.0)  ||
           MO.getFPImm()->isExactlyValue(-4.0);
  }
  return false;
}
Example #15
0
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const {
  if (MO.isReg())
    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
  else if (MO.isImm())
    return static_cast<unsigned>(MO.getImm());
  else if (MO.isGlobal())
    emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
  else if (MO.isSymbol())
    emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
  else if (MO.isCPI())
    emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
  else if (MO.isJTI())
    emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
  else if (MO.isMBB())
    emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
  else
    llvm_unreachable("Unable to encode MachineOperand!");
  return 0;
}
Example #16
0
/// addConstantValue - Add constant value entry in variable DIE.
bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
                                   DIType Ty) {
  assert (MO.isImm() && "Invalid machine operand!");
  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
  unsigned form = dwarf::DW_FORM_udata;
  switch (Ty.getSizeInBits()) {
    case 8: form = dwarf::DW_FORM_data1; break;
    case 16: form = dwarf::DW_FORM_data2; break;
    case 32: form = dwarf::DW_FORM_data4; break;
    case 64: form = dwarf::DW_FORM_data8; break;
    default: break;
  }

  DIBasicType BTy(Ty);
  if (BTy.Verify() &&
      (BTy.getEncoding()  == dwarf::DW_ATE_signed 
       || BTy.getEncoding() == dwarf::DW_ATE_signed_char))
    addSInt(Block, 0, form, MO.getImm());
  else
    addUInt(Block, 0, form, MO.getImm());

  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
  return true;
}
void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
                                                  const CallContext &Context) {
  // Ok, we can in fact do the transformation for this call.
  // Do not remove the FrameSetup instruction, but adjust the parameters.
  // PEI will end up finalizing the handling of this.
  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
  MachineBasicBlock &MBB = *(FrameSetup->getParent());
  TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);

  DebugLoc DL = FrameSetup->getDebugLoc();
  bool Is64Bit = STI->is64Bit();
  // Now, iterate through the vector in reverse order, and replace the store to
  // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
  // replace uses.
  for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
    MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx];
    MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands);
    MachineBasicBlock::iterator Push = nullptr;
    unsigned PushOpcode;
    switch (Store->getOpcode()) {
    default:
      llvm_unreachable("Unexpected Opcode!");
    case X86::AND16mi8:
    case X86::AND32mi8:
    case X86::AND64mi8:
    case X86::OR16mi8:
    case X86::OR32mi8:
    case X86::OR64mi8:
    case X86::MOV32mi:
    case X86::MOV64mi32:
      PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
      // If the operand is a small (8-bit) immediate, we can use a
      // PUSH instruction with a shorter encoding.
      // Note that isImm() may fail even though this is a MOVmi, because
      // the operand can also be a symbol.
      if (PushOp.isImm()) {
        int64_t Val = PushOp.getImm();
        if (isInt<8>(Val))
          PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
      }
      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
      break;
    case X86::MOV32mr:
    case X86::MOV64mr: {
      unsigned int Reg = PushOp.getReg();

      // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
      // in preparation for the PUSH64. The upper 32 bits can be undef.
      if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
        unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
        Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
        BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
        BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
            .addReg(UndefReg)
            .add(PushOp)
            .addImm(X86::sub_32bit);
      }

      // If PUSHrmm is not slow on this target, try to fold the source of the
      // push into the instruction.
      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();

      // Check that this is legal to fold. Right now, we're extremely
      // conservative about that.
      MachineInstr *DefMov = nullptr;
      if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
        PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
        Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode));

        unsigned NumOps = DefMov->getDesc().getNumOperands();
        for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
          Push->addOperand(DefMov->getOperand(i));

        DefMov->eraseFromParent();
      } else {
        PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
        Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
                   .addReg(Reg)
                   .getInstr();
      }
      break;
    }
    }

    // For debugging, when using SP-based CFA, we need to adjust the CFA
    // offset after each push.
    // TODO: This is needed only if we require precise CFA.
    if (!TFL->hasFP(MF))
      TFL->BuildCFI(
          MBB, std::next(Push), DL,
          MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));

    MBB.erase(Store);
  }

  // The stack-pointer copy is no longer used in the call sequences.
  // There should not be any other users, but we can't commit to that, so:
  if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
    Context.SPCopy->eraseFromParent();

  // Once we've done this, we need to make sure PEI doesn't assume a reserved
  // frame.
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  FuncInfo->setHasPushSequences(true);
}
Example #18
0
static bool isZeroImm(const MachineOperand &op) {
  return op.isImm() && op.getImm() == 0;
}
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
                                                MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator I) {

  // Check that this particular call sequence is amenable to the
  // transformation.
  const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
                                       MF.getSubtarget().getRegisterInfo());
  unsigned StackPtr = RegInfo.getStackRegister();
  int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();

  // We expect to enter this at the beginning of a call sequence
  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
  MachineBasicBlock::iterator FrameSetup = I++;

  
  // For globals in PIC mode, we can have some LEAs here.
  // Ignore them, they don't bother us.
  // TODO: Extend this to something that covers more cases.
  while (I->getOpcode() == X86::LEA32r)
    ++I;
  
  // We expect a copy instruction here.
  // TODO: The copy instruction is a lowering artifact.
  //       We should also support a copy-less version, where the stack
  //       pointer is used directly.
  if (!I->isCopy() || !I->getOperand(0).isReg())
    return false;
  MachineBasicBlock::iterator SPCopy = I++;
  StackPtr = SPCopy->getOperand(0).getReg();

  // Scan the call setup sequence for the pattern we're looking for.
  // We only handle a simple case - a sequence of MOV32mi or MOV32mr
  // instructions, that push a sequence of 32-bit values onto the stack, with
  // no gaps between them.
  SmallVector<MachineInstr*, 4> MovVector(4, nullptr);
  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
  if (MaxAdjust > 4)
    MovVector.resize(MaxAdjust, nullptr);

  do {
    int Opcode = I->getOpcode();
    if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
      break;

    // We only want movs of the form:
    // movl imm/r32, k(%esp)
    // If we run into something else, bail.
    // Note that AddrBaseReg may, counter to its name, not be a register,
    // but rather a frame index.
    // TODO: Support the fi case. This should probably work now that we
    // have the infrastructure to track the stack pointer within a call
    // sequence.
    if (!I->getOperand(X86::AddrBaseReg).isReg() ||
        (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
        !I->getOperand(X86::AddrScaleAmt).isImm() ||
        (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
        (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
        (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
        !I->getOperand(X86::AddrDisp).isImm())
      return false;

    int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
    assert(StackDisp >= 0 && "Negative stack displacement when passing parameters");

    // We really don't want to consider the unaligned case.
    if (StackDisp % 4)
      return false;
    StackDisp /= 4;

    assert((size_t)StackDisp < MovVector.size() &&
      "Function call has more parameters than the stack is adjusted for.");

    // If the same stack slot is being filled twice, something's fishy.
    if (MovVector[StackDisp] != nullptr)
      return false;
    MovVector[StackDisp] = I;

    ++I;
  } while (I != MBB.end());

  // We now expect the end of the sequence - a call and a stack adjust.
  if (I == MBB.end())
    return false;

  // For PCrel calls, we expect an additional COPY of the basereg.
  // If we find one, skip it.
  if (I->isCopy()) {
    if (I->getOperand(1).getReg() ==
      MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
      ++I;
    else
      return false;
  }

  if (!I->isCall())
    return false;
  MachineBasicBlock::iterator Call = I;
  if ((++I)->getOpcode() != FrameDestroyOpcode)
    return false;

  // Now, go through the vector, and see that we don't have any gaps,
  // but only a series of 32-bit MOVs.
  
  int64_t ExpectedDist = 0;
  auto MMI = MovVector.begin(), MME = MovVector.end();
  for (; MMI != MME; ++MMI, ExpectedDist += 4)
    if (*MMI == nullptr)
      break;
  
  // If the call had no parameters, do nothing
  if (!ExpectedDist)
    return false;

  // We are either at the last parameter, or a gap. 
  // Make sure it's not a gap
  for (; MMI != MME; ++MMI)
    if (*MMI != nullptr)
      return false;

  // Ok, we can in fact do the transformation for this call.
  // Do not remove the FrameSetup instruction, but adjust the parameters.
  // PEI will end up finalizing the handling of this.
  FrameSetup->getOperand(1).setImm(ExpectedDist);

  DebugLoc DL = I->getDebugLoc();
  // Now, iterate through the vector in reverse order, and replace the movs
  // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to 
  // replace uses.
  for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
    MachineBasicBlock::iterator MOV = *MovVector[Idx];
    MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
    if (MOV->getOpcode() == X86::MOV32mi) {
      unsigned PushOpcode = X86::PUSHi32;
      // If the operand is a small (8-bit) immediate, we can use a
      // PUSH instruction with a shorter encoding.
      // Note that isImm() may fail even though this is a MOVmi, because
      // the operand can also be a symbol.
      if (PushOp.isImm()) {
        int64_t Val = PushOp.getImm();
        if (isInt<8>(Val))
          PushOpcode = X86::PUSH32i8;
      }
      BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
    } else {
      unsigned int Reg = PushOp.getReg();

      // If PUSHrmm is not slow on this target, try to fold the source of the
      // push into the instruction.
      const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
      bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();

      // Check that this is legal to fold. Right now, we're extremely
      // conservative about that.
      MachineInstr *DefMov = nullptr;
      if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
        MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm));

        unsigned NumOps = DefMov->getDesc().getNumOperands();
        for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
          Push->addOperand(DefMov->getOperand(i));

        DefMov->eraseFromParent();
      } else {
        BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr();
      }
    }

    MBB.erase(MOV);
  }

  // The stack-pointer copy is no longer used in the call sequences.
  // There should not be any other users, but we can't commit to that, so:
  if (MRI->use_empty(SPCopy->getOperand(0).getReg()))
    SPCopy->eraseFromParent();

  // Once we've done this, we need to make sure PEI doesn't assume a reserved
  // frame.
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  FuncInfo->setHasPushSequences(true);

  return true;
}
Example #20
0
static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
                        unsigned UseOpIdx,
                        std::vector<FoldCandidate> &FoldList,
                        SmallVectorImpl<MachineInstr *> &CopiesToReplace,
                        const SIInstrInfo *TII, const SIRegisterInfo &TRI,
                        MachineRegisterInfo &MRI) {
  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);

  // FIXME: Fold operands with subregs.
  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
      UseOp.isImplicit())) {
    return;
  }

  bool FoldingImm = OpToFold.isImm();
  APInt Imm;

  if (FoldingImm) {
    unsigned UseReg = UseOp.getReg();
    const TargetRegisterClass *UseRC
      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
      MRI.getRegClass(UseReg) :
      TRI.getPhysRegClass(UseReg);

    Imm = APInt(64, OpToFold.getImm());

    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
    const TargetRegisterClass *FoldRC =
        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);

    // Split 64-bit constants into 32-bits for folding.
    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
      if (UseRC->getSize() != 8)
        return;

      if (UseOp.getSubReg() == AMDGPU::sub0) {
        Imm = Imm.getLoBits(32);
      } else {
        assert(UseOp.getSubReg() == AMDGPU::sub1);
        Imm = Imm.getHiBits(32);
      }
    }

    // In order to fold immediates into copies, we need to change the
    // copy to a MOV.
    if (UseMI->getOpcode() == AMDGPU::COPY) {
      unsigned DestReg = UseMI->getOperand(0).getReg();
      const TargetRegisterClass *DestRC
        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
        MRI.getRegClass(DestReg) :
        TRI.getPhysRegClass(DestReg);

      unsigned MovOp = TII->getMovOpcode(DestRC);
      if (MovOp == AMDGPU::COPY)
        return;

      UseMI->setDesc(TII->get(MovOp));
      CopiesToReplace.push_back(UseMI);
    }
  }

  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
         RSUse = MRI.use_begin(RegSeqDstReg),
         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
        continue;

      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  CopiesToReplace, TII, TRI, MRI);
    }
    return;
  }

  const MCInstrDesc &UseDesc = UseMI->getDesc();

  // Don't fold into target independent nodes.  Target independent opcodes
  // don't have defined register classes.
  if (UseDesc.isVariadic() ||
      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
    return;

  if (FoldingImm) {
    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
    return;
  }

  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);

  // FIXME: We could try to change the instruction from 64-bit to 32-bit
  // to enable more folding opportunites.  The shrink operands pass
  // already does this.
  return;
}
Example #21
0
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
                                     MachineOperand &OpToFold) const {
  // We need mutate the operands of new mov instructions to add implicit
  // uses of EXEC, but adding them invalidates the use_iterator, so defer
  // this.
  SmallVector<MachineInstr *, 4> CopiesToReplace;
  SmallVector<FoldCandidate, 4> FoldList;
  MachineOperand &Dst = MI.getOperand(0);

  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
  if (FoldingImm) {
    unsigned NumLiteralUses = 0;
    MachineOperand *NonInlineUse = nullptr;
    int NonInlineUseOpNo = -1;

    MachineRegisterInfo::use_iterator NextUse, NextInstUse;
    for (MachineRegisterInfo::use_iterator
           Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
         Use != E; Use = NextUse) {
      NextUse = std::next(Use);
      MachineInstr *UseMI = Use->getParent();
      unsigned OpNo = Use.getOperandNo();

      // Folding the immediate may reveal operations that can be constant
      // folded or replaced with a copy. This can happen for example after
      // frame indices are lowered to constants or from splitting 64-bit
      // constants.
      //
      // We may also encounter cases where one or both operands are
      // immediates materialized into a register, which would ordinarily not
      // be folded due to multiple uses or operand constraints.

      if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
        DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');

        // Some constant folding cases change the same immediate's use to a new
        // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
        // again. The same constant folded instruction could also have a second
        // use operand.
        NextUse = MRI->use_begin(Dst.getReg());
        continue;
      }

      // Try to fold any inline immediate uses, and then only fold other
      // constants if they have one use.
      //
      // The legality of the inline immediate must be checked based on the use
      // operand, not the defining instruction, because 32-bit instructions
      // with 32-bit inline immediate sources may be used to materialize
      // constants used in 16-bit operands.
      //
      // e.g. it is unsafe to fold:
      //  s_mov_b32 s0, 1.0    // materializes 0x3f800000
      //  v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases. A better heuristic is needed.
      if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
        foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
      } else {
        if (++NumLiteralUses == 1) {
          NonInlineUse = &*Use;
          NonInlineUseOpNo = OpNo;
        }
      }
    }

    if (NumLiteralUses == 1) {
      MachineInstr *UseMI = NonInlineUse->getParent();
      foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
    }
  } else {
    // Folding register.
    for (MachineRegisterInfo::use_iterator
           Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
         Use != E; ++Use) {
      MachineInstr *UseMI = Use->getParent();

      foldOperand(OpToFold, UseMI, Use.getOperandNo(),
                  FoldList, CopiesToReplace);
    }
  }

  MachineFunction *MF = MI.getParent()->getParent();
  // Make sure we add EXEC uses to any new v_mov instructions created.
  for (MachineInstr *Copy : CopiesToReplace)
    Copy->addImplicitDefUseOperands(*MF);

  for (FoldCandidate &Fold : FoldList) {
    if (updateOperand(Fold, *TRI)) {
      // Clear kill flags.
      if (Fold.isReg()) {
        assert(Fold.OpToFold && Fold.OpToFold->isReg());
        // FIXME: Probably shouldn't bother trying to fold if not an
        // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
        // copies.
        MRI->clearKillFlags(Fold.OpToFold->getReg());
      }
      DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
            static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
      tryFoldInst(TII, Fold.UseMI);
    } else if (Fold.isCommuted()) {
      // Restoring instruction's original operand order if fold has failed.
      TII->commuteInstruction(*Fold.UseMI, false);
    }
  }
}
Example #22
0
void SIFoldOperands::foldOperand(
  MachineOperand &OpToFold,
  MachineInstr *UseMI,
  unsigned UseOpIdx,
  SmallVectorImpl<FoldCandidate> &FoldList,
  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);

  if (!isUseSafeToFold(TII, *UseMI, UseOp))
    return;

  // FIXME: Fold operands with subregs.
  if (UseOp.isReg() && OpToFold.isReg()) {
    if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
      return;

    // Don't fold subregister extracts into tied operands, only if it is a full
    // copy since a subregister use tied to a full register def doesn't really
    // make sense. e.g. don't fold:
    //
    // %vreg1 = COPY %vreg0:sub1
    // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0>
    //
    //  into
    // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0>
    if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
      return;
  }

  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
  if (UseMI->isRegSequence()) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
           RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
         RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
        continue;

      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  CopiesToReplace);
    }

    return;
  }


  bool FoldingImm = OpToFold.isImm();

  // In order to fold immediates into copies, we need to change the
  // copy to a MOV.
  if (FoldingImm && UseMI->isCopy()) {
    unsigned DestReg = UseMI->getOperand(0).getReg();
    const TargetRegisterClass *DestRC
      = TargetRegisterInfo::isVirtualRegister(DestReg) ?
      MRI->getRegClass(DestReg) :
      TRI->getPhysRegClass(DestReg);

    unsigned MovOp = TII->getMovOpcode(DestRC);
    if (MovOp == AMDGPU::COPY)
      return;

    UseMI->setDesc(TII->get(MovOp));
    CopiesToReplace.push_back(UseMI);
  } else {
    const MCInstrDesc &UseDesc = UseMI->getDesc();

    // Don't fold into target independent nodes.  Target independent opcodes
    // don't have defined register classes.
    if (UseDesc.isVariadic() ||
        UseDesc.OpInfo[UseOpIdx].RegClass == -1)
      return;
  }

  if (!FoldingImm) {
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);

    // FIXME: We could try to change the instruction from 64-bit to 32-bit
    // to enable more folding opportunites.  The shrink operands pass
    // already does this.
    return;
  }


  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
  const TargetRegisterClass *FoldRC =
    TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);


  // Split 64-bit constants into 32-bits for folding.
  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
    unsigned UseReg = UseOp.getReg();
    const TargetRegisterClass *UseRC
      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
      MRI->getRegClass(UseReg) :
      TRI->getPhysRegClass(UseReg);

    if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
      return;

    APInt Imm(64, OpToFold.getImm());
    if (UseOp.getSubReg() == AMDGPU::sub0) {
      Imm = Imm.getLoBits(32);
    } else {
      assert(UseOp.getSubReg() == AMDGPU::sub1);
      Imm = Imm.getHiBits(32);
    }

    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
    return;
  }



  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
}
Example #23
0
bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
  return op.isImm() && op.getImm() == 0;
}
Example #24
0
unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                             const MachineOperand &MO) {

  unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
                   // or things that get fixed up later by the JIT.

  if (MO.isReg()) {
    rv = getAlphaRegNumber(MO.getReg());
  } else if (MO.isImm()) {
    rv = MO.getImm();
  } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
    DOUT << MO << " is a relocated op for " << MI << "\n";
    unsigned Reloc = 0;
    int Offset = 0;
    bool useGOT = false;
    switch (MI.getOpcode()) {
    case Alpha::BSR:
      Reloc = Alpha::reloc_bsr;
      break;
    case Alpha::LDLr:
    case Alpha::LDQr:
    case Alpha::LDBUr:
    case Alpha::LDWUr:
    case Alpha::LDSr:
    case Alpha::LDTr:
    case Alpha::LDAr:
    case Alpha::STQr:
    case Alpha::STLr:
    case Alpha::STWr:
    case Alpha::STBr:
    case Alpha::STSr:
    case Alpha::STTr:
      Reloc = Alpha::reloc_gprellow;
      break;
    case Alpha::LDAHr:
      Reloc = Alpha::reloc_gprelhigh;
      break;
    case Alpha::LDQl:
      Reloc = Alpha::reloc_literal;
      useGOT = true;
      break;
    case Alpha::LDAg:
    case Alpha::LDAHg:
      Reloc = Alpha::reloc_gpdist;
      Offset = MI.getOperand(3).getImm();
      break;
    default:
      assert(0 && "unknown relocatable instruction");
      abort();
    }
    if (MO.isGlobal())
      MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
                                                 Reloc, MO.getGlobal(), Offset,
                                                 isa<Function>(MO.getGlobal()),
                                                 useGOT));
    else if (MO.isSymbol())
      MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
                                                     Reloc, MO.getSymbolName(),
                                                     Offset, true));
    else
     MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
                                          Reloc, MO.getIndex(), Offset));
  } else if (MO.isMBB()) {
    MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                               Alpha::reloc_bsr, MO.getMBB()));
  }else {
    cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
    abort();
  }

  return rv;
}
static bool isValidDispOp(const MachineOperand &MO) {
  return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
         MO.isGlobal() || MO.isBlockAddress() || MO.isMCSymbol() || MO.isMBB();
}
Example #26
0
bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
  return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
}
Example #27
0
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
                                                  const CallContext &Context) {
  // Ok, we can in fact do the transformation for this call.
  // Do not remove the FrameSetup instruction, but adjust the parameters.
  // PEI will end up finalizing the handling of this.
  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
  MachineBasicBlock &MBB = *(FrameSetup->getParent());
  FrameSetup->getOperand(1).setImm(Context.ExpectedDist);

  DebugLoc DL = FrameSetup->getDebugLoc();
  // Now, iterate through the vector in reverse order, and replace the movs
  // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
  // replace uses.
  for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
    MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
    MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
    MachineBasicBlock::iterator Push = nullptr;
    if (MOV->getOpcode() == X86::MOV32mi) {
      unsigned PushOpcode = X86::PUSHi32;
      // If the operand is a small (8-bit) immediate, we can use a
      // PUSH instruction with a shorter encoding.
      // Note that isImm() may fail even though this is a MOVmi, because
      // the operand can also be a symbol.
      if (PushOp.isImm()) {
        int64_t Val = PushOp.getImm();
        if (isInt<8>(Val))
          PushOpcode = X86::PUSH32i8;
      }
      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
          .addOperand(PushOp);
    } else {
      unsigned int Reg = PushOp.getReg();

      // If PUSHrmm is not slow on this target, try to fold the source of the
      // push into the instruction.
      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();

      // Check that this is legal to fold. Right now, we're extremely
      // conservative about that.
      MachineInstr *DefMov = nullptr;
      if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));

        unsigned NumOps = DefMov->getDesc().getNumOperands();
        for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
          Push->addOperand(DefMov->getOperand(i));

        DefMov->eraseFromParent();
      } else {
        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
            .addReg(Reg)
            .getInstr();
      }
    }

    // For debugging, when using SP-based CFA, we need to adjust the CFA
    // offset after each push.
    // TODO: This is needed only if we require precise CFA.
    if (!TFL->hasFP(MF))
      TFL->BuildCFI(MBB, std::next(Push), DL, 
                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));

    MBB.erase(MOV);
  }

  // The stack-pointer copy is no longer used in the call sequences.
  // There should not be any other users, but we can't commit to that, so:
  if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
    Context.SPCopy->eraseFromParent();

  // Once we've done this, we need to make sure PEI doesn't assume a reserved
  // frame.
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  FuncInfo->setHasPushSequences(true);

  return true;
}