bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
                                                  const CallContext &Context) {
  // Ok, we can in fact do the transformation for this call.
  // Do not remove the FrameSetup instruction, but adjust the parameters.
  // PEI will end up finalizing the handling of this.
  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
  MachineBasicBlock &MBB = *(FrameSetup->getParent());
  FrameSetup->getOperand(1).setImm(Context.ExpectedDist);

  DebugLoc DL = FrameSetup->getDebugLoc();
  // Now, iterate through the vector in reverse order, and replace the movs
  // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
  // replace uses.
  for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
    MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
    MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
    MachineBasicBlock::iterator Push = nullptr;
    if (MOV->getOpcode() == X86::MOV32mi) {
      unsigned PushOpcode = X86::PUSHi32;
      // If the operand is a small (8-bit) immediate, we can use a
      // PUSH instruction with a shorter encoding.
      // Note that isImm() may fail even though this is a MOVmi, because
      // the operand can also be a symbol.
      if (PushOp.isImm()) {
        int64_t Val = PushOp.getImm();
        if (isInt<8>(Val))
          PushOpcode = X86::PUSH32i8;
      }
      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
          .addOperand(PushOp);
    } else {
      unsigned int Reg = PushOp.getReg();

      // If PUSHrmm is not slow on this target, try to fold the source of the
      // push into the instruction.
      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();

      // Check that this is legal to fold. Right now, we're extremely
      // conservative about that.
      MachineInstr *DefMov = nullptr;
      if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));

        unsigned NumOps = DefMov->getDesc().getNumOperands();
        for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
          Push->addOperand(DefMov->getOperand(i));

        DefMov->eraseFromParent();
      } else {
        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
            .addReg(Reg)
            .getInstr();
      }
    }

    // For debugging, when using SP-based CFA, we need to adjust the CFA
    // offset after each push.
    // TODO: This is needed only if we require precise CFA.
    if (!TFL->hasFP(MF))
      TFL->BuildCFI(MBB, std::next(Push), DL, 
                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));

    MBB.erase(MOV);
  }

  // The stack-pointer copy is no longer used in the call sequences.
  // There should not be any other users, but we can't commit to that, so:
  if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
    Context.SPCopy->eraseFromParent();

  // Once we've done this, we need to make sure PEI doesn't assume a reserved
  // frame.
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  FuncInfo->setHasPushSequences(true);

  return true;
}
Exemple #2
0
/// findSurvivorReg - Return the candidate register that is unused for the
/// longest after StargMII. UseMI is set to the instruction where the search
/// stopped.
///
/// No more than InstrLimit instructions are inspected.
///
unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
                                       BitVector &Candidates,
                                       unsigned InstrLimit,
                                       MachineBasicBlock::iterator &UseMI) {
  int Survivor = Candidates.find_first();
  assert(Survivor > 0 && "No candidates for scavenging");

  MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
  assert(StartMI != ME && "MI already at terminator");
  MachineBasicBlock::iterator RestorePointMI = StartMI;
  MachineBasicBlock::iterator MI = StartMI;

  bool inVirtLiveRange = false;
  for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
    if (MI->isDebugValue()) {
      ++InstrLimit; // Don't count debug instructions
      continue;
    }
    bool isVirtKillInsn = false;
    bool isVirtDefInsn = false;
    // Remove any candidates touched by instruction.
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = MI->getOperand(i);
      if (MO.isRegMask())
        Candidates.clearBitsNotInMask(MO.getRegMask());
      if (!MO.isReg() || MO.isUndef() || !MO.getReg())
        continue;
      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
        if (MO.isDef())
          isVirtDefInsn = true;
        else if (MO.isKill())
          isVirtKillInsn = true;
        continue;
      }
      for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
        Candidates.reset(*AI);
    }
    // If we're not in a virtual reg's live range, this is a valid
    // restore point.
    if (!inVirtLiveRange) RestorePointMI = MI;

    // Update whether we're in the live range of a virtual register
    if (isVirtKillInsn) inVirtLiveRange = false;
    if (isVirtDefInsn) inVirtLiveRange = true;

    // Was our survivor untouched by this instruction?
    if (Candidates.test(Survivor))
      continue;

    // All candidates gone?
    if (Candidates.none())
      break;

    Survivor = Candidates.find_first();
  }
  // If we ran off the end, that's where we want to restore.
  if (MI == ME) RestorePointMI = ME;
  assert (RestorePointMI != StartMI &&
          "No available scavenger restore location!");

  // We ran out of candidates, so stop the search.
  UseMI = RestorePointMI;
  return Survivor;
}
X86CallFrameOptimization::InstClassification
X86CallFrameOptimization::classifyInstruction(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
  if (MI == MBB.end())
    return Exit;

  // The instructions we actually care about are movs onto the stack or special
  // cases of constant-stores to stack
  switch (MI->getOpcode()) {
    case X86::AND16mi8:
    case X86::AND32mi8:
    case X86::AND64mi8: {
      MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
      return ImmOp.getImm() == 0 ? Convert : Exit;
    }
    case X86::OR16mi8:
    case X86::OR32mi8:
    case X86::OR64mi8: {
      MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
      return ImmOp.getImm() == -1 ? Convert : Exit;
    }
    case X86::MOV32mi:
    case X86::MOV32mr:
    case X86::MOV64mi32:
    case X86::MOV64mr:
      return Convert;
  }

  // Not all calling conventions have only stack MOVs between the stack
  // adjust and the call.

  // We want to tolerate other instructions, to cover more cases.
  // In particular:
  // a) PCrel calls, where we expect an additional COPY of the basereg.
  // b) Passing frame-index addresses.
  // c) Calling conventions that have inreg parameters. These generate
  //    both copies and movs into registers.
  // To avoid creating lots of special cases, allow any instruction
  // that does not write into memory, does not def or use the stack
  // pointer, and does not def any register that was used by a preceding
  // push.
  // (Reading from memory is allowed, even if referenced through a
  // frame index, since these will get adjusted properly in PEI)

  // The reason for the last condition is that the pushes can't replace
  // the movs in place, because the order must be reversed.
  // So if we have a MOV32mr that uses EDX, then an instruction that defs
  // EDX, and then the call, after the transformation the push will use
  // the modified version of EDX, and not the original one.
  // Since we are still in SSA form at this point, we only need to
  // make sure we don't clobber any *physical* registers that were
  // used by an earlier mov that will become a push.

  if (MI->isCall() || MI->mayStore())
    return Exit;

  for (const MachineOperand &MO : MI->operands()) {
    if (!MO.isReg())
      continue;
    unsigned int Reg = MO.getReg();
    if (!RegInfo.isPhysicalRegister(Reg))
      continue;
    if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
      return Exit;
    if (MO.isDef()) {
      for (unsigned int U : UsedRegs)
        if (RegInfo.regsOverlap(Reg, U))
          return Exit;
    }
  }

  return Skip;
}
Exemple #4
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
  AArch64MachineFunctionInfo *FuncInfo =
    MF.getInfo<AArch64MachineFunctionInfo>();
  MachineBasicBlock &MBB = MF.front();
  MachineBasicBlock::iterator MBBI = MBB.begin();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

  MachineModuleInfo &MMI = MF.getMMI();
  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
  bool NeedsFrameMoves = MMI.hasDebugInfo()
    || MF.getFunction()->needsUnwindTableEntry();

  uint64_t NumInitialBytes, NumResidualBytes;

  // Currently we expect the stack to be laid out by
  //     sub sp, sp, #initial
  //     stp x29, x30, [sp, #offset]
  //     ...
  //     str xxx, [sp, #offset]
  //     sub sp, sp, #rest (possibly via extra instructions).
  if (MFI->getCalleeSavedInfo().size()) {
    // If there are callee-saved registers, we want to store them efficiently as
    // a block, and virtual base assignment happens too early to do it for us so
    // we adjust the stack in two phases: first just for callee-saved fiddling,
    // then to allocate the rest of the frame.
    splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
  } else {
    // If there aren't any callee-saved registers, two-phase adjustment is
    // inefficient. It's more efficient to adjust with NumInitialBytes too
    // because when we're in a "callee pops argument space" situation, that pop
    // must be tacked onto Initial for correctness.
    NumInitialBytes = MFI->getStackSize();
    NumResidualBytes = 0;
  }

  // Tell everyone else how much adjustment we're expecting them to use. In
  // particular if an adjustment is required for a tail call the epilogue could
  // have a different view of things.
  FuncInfo->setInitialStackAdjust(NumInitialBytes);

  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
               MachineInstr::FrameSetup);

  if (NeedsFrameMoves && NumInitialBytes) {
    // We emit this update even if the CFA is set from a frame pointer later so
    // that the CFA is valid in the interim.
    MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
      .addSym(SPLabel);

    MachineLocation Dst(MachineLocation::VirtualFP);
    MachineLocation Src(AArch64::XSP, NumInitialBytes);
    Moves.push_back(MachineMove(SPLabel, Dst, Src));
  }

  // Otherwise we need to set the frame pointer and/or add a second stack
  // adjustment.

  bool FPNeedsSetting = hasFP(MF);
  for (; MBBI != MBB.end(); ++MBBI) {
    // Note that this search makes strong assumptions about the operation used
    // to store the frame-pointer: it must be "STP x29, x30, ...". This could
    // change in future, but until then there's no point in implementing
    // untestable more generic cases.
    if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
                       && MBBI->getOperand(0).getReg() == AArch64::X29) {
      int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
      FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));

      ++MBBI;
      emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
                    AArch64::X29,
                    NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
                    MachineInstr::FrameSetup);

      // The offset adjustment used when emitting debugging locations relative
      // to whatever frame base is set. AArch64 uses the default frame base (FP
      // or SP) and this adjusts the calculations to be correct.
      MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
                               - MFI->getStackSize());

      if (NeedsFrameMoves) {
        MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
        BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
          .addSym(FPLabel);
        MachineLocation Dst(MachineLocation::VirtualFP);
        MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
        Moves.push_back(MachineMove(FPLabel, Dst, Src));
      }

      FPNeedsSetting = false;
    }

    if (!MBBI->getFlag(MachineInstr::FrameSetup))
      break;
  }

  assert(!FPNeedsSetting && "Frame pointer couldn't be set");

  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
               MachineInstr::FrameSetup);

  // Now we emit the rest of the frame setup information, if necessary: we've
  // already noted the FP and initial SP moves so we're left with the prologue's
  // final SP update and callee-saved register locations.
  if (!NeedsFrameMoves)
    return;

  // Reuse the label if appropriate, so create it in this outer scope.
  MCSymbol *CSLabel = 0;

  // The rest of the stack adjustment
  if (!hasFP(MF) && NumResidualBytes) {
    CSLabel = MMI.getContext().CreateTempSymbol();
    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
      .addSym(CSLabel);

    MachineLocation Dst(MachineLocation::VirtualFP);
    MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
    Moves.push_back(MachineMove(CSLabel, Dst, Src));
  }

  // And any callee-saved registers (it's fine to leave them to the end here,
  // because the old values are still valid at this point.
  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
  if (CSI.size()) {
    if (!CSLabel) {
      CSLabel = MMI.getContext().CreateTempSymbol();
      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
        .addSym(CSLabel);
    }

    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
           E = CSI.end(); I != E; ++I) {
      MachineLocation Dst(MachineLocation::VirtualFP,
                          MFI->getObjectOffset(I->getFrameIdx()));
      MachineLocation Src(I->getReg());
      Moves.push_back(MachineMove(CSLabel, Dst, Src));
    }
  }
}
Exemple #5
0
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    const Counters &Increment) {

  // Get the hardware counter increments and sum them up
  Counters Limit = ZeroCounts;
  unsigned Sum = 0;

  if (TII->mayAccessFlatAddressSpace(*I))
    IsFlatOutstanding = true;

  for (unsigned i = 0; i < 3; ++i) {
    LastIssued.Array[i] += Increment.Array[i];
    if (Increment.Array[i])
      Limit.Array[i] = LastIssued.Array[i];
    Sum += Increment.Array[i];
  }

  // If we don't increase anything then that's it
  if (Sum == 0) {
    LastOpcodeType = OTHER;
    return;
  }

  if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
    // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
    // or SMEM clause, respectively.
    //
    // The temporary workaround is to break the clauses with S_NOP.
    //
    // The proper solution would be to allocate registers such that all source
    // and destination registers don't overlap, e.g. this is illegal:
    //   r0 = load r2
    //   r2 = load r0
    if (LastOpcodeType == VMEM && Increment.Named.VM) {
      // Insert a NOP to break the clause.
      BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
          .addImm(0);
      LastInstWritesM0 = false;
    }

    if (TII->isSMRD(*I))
      LastOpcodeType = SMEM;
    else if (Increment.Named.VM)
      LastOpcodeType = VMEM;
  }

  // Remember which export instructions we have seen
  if (Increment.Named.EXP) {
    ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2;
  }

  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
    MachineOperand &Op = I->getOperand(i);
    if (!isOpRelevant(Op))
      continue;

    const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
    RegInterval Interval = getRegInterval(RC, Op);
    for (unsigned j = Interval.first; j < Interval.second; ++j) {

      // Remember which registers we define
      if (Op.isDef())
        DefinedRegs[j] = Limit;

      // and which one we are using
      if (Op.isUse())
        UsedRegs[j] = Limit;
    }
  }
}
// Finds compare instruction that corresponds to supported types of branching.
// Returns the instruction or nullptr on failures or detecting unsupported
// instructions.
MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
    MachineBasicBlock *MBB) {
  MachineBasicBlock::iterator I = MBB->getFirstTerminator();
  if (I == MBB->end())
    return nullptr;

  if (I->getOpcode() != AArch64::Bcc)
    return nullptr;

  // Since we may modify cmp of this MBB, make sure NZCV does not live out.
  for (auto SuccBB : MBB->successors())
    if (SuccBB->isLiveIn(AArch64::NZCV))
      return nullptr;

  // Now find the instruction controlling the terminator.
  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
    --I;
    assert(!I->isTerminator() && "Spurious terminator");
    // Check if there is any use of NZCV between CMP and Bcc.
    if (I->readsRegister(AArch64::NZCV))
      return nullptr;
    switch (I->getOpcode()) {
    // cmp is an alias for subs with a dead destination register.
    case AArch64::SUBSWri:
    case AArch64::SUBSXri:
    // cmn is an alias for adds with a dead destination register.
    case AArch64::ADDSWri:
    case AArch64::ADDSXri: {
      unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
      if (!I->getOperand(2).isImm()) {
        DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
        return nullptr;
      } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) {
        DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n');
        return nullptr;
      } else if (!MRI->use_empty(I->getOperand(0).getReg())) {
        DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
        return nullptr;
      }
      return I;
    }
    // Prevent false positive case like:
    // cmp      w19, #0
    // cinc     w0, w19, gt
    // ...
    // fcmp     d8, #0.0
    // b.gt     .LBB0_5
    case AArch64::FCMPDri:
    case AArch64::FCMPSri:
    case AArch64::FCMPESri:
    case AArch64::FCMPEDri:

    case AArch64::SUBSWrr:
    case AArch64::SUBSXrr:
    case AArch64::ADDSWrr:
    case AArch64::ADDSXrr:
    case AArch64::FCMPSrr:
    case AArch64::FCMPDrr:
    case AArch64::FCMPESrr:
    case AArch64::FCMPEDrr:
      // Skip comparison instructions without immediate operands.
      return nullptr;
    }
  }
  DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
  return nullptr;
}
static void analyzeFrameIndexes(MachineFunction &MF) {
  if (MBDisableStackAdjust) return;

  MachineFrameInfo *MFI = MF.getFrameInfo();
  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
  const MachineRegisterInfo &MRI = MF.getRegInfo();

  MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
  MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
  const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
  SmallVector<MachineInstr*, 16> EraseInstr;
  SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;

  MachineBasicBlock *MBB = MF.getBlockNumbered(0);
  MachineBasicBlock::iterator MIB = MBB->begin();
  MachineBasicBlock::iterator MIE = MBB->end();

  int StackAdjust = 0;
  int StackOffset = -28;

  // In this loop we are searching frame indexes that corrospond to incoming
  // arguments that are already in the stack. We look for instruction sequences
  // like the following:
  //    
  //    LWI REG, FI1, 0
  //    ...
  //    SWI REG, FI2, 0
  //
  // As long as there are no defs of REG in the ... part, we can eliminate
  // the SWI instruction because the value has already been stored to the
  // stack by the caller. All we need to do is locate FI at the correct
  // stack location according to the calling convensions.
  //
  // Additionally, if the SWI operation kills the def of REG then we don't
  // need the LWI operation so we can erase it as well.
  for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
      if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
          I->getOperand(1).getIndex() != LiveInFI[i]) continue;

      unsigned FIReg = I->getOperand(0).getReg();
      MachineBasicBlock::iterator SI = I;
      for (SI++; SI != MIE; ++SI) {
        if (!SI->getOperand(0).isReg() ||
            !SI->getOperand(1).isFI() ||
            SI->getOpcode() != MBlaze::SWI) continue;

        int FI = SI->getOperand(1).getIndex();
        if (SI->getOperand(0).getReg() != FIReg ||
            MFI->isFixedObjectIndex(FI) ||
            MFI->getObjectSize(FI) != 4) continue;

        if (SI->getOperand(0).isDef()) break;

        if (SI->getOperand(0).isKill()) {
          DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex() 
                       << " removed\n");
          EraseInstr.push_back(I);
        }

        EraseInstr.push_back(SI);
        DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");

        FrameRelocate.push_back(std::make_pair(FI,StackOffset));
        DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");

        StackOffset -= 4;
        StackAdjust += 4;
        break;
      }
    }
  }

  // In this loop we are searching for frame indexes that corrospond to
  // incoming arguments that are in registers. We look for instruction
  // sequences like the following:
  //    
  //    ...  SWI REG, FI, 0
  // 
  // As long as the ... part does not define REG and if REG is an incoming
  // parameter register then we know that, according to ABI convensions, the
  // caller has allocated stack space for it already.  Instead of allocating
  // stack space on our frame, we record the correct location in the callers
  // frame.
  for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
      if (I->definesRegister(LI->first))
        break;

      if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
          I->getOperand(1).getIndex() < 0) continue;

      if (I->getOperand(0).getReg() == LI->first) {
        int FI = I->getOperand(1).getIndex();
        MBlazeFI->recordLiveIn(FI);

        int FILoc = 0;
        switch (LI->first) {
        default: llvm_unreachable("invalid incoming parameter!");
        case MBlaze::R5:  FILoc = -4; break;
        case MBlaze::R6:  FILoc = -8; break;
        case MBlaze::R7:  FILoc = -12; break;
        case MBlaze::R8:  FILoc = -16; break;
        case MBlaze::R9:  FILoc = -20; break;
        case MBlaze::R10: FILoc = -24; break;
        }

        StackAdjust += 4;
        FrameRelocate.push_back(std::make_pair(FI,FILoc));
        DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
        break;
      }
    }
  }

  // Go ahead and erase all of the instructions that we determined were
  // no longer needed.
  for (int i = 0, e = EraseInstr.size(); i < e; ++i)
    MBB->erase(EraseInstr[i]);

  // Replace all of the frame indexes that we have relocated with new
  // fixed object frame indexes.
  replaceFrameIndexes(MF, FrameRelocate);
}
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
  MachineFunction *MF = getParent();
  DebugLoc dl;  // FIXME: this is nowhere

  // We may need to update this's terminator, but we can't do that if
  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
  MachineBasicBlock *TBB = 0, *FBB = 0;
  SmallVector<MachineOperand, 4> Cond;
  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
    return NULL;

  // Avoid bugpoint weirdness: A block may end with a conditional branch but
  // jumps to the same MBB is either case. We have duplicate CFG edges in that
  // case that we can't handle. Since this never happens in properly optimized
  // code, just skip those edges.
  if (TBB && TBB == FBB) {
    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
                 << getNumber() << '\n');
    return NULL;
  }

  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
  MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
  DEBUG(dbgs() << "Splitting critical edge:"
        " BB#" << getNumber()
        << " -- BB#" << NMBB->getNumber()
        << " -- BB#" << Succ->getNumber() << '\n');

  // On some targets like Mips, branches may kill virtual registers. Make sure
  // that LiveVariables is properly updated after updateTerminator replaces the
  // terminators.
  LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();

  // Collect a list of virtual registers killed by the terminators.
  SmallVector<unsigned, 4> KilledRegs;
  if (LV)
    for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
      MachineInstr *MI = I;
      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
           OE = MI->operands_end(); OI != OE; ++OI) {
        if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
          continue;
        unsigned Reg = OI->getReg();
        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
            LV->getVarInfo(Reg).removeKill(MI)) {
          KilledRegs.push_back(Reg);
          DEBUG(dbgs() << "Removing terminator kill: " << *MI);
          OI->setIsKill(false);
        }
      }
    }

  ReplaceUsesOfBlockWith(Succ, NMBB);
  updateTerminator();

  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
  NMBB->addSuccessor(Succ);
  if (!NMBB->isLayoutSuccessor(Succ)) {
    Cond.clear();
    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
  }

  // Fix PHI nodes in Succ so they refer to NMBB instead of this
  for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
       i != e && i->isPHI(); ++i)
    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
      if (i->getOperand(ni+1).getMBB() == this)
        i->getOperand(ni+1).setMBB(NMBB);

  // Inherit live-ins from the successor
  for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
	 E = Succ->livein_end(); I != E; ++I)
    NMBB->addLiveIn(*I);

  // Update LiveVariables.
  if (LV) {
    // Restore kills of virtual registers that were killed by the terminators.
    while (!KilledRegs.empty()) {
      unsigned Reg = KilledRegs.pop_back_val();
      for (iterator I = end(), E = begin(); I != E;) {
        if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
          continue;
        LV->getVarInfo(Reg).Kills.push_back(I);
        DEBUG(dbgs() << "Restored terminator kill: " << *I);
        break;
      }
    }
    // Update relevant live-through information.
    LV->addNewBlock(NMBB, this, Succ);
  }

  if (MachineDominatorTree *MDT =
      P->getAnalysisIfAvailable<MachineDominatorTree>()) {
    // Update dominator information.
    MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);

    bool IsNewIDom = true;
    for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
         PI != E; ++PI) {
      MachineBasicBlock *PredBB = *PI;
      if (PredBB == NMBB)
        continue;
      if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
        IsNewIDom = false;
        break;
      }
    }

    // We know "this" dominates the newly created basic block.
    MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);

    // If all the other predecessors of "Succ" are dominated by "Succ" itself
    // then the new block is the new immediate dominator of "Succ". Otherwise,
    // the new block doesn't dominate anything.
    if (IsNewIDom)
      MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
  }

  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
      // If one or the other blocks were not in a loop, the new block is not
      // either, and thus LI doesn't need to be updated.
      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
        if (TIL == DestLoop) {
          // Both in the same loop, the NMBB joins loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (TIL->contains(DestLoop)) {
          // Edge from an outer loop to an inner loop.  Add to the outer loop.
          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else if (DestLoop->contains(TIL)) {
          // Edge from an inner loop to an outer loop.  Add to the outer loop.
          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
        } else {
          // Edge from two loops with no containment relation.  Because these
          // are natural loops, we know that the destination block must be the
          // header of its loop (adding a branch into a loop elsewhere would
          // create an irreducible loop).
          assert(DestLoop->getHeader() == Succ &&
                 "Should not create irreducible loops!");
          if (MachineLoop *P = DestLoop->getParentLoop())
            P->addBasicBlockToLoop(NMBB, MLI->getBase());
        }
      }
    }

  return NMBB;
}
Exemple #9
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);
  DebugLoc DL = MBB.findDebugLoc(MBBI);

  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();

    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (NumBytes && !canUseRedZone(MF)) {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    } else if (NumBytes) {
      ++NumRedZoneFunctions;
    }

    return;
  }

  // Only set up FP if we actually need to.
  int FPOffset = 0;
  if (HasFP) {
    // First instruction must a) allocate the stack  and b) have an immediate
    // that is a multiple of -2.
    assert((MBBI->getOpcode() == AArch64::STPXpre ||
            MBBI->getOpcode() == AArch64::STPDpre) &&
           MBBI->getOperand(3).getReg() == AArch64::SP &&
           MBBI->getOperand(4).getImm() < 0 &&
           (MBBI->getOperand(4).getImm() & 1) == 0);

    // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
    // required for the callee saved register area we get the frame pointer
    // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
    FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
    assert(FPOffset >= 0 && "Bad Framepointer Offset");
  }

  // Move past the saves of the callee-saved registers.
  while (MBBI->getOpcode() == AArch64::STPXi ||
         MBBI->getOpcode() == AArch64::STPDi ||
         MBBI->getOpcode() == AArch64::STPXpre ||
         MBBI->getOpcode() == AArch64::STPDpre) {
    ++MBBI;
    NumBytes -= 16;
  }
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
  if (HasFP) {
    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes);

  // Allocate space for the rest of the frame.
  if (NumBytes) {
    // If we're a leaf function, try using the red zone.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  //
  if (RegInfo->hasBasePointer(MF))
    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);

  if (needsFrameMoves) {
    const DataLayout *TD = MF.getSubtarget().getDataLayout();
    const int StackGrowth = -TD->getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);

    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);

      // Record the location of the stored LR
      unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);

      // Record the location of the stored FP
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    }

    // Now emit the moves for whatever callee saved regs we have.
    emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
  }
}
Exemple #10
0
void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                             MachineBasicBlock::iterator mi,
                                             SlotIndex MIIdx,
                                             MachineOperand& MO,
                                             unsigned MOIdx,
                                             LiveInterval &interval) {
  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));

  // Virtual registers may be defined multiple times (due to phi
  // elimination and 2-addr elimination).  Much of what we do only has to be
  // done once for the vreg.  We use an empty interval to detect the first
  // time we see a vreg.
  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
  if (interval.empty()) {
    // Get the Idx of the defining instructions.
    SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());

    // Make sure the first definition is not a partial redefinition. Add an
    // <imp-def> of the full register.
    // FIXME: LiveIntervals shouldn't modify the code like this.  Whoever
    // created the machine instruction should annotate it with <undef> flags
    // as needed.  Then we can simply assert here.  The REG_SEQUENCE lowering
    // is the main suspect.
    if (MO.getSubReg()) {
      mi->addRegisterDefined(interval.reg);
      // Mark all defs of interval.reg on this instruction as reading <undef>.
      for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
        MachineOperand &MO2 = mi->getOperand(i);
        if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
          MO2.setIsUndef();
      }
    }

    MachineInstr *CopyMI = NULL;
    if (mi->isCopyLike()) {
      CopyMI = mi;
    }

    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
    assert(ValNo->id == 0 && "First value in interval is not 0?");

    // Loop over all of the blocks that the vreg is defined in.  There are
    // two cases we have to handle here.  The most common case is a vreg
    // whose lifetime is contained within a basic block.  In this case there
    // will be a single kill, in MBB, which comes after the definition.
    if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
      // FIXME: what about dead vars?
      SlotIndex killIdx;
      if (vi.Kills[0] != mi)
        killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
      else
        killIdx = defIndex.getDeadSlot();

      // If the kill happens after the definition, we have an intra-block
      // live range.
      if (killIdx > defIndex) {
        assert(vi.AliveBlocks.empty() &&
               "Shouldn't be alive across any blocks!");
        LiveRange LR(defIndex, killIdx, ValNo);
        interval.addRange(LR);
        DEBUG(dbgs() << " +" << LR << "\n");
        return;
      }
    }

    // The other case we handle is when a virtual register lives to the end
    // of the defining block, potentially live across some blocks, then is
    // live into some number of blocks, but gets killed.  Start by adding a
    // range that goes from this definition to the end of the defining block.
    LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
    DEBUG(dbgs() << " +" << NewLR);
    interval.addRange(NewLR);

    bool PHIJoin = lv_->isPHIJoin(interval.reg);

    if (PHIJoin) {
      // A phi join register is killed at the end of the MBB and revived as a new
      // valno in the killing blocks.
      assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
      DEBUG(dbgs() << " phi-join");
      ValNo->setHasPHIKill(true);
    } else {
      // Iterate over all of the blocks that the variable is completely
      // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
      // live interval.
      for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
               E = vi.AliveBlocks.end(); I != E; ++I) {
        MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
        LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
        interval.addRange(LR);
        DEBUG(dbgs() << " +" << LR);
      }
    }

    // Finally, this virtual register is live from the start of any killing
    // block to the 'use' slot of the killing instruction.
    for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
      MachineInstr *Kill = vi.Kills[i];
      SlotIndex Start = getMBBStartIdx(Kill->getParent());
      SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();

      // Create interval with one of a NEW value number.  Note that this value
      // number isn't actually defined by an instruction, weird huh? :)
      if (PHIJoin) {
        assert(getInstructionFromIndex(Start) == 0 &&
               "PHI def index points at actual instruction.");
        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
        ValNo->setIsPHIDef(true);
      }
      LiveRange LR(Start, killIdx, ValNo);
      interval.addRange(LR);
      DEBUG(dbgs() << " +" << LR);
    }

  } else {
    if (MultipleDefsBySameMI(*mi, MOIdx))
      // Multiple defs of the same virtual register by the same instruction.
      // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
      // This is likely due to elimination of REG_SEQUENCE instructions. Return
      // here since there is nothing to do.
      return;

    // If this is the second time we see a virtual register definition, it
    // must be due to phi elimination or two addr elimination.  If this is
    // the result of two address elimination, then the vreg is one of the
    // def-and-use register operand.

    // It may also be partial redef like this:
    // 80  %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
    // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
    bool PartReDef = isPartialRedef(MIIdx, MO, interval);
    if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
      // If this is a two-address definition, then we have already processed
      // the live range.  The only problem is that we didn't realize there
      // are actually two values in the live interval.  Because of this we
      // need to take the LiveRegion that defines this register and split it
      // into two values.
      SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());

      const LiveRange *OldLR =
        interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
      VNInfo *OldValNo = OldLR->valno;
      SlotIndex DefIndex = OldValNo->def.getRegSlot();

      // Delete the previous value, which should be short and continuous,
      // because the 2-addr copy must be in the same MBB as the redef.
      interval.removeRange(DefIndex, RedefIndex);

      // The new value number (#1) is defined by the instruction we claimed
      // defined value #0.
      VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);

      // Value#0 is now defined by the 2-addr instruction.
      OldValNo->def  = RedefIndex;
      OldValNo->setCopy(0);

      // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
      if (PartReDef && mi->isCopyLike())
        OldValNo->setCopy(&*mi);

      // Add the new live interval which replaces the range for the input copy.
      LiveRange LR(DefIndex, RedefIndex, ValNo);
      DEBUG(dbgs() << " replace range with " << LR);
      interval.addRange(LR);

      // If this redefinition is dead, we need to add a dummy unit live
      // range covering the def slot.
      if (MO.isDead())
        interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
                                    OldValNo));

      DEBUG({
          dbgs() << " RESULT: ";
          interval.print(dbgs(), tri_);
        });
    } else if (lv_->isPHIJoin(interval.reg)) {
MachineInstrBuilder
MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
                                  MachineBasicBlock::iterator I) const {
  MachineInstrBuilder MIB;

  // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest
  // Pick the zero form of the branch for readable assembly and for greater
  // branch distance in non-microMIPS mode.
  // Additional MIPSR6 does not permit the use of register $zero for compact
  // branches.
  // FIXME: Certain atomic sequences on mips64 generate 32bit references to
  // Mips::ZERO, which is incorrect. This test should be updated to use
  // Subtarget.getABI().GetZeroReg() when those atomic sequences and others
  // are fixed.
  int ZeroOperandPosition = -1;
  bool BranchWithZeroOperand = false;
  if (I->isBranch() && !I->isPseudo()) {
    auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo();
    ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI);
    BranchWithZeroOperand = ZeroOperandPosition != -1;
  }

  if (BranchWithZeroOperand) {
    switch (NewOpc) {
    case Mips::BEQC:
      NewOpc = Mips::BEQZC;
      break;
    case Mips::BNEC:
      NewOpc = Mips::BNEZC;
      break;
    case Mips::BGEC:
      NewOpc = Mips::BGEZC;
      break;
    case Mips::BLTC:
      NewOpc = Mips::BLTZC;
      break;
    case Mips::BEQC64:
      NewOpc = Mips::BEQZC64;
      break;
    case Mips::BNEC64:
      NewOpc = Mips::BNEZC64;
      break;
    }
  }

  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));

  // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an
  // immediate 0 as an operand and requires the removal of it's %RA<imp-def>
  // implicit operand as copying the implicit operations of the instructio we're
  // looking at will give us the correct flags.
  if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 ||
      NewOpc == Mips::JIALC64) {

    if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64)
      MIB->RemoveOperand(0);

    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.add(I->getOperand(J));
    }

    MIB.addImm(0);

  } else {
    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
        continue;

      MIB.add(I->getOperand(J));
    }
  }

  MIB.copyImplicitOps(*I);

  MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
  return MIB;
}
Exemple #12
0
/// ComputeLocalLiveness - Computes liveness of registers within a basic
/// block, setting the killed/dead flags as appropriate.
void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  // Keep track of the most recently seen previous use or def of each reg, 
  // so that we can update them with dead/kill markers.
  DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
       I != E; ++I) {
    if (I->isDebugValue())
      continue;
    
    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = I->getOperand(i);
      // Uses don't trigger any flags, but we need to save
      // them for later.  Also, we have to process these
      // _before_ processing the defs, since an instr
      // uses regs before it defs them.
      if (!MO.isReg() || !MO.getReg() || !MO.isUse())
        continue;
      
      LastUseDef[MO.getReg()] = std::make_pair(I, i);
      
      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
      
      const unsigned *Aliases = TRI->getAliasSet(MO.getReg());
      if (Aliases == 0)
        continue;
      
      while (*Aliases) {
        DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
          alias = LastUseDef.find(*Aliases);
        
        if (alias != LastUseDef.end() && alias->second.first != I)
          LastUseDef[*Aliases] = std::make_pair(I, i);
        
        ++Aliases;
      }
    }
    
    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = I->getOperand(i);
      // Defs others than 2-addr redefs _do_ trigger flag changes:
      //   - A def followed by a def is dead
      //   - A use followed by a def is a kill
      if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
      
      DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
        last = LastUseDef.find(MO.getReg());
      if (last != LastUseDef.end()) {
        // Check if this is a two address instruction.  If so, then
        // the def does not kill the use.
        if (last->second.first == I &&
            I->isRegTiedToUseOperand(i))
          continue;
        
        MachineOperand &lastUD =
                    last->second.first->getOperand(last->second.second);
        if (lastUD.isDef())
          lastUD.setIsDead(true);
        else
          lastUD.setIsKill(true);
      }
      
      LastUseDef[MO.getReg()] = std::make_pair(I, i);
    }
  }
  
  // Live-out (of the function) registers contain return values of the function,
  // so we need to make sure they are alive at return time.
  MachineBasicBlock::iterator Ret = MBB.getFirstTerminator();
  bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn());

  if (BBEndsInReturn)
    for (MachineRegisterInfo::liveout_iterator
         I = MF->getRegInfo().liveout_begin(),
         E = MF->getRegInfo().liveout_end(); I != E; ++I)
      if (!Ret->readsRegister(*I)) {
        Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
        LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
      }
  
  // Finally, loop over the final use/def of each reg 
  // in the block and determine if it is dead.
  for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
       I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
    MachineInstr *MI = I->second.first;
    unsigned idx = I->second.second;
    MachineOperand &MO = MI->getOperand(idx);
    
    bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
    
    // A crude approximation of "live-out" calculation
    bool usedOutsideBlock = isPhysReg ? false :   
          UsedInMultipleBlocks.test(MO.getReg() -  
                                    TargetRegisterInfo::FirstVirtualRegister);

    // If the machine BB ends in a return instruction, then the value isn't used
    // outside of the BB.
    if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) {
      // DBG_VALUE complicates this:  if the only refs of a register outside
      // this block are DBG_VALUE, we can't keep the reg live just for that,
      // as it will cause the reg to be spilled at the end of this block when
      // it wouldn't have been otherwise.  Nullify the DBG_VALUEs when that
      // happens.
      bool UsedByDebugValueOnly = false;
      for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
             UE = MRI.reg_end(); UI != UE; ++UI) {
        // Two cases:
        // - used in another block
        // - used in the same block before it is defined (loop)
        if (UI->getParent() == &MBB &&
            !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI)))
          continue;
        
        if (UI->isDebugValue()) {
          UsedByDebugValueOnly = true;
          continue;
        }

        // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
        UsedInMultipleBlocks.set(MO.getReg() - 
                                 TargetRegisterInfo::FirstVirtualRegister);
        usedOutsideBlock = true;
        UsedByDebugValueOnly = false;
        break;
      }

      if (UsedByDebugValueOnly)
        for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
             UE = MRI.reg_end(); UI != UE; ++UI)
          if (UI->isDebugValue() &&
              (UI->getParent() != &MBB ||
               (MO.isDef() && precedes(&*UI, MI))))
            UI.getOperand().setReg(0U);
    }
  
    // Physical registers and those that are not live-out of the block are
    // killed/dead at their last use/def within this block.
    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
      if (MO.isUse()) {
        // Don't mark uses that are tied to defs as kills.
        if (!MI->isRegTiedToDefOperand(idx))
          MO.setIsKill(true);
      } else {
        MO.setIsDead(true);
      }
    }
  }
}
// Try to fuse comparison instruction Compare into a later branch.
// Return true on success and if Compare is therefore redundant.
bool SystemZElimCompare::fuseCompareOperations(
    MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
  // See whether we have a single branch with which to fuse.
  if (CCUsers.size() != 1)
    return false;
  MachineInstr *Branch = CCUsers[0];
  SystemZII::FusedCompareType Type;
  switch (Branch->getOpcode()) {
  case SystemZ::BRC:
    Type = SystemZII::CompareAndBranch;
    break;
  case SystemZ::CondReturn:
    Type = SystemZII::CompareAndReturn;
    break;
  case SystemZ::CallBCR:
    Type = SystemZII::CompareAndSibcall;
    break;
  case SystemZ::CondTrap:
    Type = SystemZII::CompareAndTrap;
    break;
  default:
    return false;
  }

  // See whether we have a comparison that can be fused.
  unsigned FusedOpcode =
      TII->getFusedCompare(Compare.getOpcode(), Type, &Compare);
  if (!FusedOpcode)
    return false;

  // Make sure that the operands are available at the branch.
  // SrcReg2 is the register if the source operand is a register,
  // 0 if the source operand is immediate, and the base register
  // if the source operand is memory (index is not supported).
  unsigned SrcReg = Compare.getOperand(0).getReg();
  unsigned SrcReg2 =
      Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
  MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
  for (++MBBI; MBBI != MBBE; ++MBBI)
    if (MBBI->modifiesRegister(SrcReg, TRI) ||
        (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
      return false;

  // Read the branch mask, target (if applicable), regmask (if applicable).
  MachineOperand CCMask(MBBI->getOperand(1));
  assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
         "Invalid condition-code mask for integer comparison");
  // This is only valid for CompareAndBranch.
  MachineOperand Target(MBBI->getOperand(
    Type == SystemZII::CompareAndBranch ? 2 : 0));
  const uint32_t *RegMask;
  if (Type == SystemZII::CompareAndSibcall)
    RegMask = MBBI->getOperand(2).getRegMask();

  // Clear out all current operands.
  int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
  assert(CCUse >= 0 && "BRC/BCR must use CC");
  Branch->RemoveOperand(CCUse);
  // Remove target (branch) or regmask (sibcall).
  if (Type == SystemZII::CompareAndBranch ||
      Type == SystemZII::CompareAndSibcall)
    Branch->RemoveOperand(2);
  Branch->RemoveOperand(1);
  Branch->RemoveOperand(0);

  // Rebuild Branch as a fused compare and branch.
  // SrcNOps is the number of MI operands of the compare instruction
  // that we need to copy over.
  unsigned SrcNOps = 2;
  if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT)
    SrcNOps = 3;
  Branch->setDesc(TII->get(FusedOpcode));
  MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
  for (unsigned I = 0; I < SrcNOps; I++)
    MIB.addOperand(Compare.getOperand(I));
  MIB.addOperand(CCMask);

  if (Type == SystemZII::CompareAndBranch) {
    // Only conditional branches define CC, as they may be converted back
    // to a non-fused branch because of a long displacement.  Conditional
    // returns don't have that problem.
    MIB.addOperand(Target)
       .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
  }

  if (Type == SystemZII::CompareAndSibcall)
    MIB.addRegMask(RegMask);

  // Clear any intervening kills of SrcReg and SrcReg2.
  MBBI = Compare;
  for (++MBBI; MBBI != MBBE; ++MBBI) {
    MBBI->clearRegisterKills(SrcReg, TRI);
    if (SrcReg2)
      MBBI->clearRegisterKills(SrcReg2, TRI);
  }
  FusedComparisons += 1;
  return true;
}
/// If \p MBBI is a pseudo instruction, this method expands
/// it to the corresponding (sequence of) actual instruction(s).
/// \returns true if \p MBBI has been expanded.
bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI) {
  MachineInstr &MI = *MBBI;
  unsigned Opcode = MI.getOpcode();
  DebugLoc DL = MBBI->getDebugLoc();
  switch (Opcode) {
  default:
    return false;
  case X86::TCRETURNdi:
  case X86::TCRETURNri:
  case X86::TCRETURNmi:
  case X86::TCRETURNdi64:
  case X86::TCRETURNri64:
  case X86::TCRETURNmi64: {
    bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
    MachineOperand &JumpTarget = MBBI->getOperand(0);
    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
    assert(StackAdjust.isImm() && "Expecting immediate value.");

    // Adjust stack pointer.
    int StackAdj = StackAdjust.getImm();

    if (StackAdj) {
      // Check for possible merge with preceding ADD instruction.
      StackAdj += X86FL->mergeSPUpdates(MBB, MBBI, true);
      X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
    }

    // Jump to label or value in register.
    bool IsWin64 = STI->isTargetWin64();
    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
      unsigned Op = (Opcode == X86::TCRETURNdi)
                        ? X86::TAILJMPd
                        : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
      if (JumpTarget.isGlobal())
        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                             JumpTarget.getTargetFlags());
      else {
        assert(JumpTarget.isSymbol());
        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                              JumpTarget.getTargetFlags());
      }
    } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
      unsigned Op = (Opcode == X86::TCRETURNmi)
                        ? X86::TAILJMPm
                        : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
      for (unsigned i = 0; i != 5; ++i)
        MIB.addOperand(MBBI->getOperand(i));
    } else if (Opcode == X86::TCRETURNri64) {
      BuildMI(MBB, MBBI, DL,
              TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
          .addReg(JumpTarget.getReg(), RegState::Kill);
    } else {
      BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
          .addReg(JumpTarget.getReg(), RegState::Kill);
    }

    MachineInstr *NewMI = std::prev(MBBI);
    NewMI->copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);

    // Delete the pseudo instruction TCRETURN.
    MBB.erase(MBBI);

    return true;
  }
  case X86::EH_RETURN:
  case X86::EH_RETURN64: {
    MachineOperand &DestAddr = MBBI->getOperand(0);
    assert(DestAddr.isReg() && "Offset should be in register!");
    const bool Uses64BitFramePtr =
        STI->isTarget64BitLP64() || STI->isTargetNaCl64();
    unsigned StackPtr = TRI->getStackRegister();
    BuildMI(MBB, MBBI, DL,
            TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
        .addReg(DestAddr.getReg());
    // The EH_RETURN pseudo is really removed during the MC Lowering.
    return true;
  }
  case X86::IRET: {
    // Adjust stack to erase error code
    int64_t StackAdj = MBBI->getOperand(0).getImm();
    X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true);
    // Replace pseudo with machine iret
    BuildMI(MBB, MBBI, DL,
            TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32));
    MBB.erase(MBBI);
    return true;
  }
  case X86::RET: {
    // Adjust stack to erase error code
    int64_t StackAdj = MBBI->getOperand(0).getImm();
    MachineInstrBuilder MIB;
    if (StackAdj == 0) {
      MIB = BuildMI(MBB, MBBI, DL,
                    TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL));
    } else if (isUInt<16>(StackAdj)) {
      MIB = BuildMI(MBB, MBBI, DL,
                    TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL))
                .addImm(StackAdj);
    } else {
      assert(!STI->is64Bit() &&
             "shouldn't need to do this for x86_64 targets!");
      // A ret can only handle immediates as big as 2**16-1.  If we need to pop
      // off bytes before the return address, we must do it manually.
      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
      X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
      BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
      MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
    }
    for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
      MIB.addOperand(MBBI->getOperand(I));
    MBB.erase(MBBI);
    return true;
  }
  case X86::EH_RESTORE: {
    // Restore ESP and EBP, and optionally ESI if required.
    bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(
        MBB.getParent()->getFunction()->getPersonalityFn()));
    X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH);
    MBBI->eraseFromParent();
    return true;
  }
  }
  llvm_unreachable("Previous switch has a fallthrough?");
}
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
                                   MachineBasicBlock &MBB,
                                   MachineLoopInfo *MLI) {
    if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
        return false;   // Quick exit for basic blocks without PHIs.

    const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
    bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();

    bool Changed = false;
    for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
            BBI != BBE && BBI->isPHI(); ++BBI) {
        for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
            unsigned Reg = BBI->getOperand(i).getReg();
            MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
            // Is there a critical edge from PreMBB to MBB?
            if (PreMBB->succ_size() == 1)
                continue;

            // Avoid splitting backedges of loops. It would introduce small
            // out-of-line blocks into the loop which is very bad for code placement.
            if (PreMBB == &MBB && !SplitAllCriticalEdges)
                continue;
            const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
            if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
                continue;

            // LV doesn't consider a phi use live-out, so isLiveOut only returns true
            // when the source register is live-out for some other reason than a phi
            // use. That means the copy we will insert in PreMBB won't be a kill, and
            // there is a risk it may not be coalesced away.
            //
            // If the copy would be a kill, there is no need to split the edge.
            if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
                continue;

            DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
                  << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
                  << ": " << *BBI);

            // If Reg is not live-in to MBB, it means it must be live-in to some
            // other PreMBB successor, and we can avoid the interference by splitting
            // the edge.
            //
            // If Reg *is* live-in to MBB, the interference is inevitable and a copy
            // is likely to be left after coalescing. If we are looking at a loop
            // exiting edge, split it so we won't insert code in the loop, otherwise
            // don't bother.
            bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;

            // Check for a loop exiting edge.
            if (!ShouldSplit && CurLoop != PreLoop) {
                DEBUG({
                    dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
                    if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
                    if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
                });
                // This edge could be entering a loop, exiting a loop, or it could be
                // both: Jumping directly form one loop to the header of a sibling
                // loop.
                // Split unless this edge is entering CurLoop from an outer loop.
                ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
            }
            if (!ShouldSplit)
                continue;
            if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
                DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
                continue;
            }
            Changed = true;
            ++NumCriticalEdgesSplit;
        }
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  DebugLoc DL;
  bool IsTailCallReturn = false;
  if (MBB.end() != MBBI) {
    DL = MBBI->getDebugLoc();
    unsigned RetOpcode = MBBI->getOpcode();
    IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
      RetOpcode == AArch64::TCRETURNri;
  }
  int NumBytes = MFI->getStackSize();
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  // Initial and residual are named for consistency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
  if (IsTailCallReturn) {
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = AFI->getArgumentStackToRestore();
  }

  // The stack frame should be like below,
  //
  //      ----------------------                     ---
  //      |                    |                      |
  //      | BytesInStackArgArea|              CalleeArgStackSize
  //      | (NumReusableBytes) |                (of tail call)
  //      |                    |                     ---
  //      |                    |                      |
  //      ---------------------|        ---           |
  //      |                    |         |            |
  //      |   CalleeSavedReg   |         |            |
  //      | (NumRestores * 8)  |         |            |
  //      |                    |         |            |
  //      ---------------------|         |         NumBytes
  //      |                    |     StackSize  (StackAdjustUp)
  //      |   LocalStackSize   |         |            |
  //      | (covering callee   |         |            |
  //      |       args)        |         |            |
  //      |                    |         |            |
  //      ----------------------        ---          ---
  //
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
  //             = StackSize + ArgumentPopSize
  //
  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
  // it as the 2nd argument of AArch64ISD::TC_RETURN.
  NumBytes += ArgumentPopSize;

  unsigned NumRestores = 0;
  // Move past the restores of the callee-saved registers.
  MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
  MachineBasicBlock::iterator Begin = MBB.begin();
  while (LastPopI != Begin) {
    --LastPopI;
    unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
    NumRestores += Restores;
    if (Restores == 0) {
      ++LastPopI;
      break;
    }
  }
  NumBytes -= NumRestores * 8;
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  if (!hasFP(MF)) {
    // If this was a redzone leaf function, we don't need to restore the
    // stack pointer.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
                      TII);
    return;
  }

  // Restore the original stack pointer.
  // FIXME: Rather than doing the math here, we should instead just use
  // non-post-indexed loads for the restores if we aren't actually going to
  // be able to save any instructions.
  if (NumBytes || MFI->hasVarSizedObjects())
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                    -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
}
Exemple #17
0
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
                                     MachineBasicBlock::iterator MI) {
  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
  VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());

  if (!ParentVNI) {
    DEBUG(dbgs() << "\tadding <undef> flags: ");
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
        MO.setIsUndef();
    }
    DEBUG(dbgs() << UseIdx << '\t' << *MI);
    return true;
  }

  if (SnippetCopies.count(MI))
    return false;

  // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
  LiveRangeEdit::Remat RM(ParentVNI);
  SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
  if (SibI != SibValues.end())
    RM.OrigMI = SibI->second.DefMI;
  if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
    markValueUsed(&VirtReg, ParentVNI);
    DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
    return false;
  }

  // If the instruction also writes VirtReg.reg, it had better not require the
  // same register for uses and defs.
  SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
  MIBundleOperands::VirtRegInfo RI =
    MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
  if (RI.Tied) {
    markValueUsed(&VirtReg, ParentVNI);
    DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
    return false;
  }

  // Before rematerializing into a register for a single instruction, try to
  // fold a load into the instruction. That avoids allocating a new register.
  if (RM.OrigMI->canFoldAsLoad() &&
      foldMemoryOperand(Ops, RM.OrigMI)) {
    Edit->markRematerialized(RM.ParentVNI);
    ++NumFoldedLoads;
    return true;
  }

  // Alocate a new register for the remat.
  unsigned NewVReg = Edit->createFrom(Original);

  // Finally we can rematerialize OrigMI before MI.
  SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM,
                                           TRI);
  (void)DefIdx;
  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
               << *LIS.getInstructionFromIndex(DefIdx));

  // Replace operands
  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
    MachineOperand &MO = MI->getOperand(Ops[i].second);
    if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
      MO.setReg(NewVReg);
      MO.setIsKill();
    }
  }
  DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI << '\n');

  ++NumRemats;
  return true;
}
Exemple #18
0
/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
/// blocks, the successors have gained new predecessors. Update the PHI
/// instructions in them accordingly.
void
TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
                                  SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                                  SmallSetVector<MachineBasicBlock*,8> &Succs) {
  for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
         SE = Succs.end(); SI != SE; ++SI) {
    MachineBasicBlock *SuccBB = *SI;
    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
         II != EE; ++II) {
      if (!II->isPHI())
        break;
      MachineInstrBuilder MIB(*FromBB->getParent(), II);
      unsigned Idx = 0;
      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
        MachineOperand &MO = II->getOperand(i+1);
        if (MO.getMBB() == FromBB) {
          Idx = i;
          break;
        }
      }

      assert(Idx != 0);
      MachineOperand &MO0 = II->getOperand(Idx);
      unsigned Reg = MO0.getReg();
      if (isDead) {
        // Folded into the previous BB.
        // There could be duplicate phi source entries. FIXME: Should sdisel
        // or earlier pass fixed this?
        for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
          MachineOperand &MO = II->getOperand(i+1);
          if (MO.getMBB() == FromBB) {
            II->RemoveOperand(i+1);
            II->RemoveOperand(i);
          }
        }
      } else
        Idx = 0;

      // If Idx is set, the operands at Idx and Idx+1 must be removed.
      // We reuse the location to avoid expensive RemoveOperand calls.

      DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
      if (LI != SSAUpdateVals.end()) {
        // This register is defined in the tail block.
        for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
          MachineBasicBlock *SrcBB = LI->second[j].first;
          // If we didn't duplicate a bb into a particular predecessor, we
          // might still have added an entry to SSAUpdateVals to correcly
          // recompute SSA. If that case, avoid adding a dummy extra argument
          // this PHI.
          if (!SrcBB->isSuccessor(SuccBB))
            continue;

          unsigned SrcReg = LI->second[j].second;
          if (Idx != 0) {
            II->getOperand(Idx).setReg(SrcReg);
            II->getOperand(Idx+1).setMBB(SrcBB);
            Idx = 0;
          } else {
            MIB.addReg(SrcReg).addMBB(SrcBB);
          }
        }
      } else {
        // Live in tail block, must also be live in predecessors.
        for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
          MachineBasicBlock *SrcBB = TDBBs[j];
          if (Idx != 0) {
            II->getOperand(Idx).setReg(Reg);
            II->getOperand(Idx+1).setMBB(SrcBB);
            Idx = 0;
          } else {
            MIB.addReg(Reg).addMBB(SrcBB);
          }
        }
      }
      if (Idx != 0) {
        II->RemoveOperand(Idx+1);
        II->RemoveOperand(Idx);
      }
    }
  }
}
Exemple #19
0
void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator I,
                                         unsigned NewOpc) const {
  BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg());
}
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
  df_iterator_default_set<MachineBasicBlock*> Reachable;
  bool ModifiedPHI = false;

  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
  MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
  MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();

  // Mark all reachable blocks.
  for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
    (void)BB/* Mark all reachable blocks */;

  // Loop over all dead blocks, remembering them and deleting all instructions
  // in them.
  std::vector<MachineBasicBlock*> DeadBlocks;
  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
    MachineBasicBlock *BB = &*I;

    // Test for deadness.
    if (!Reachable.count(BB)) {
      DeadBlocks.push_back(BB);

      // Update dominator and loop info.
      if (MLI) MLI->removeBlock(BB);
      if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);

      while (BB->succ_begin() != BB->succ_end()) {
        MachineBasicBlock* succ = *BB->succ_begin();

        MachineBasicBlock::iterator start = succ->begin();
        while (start != succ->end() && start->isPHI()) {
          for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
            if (start->getOperand(i).isMBB() &&
                start->getOperand(i).getMBB() == BB) {
              start->RemoveOperand(i);
              start->RemoveOperand(i-1);
            }

          start++;
        }

        BB->removeSuccessor(BB->succ_begin());
      }
    }
  }

  // Actually remove the blocks now.
  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
    DeadBlocks[i]->eraseFromParent();

  // Cleanup PHI nodes.
  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
    MachineBasicBlock *BB = &*I;
    // Prune unneeded PHI entries.
    SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
                                             BB->pred_end());
    MachineBasicBlock::iterator phi = BB->begin();
    while (phi != BB->end() && phi->isPHI()) {
      for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
        if (!preds.count(phi->getOperand(i).getMBB())) {
          phi->RemoveOperand(i);
          phi->RemoveOperand(i-1);
          ModifiedPHI = true;
        }

      if (phi->getNumOperands() == 3) {
        const MachineOperand &Input = phi->getOperand(1);
        const MachineOperand &Output = phi->getOperand(0);
        unsigned InputReg = Input.getReg();
        unsigned OutputReg = Output.getReg();
        assert(Output.getSubReg() == 0 && "Cannot have output subregister");
        ModifiedPHI = true;

        if (InputReg != OutputReg) {
          MachineRegisterInfo &MRI = F.getRegInfo();
          unsigned InputSub = Input.getSubReg();
          if (InputSub == 0 &&
              MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) {
            MRI.replaceRegWith(OutputReg, InputReg);
          } else {
            // The input register to the PHI has a subregister or it can't be
            // constrained to the proper register class:
            // insert a COPY instead of simply replacing the output
            // with the input.
            const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
            BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(),
                    TII->get(TargetOpcode::COPY), OutputReg)
                .addReg(InputReg, getRegState(Input), InputSub);
          }
          phi++->eraseFromParent();
        }
        continue;
      }

      ++phi;
    }
  }

  F.RenumberBlocks();

  return (!DeadBlocks.empty() || ModifiedPHI);
}
Exemple #21
0
void
AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
  AArch64MachineFunctionInfo *FuncInfo =
    MF.getInfo<AArch64MachineFunctionInfo>();

  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  DebugLoc DL = MBBI->getDebugLoc();
  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  unsigned RetOpcode = MBBI->getOpcode();

  // Initial and residual are named for consitency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
  uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
  uint64_t ArgumentPopSize = 0;
  if (RetOpcode == AArch64::TC_RETURNdi ||
      RetOpcode == AArch64::TC_RETURNxi) {
    MachineOperand &JumpTarget = MBBI->getOperand(0);
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    MachineInstrBuilder MIB;
    if (RetOpcode == AArch64::TC_RETURNdi) {
      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
      if (JumpTarget.isGlobal()) {
        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                             JumpTarget.getTargetFlags());
      } else {
        assert(JumpTarget.isSymbol() && "unexpected tail call destination");
        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                              JumpTarget.getTargetFlags());
      }
    } else {
      assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
             && "Unexpected tail call");

      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
      MIB.addReg(JumpTarget.getReg(), RegState::Kill);
    }

    // Add the extra operands onto the new tail call instruction even though
    // they're not used directly (so that liveness is tracked properly etc).
    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
        MIB->addOperand(MBBI->getOperand(i));


    // Delete the pseudo instruction TC_RETURN.
    MachineInstr *NewMI = prior(MBBI);
    MBB.erase(MBBI);
    MBBI = NewMI;

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
  }

  assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
         && "refusing to adjust stack by misaligned amt");

  // We may need to address callee-saved registers differently, so find out the
  // bound on the frame indices.
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  int MinCSFI = 0;
  int MaxCSFI = -1;

  if (CSI.size()) {
    MinCSFI = CSI[0].getFrameIdx();
    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
  }

  // The "residual" stack update comes first from this direction and guarantees
  // that SP is NumInitialBytes below its value on function entry, either by a
  // direct update or restoring it from the frame pointer.
  if (NumInitialBytes + ArgumentPopSize != 0) {
    emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
                 NumInitialBytes + ArgumentPopSize);
    --MBBI;
  }


  // MBBI now points to the instruction just past the last callee-saved
  // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
  // otherwise).

  // Now we need to find out where to put the bulk of the stack adjustment
  MachineBasicBlock::iterator FirstEpilogue = MBBI;
  while (MBBI != MBB.begin()) {
    --MBBI;

    unsigned FrameOp;
    for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
      if (MBBI->getOperand(FrameOp).isFI())
        break;
    }

    // If this instruction doesn't have a frame index we've reached the end of
    // the callee-save restoration.
    if (FrameOp == MBBI->getNumOperands())
      break;

    // Likewise if it *is* a local reference, but not to a callee-saved object.
    int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
    if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
      break;

    FirstEpilogue = MBBI;
  }

  if (MF.getFrameInfo()->hasVarSizedObjects()) {
    int64_t StaticFrameBase;
    StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
    emitRegUpdate(MBB, FirstEpilogue, DL, TII,
                  AArch64::XSP, AArch64::X29, AArch64::NoRegister,
                  StaticFrameBase);
  } else {
    emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
  }
}
Exemple #22
0
/// Return the corresponding compact (no delay slot) form of a branch.
unsigned MipsInstrInfo::getEquivalentCompactForm(
    const MachineBasicBlock::iterator I) const {
  unsigned Opcode = I->getOpcode();
  bool canUseShortMicroMipsCTI = false;

  if (Subtarget.inMicroMipsMode()) {
    switch (Opcode) {
    case Mips::BNE:
    case Mips::BEQ:
    // microMIPS has NE,EQ branches that do not have delay slots provided one
    // of the operands is zero.
      if (I->getOperand(1).getReg() == Subtarget.getABI().GetZeroReg())
        canUseShortMicroMipsCTI = true;
      break;
    // For microMIPS the PseudoReturn and PseudoIndirectBranch are always
    // expanded to JR_MM, so they can be replaced with JRC16_MM.
    case Mips::JR:
    case Mips::PseudoReturn:
    case Mips::PseudoIndirectBranch:
      canUseShortMicroMipsCTI = true;
      break;
    }
  }

  // MIPSR6 forbids both operands being the zero register.
  if (Subtarget.hasMips32r6() && (I->getNumOperands() > 1) &&
      (I->getOperand(0).isReg() &&
       (I->getOperand(0).getReg() == Mips::ZERO ||
        I->getOperand(0).getReg() == Mips::ZERO_64)) &&
      (I->getOperand(1).isReg() &&
       (I->getOperand(1).getReg() == Mips::ZERO ||
        I->getOperand(1).getReg() == Mips::ZERO_64)))
    return 0;

  if (Subtarget.hasMips32r6() || canUseShortMicroMipsCTI) {
    switch (Opcode) {
    case Mips::B:
      return Mips::BC;
    case Mips::BAL:
      return Mips::BALC;
    case Mips::BEQ:
      if (canUseShortMicroMipsCTI)
        return Mips::BEQZC_MM;
      else if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
        return 0;
      return Mips::BEQC;
    case Mips::BNE:
      if (canUseShortMicroMipsCTI)
        return Mips::BNEZC_MM;
      else if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
        return 0;
      return Mips::BNEC;
    case Mips::BGE:
      if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
        return 0;
      return Mips::BGEC;
    case Mips::BGEU:
      if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
        return 0;
      return Mips::BGEUC;
    case Mips::BGEZ:
      return Mips::BGEZC;
    case Mips::BGTZ:
      return Mips::BGTZC;
    case Mips::BLEZ:
      return Mips::BLEZC;
    case Mips::BLT:
      if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
        return 0;
      return Mips::BLTC;
    case Mips::BLTU:
      if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
        return 0;
      return Mips::BLTUC;
    case Mips::BLTZ:
      return Mips::BLTZC;
    // For MIPSR6, the instruction 'jic' can be used for these cases. Some
    // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'.
    case Mips::JR:
    case Mips::PseudoReturn:
    case Mips::PseudoIndirectBranch:
      if (canUseShortMicroMipsCTI)
        return Mips::JRC16_MM;
      return Mips::JIC;
    case Mips::JALRPseudo:
      return Mips::JIALC;
    case Mips::JR64:
    case Mips::PseudoReturn64:
    case Mips::PseudoIndirectBranch64:
      return Mips::JIC64;
    case Mips::JALR64Pseudo:
      return Mips::JIALC64;
    default:
      return 0;
    }
  }

  return 0;
}
MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
  MachineBasicBlock::iterator I = MBB->getFirstTerminator();
  if (I == MBB->end())
    return nullptr;
  // The terminator must be controlled by the flags.
  if (!I->readsRegister(AArch64::NZCV)) {
    switch (I->getOpcode()) {
    case AArch64::CBZW:
    case AArch64::CBZX:
    case AArch64::CBNZW:
    case AArch64::CBNZX:
      // These can be converted into a ccmp against #0.
      return I;
    }
    ++NumCmpTermRejs;
    DEBUG(dbgs() << "Flags not used by terminator: " << *I);
    return nullptr;
  }

  // Now find the instruction controlling the terminator.
  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
    --I;
    assert(!I->isTerminator() && "Spurious terminator");
    switch (I->getOpcode()) {
    // cmp is an alias for subs with a dead destination register.
    case AArch64::SUBSWri:
    case AArch64::SUBSXri:
    // cmn is an alias for adds with a dead destination register.
    case AArch64::ADDSWri:
    case AArch64::ADDSXri:
      // Check that the immediate operand is within range, ccmp wants a uimm5.
      // Rd = SUBSri Rn, imm, shift
      if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
        DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
        ++NumImmRangeRejs;
        return nullptr;
      }
    // Fall through.
    case AArch64::SUBSWrr:
    case AArch64::SUBSXrr:
    case AArch64::ADDSWrr:
    case AArch64::ADDSXrr:
      if (isDeadDef(I->getOperand(0).getReg()))
        return I;
      DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
      ++NumLiveDstRejs;
      return nullptr;
    case AArch64::FCMPSrr:
    case AArch64::FCMPDrr:
    case AArch64::FCMPESrr:
    case AArch64::FCMPEDrr:
      return I;
    }

    // Check for flag reads and clobbers.
    MIOperands::PhysRegInfo PRI =
        MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI);

    if (PRI.Reads) {
      // The ccmp doesn't produce exactly the same flags as the original
      // compare, so reject the transform if there are uses of the flags
      // besides the terminators.
      DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
      ++NumMultNZCVUses;
      return nullptr;
    }

    if (PRI.Clobbers) {
      DEBUG(dbgs() << "Not convertible compare: " << *I);
      ++NumUnknNZCVDefs;
      return nullptr;
    }
  }
  DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
  return nullptr;
}
Exemple #24
0
MachineInstrBuilder
MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
                                  MachineBasicBlock::iterator I) const {
  MachineInstrBuilder MIB;

  // Certain branches have two forms: e.g beq $1, $zero, dst vs beqz $1, dest
  // Pick the zero form of the branch for readable assembly and for greater
  // branch distance in non-microMIPS mode.
  // FIXME: Certain atomic sequences on mips64 generate 32bit references to
  // Mips::ZERO, which is incorrect. This test should be updated to use
  // Subtarget.getABI().GetZeroReg() when those atomic sequences and others
  // are fixed.
  bool BranchWithZeroOperand =
      (I->isBranch() && !I->isPseudo() && I->getOperand(1).isReg() &&
       (I->getOperand(1).getReg() == Mips::ZERO ||
        I->getOperand(1).getReg() == Mips::ZERO_64));

  if (BranchWithZeroOperand) {
    switch (NewOpc) {
    case Mips::BEQC:
      NewOpc = Mips::BEQZC;
      break;
    case Mips::BNEC:
      NewOpc = Mips::BNEZC;
      break;
    case Mips::BGEC:
      NewOpc = Mips::BGEZC;
      break;
    case Mips::BLTC:
      NewOpc = Mips::BLTZC;
      break;
    }
  }

  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));

  // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an
  // immediate 0 as an operand and requires the removal of it's %RA<imp-def>
  // implicit operand as copying the implicit operations of the instructio we're
  // looking at will give us the correct flags.
  if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 ||
      NewOpc == Mips::JIALC64) {

    if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64)
      MIB->RemoveOperand(0);

    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.addOperand(I->getOperand(J));
    }

    MIB.addImm(0);

 } else if (BranchWithZeroOperand) {
    // For MIPSR6 and microMIPS branches with an explicit zero operand, copy
    // everything after the zero.
     MIB.addOperand(I->getOperand(0));

    for (unsigned J = 2, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.addOperand(I->getOperand(J));
    }
  } else {
    // All other cases copy all other operands.
    for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
      MIB.addOperand(I->getOperand(J));
    }
  }

  MIB.copyImplicitOps(*I);

  MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
  return MIB;
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
///
void PEI::replaceFrameIndices(MachineFunction &Fn) {
  if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?

  const TargetMachine &TM = Fn.getTarget();
  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
  const TargetFrameLowering *TFI = TM.getFrameLowering();
  bool StackGrowsDown =
    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();

  for (MachineFunction::iterator BB = Fn.begin(),
         E = Fn.end(); BB != E; ++BB) {
#ifndef NDEBUG
    int SPAdjCount = 0; // frame setup / destroy count.
#endif
    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
    if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);

    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {

      if (I->getOpcode() == FrameSetupOpcode ||
          I->getOpcode() == FrameDestroyOpcode) {
#ifndef NDEBUG
        // Track whether we see even pairs of them
        SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
#endif
        // Remember how much SP has been adjusted to create the call
        // frame.
        int Size = I->getOperand(0).getImm();

        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
          Size = -Size;

        SPAdj += Size;

        MachineBasicBlock::iterator PrevI = BB->end();
        if (I != BB->begin()) PrevI = prior(I);
        TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);

        // Visit the instructions created by eliminateCallFramePseudoInstr().
        if (PrevI == BB->end())
          I = BB->begin();     // The replaced instr was the first in the block.
        else
          I = llvm::next(PrevI);
        continue;
      }

      MachineInstr *MI = I;
      bool DoIncr = true;
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
        if (MI->getOperand(i).isFI()) {
          // Some instructions (e.g. inline asm instructions) can have
          // multiple frame indices and/or cause eliminateFrameIndex
          // to insert more than one instruction. We need the register
          // scavenger to go through all of these instructions so that
          // it can update its register information. We keep the
          // iterator at the point before insertion so that we can
          // revisit them in full.
          bool AtBeginning = (I == BB->begin());
          if (!AtBeginning) --I;

          // If this instruction has a FrameIndex operand, we need to
          // use that target machine register info object to eliminate
          // it.
          TRI.eliminateFrameIndex(MI, SPAdj,
                                  FrameIndexVirtualScavenging ?  NULL : RS);

          // Reset the iterator if we were at the beginning of the BB.
          if (AtBeginning) {
            I = BB->begin();
            DoIncr = false;
          }

          MI = 0;
          break;
        }

      if (DoIncr && I != BB->end()) ++I;

      // Update register states.
      if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
    }

    // If we have evenly matched pairs of frame setup / destroy instructions,
    // make sure the adjustments come out to zero. If we don't have matched
    // pairs, we can't be sure the missing bit isn't in another basic block
    // due to a custom inserter playing tricks, so just asserting SPAdj==0
    // isn't sufficient. See tMOVCC on Thumb1, for example.
    assert((SPAdjCount || SPAdj == 0) &&
           "Unbalanced call frame setup / destroy pairs?");
  }
}
bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TBB,
                                    MachineBasicBlock *&FBB,
                                    SmallVectorImpl<MachineOperand> &Cond,
                                    bool AllowModify) const {
  // Start from the bottom of the block and work up, examining the
  // terminator instructions.
  MachineBasicBlock::iterator I = MBB.end();
  while (I != MBB.begin()) {
    --I;
    if (I->isDebugValue())
      continue;

    // Working from the bottom, when we see a non-terminator
    // instruction, we're done.
    if (!isUnpredicatedTerminator(I))
      break;

    // A terminator that isn't a branch can't easily be handled
    // by this analysis.
    if (!I->isBranch())
      return true;

    // Cannot handle indirect branches.
    if (I->getOpcode() == MSP430::Br ||
        I->getOpcode() == MSP430::Bm)
      return true;

    // Handle unconditional branches.
    if (I->getOpcode() == MSP430::JMP) {
      if (!AllowModify) {
        TBB = I->getOperand(0).getMBB();
        continue;
      }

      // If the block has any instructions after a JMP, delete them.
      while (std::next(I) != MBB.end())
        std::next(I)->eraseFromParent();
      Cond.clear();
      FBB = nullptr;

      // Delete the JMP if it's equivalent to a fall-through.
      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
        TBB = nullptr;
        I->eraseFromParent();
        I = MBB.end();
        continue;
      }

      // TBB is used to indicate the unconditinal destination.
      TBB = I->getOperand(0).getMBB();
      continue;
    }

    // Handle conditional branches.
    assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch");
    MSP430CC::CondCodes BranchCode =
      static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm());
    if (BranchCode == MSP430CC::COND_INVALID)
      return true;  // Can't handle weird stuff.

    // Working from the bottom, handle the first conditional branch.
    if (Cond.empty()) {
      FBB = TBB;
      TBB = I->getOperand(0).getMBB();
      Cond.push_back(MachineOperand::CreateImm(BranchCode));
      continue;
    }

    // Handle subsequent conditional branches. Only handle the case where all
    // conditional branches branch to the same destination.
    assert(Cond.size() == 1);
    assert(TBB);

    // Only handle the case where all conditional branches branch to
    // the same destination.
    if (TBB != I->getOperand(0).getMBB())
      return true;

    MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm();
    // If the conditions are the same, we can leave them alone.
    if (OldBranchCode == BranchCode)
      continue;

    return true;
  }

  return false;
}
Exemple #27
0
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
                                        MachineBasicBlock::iterator I,
                                        int SPAdj) {
  // Consider all allocatable registers in the register class initially
  BitVector Candidates =
    TRI->getAllocatableSet(*I->getParent()->getParent(), RC);

  // Exclude all the registers being used by the instruction.
  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
    MachineOperand &MO = I->getOperand(i);
    if (MO.isReg() && MO.getReg() != 0 &&
        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
      Candidates.reset(MO.getReg());
  }

  // Try to find a register that's unused if there is one, as then we won't
  // have to spill. Search explicitly rather than masking out based on
  // RegsAvailable, as RegsAvailable does not take aliases into account.
  // That's what getRegsAvailable() is for.
  BitVector Available = getRegsAvailable(RC);
  Available &= Candidates;
  if (Available.any())
    Candidates = Available;

  // Find the register whose use is furthest away.
  MachineBasicBlock::iterator UseMI;
  unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);

  // If we found an unused register there is no reason to spill it.
  if (!isAliasUsed(SReg)) {
    DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
    return SReg;
  }

  assert(ScavengedReg == 0 &&
         "Scavenger slot is live, unable to scavenge another register!");

  // Avoid infinite regress
  ScavengedReg = SReg;

  // If the target knows how to save/restore the register, let it do so;
  // otherwise, use the emergency stack spill slot.
  if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
    // Spill the scavenged register before I.
    assert(ScavengingFrameIndex >= 0 &&
           "Cannot scavenge register without an emergency spill slot!");
    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
    MachineBasicBlock::iterator II = prior(I);

    unsigned FIOperandNum = getFrameIndexOperandNum(II);
    TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);

    // Restore the scavenged register before its use (or first terminator).
    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
    II = prior(UseMI);

    FIOperandNum = getFrameIndexOperandNum(II);
    TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
  }

  ScavengeRestore = prior(UseMI);

  // Doing this here leads to infinite regress.
  // ScavengedReg = SReg;

  DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
        "\n");

  return SReg;
}
/// This function generates the sequence of instructions needed to get the
/// result of adding register REG and immediate IMM.
unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
                                        MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator II,
                                        const DebugLoc &DL,
                                        unsigned &NewImm) const {
  //
  // given original instruction is:
  // Instr rx, T[offset] where offset is too big.
  //
  // lo = offset & 0xFFFF
  // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF;
  //
  // let T = temporary register
  // li T, hi
  // shl T, 16
  // add T, Rx, T
  //
  RegScavenger rs;
  int32_t lo = Imm & 0xFFFF;
  NewImm = lo;
  int Reg =0;
  int SpReg = 0;

  rs.enterBasicBlock(MBB);
  rs.forward(II);
  //
  // We need to know which registers can be used, in the case where there
  // are not enough free registers. We exclude all registers that
  // are used in the instruction that we are helping.
  //  // Consider all allocatable registers in the register class initially
  BitVector Candidates =
      RI.getAllocatableSet
      (*II->getParent()->getParent(), &Mips::CPU16RegsRegClass);
  // Exclude all the registers being used by the instruction.
  for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
    MachineOperand &MO = II->getOperand(i);
    if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
      Candidates.reset(MO.getReg());
  }

  // If the same register was used and defined in an instruction, then
  // it will not be in the list of candidates.
  //
  // we need to analyze the instruction that we are helping.
  // we need to know if it defines register x but register x is not
  // present as an operand of the instruction. this tells
  // whether the register is live before the instruction. if it's not
  // then we don't need to save it in case there are no free registers.
  int DefReg = 0;
  for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
    MachineOperand &MO = II->getOperand(i);
    if (MO.isReg() && MO.isDef()) {
      DefReg = MO.getReg();
      break;
    }
  }

  BitVector Available = rs.getRegsAvailable(&Mips::CPU16RegsRegClass);
  Available &= Candidates;
  //
  // we use T0 for the first register, if we need to save something away.
  // we use T1 for the second register, if we need to save something away.
  //
  unsigned FirstRegSaved =0, SecondRegSaved=0;
  unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0;

  Reg = Available.find_first();

  if (Reg == -1) {
    Reg = Candidates.find_first();
    Candidates.reset(Reg);
    if (DefReg != Reg) {
      FirstRegSaved = Reg;
      FirstRegSavedTo = Mips::T0;
      copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true);
    }
  }
  else
    Available.reset(Reg);
  BuildMI(MBB, II, DL, get(Mips::LwConstant32), Reg).addImm(Imm).addImm(-1);
  NewImm = 0;
  if (FrameReg == Mips::SP) {
    SpReg = Available.find_first();
    if (SpReg == -1) {
      SpReg = Candidates.find_first();
      // Candidates.reset(SpReg); // not really needed
      if (DefReg!= SpReg) {
        SecondRegSaved = SpReg;
        SecondRegSavedTo = Mips::T1;
      }
      if (SecondRegSaved)
        copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
    }
   else
     Available.reset(SpReg);
    copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
    BuildMI(MBB, II, DL, get(Mips::  AdduRxRyRz16), Reg).addReg(SpReg, RegState::Kill)
      .addReg(Reg);
  }
  else
    BuildMI(MBB, II, DL, get(Mips::  AdduRxRyRz16), Reg).addReg(FrameReg)
      .addReg(Reg, RegState::Kill);
  if (FirstRegSaved || SecondRegSaved) {
    II = std::next(II);
    if (FirstRegSaved)
      copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true);
    if (SecondRegSaved)
      copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true);
  }
  return Reg;
}
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
                                               MachineBasicBlock &MBB,
                                               MachineBasicBlock::iterator I,
                                               CallContext &Context) {
  // Check that this particular call sequence is amenable to the
  // transformation.
  const X86RegisterInfo &RegInfo =
      *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());

  // We expect to enter this at the beginning of a call sequence
  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
  MachineBasicBlock::iterator FrameSetup = I++;
  Context.FrameSetup = FrameSetup;

  // How much do we adjust the stack? This puts an upper bound on
  // the number of parameters actually passed on it.
  unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;

  // A zero adjustment means no stack parameters
  if (!MaxAdjust) {
    Context.NoStackParams = true;
    return;
  }

  // Skip over DEBUG_VALUE.
  // For globals in PIC mode, we can have some LEAs here. Skip them as well.
  // TODO: Extend this to something that covers more cases.
  while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
    ++I;

  unsigned StackPtr = RegInfo.getStackRegister();
  auto StackPtrCopyInst = MBB.end();
  // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
  // register.  If it's there, use that virtual register as stack pointer
  // instead. Also, we need to locate this instruction so that we can later
  // safely ignore it while doing the conservative processing of the call chain.
  // The COPY can be located anywhere between the call-frame setup
  // instruction and its first use. We use the call instruction as a boundary
  // because it is usually cheaper to check if an instruction is a call than
  // checking if an instruction uses a register.
  for (auto J = I; !J->isCall(); ++J)
    if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
        J->getOperand(1).getReg() == StackPtr) {
      StackPtrCopyInst = J;
      Context.SPCopy = &*J++;
      StackPtr = Context.SPCopy->getOperand(0).getReg();
      break;
    }

  // Scan the call setup sequence for the pattern we're looking for.
  // We only handle a simple case - a sequence of store instructions that
  // push a sequence of stack-slot-aligned values onto the stack, with
  // no gaps between them.
  if (MaxAdjust > 4)
    Context.ArgStoreVector.resize(MaxAdjust, nullptr);

  DenseSet<unsigned int> UsedRegs;

  for (InstClassification Classification = Skip; Classification != Exit; ++I) {
    // If this is the COPY of the stack pointer, it's ok to ignore.
    if (I == StackPtrCopyInst)
      continue;
    Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
    if (Classification != Convert)
      continue;
    // We know the instruction has a supported store opcode.
    // We only want movs of the form:
    // mov imm/reg, k(%StackPtr)
    // If we run into something else, bail.
    // Note that AddrBaseReg may, counter to its name, not be a register,
    // but rather a frame index.
    // TODO: Support the fi case. This should probably work now that we
    // have the infrastructure to track the stack pointer within a call
    // sequence.
    if (!I->getOperand(X86::AddrBaseReg).isReg() ||
        (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
        !I->getOperand(X86::AddrScaleAmt).isImm() ||
        (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
        (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
        (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
        !I->getOperand(X86::AddrDisp).isImm())
      return;

    int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
    assert(StackDisp >= 0 &&
           "Negative stack displacement when passing parameters");

    // We really don't want to consider the unaligned case.
    if (StackDisp & (SlotSize - 1))
      return;
    StackDisp >>= Log2SlotSize;

    assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
           "Function call has more parameters than the stack is adjusted for.");

    // If the same stack slot is being filled twice, something's fishy.
    if (Context.ArgStoreVector[StackDisp] != nullptr)
      return;
    Context.ArgStoreVector[StackDisp] = &*I;

    for (const MachineOperand &MO : I->uses()) {
      if (!MO.isReg())
        continue;
      unsigned int Reg = MO.getReg();
      if (RegInfo.isPhysicalRegister(Reg))
        UsedRegs.insert(Reg);
    }
  }

  --I;

  // We now expect the end of the sequence. If we stopped early,
  // or reached the end of the block without finding a call, bail.
  if (I == MBB.end() || !I->isCall())
    return;

  Context.Call = &*I;
  if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
    return;

  // Now, go through the vector, and see that we don't have any gaps,
  // but only a series of storing instructions.
  auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
  for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
    if (*MMI == nullptr)
      break;

  // If the call had no parameters, do nothing
  if (MMI == Context.ArgStoreVector.begin())
    return;

  // We are either at the last parameter, or a gap.
  // Make sure it's not a gap
  for (; MMI != MME; ++MMI)
    if (*MMI != nullptr)
      return;

  Context.UsePush = true;
}
void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
                                               MachineBasicBlock &MBB,
                                               MachineBasicBlock::iterator I,
                                               CallContext &Context) {
  // Check that this particular call sequence is amenable to the
  // transformation.
  const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
                                       STI->getRegisterInfo());
  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();

  // We expect to enter this at the beginning of a call sequence
  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
  MachineBasicBlock::iterator FrameSetup = I++;
  Context.FrameSetup = FrameSetup;

  // How much do we adjust the stack? This puts an upper bound on
  // the number of parameters actually passed on it.
  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;

  // A zero adjustment means no stack parameters
  if (!MaxAdjust) {
    Context.NoStackParams = true;
    return;
  }

  // For globals in PIC mode, we can have some LEAs here.
  // Ignore them, they don't bother us.
  // TODO: Extend this to something that covers more cases.
  while (I->getOpcode() == X86::LEA32r)
    ++I;

  // We expect a copy instruction here.
  // TODO: The copy instruction is a lowering artifact.
  //       We should also support a copy-less version, where the stack
  //       pointer is used directly.
  if (!I->isCopy() || !I->getOperand(0).isReg())
    return;
  Context.SPCopy = I++;

  unsigned StackPtr = Context.SPCopy->getOperand(0).getReg();

  // Scan the call setup sequence for the pattern we're looking for.
  // We only handle a simple case - a sequence of MOV32mi or MOV32mr
  // instructions, that push a sequence of 32-bit values onto the stack, with
  // no gaps between them.
  if (MaxAdjust > 4)
    Context.MovVector.resize(MaxAdjust, nullptr);

  InstClassification Classification;
  DenseSet<unsigned int> UsedRegs;

  while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
         Exit) {
    if (Classification == Skip) {
      ++I;
      continue;
    }

    // We know the instruction is a MOV32mi/MOV32mr.
    // We only want movs of the form:
    // movl imm/r32, k(%esp)
    // If we run into something else, bail.
    // Note that AddrBaseReg may, counter to its name, not be a register,
    // but rather a frame index.
    // TODO: Support the fi case. This should probably work now that we
    // have the infrastructure to track the stack pointer within a call
    // sequence.
    if (!I->getOperand(X86::AddrBaseReg).isReg() ||
        (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
        !I->getOperand(X86::AddrScaleAmt).isImm() ||
        (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
        (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
        (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
        !I->getOperand(X86::AddrDisp).isImm())
      return;

    int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
    assert(StackDisp >= 0 &&
           "Negative stack displacement when passing parameters");

    // We really don't want to consider the unaligned case.
    if (StackDisp % 4)
      return;
    StackDisp /= 4;

    assert((size_t)StackDisp < Context.MovVector.size() &&
           "Function call has more parameters than the stack is adjusted for.");

    // If the same stack slot is being filled twice, something's fishy.
    if (Context.MovVector[StackDisp] != nullptr)
      return;
    Context.MovVector[StackDisp] = I;

    for (const MachineOperand &MO : I->uses()) {
      if (!MO.isReg())
        continue;
      unsigned int Reg = MO.getReg();
      if (RegInfo.isPhysicalRegister(Reg))
        UsedRegs.insert(Reg);
    }

    ++I;
  }

  // We now expect the end of the sequence. If we stopped early,
  // or reached the end of the block without finding a call, bail.
  if (I == MBB.end() || !I->isCall())
    return;

  Context.Call = I;
  if ((++I)->getOpcode() != FrameDestroyOpcode)
    return;

  // Now, go through the vector, and see that we don't have any gaps,
  // but only a series of 32-bit MOVs.
  auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end();
  for (; MMI != MME; ++MMI, Context.ExpectedDist += 4)
    if (*MMI == nullptr)
      break;

  // If the call had no parameters, do nothing
  if (MMI == Context.MovVector.begin())
    return;

  // We are either at the last parameter, or a gap.
  // Make sure it's not a gap
  for (; MMI != MME; ++MMI)
    if (*MMI != nullptr)
      return;

  Context.UsePush = true;
}