示例#1
0
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
  const Function* F = Fn.getFunction();
  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();

  assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");

  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);

  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
  // function's frame information. Also eliminates call frame pseudo
  // instructions.
  calculateCallsInformation(Fn);

  // Allow the target machine to make some adjustments to the function
  // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
  TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);

  // Scan the function for modified callee saved registers and insert spill code
  // for any callee saved registers that are modified.
  calculateCalleeSavedRegisters(Fn);

  // Determine placement of CSR spill/restore code:
  // place all spills in the entry block, all restores in return blocks.
  calculateSets(Fn);

  // Add the code to save and restore the callee saved registers
  if (!F->hasFnAttribute(Attribute::Naked))
    insertCSRSpillsAndRestores(Fn);

  // Allow the target machine to make final modifications to the function
  // before the frame layout is finalized.
  TFI->processFunctionBeforeFrameFinalized(Fn, RS);

  // Calculate actual frame offsets for all abstract stack objects...
  calculateFrameObjectOffsets(Fn);

  // Add prolog and epilog code to the function.  This function is required
  // to align the stack frame as necessary for any stack variables or
  // called functions.  Because of this, calculateCalleeSavedRegisters()
  // must be called before this function in order to set the AdjustsStack
  // and MaxCallFrameSize variables.
  if (!F->hasFnAttribute(Attribute::Naked))
    insertPrologEpilogCode(Fn);

  // Replace all MO_FrameIndex operands with physical register references
  // and actual offsets.
  //
  replaceFrameIndices(Fn);

  // If register scavenging is needed, as we've enabled doing it as a
  // post-pass, scavenge the virtual registers that frame index elimination
  // inserted.
  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
    scavengeFrameVirtualRegs(Fn);

  // Clear any vregs created by virtual scavenging.
  Fn.getRegInfo().clearVirtRegs();

  // Warn on stack size when we exceeds the given limit.
  MachineFrameInfo *MFI = Fn.getFrameInfo();
  uint64_t StackSize = MFI->getStackSize();
  if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
    DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
    F->getContext().diagnose(DiagStackSize);
  }

  delete RS;
  RestoreBlocks.clear();
  return true;
}
bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {

  DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
               << "********** Function: "
               << fn.getFunction()->getName() << '\n');

  LiveIntervals *lis = &getAnalysis<LiveIntervals>();
  MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>();
  const TargetInstrInfo *tii = fn.getTarget().getInstrInfo();
  MachineRegisterInfo *mri = &fn.getRegInfo();

  SmallSet<unsigned, 4> processed;
  for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end();
       mbbi != mbbe; ++mbbi) {
    MachineBasicBlock* mbb = mbbi;
    SlotIndex mbbEnd = lis->getMBBEndIdx(mbb);
    MachineLoop* loop = loopInfo->getLoopFor(mbb);
    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
    bool isExiting = loop ? loop->isLoopExiting(mbb) : false;

    for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
         mii != mie; ++mii) {
      const MachineInstr *mi = mii;
      if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue())
        continue;

      for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
        const MachineOperand &mopi = mi->getOperand(i);
        if (!mopi.isReg() || mopi.getReg() == 0)
          continue;
        unsigned reg = mopi.getReg();
        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
          continue;
        // Multiple uses of reg by the same instruction. It should not
        // contribute to spill weight again.
        if (!processed.insert(reg))
          continue;

        bool hasDef = mopi.isDef();
        bool hasUse = !hasDef;
        for (unsigned j = i+1; j != e; ++j) {
          const MachineOperand &mopj = mi->getOperand(j);
          if (!mopj.isReg() || mopj.getReg() != reg)
            continue;
          hasDef |= mopj.isDef();
          hasUse |= mopj.isUse();
          if (hasDef && hasUse)
            break;
        }

        LiveInterval &regInt = lis->getInterval(reg);
        float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth);
        if (hasDef && isExiting) {
          // Looks like this is a loop count variable update.
          SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex();
          const LiveRange *dlr =
            lis->getInterval(reg).getLiveRangeContaining(defIdx);
          if (dlr->end >= mbbEnd)
            weight *= 3.0F;
        }
        regInt.weight += weight;
      }
      processed.clear();
    }
  }

  for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) {
    LiveInterval &li = *I->second;
    if (TargetRegisterInfo::isVirtualRegister(li.reg)) {
      // If the live interval length is essentially zero, i.e. in every live
      // range the use follows def immediately, it doesn't make sense to spill
      // it and hope it will be easier to allocate for this li.
      if (isZeroLengthInterval(&li)) {
        li.weight = HUGE_VALF;
        continue;
      }

      bool isLoad = false;
      SmallVector<LiveInterval*, 4> spillIs;
      if (lis->isReMaterializable(li, spillIs, isLoad)) {
        // If all of the definitions of the interval are re-materializable,
        // it is a preferred candidate for spilling. If non of the defs are
        // loads, then it's potentially very cheap to re-materialize.
        // FIXME: this gets much more complicated once we support non-trivial
        // re-materialization.
        if (isLoad)
          li.weight *= 0.9F;
        else
          li.weight *= 0.5F;
      }

      // Slightly prefer live interval that has been assigned a preferred reg.
      std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg);
      if (Hint.first || Hint.second)
        li.weight *= 1.01F;

      lis->normalizeSpillWeight(li);
    }
  }
  
  return false;
}
示例#3
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
  AArch64MachineFunctionInfo *FuncInfo =
    MF.getInfo<AArch64MachineFunctionInfo>();
  MachineBasicBlock &MBB = MF.front();
  MachineBasicBlock::iterator MBBI = MBB.begin();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

  MachineModuleInfo &MMI = MF.getMMI();
  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
  bool NeedsFrameMoves = MMI.hasDebugInfo()
    || MF.getFunction()->needsUnwindTableEntry();

  uint64_t NumInitialBytes, NumResidualBytes;

  // Currently we expect the stack to be laid out by
  //     sub sp, sp, #initial
  //     stp x29, x30, [sp, #offset]
  //     ...
  //     str xxx, [sp, #offset]
  //     sub sp, sp, #rest (possibly via extra instructions).
  if (MFI->getCalleeSavedInfo().size()) {
    // If there are callee-saved registers, we want to store them efficiently as
    // a block, and virtual base assignment happens too early to do it for us so
    // we adjust the stack in two phases: first just for callee-saved fiddling,
    // then to allocate the rest of the frame.
    splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
  } else {
    // If there aren't any callee-saved registers, two-phase adjustment is
    // inefficient. It's more efficient to adjust with NumInitialBytes too
    // because when we're in a "callee pops argument space" situation, that pop
    // must be tacked onto Initial for correctness.
    NumInitialBytes = MFI->getStackSize();
    NumResidualBytes = 0;
  }

  // Tell everyone else how much adjustment we're expecting them to use. In
  // particular if an adjustment is required for a tail call the epilogue could
  // have a different view of things.
  FuncInfo->setInitialStackAdjust(NumInitialBytes);

  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
               MachineInstr::FrameSetup);

  if (NeedsFrameMoves && NumInitialBytes) {
    // We emit this update even if the CFA is set from a frame pointer later so
    // that the CFA is valid in the interim.
    MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
      .addSym(SPLabel);

    MachineLocation Dst(MachineLocation::VirtualFP);
    MachineLocation Src(AArch64::XSP, NumInitialBytes);
    Moves.push_back(MachineMove(SPLabel, Dst, Src));
  }

  // Otherwise we need to set the frame pointer and/or add a second stack
  // adjustment.

  bool FPNeedsSetting = hasFP(MF);
  for (; MBBI != MBB.end(); ++MBBI) {
    // Note that this search makes strong assumptions about the operation used
    // to store the frame-pointer: it must be "STP x29, x30, ...". This could
    // change in future, but until then there's no point in implementing
    // untestable more generic cases.
    if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
                       && MBBI->getOperand(0).getReg() == AArch64::X29) {
      int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
      FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));

      ++MBBI;
      emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
                    AArch64::X29,
                    NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
                    MachineInstr::FrameSetup);

      // The offset adjustment used when emitting debugging locations relative
      // to whatever frame base is set. AArch64 uses the default frame base (FP
      // or SP) and this adjusts the calculations to be correct.
      MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
                               - MFI->getStackSize());

      if (NeedsFrameMoves) {
        MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
        BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
          .addSym(FPLabel);
        MachineLocation Dst(MachineLocation::VirtualFP);
        MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
        Moves.push_back(MachineMove(FPLabel, Dst, Src));
      }

      FPNeedsSetting = false;
    }

    if (!MBBI->getFlag(MachineInstr::FrameSetup))
      break;
  }

  assert(!FPNeedsSetting && "Frame pointer couldn't be set");

  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
               MachineInstr::FrameSetup);

  // Now we emit the rest of the frame setup information, if necessary: we've
  // already noted the FP and initial SP moves so we're left with the prologue's
  // final SP update and callee-saved register locations.
  if (!NeedsFrameMoves)
    return;

  // Reuse the label if appropriate, so create it in this outer scope.
  MCSymbol *CSLabel = 0;

  // The rest of the stack adjustment
  if (!hasFP(MF) && NumResidualBytes) {
    CSLabel = MMI.getContext().CreateTempSymbol();
    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
      .addSym(CSLabel);

    MachineLocation Dst(MachineLocation::VirtualFP);
    MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
    Moves.push_back(MachineMove(CSLabel, Dst, Src));
  }

  // And any callee-saved registers (it's fine to leave them to the end here,
  // because the old values are still valid at this point.
  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
  if (CSI.size()) {
    if (!CSLabel) {
      CSLabel = MMI.getContext().CreateTempSymbol();
      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
        .addSym(CSLabel);
    }

    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
           E = CSI.end(); I != E; ++I) {
      MachineLocation Dst(MachineLocation::VirtualFP,
                          MFI->getObjectOffset(I->getFrameIdx()));
      MachineLocation Src(I->getReg());
      Moves.push_back(MachineMove(CSLabel, Dst, Src));
    }
  }
}
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
               << "********** Function: " << MF.getName() << '\n');
  if (skipFunction(*MF.getFunction()))
    return false;

  TII = MF.getSubtarget().getInstrInfo();
  DomTree = &getAnalysis<MachineDominatorTree>();
  MRI = &MF.getRegInfo();

  bool Changed = false;

  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
  // cmp-conversions from the same head block.
  // Note that updateDomTree() modifies the children of the DomTree node
  // currently being visited. The df_iterator supports that; it doesn't look at
  // child_begin() / child_end() until after a node has been visited.
  for (MachineDomTreeNode *I : depth_first(DomTree)) {
    MachineBasicBlock *HBB = I->getBlock();

    SmallVector<MachineOperand, 4> HeadCond;
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) {
      continue;
    }

    // Equivalence check is to skip loops.
    if (!TBB || TBB == HBB) {
      continue;
    }

    SmallVector<MachineOperand, 4> TrueCond;
    MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
    if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
      continue;
    }

    MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
    if (!HeadCmpMI) {
      continue;
    }

    MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
    if (!TrueCmpMI) {
      continue;
    }

    AArch64CC::CondCode HeadCmp;
    if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
      continue;
    }

    AArch64CC::CondCode TrueCmp;
    if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
      continue;
    }

    const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
    const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();

    DEBUG(dbgs() << "Head branch:\n");
    DEBUG(dbgs() << "\tcondition: "
          << AArch64CC::getCondCodeName(HeadCmp) << '\n');
    DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');

    DEBUG(dbgs() << "True branch:\n");
    DEBUG(dbgs() << "\tcondition: "
          << AArch64CC::getCondCodeName(TrueCmp) << '\n');
    DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');

    if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
         (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
        std::abs(TrueImm - HeadImm) == 2) {
      // This branch transforms machine instructions that correspond to
      //
      // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
      // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
      //
      // into
      //
      // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
      // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)

      CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
      CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
      if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
          std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
        modifyCmp(HeadCmpMI, HeadCmpInfo);
        modifyCmp(TrueCmpMI, TrueCmpInfo);
        Changed = true;
      }
    } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
                (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
                std::abs(TrueImm - HeadImm) == 1) {
      // This branch transforms machine instructions that correspond to
      //
      // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
      // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
      //
      // into
      //
      // 1) (a <= {NewImm} && ...) || (a >  {NewImm} && ...)
      // 2) (a <  {NewImm} && ...) || (a >= {NewImm} && ...)

      // GT -> GE transformation increases immediate value, so picking the
      // smaller one; LT -> LE decreases immediate value so invert the choice.
      bool adjustHeadCond = (HeadImm < TrueImm);
      if (HeadCmp == AArch64CC::LT) {
          adjustHeadCond = !adjustHeadCond;
      }

      if (adjustHeadCond) {
        Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
      } else {
        Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
      }
    }
    // Other transformation cases almost never occur due to generation of < or >
    // comparisons instead of <= and >=.
  }

  return Changed;
}
示例#5
0
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(*MF.getFunction()))
    return false;

  MRI = &MF.getRegInfo();
  ST = &MF.getSubtarget<SISubtarget>();
  TII = ST->getInstrInfo();
  TRI = &TII->getRegisterInfo();

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  // omod is ignored by hardware if IEEE bit is enabled. omod also does not
  // correctly handle signed zeros.
  //
  // TODO: Check nsz on instructions when fast math flags are preserved to MI
  // level.
  bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      tryFoldInst(TII, &MI);

      if (!TII->isFoldableCopy(MI)) {
        if (IsIEEEMode || !tryFoldOMod(MI))
          tryFoldClamp(MI);
        continue;
      }

      MachineOperand &OpToFold = MI.getOperand(1);
      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();

      // FIXME: We could also be folding things like TargetIndexes.
      if (!FoldingImm && !OpToFold.isReg())
        continue;

      if (OpToFold.isReg() &&
          !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
        continue;

      // Prevent folding operands backwards in the function. For example,
      // the COPY opcode must not be replaced by 1 in this example:
      //
      //    %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
      //    ...
      //    %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
      MachineOperand &Dst = MI.getOperand(0);
      if (Dst.isReg() &&
          !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
        continue;

      foldInstOperand(MI, OpToFold);
    }
  }
  return false;
}
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  BitVector Reserved(getNumRegs());
  const X86FrameLowering *TFI = getFrameLowering(MF);

  // Set the floating point control register as reserved.
  Reserved.set(X86::FPCW);

  // Set the stack-pointer register and its aliases as reserved.
  for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
       ++I)
    Reserved.set(*I);

  // Set the Shadow Stack Pointer as reserved.
  Reserved.set(X86::SSP);

  // Set the instruction pointer register and its aliases as reserved.
  for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid();
       ++I)
    Reserved.set(*I);

  // Set the frame-pointer register and its aliases as reserved if needed.
  if (TFI->hasFP(MF)) {
    for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid();
         ++I)
      Reserved.set(*I);
  }

  // Set the base-pointer register and its aliases as reserved if needed.
  if (hasBasePointer(MF)) {
    CallingConv::ID CC = MF.getFunction().getCallingConv();
    const uint32_t *RegMask = getCallPreservedMask(MF, CC);
    if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
      report_fatal_error(
        "Stack realignment in presence of dynamic allocas is not supported with"
        "this calling convention.");

    unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
    for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
         I.isValid(); ++I)
      Reserved.set(*I);
  }

  // Mark the segment registers as reserved.
  Reserved.set(X86::CS);
  Reserved.set(X86::SS);
  Reserved.set(X86::DS);
  Reserved.set(X86::ES);
  Reserved.set(X86::FS);
  Reserved.set(X86::GS);

  // Mark the floating point stack registers as reserved.
  for (unsigned n = 0; n != 8; ++n)
    Reserved.set(X86::ST0 + n);

  // Reserve the registers that only exist in 64-bit mode.
  if (!Is64Bit) {
    // These 8-bit registers are part of the x86-64 extension even though their
    // super-registers are old 32-bits.
    Reserved.set(X86::SIL);
    Reserved.set(X86::DIL);
    Reserved.set(X86::BPL);
    Reserved.set(X86::SPL);
    Reserved.set(X86::SIH);
    Reserved.set(X86::DIH);
    Reserved.set(X86::BPH);
    Reserved.set(X86::SPH);

    for (unsigned n = 0; n != 8; ++n) {
      // R8, R9, ...
      for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
        Reserved.set(*AI);

      // XMM8, XMM9, ...
      for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
        Reserved.set(*AI);
    }
  }
  if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
    for (unsigned n = 16; n != 32; ++n) {
      for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
        Reserved.set(*AI);
    }
  }

  assert(checkAllSuperRegsMarked(Reserved,
                                 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
                                  X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
  return Reserved;
}
示例#7
0
void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
  bool isHandler = (CallConv == CallingConv::AVR_INTR ||
                    CallConv == CallingConv::AVR_SIGNAL);

  // Early exit if the frame pointer is not needed in this function except for
  // signal/interrupt handlers where special code generation is required.
  if (!hasFP(MF) && !isHandler) {
    return;
  }

  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  assert(MBBI->getDesc().isReturn() &&
         "Can only insert epilog into returning blocks");

  DebugLoc DL = MBBI->getDebugLoc();
  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
  unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize();
  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
  const AVRInstrInfo &TII = *STI.getInstrInfo();

  // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal
  // handlers at the very end of the function, just before reti.
  if (isHandler) {
    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
    BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
        .addImm(0x3f)
        .addReg(AVR::R0, RegState::Kill);
    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
  }

  if (hasFP(MF))
    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28);

  // Early exit if there is no need to restore the frame pointer.
  if (!FrameSize) {
    return;
  }

  // Skip the callee-saved pop instructions.
  while (MBBI != MBB.begin()) {
    MachineBasicBlock::iterator PI = std::prev(MBBI);
    int Opc = PI->getOpcode();

    if (Opc != AVR::POPRd && Opc != AVR::POPWRd && !PI->isTerminator()) {
      break;
    }

    --MBBI;
  }

  unsigned Opcode;

  // Select the optimal opcode depending on how big it is.
  if (isUInt<6>(FrameSize)) {
    Opcode = AVR::ADIWRdK;
  } else {
    Opcode = AVR::SUBIWRdK;
    FrameSize = -FrameSize;
  }

  // Restore the frame pointer by doing FP += <size>.
  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
                         .addReg(AVR::R29R28, RegState::Kill)
                         .addImm(FrameSize);
  // The SREG implicit def is dead.
  MI->getOperand(3).setIsDead();

  // Write back R29R28 to SP and temporarily disable interrupts.
  BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP)
      .addReg(AVR::R29R28, RegState::Kill);
}
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);

  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc DL;

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();

    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (NumBytes && !canUseRedZone(MF)) {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else if (NumBytes) {
      ++NumRedZoneFunctions;
    }

    return;
  }

  // Only set up FP if we actually need to.
  int FPOffset = 0;
  if (HasFP)
    FPOffset = getFPOffsetInPrologue(MBBI);

  // Move past the saves of the callee-saved registers.
  while (isCSSave(MBBI)) {
    ++MBBI;
    NumBytes -= 16;
  }
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
  if (HasFP) {
    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes);

  // Allocate space for the rest of the frame.

  const unsigned Alignment = MFI->getMaxAlignment();
  const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
  unsigned scratchSPReg = AArch64::SP;
  if (NumBytes && NeedsRealignment) {
    // Use the first callee-saved register as a scratch register.
    scratchSPReg = AArch64::X9;
  }

  // If we're a leaf function, try using the red zone.
  if (NumBytes && !canUseRedZone(MF))
    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
    // the correct value here, as NumBytes also includes padding bytes,
    // which shouldn't be counted here.
    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
                    MachineInstr::FrameSetup);

  if (NumBytes && NeedsRealignment) {
    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
    assert(NrBitsToZero > 1);
    assert(scratchSPReg != AArch64::SP);

    // SUB X9, SP, NumBytes
    //   -- X9 is temporary register, so shouldn't contain any live data here,
    //   -- free to use. This is already produced by emitFrameOffset above.
    // AND SP, X9, 0b11111...0000
    // The logical immediates have a non-trivial encoding. The following
    // formula computes the encoded immediate with all ones but
    // NrBitsToZero zero bits as least significant bits.
    uint32_t andMaskEncoded =
        (1                   <<12) // = N
      | ((64-NrBitsToZero)   << 6) // immr
      | ((64-NrBitsToZero-1) << 0) // imms
      ;
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
      .addReg(scratchSPReg, RegState::Kill)
      .addImm(andMaskEncoded);
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  if (RegInfo->hasBasePointer(MF)) {
    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                     false);
  }

  if (needsFrameMoves) {
    const DataLayout &TD = MF.getDataLayout();
    const int StackGrowth = -TD.getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);

      // Record the location of the stored LR
      unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);

      // Record the location of the stored FP
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    }

    // Now emit the moves for whatever callee saved regs we have.
    emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
  }
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  DebugLoc DL;
  bool IsTailCallReturn = false;
  if (MBB.end() != MBBI) {
    DL = MBBI->getDebugLoc();
    unsigned RetOpcode = MBBI->getOpcode();
    IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
      RetOpcode == AArch64::TCRETURNri;
  }
  int NumBytes = MFI->getStackSize();
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  // Initial and residual are named for consistency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
  if (IsTailCallReturn) {
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = AFI->getArgumentStackToRestore();
  }

  // The stack frame should be like below,
  //
  //      ----------------------                     ---
  //      |                    |                      |
  //      | BytesInStackArgArea|              CalleeArgStackSize
  //      | (NumReusableBytes) |                (of tail call)
  //      |                    |                     ---
  //      |                    |                      |
  //      ---------------------|        ---           |
  //      |                    |         |            |
  //      |   CalleeSavedReg   |         |            |
  //      | (NumRestores * 8)  |         |            |
  //      |                    |         |            |
  //      ---------------------|         |         NumBytes
  //      |                    |     StackSize  (StackAdjustUp)
  //      |   LocalStackSize   |         |            |
  //      | (covering callee   |         |            |
  //      |       args)        |         |            |
  //      |                    |         |            |
  //      ----------------------        ---          ---
  //
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
  //             = StackSize + ArgumentPopSize
  //
  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
  // it as the 2nd argument of AArch64ISD::TC_RETURN.
  NumBytes += ArgumentPopSize;

  unsigned NumRestores = 0;
  // Move past the restores of the callee-saved registers.
  MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
  MachineBasicBlock::iterator Begin = MBB.begin();
  while (LastPopI != Begin) {
    --LastPopI;
    unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
    NumRestores += Restores;
    if (Restores == 0) {
      ++LastPopI;
      break;
    }
  }
  NumBytes -= NumRestores * 8;
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  if (!hasFP(MF)) {
    // If this was a redzone leaf function, we don't need to restore the
    // stack pointer.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
                      TII);
    return;
  }

  // Restore the original stack pointer.
  // FIXME: Rather than doing the math here, we should instead just use
  // non-post-indexed loads for the restores if we aren't actually going to
  // be able to save any instructions.
  if (NumBytes || MFI->hasVarSizedObjects())
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                    -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
}
示例#10
0
bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
                                        const yaml::MachineFunction &YamlMF,
                                        PerFunctionMIParsingState &PFS) {
  MachineFrameInfo &MFI = *MF.getFrameInfo();
  const Function &F = *MF.getFunction();
  const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
  MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
  MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
  MFI.setHasStackMap(YamlMFI.HasStackMap);
  MFI.setHasPatchPoint(YamlMFI.HasPatchPoint);
  MFI.setStackSize(YamlMFI.StackSize);
  MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
  if (YamlMFI.MaxAlignment)
    MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
  MFI.setAdjustsStack(YamlMFI.AdjustsStack);
  MFI.setHasCalls(YamlMFI.HasCalls);
  MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
  MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
  MFI.setHasVAStart(YamlMFI.HasVAStart);
  MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
  if (!YamlMFI.SavePoint.Value.empty()) {
    MachineBasicBlock *MBB = nullptr;
    if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS))
      return true;
    MFI.setSavePoint(MBB);
  }
  if (!YamlMFI.RestorePoint.Value.empty()) {
    MachineBasicBlock *MBB = nullptr;
    if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS))
      return true;
    MFI.setRestorePoint(MBB);
  }

  std::vector<CalleeSavedInfo> CSIInfo;
  // Initialize the fixed frame objects.
  for (const auto &Object : YamlMF.FixedStackObjects) {
    int ObjectIdx;
    if (Object.Type != yaml::FixedMachineStackObject::SpillSlot)
      ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset,
                                        Object.IsImmutable, Object.IsAliased);
    else
      ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
    MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
    // TODO: Report an error when objects are redefined.
    PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID, ObjectIdx));
    if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
                                 ObjectIdx))
      return true;
  }

  // Initialize the ordinary frame objects.
  for (const auto &Object : YamlMF.StackObjects) {
    int ObjectIdx;
    const AllocaInst *Alloca = nullptr;
    const yaml::StringValue &Name = Object.Name;
    if (!Name.Value.empty()) {
      Alloca = dyn_cast_or_null<AllocaInst>(
          F.getValueSymbolTable().lookup(Name.Value));
      if (!Alloca)
        return error(Name.SourceRange.Start,
                     "alloca instruction named '" + Name.Value +
                         "' isn't defined in the function '" + F.getName() +
                         "'");
    }
    if (Object.Type == yaml::MachineStackObject::VariableSized)
      ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
    else
      ObjectIdx = MFI.CreateStackObject(
          Object.Size, Object.Alignment,
          Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
    MFI.setObjectOffset(ObjectIdx, Object.Offset);
    // TODO: Report an error when objects are redefined.
    PFS.StackObjectSlots.insert(std::make_pair(Object.ID, ObjectIdx));
    if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
                                 ObjectIdx))
      return true;
  }
  MFI.setCalleeSavedInfo(CSIInfo);
  if (!CSIInfo.empty())
    MFI.setCalleeSavedInfoValid(true);
  return false;
}
/// Returns true if the local user-space stack pointer needs to be written back
/// to memory by this function (this is not meaningful if needsSP is false). If
/// false, the stack red zone can be used and only a local SP is needed.
bool WebAssemblyFrameLowering::needsSPWriteback(
    const MachineFunction &MF, const MachineFrameInfo &MFI) const {
  assert(needsSP(MF, MFI));
  return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() ||
         MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
}
示例#12
0
bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
  auto It = Functions.find(MF.getName());
  if (It == Functions.end())
    return error(Twine("no machine function information for function '") +
                 MF.getName() + "' in the MIR file");
  // TODO: Recreate the machine function.
  const yaml::MachineFunction &YamlMF = *It->getValue();
  if (YamlMF.Alignment)
    MF.setAlignment(YamlMF.Alignment);
  MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
  MF.setHasInlineAsm(YamlMF.HasInlineAsm);
  PerFunctionMIParsingState PFS;
  if (initializeRegisterInfo(MF, YamlMF, PFS))
    return true;
  if (!YamlMF.Constants.empty()) {
    auto *ConstantPool = MF.getConstantPool();
    assert(ConstantPool && "Constant pool must be created");
    if (initializeConstantPool(*ConstantPool, YamlMF, MF,
                               PFS.ConstantPoolSlots))
      return true;
  }

  const auto &F = *MF.getFunction();
  for (const auto &YamlMBB : YamlMF.BasicBlocks) {
    const BasicBlock *BB = nullptr;
    const yaml::StringValue &Name = YamlMBB.Name;
    const yaml::StringValue &IRBlock = YamlMBB.IRBlock;
    if (!Name.Value.empty()) {
      BB = dyn_cast_or_null<BasicBlock>(
          F.getValueSymbolTable().lookup(Name.Value));
      if (!BB)
        return error(Name.SourceRange.Start,
                     Twine("basic block '") + Name.Value +
                         "' is not defined in the function '" + MF.getName() +
                         "'");
    }
    if (!IRBlock.Value.empty()) {
      // TODO: Report an error when both name and ir block are specified.
      SMDiagnostic Error;
      if (parseIRBlockReference(BB, SM, MF, IRBlock.Value, PFS, IRSlots, Error))
        return error(Error, IRBlock.SourceRange);
    }
    auto *MBB = MF.CreateMachineBasicBlock(BB);
    MF.insert(MF.end(), MBB);
    bool WasInserted =
        PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
    if (!WasInserted)
      return error(Twine("redefinition of machine basic block with id #") +
                   Twine(YamlMBB.ID));
  }

  if (YamlMF.BasicBlocks.empty())
    return error(Twine("machine function '") + Twine(MF.getName()) +
                 "' requires at least one machine basic block in its body");
  // Initialize the frame information after creating all the MBBs so that the
  // MBB references in the frame information can be resolved.
  if (initializeFrameInfo(MF, YamlMF, PFS))
    return true;
  // Initialize the jump table after creating all the MBBs so that the MBB
  // references can be resolved.
  if (!YamlMF.JumpTableInfo.Entries.empty() &&
      initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS))
    return true;
  // Initialize the machine basic blocks after creating them all so that the
  // machine instructions parser can resolve the MBB references.
  unsigned I = 0;
  for (const auto &YamlMBB : YamlMF.BasicBlocks) {
    if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
                                    PFS))
      return true;
  }
  // FIXME: This is a temporary workaround until the reserved registers can be
  // serialized.
  MF.getRegInfo().freezeReservedRegs(MF);
  MF.verify();
  return false;
}
bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const {
  return !MF.getFunction()->hasFnAttribute("no-realign-stack");
}
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
  MachineBlockFrequencyInfo &MBFI =
    getAnalysis<MachineBlockFrequencyInfo>();

  calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI);

  VirtRegMap &VRM = getAnalysis<VirtRegMap>();

  std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));

  MF.getRegInfo().freezeReservedRegs(MF);

  DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");

  // Allocator main loop:
  //
  // * Map current regalloc problem to a PBQP problem
  // * Solve the PBQP problem
  // * Map the solution back to a register allocation
  // * Spill if necessary
  //
  // This process is continued till no more spills are generated.

  // Find the vreg intervals in need of allocation.
  findVRegIntervalsToAlloc(MF, LIS);

#ifndef NDEBUG
  const Function &F = *MF.getFunction();
  std::string FullyQualifiedName =
    F.getParent()->getModuleIdentifier() + "." + F.getName().str();
#endif

  // If there are non-empty intervals allocate them using pbqp.
  if (!VRegsToAlloc.empty()) {

    const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl();
    std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
      llvm::make_unique<PBQPRAConstraintList>();
    ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
    ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
    if (PBQPCoalescing)
      ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
    ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());

    bool PBQPAllocComplete = false;
    unsigned Round = 0;

    while (!PBQPAllocComplete) {
      DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");

      PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
      initializeGraph(G);
      ConstraintsRoot->apply(G);

#ifndef NDEBUG
      if (PBQPDumpGraphs) {
        std::ostringstream RS;
        RS << Round;
        std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
                                    ".pbqpgraph";
        std::error_code EC;
        raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
        DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
              << GraphFileName << "\"\n");
        G.dumpToStream(OS);
      }
#endif

      PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
      PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
      ++Round;
    }
  }

  // Finalise allocation, allocate empty ranges.
  finalizeAlloc(MF, LIS, VRM);
  VRegsToAlloc.clear();
  EmptyIntervalVRegs.clear();

  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");

  return true;
}
示例#15
0
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
  MachineFrameInfo *MFI = Fn.getFrameInfo();

  // Get the callee saved register list...
  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);

  // These are used to keep track the callee-save area. Initialize them.
  MinCSFrameIndex = INT_MAX;
  MaxCSFrameIndex = 0;

  // Early exit for targets which have no callee saved registers.
  if (CSRegs == 0 || CSRegs[0] == 0)
    return;

  // In Naked functions we aren't going to save any registers.
  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
    return;

  std::vector<CalleeSavedInfo> CSI;
  for (unsigned i = 0; CSRegs[i]; ++i) {
    unsigned Reg = CSRegs[i];
    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
      // If the reg is modified, save it!
      CSI.push_back(CalleeSavedInfo(Reg));
    } else {
      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
           *AliasSet; ++AliasSet) {  // Check alias registers too.
        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
          CSI.push_back(CalleeSavedInfo(Reg));
          break;
        }
      }
    }
  }

  if (CSI.empty())
    return;   // Early exit if no callee saved registers are modified!

  unsigned NumFixedSpillSlots;
  const TargetFrameInfo::SpillSlot *FixedSpillSlots =
    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);

  // Now that we know which registers need to be saved and restored, allocate
  // stack slots for them.
  for (std::vector<CalleeSavedInfo>::iterator
         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
    unsigned Reg = I->getReg();
    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);

    int FrameIdx;
    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
      I->setFrameIdx(FrameIdx);
      continue;
    }

    // Check to see if this physreg must be spilled to a particular stack slot
    // on this target.
    const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
           FixedSlot->Reg != Reg)
      ++FixedSlot;

    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
      // Nope, just spill it anywhere convenient.
      unsigned Align = RC->getAlignment();
      unsigned StackAlign = TFI->getStackAlignment();

      // We may not be able to satisfy the desired alignment specification of
      // the TargetRegisterClass if the stack alignment is smaller. Use the
      // min.
      Align = std::min(Align, StackAlign);
      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
    } else {
      // Spill it to the stack where we must.
      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
    }

    I->setFrameIdx(FrameIdx);
  }

  MFI->setCalleeSavedInfo(CSI);
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                BitVector &SavedRegs,
                                                RegScavenger *RS) const {
  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  SmallVector<unsigned, 4> UnspilledCSGPRs;
  SmallVector<unsigned, 4> UnspilledCSFPRs;

  // The frame record needs to be created by saving the appropriate registers
  if (hasFP(MF)) {
    SavedRegs.set(AArch64::FP);
    SavedRegs.set(AArch64::LR);
  }

  // Spill the BasePtr if it's used. Do this first thing so that the
  // getCalleeSavedRegs() below will get the right answer.
  if (RegInfo->hasBasePointer(MF))
    SavedRegs.set(RegInfo->getBaseRegister());

  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
    SavedRegs.set(AArch64::X9);

  // If any callee-saved registers are used, the frame cannot be eliminated.
  unsigned NumGPRSpilled = 0;
  unsigned NumFPRSpilled = 0;
  bool ExtraCSSpill = false;
  bool CanEliminateFrame = true;
  DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

  // Check pairs of consecutive callee-saved registers.
  for (unsigned i = 0; CSRegs[i]; i += 2) {
    assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");

    const unsigned OddReg = CSRegs[i];
    const unsigned EvenReg = CSRegs[i + 1];
    assert((AArch64::GPR64RegClass.contains(OddReg) &&
            AArch64::GPR64RegClass.contains(EvenReg)) ^
               (AArch64::FPR64RegClass.contains(OddReg) &&
                AArch64::FPR64RegClass.contains(EvenReg)) &&
           "Register class mismatch!");

    const bool OddRegUsed = SavedRegs.test(OddReg);
    const bool EvenRegUsed = SavedRegs.test(EvenReg);

    // Early exit if none of the registers in the register pair is actually
    // used.
    if (!OddRegUsed && !EvenRegUsed) {
      if (AArch64::GPR64RegClass.contains(OddReg)) {
        UnspilledCSGPRs.push_back(OddReg);
        UnspilledCSGPRs.push_back(EvenReg);
      } else {
        UnspilledCSFPRs.push_back(OddReg);
        UnspilledCSFPRs.push_back(EvenReg);
      }
      continue;
    }

    unsigned Reg = AArch64::NoRegister;
    // If only one of the registers of the register pair is used, make sure to
    // mark the other one as used as well.
    if (OddRegUsed ^ EvenRegUsed) {
      // Find out which register is the additional spill.
      Reg = OddRegUsed ? EvenReg : OddReg;
      SavedRegs.set(Reg);
    }

    DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
    DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));

    assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
            (RegInfo->getEncodingValue(OddReg) + 1 ==
             RegInfo->getEncodingValue(EvenReg))) &&
           "Register pair of non-adjacent registers!");
    if (AArch64::GPR64RegClass.contains(OddReg)) {
      NumGPRSpilled += 2;
      // If it's not a reserved register, we can use it in lieu of an
      // emergency spill slot for the register scavenger.
      // FIXME: It would be better to instead keep looking and choose another
      // unspilled register that isn't reserved, if there is one.
      if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
        ExtraCSSpill = true;
    } else
      NumFPRSpilled += 2;

    CanEliminateFrame = false;
  }

  // FIXME: Set BigStack if any stack slot references may be out of range.
  // For now, just conservatively guestimate based on unscaled indexing
  // range. We'll end up allocating an unnecessary spill slot a lot, but
  // realistically that's not a big deal at this stage of the game.
  // The CSR spill slots have not been allocated yet, so estimateStackSize
  // won't include them.
  MachineFrameInfo *MFI = MF.getFrameInfo();
  unsigned CFSize =
      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
  DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
  bool BigStack = (CFSize >= 256);
  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    AFI->setHasStackFrame(true);

  // Estimate if we might need to scavenge a register at some point in order
  // to materialize a stack offset. If so, either spill one additional
  // callee-saved register or reserve a special spill slot to facilitate
  // register scavenging. If we already spilled an extra callee-saved register
  // above to keep the number of spills even, we don't need to do anything else
  // here.
  if (BigStack && !ExtraCSSpill) {

    // If we're adding a register to spill here, we have to add two of them
    // to keep the number of regs to spill even.
    assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
    unsigned Count = 0;
    while (!UnspilledCSGPRs.empty() && Count < 2) {
      unsigned Reg = UnspilledCSGPRs.back();
      UnspilledCSGPRs.pop_back();
      DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
                   << " to get a scratch register.\n");
      SavedRegs.set(Reg);
      ExtraCSSpill = true;
      ++Count;
    }

    // If we didn't find an extra callee-saved register to spill, create
    // an emergency spill slot.
    if (!ExtraCSSpill) {
      const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
      int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
      RS->addScavengingFrameIndex(FI);
      DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
                   << " as the emergency spill slot.\n");
    }
  }
}
示例#17
0
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
  const Function* F = Fn.getFunction();
  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
  FrameConstantRegMap.clear();

  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
  // function's frame information. Also eliminates call frame pseudo
  // instructions.
  calculateCallsInformation(Fn);

  // Allow the target machine to make some adjustments to the function
  // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
  TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);

  // Scan the function for modified callee saved registers and insert spill code
  // for any callee saved registers that are modified.
  calculateCalleeSavedRegisters(Fn);

  // Determine placement of CSR spill/restore code:
  //  - with shrink wrapping, place spills and restores to tightly
  //    enclose regions in the Machine CFG of the function where
  //    they are used. Without shrink wrapping
  //  - default (no shrink wrapping), place all spills in the
  //    entry block, all restores in return blocks.
  placeCSRSpillsAndRestores(Fn);

  // Add the code to save and restore the callee saved registers
  if (!F->hasFnAttr(Attribute::Naked))
    insertCSRSpillsAndRestores(Fn);

  // Allow the target machine to make final modifications to the function
  // before the frame layout is finalized.
  TRI->processFunctionBeforeFrameFinalized(Fn);

  // Calculate actual frame offsets for all abstract stack objects...
  calculateFrameObjectOffsets(Fn);

  // Add prolog and epilog code to the function.  This function is required
  // to align the stack frame as necessary for any stack variables or
  // called functions.  Because of this, calculateCalleeSavedRegisters()
  // must be called before this function in order to set the AdjustsStack
  // and MaxCallFrameSize variables.
  if (!F->hasFnAttr(Attribute::Naked))
    insertPrologEpilogCode(Fn);

  // Replace all MO_FrameIndex operands with physical register references
  // and actual offsets.
  //
  replaceFrameIndices(Fn);

  // If register scavenging is needed, as we've enabled doing it as a
  // post-pass, scavenge the virtual registers that frame index elimiation
  // inserted.
  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
    scavengeFrameVirtualRegs(Fn);

  delete RS;
  clearAllSets();
  return true;
}
示例#18
0
DenseMap<const MachineBasicBlock *, int>
llvm::getFuncletMembership(const MachineFunction &MF) {
    DenseMap<const MachineBasicBlock *, int> FuncletMembership;

    // We don't have anything to do if there aren't any EH pads.
    if (!MF.getMMI().hasEHFunclets())
        return FuncletMembership;

    int EntryBBNumber = MF.front().getNumber();
    bool IsSEH = isAsynchronousEHPersonality(
                     classifyEHPersonality(MF.getFunction()->getPersonalityFn()));

    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
    SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
    SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
    SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
    for (const MachineBasicBlock &MBB : MF) {
        if (MBB.isEHFuncletEntry()) {
            FuncletBlocks.push_back(&MBB);
        } else if (IsSEH && MBB.isEHPad()) {
            SEHCatchPads.push_back(&MBB);
        } else if (MBB.pred_empty()) {
            UnreachableBlocks.push_back(&MBB);
        }

        MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
        // CatchPads are not funclets for SEH so do not consider CatchRet to
        // transfer control to another funclet.
        if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
            continue;

        // FIXME: SEH CatchPads are not necessarily in the parent function:
        // they could be inside a finally block.
        const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
        const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
        CatchRetSuccessors.push_back(
        {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
    }

    // We don't have anything to do if there aren't any EH pads.
    if (FuncletBlocks.empty())
        return FuncletMembership;

    // Identify all the basic blocks reachable from the function entry.
    collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
    // All blocks not part of a funclet are in the parent function.
    for (const MachineBasicBlock *MBB : UnreachableBlocks)
        collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
    // Next, identify all the blocks inside the funclets.
    for (const MachineBasicBlock *MBB : FuncletBlocks)
        collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
    // SEH CatchPads aren't really funclets, handle them separately.
    for (const MachineBasicBlock *MBB : SEHCatchPads)
        collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
    // Finally, identify all the targets of a catchret.
    for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
            CatchRetSuccessors)
        collectFuncletMembers(FuncletMembership, CatchRetPair.second,
                              CatchRetPair.first);
    return FuncletMembership;
}
示例#19
0
bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
  DEBUG(dbgs() << "********** Stack Coloring **********\n"
               << "********** Function: "
               << ((const Value*)Func.getFunction())->getName() << '\n');
  MF = &Func;
  MFI = MF->getFrameInfo();
  Indexes = &getAnalysis<SlotIndexes>();
  BlockLiveness.clear();
  BasicBlocks.clear();
  BasicBlockNumbering.clear();
  Markers.clear();
  Intervals.clear();
  VNInfoAllocator.Reset();

  unsigned NumSlots = MFI->getObjectIndexEnd();

  // If there are no stack slots then there are no markers to remove.
  if (!NumSlots)
    return false;

  SmallVector<int, 8> SortedSlots;

  SortedSlots.reserve(NumSlots);
  Intervals.reserve(NumSlots);

  unsigned NumMarkers = collectMarkers(NumSlots);

  unsigned TotalSize = 0;
  DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
  DEBUG(dbgs()<<"Slot structure:\n");

  for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
    DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
    TotalSize += MFI->getObjectSize(i);
  }

  DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");

  // Don't continue because there are not enough lifetime markers, or the
  // stack is too small, or we are told not to optimize the slots.
  if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
    DEBUG(dbgs()<<"Will not try to merge slots.\n");
    return removeAllMarkers();
  }

  for (unsigned i=0; i < NumSlots; ++i) {
    LiveInterval *LI = new LiveInterval(i, 0);
    Intervals.push_back(LI);
    LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
    SortedSlots.push_back(i);
  }

  // Calculate the liveness of each block.
  calculateLocalLiveness();

  // Propagate the liveness information.
  calculateLiveIntervals(NumSlots);

  // Search for allocas which are used outside of the declared lifetime
  // markers.
  if (ProtectFromEscapedAllocas)
    removeInvalidSlotRanges();

  // Maps old slots to new slots.
  DenseMap<int, int> SlotRemap;
  unsigned RemovedSlots = 0;
  unsigned ReducedSize = 0;

  // Do not bother looking at empty intervals.
  for (unsigned I = 0; I < NumSlots; ++I) {
    if (Intervals[SortedSlots[I]]->empty())
      SortedSlots[I] = -1;
  }

  // This is a simple greedy algorithm for merging allocas. First, sort the
  // slots, placing the largest slots first. Next, perform an n^2 scan and look
  // for disjoint slots. When you find disjoint slots, merge the samller one
  // into the bigger one and update the live interval. Remove the small alloca
  // and continue.

  // Sort the slots according to their size. Place unused slots at the end.
  std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));

  bool Chanded = true;
  while (Chanded) {
    Chanded = false;
    for (unsigned I = 0; I < NumSlots; ++I) {
      if (SortedSlots[I] == -1)
        continue;

      for (unsigned J=I+1; J < NumSlots; ++J) {
        if (SortedSlots[J] == -1)
          continue;

        int FirstSlot = SortedSlots[I];
        int SecondSlot = SortedSlots[J];
        LiveInterval *First = Intervals[FirstSlot];
        LiveInterval *Second = Intervals[SecondSlot];
        assert (!First->empty() && !Second->empty() && "Found an empty range");

        // Merge disjoint slots.
        if (!First->overlaps(*Second)) {
          Chanded = true;
          First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
          SlotRemap[SecondSlot] = FirstSlot;
          SortedSlots[J] = -1;
          DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
                SecondSlot<<" together.\n");
          unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
                                           MFI->getObjectAlignment(SecondSlot));

          assert(MFI->getObjectSize(FirstSlot) >=
                 MFI->getObjectSize(SecondSlot) &&
                 "Merging a small object into a larger one");

          RemovedSlots+=1;
          ReducedSize += MFI->getObjectSize(SecondSlot);
          MFI->setObjectAlignment(FirstSlot, MaxAlignment);
          MFI->RemoveStackObject(SecondSlot);
        }
      }
    }
  }// While changed.

  // Record statistics.
  StackSpaceSaved += ReducedSize;
  StackSlotMerged += RemovedSlots;
  DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
        ReducedSize<<" bytes\n");

  // Scan the entire function and update all machine operands that use frame
  // indices to use the remapped frame index.
  expungeSlotMap(SlotRemap, NumSlots);
  remapInstructions(SlotRemap);

  // Release the intervals.
  for (unsigned I = 0; I < NumSlots; ++I) {
    delete Intervals[I];
  }

  return removeAllMarkers();
}
示例#20
0
/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
bool
TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
                                       bool IsSimple,
                                       MachineBasicBlock &TailBB) {
  // Only duplicate blocks that end with unconditional branches.
  if (TailBB.canFallThrough())
    return false;

  // Don't try to tail-duplicate single-block loops.
  if (TailBB.isSuccessor(&TailBB))
    return false;

  // Set the limit on the cost to duplicate. When optimizing for size,
  // duplicate only one, because one branch instruction can be eliminated to
  // compensate for the duplication.
  unsigned MaxDuplicateCount;
  if (TailDuplicateSize.getNumOccurrences() == 0 &&
      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
    MaxDuplicateCount = 1;
  else
    MaxDuplicateCount = TailDuplicateSize;

  // If the target has hardware branch prediction that can handle indirect
  // branches, duplicating them can often make them predictable when there
  // are common paths through the code.  The limit needs to be high enough
  // to allow undoing the effects of tail merging and other optimizations
  // that rearrange the predecessors of the indirect branch.

  bool HasIndirectbr = false;
  if (!TailBB.empty())
    HasIndirectbr = TailBB.back().isIndirectBranch();

  if (HasIndirectbr && PreRegAlloc)
    MaxDuplicateCount = 20;

  // Check the instructions in the block to determine whether tail-duplication
  // is invalid or unlikely to be profitable.
  unsigned InstrCount = 0;
  for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
    // Non-duplicable things shouldn't be tail-duplicated.
    if (I->isNotDuplicable())
      return false;

    // Do not duplicate 'return' instructions if this is a pre-regalloc run.
    // A return may expand into a lot more instructions (e.g. reload of callee
    // saved registers) after PEI.
    if (PreRegAlloc && I->isReturn())
      return false;

    // Avoid duplicating calls before register allocation. Calls presents a
    // barrier to register allocation so duplicating them may end up increasing
    // spills.
    if (PreRegAlloc && I->isCall())
      return false;

    if (!I->isPHI() && !I->isDebugValue())
      InstrCount += 1;

    if (InstrCount > MaxDuplicateCount)
      return false;
  }

  if (HasIndirectbr && PreRegAlloc)
    return true;

  if (IsSimple)
    return true;

  if (!PreRegAlloc)
    return true;

  return canCompletelyDuplicateBB(TailBB);
}
示例#21
0
void AVRFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.begin();
  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
  DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
  const AVRInstrInfo &TII = *STI.getInstrInfo();
  bool HasFP = hasFP(MF);

  // Interrupt handlers re-enable interrupts in function entry.
  if (CallConv == CallingConv::AVR_INTR) {
    BuildMI(MBB, MBBI, DL, TII.get(AVR::BSETs))
        .addImm(0x07)
        .setMIFlag(MachineInstr::FrameSetup);
  }

  // Save the frame pointer if we have one.
  if (HasFP) {
    BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
        .addReg(AVR::R29R28, RegState::Kill)
        .setMIFlag(MachineInstr::FrameSetup);
  }

  // Emit special prologue code to save R1, R0 and SREG in interrupt/signal
  // handlers before saving any other registers.
  if (CallConv == CallingConv::AVR_INTR ||
      CallConv == CallingConv::AVR_SIGNAL) {
    BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
        .addReg(AVR::R1R0, RegState::Kill)
        .setMIFlag(MachineInstr::FrameSetup);

    BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0)
        .addImm(0x3f)
        .setMIFlag(MachineInstr::FrameSetup);
    BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
        .addReg(AVR::R0, RegState::Kill)
        .setMIFlag(MachineInstr::FrameSetup);
    BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr))
        .addReg(AVR::R0, RegState::Define)
        .addReg(AVR::R0, RegState::Kill)
        .addReg(AVR::R0, RegState::Kill)
        .setMIFlag(MachineInstr::FrameSetup);
  }

  // Early exit if the frame pointer is not needed in this function.
  if (!HasFP) {
    return;
  }

  const MachineFrameInfo &MFI = MF.getFrameInfo();
  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
  unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize();

  // Skip the callee-saved push instructions.
  while (
      (MBBI != MBB.end()) && MBBI->getFlag(MachineInstr::FrameSetup) &&
      (MBBI->getOpcode() == AVR::PUSHRr || MBBI->getOpcode() == AVR::PUSHWRr)) {
    ++MBBI;
  }

  // Update Y with the new base value.
  BuildMI(MBB, MBBI, DL, TII.get(AVR::SPREAD), AVR::R29R28)
      .addReg(AVR::SP)
      .setMIFlag(MachineInstr::FrameSetup);

  // Mark the FramePtr as live-in in every block except the entry.
  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
       I != E; ++I) {
    I->addLiveIn(AVR::R29R28);
  }

  if (!FrameSize) {
    return;
  }

  // Reserve the necessary frame memory by doing FP -= <size>.
  unsigned Opcode = (isUInt<6>(FrameSize)) ? AVR::SBIWRdK : AVR::SUBIWRdK;

  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
                         .addReg(AVR::R29R28, RegState::Kill)
                         .addImm(FrameSize)
                         .setMIFlag(MachineInstr::FrameSetup);
  // The SREG implicit def is dead.
  MI->getOperand(3).setIsDead();

  // Write back R29R28 to SP and temporarily disable interrupts.
  BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP)
      .addReg(AVR::R29R28)
      .setMIFlag(MachineInstr::FrameSetup);
}
示例#22
0
void XTCFrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB   = MF.front();
  MachineFrameInfo *MFI    = MF.getFrameInfo();
  const XTCInstrInfo &TII =
    *static_cast<const XTCInstrInfo*>(MF.getTarget().getInstrInfo());
  XTCFunctionInfo *XTCFI = MF.getInfo<XTCFunctionInfo>();
  MachineBasicBlock::iterator MBBI = MBB.begin();
  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();

  bool requiresRA = false; //CallConv == CallingConv::MBLAZE_INTR;

  // Determine the correct frame layout
  determineFrameLayout(MF);

  // Get the number of bytes to allocate from the FrameInfo.
  unsigned StackSize = MFI->getStackSize();

  // No need to allocate space on the stack.
  if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;

  int FPOffset = XTCFI->getFPStackOffset();
  int RAOffset = XTCFI->getRAStackOffset();

  if (hasFP(MF)) {
      // Save frame pointer
      /*
      BuildMI(MBB, MBBI, DL, TII.get(XTC::STWPREI))
      .addReg(XTC::r15).addReg(XTC::r14).addImm(-4);
      */

      BuildMI(MBB, MBBI, DL, TII.get(XTC::STW))
          .addReg(XTC::r15).addReg(XTC::r14).addImm(-4);


      // Set it to SP

      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY))
      .addReg(XTC::r15).addReg(XTC::r14);
  }

  // Adjust stack : addi R1, R1, -imm
  BuildMI(MBB, MBBI, DL, TII.get(XTC::ADDI), XTC::r15).addReg(XTC::r15).addImm(-StackSize);
/*
  // swi  R15, R1, stack_loc
  if (MFI->adjustsStack() || requiresRA) {
    BuildMI(MBB, MBBI, DL, TII.get(XTC::SWI))
        .addReg(XTC::R15).addReg(XTC::R1).addImm(RAOffset);
  }

  if (hasFP(MF)) {
    // swi  R19, R1, stack_loc
    BuildMI(MBB, MBBI, DL, TII.get(XTC::SWI))
      .addReg(XTC::R19).addReg(XTC::R1).addImm(FPOffset);

    // add R19, R1, R0
    BuildMI(MBB, MBBI, DL, TII.get(XTC::ADD), XTC::R19)
      .addReg(XTC::R1).addReg(XTC::R0);
  }
  */
}
示例#23
0
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(*MF.getFunction()))
    return false;

  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  std::vector<unsigned> I1Defs;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
        // If this has a literal constant source that is the same as the
        // reversed bits of an inline immediate, replace with a bitreverse of
        // that constant. This saves 4 bytes in the common case of materializing
        // sign bits.

        // Test if we are after regalloc. We only want to do this after any
        // optimizations happen because this will confuse them.
        // XXX - not exactly a check for post-regalloc run.
        MachineOperand &Src = MI.getOperand(1);
        if (Src.isImm() &&
            TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
          int32_t ReverseImm;
          if (isReverseInlineImm(TII, Src, ReverseImm)) {
            MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
            Src.setImm(ReverseImm);
            continue;
          }
        }
      }

      // Combine adjacent s_nops to use the immediate operand encoding how long
      // to wait.
      //
      // s_nop N
      // s_nop M
      //  =>
      // s_nop (N + M)
      if (MI.getOpcode() == AMDGPU::S_NOP &&
          Next != MBB.end() &&
          (*Next).getOpcode() == AMDGPU::S_NOP) {

        MachineInstr &NextMI = *Next;
        // The instruction encodes the amount to wait with an offset of 1,
        // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
        // after adding.
        uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
        uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;

        // Make sure we don't overflow the bounds.
        if (Nop0 + Nop1 <= 8) {
          NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
          MI.eraseFromParent();
        }

        continue;
      }

      // FIXME: We also need to consider movs of constant operands since
      // immediate operands are not folded if they have more than one use, and
      // the operand folding pass is unaware if the immediate will be free since
      // it won't know if the src == dest constraint will end up being
      // satisfied.
      if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
          MI.getOpcode() == AMDGPU::S_MUL_I32) {
        const MachineOperand *Dest = &MI.getOperand(0);
        MachineOperand *Src0 = &MI.getOperand(1);
        MachineOperand *Src1 = &MI.getOperand(2);

        if (!Src0->isReg() && Src1->isReg()) {
          if (TII->commuteInstruction(MI, false, 1, 2))
            std::swap(Src0, Src1);
        }

        // FIXME: This could work better if hints worked with subregisters. If
        // we have a vector add of a constant, we usually don't get the correct
        // allocation due to the subregister usage.
        if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
            Src0->isReg()) {
          MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
          MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
          continue;
        }

        if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
          if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
            unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
              AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;

            MI.setDesc(TII->get(Opc));
            MI.tieOperands(0, 1);
          }
        }
      }

      // Try to use s_cmpk_*
      if (MI.isCompare() && TII->isSOPC(MI)) {
        shrinkScalarCompare(TII, MI);
        continue;
      }

      // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
      if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
        const MachineOperand &Dst = MI.getOperand(0);
        MachineOperand &Src = MI.getOperand(1);

        if (Src.isImm() &&
            TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) {
          int32_t ReverseImm;
          if (isKImmOperand(TII, Src))
            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
          else if (isReverseInlineImm(TII, Src, ReverseImm)) {
            MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
            Src.setImm(ReverseImm);
          }
        }

        continue;
      }

      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

      if (!canShrink(MI, TII, TRI, MRI)) {
        // Try commuting the instruction and see if that enables us to shrink
        // it.
        if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
            !canShrink(MI, TII, TRI, MRI))
          continue;
      }

      // getVOPe32 could be -1 here if we started with an instruction that had
      // a 32-bit encoding and then commuted it to an instruction that did not.
      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
        continue;

      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());

      if (TII->isVOPC(Op32)) {
        unsigned DstReg = MI.getOperand(0).getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
          // VOPC instructions can only write to the VCC register. We can't
          // force them to use VCC here, because this is only one register and
          // cannot deal with sequences which would require multiple copies of
          // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
          //
          // So, instead of forcing the instruction to write to VCC, we provide
          // a hint to the register allocator to use VCC and then we we will run
          // this pass again after RA and shrink it if it outputs to VCC.
          MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
          continue;
        }
        if (DstReg != AMDGPU::VCC)
          continue;
      }

      if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
        // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
        // instructions.
        const MachineOperand *Src2 =
            TII->getNamedOperand(MI, AMDGPU::OpName::src2);
        if (!Src2->isReg())
          continue;
        unsigned SReg = Src2->getReg();
        if (TargetRegisterInfo::isVirtualRegister(SReg)) {
          MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
          continue;
        }
        if (SReg != AMDGPU::VCC)
          continue;
      }

      // We can shrink this instruction
      DEBUG(dbgs() << "Shrinking " << MI);

      MachineInstrBuilder Inst32 =
          BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));

      // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
      // For VOPC instructions, this is replaced by an implicit def of vcc.
      int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
      if (Op32DstIdx != -1) {
        // dst
        Inst32.addOperand(MI.getOperand(0));
      } else {
        assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
               "Unexpected case");
      }


      Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));

      const MachineOperand *Src1 =
          TII->getNamedOperand(MI, AMDGPU::OpName::src1);
      if (Src1)
        Inst32.addOperand(*Src1);

      const MachineOperand *Src2 =
        TII->getNamedOperand(MI, AMDGPU::OpName::src2);
      if (Src2) {
        int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
        if (Op32Src2Idx != -1) {
          Inst32.addOperand(*Src2);
        } else {
          // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
          // replaced with an implicit read of vcc. This was already added
          // during the initial BuildMI, so find it to preserve the flags.
          copyFlagsToImplicitVCC(*Inst32, *Src2);
        }
      }

      ++NumInstructionsShrunk;

      // Copy extra operands not present in the instruction definition.
      copyExtraImplicitOps(*Inst32, MF, MI);

      MI.eraseFromParent();
      foldImmediates(*Inst32, TII, MRI);

      DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');


    }
  }
  return false;
}
示例#24
0
/// Insert prolog code into the function.
/// For ARC, this inserts a call to a function that puts required callee saved
/// registers onto the stack, when enough callee saved registers are required.
void ARCFrameLowering::emitPrologue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  LLVM_DEBUG(dbgs() << "Emit Prologue: " << MF.getName() << "\n");
  auto *AFI = MF.getInfo<ARCFunctionInfo>();
  MachineModuleInfo &MMI = MF.getMMI();
  MCContext &Context = MMI.getContext();
  const MCRegisterInfo *MRI = Context.getRegisterInfo();
  const ARCInstrInfo *TII = MF.getSubtarget<ARCSubtarget>().getInstrInfo();
  MachineBasicBlock::iterator MBBI = MBB.begin();
  // Debug location must be unknown since the first debug location is used
  // to determine the end of the prologue.
  DebugLoc dl;
  MachineFrameInfo &MFI = MF.getFrameInfo();
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  unsigned Last = determineLastCalleeSave(CSI);
  unsigned StackSlotsUsedByFunclet = 0;
  bool SavedBlink = false;
  unsigned AlreadyAdjusted = 0;
  if (MF.getFunction().isVarArg()) {
    // Add in the varargs area here first.
    LLVM_DEBUG(dbgs() << "Varargs\n");
    unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
    BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(VarArgsBytes);
  }
  if (hasFP(MF)) {
    LLVM_DEBUG(dbgs() << "Saving FP\n");
    BuildMI(MBB, MBBI, dl, TII->get(ARC::ST_AW_rs9))
        .addReg(ARC::SP, RegState::Define)
        .addReg(ARC::FP)
        .addReg(ARC::SP)
        .addImm(-4);
    AlreadyAdjusted += 4;
  }
  if (UseSaveRestoreFunclet && Last > ARC::R14) {
    LLVM_DEBUG(dbgs() << "Creating store funclet.\n");
    // BL to __save_r13_to_<TRI->getRegAsmName()>
    StackSlotsUsedByFunclet = Last - ARC::R12;
    BuildMI(MBB, MBBI, dl, TII->get(ARC::PUSH_S_BLINK));
    BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(4 * StackSlotsUsedByFunclet);
    BuildMI(MBB, MBBI, dl, TII->get(ARC::BL))
        .addExternalSymbol(store_funclet_name[Last - ARC::R15])
        .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill);
    AlreadyAdjusted += 4 * (StackSlotsUsedByFunclet + 1);
    SavedBlink = true;
  }
  // If we haven't saved BLINK, but we need to...do that now.
  if (MFI.hasCalls() && !SavedBlink) {
    LLVM_DEBUG(dbgs() << "Creating save blink.\n");
    BuildMI(MBB, MBBI, dl, TII->get(ARC::PUSH_S_BLINK));
    AlreadyAdjusted += 4;
  }
  if (AFI->MaxCallStackReq > 0)
    MFI.setStackSize(MFI.getStackSize() + AFI->MaxCallStackReq);
  // We have already saved some of the stack...
  LLVM_DEBUG(dbgs() << "Adjusting stack by: "
                    << (MFI.getStackSize() - AlreadyAdjusted) << "\n");
  generateStackAdjustment(MBB, MBBI, *ST.getInstrInfo(), dl,
                          -(MFI.getStackSize() - AlreadyAdjusted), ARC::SP);

  if (hasFP(MF)) {
    LLVM_DEBUG(dbgs() << "Setting FP from SP.\n");
    BuildMI(MBB, MBBI, dl,
            TII->get(isUInt<6>(MFI.getStackSize()) ? ARC::ADD_rru6
                                                   : ARC::ADD_rrlimm),
            ARC::FP)
        .addReg(ARC::SP)
        .addImm(MFI.getStackSize());
  }

  // Emit CFI records:
  // .cfi_def_cfa_offset StackSize
  // .cfi_offset fp, -StackSize
  // .cfi_offset blink, -StackSize+4
  unsigned CFIIndex = MF.addFrameInst(
      MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
  BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
      .addCFIIndex(CFIIndex)
      .setMIFlags(MachineInstr::FrameSetup);

  int CurOffset = -4;
  if (hasFP(MF)) {
    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
        nullptr, MRI->getDwarfRegNum(ARC::FP, true), CurOffset));
    BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
    CurOffset -= 4;
  }

  if (MFI.hasCalls()) {
    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
        nullptr, MRI->getDwarfRegNum(ARC::BLINK, true), CurOffset));
    BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
  }
  // CFI for the rest of the registers.
  for (const auto &Entry : CSI) {
    unsigned Reg = Entry.getReg();
    int FI = Entry.getFrameIdx();
    // Skip BLINK and FP.
    if ((hasFP(MF) && Reg == ARC::FP) || (MFI.hasCalls() && Reg == ARC::BLINK))
      continue;
    CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
        nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
    BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
  }
}
示例#25
0
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {

  DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
               << "********** Function: "
               << MF.getName() << "\n");

  if (skipFunction(*MF.getFunction()))
    return false;

  // If we move NewValueJump before register allocation we'll need live variable
  // analysis here too.

  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
  QRI = static_cast<const HexagonRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();

  if (DisableNewValueJumps) {
    return false;
  }

  int nvjCount = DbgNVJCount;
  int nvjGenerated = 0;

  // Loop through all the bb's of the function
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
    MachineBasicBlock *MBB = &*MBBb;

    DEBUG(dbgs() << "** dumping bb ** "
                 << MBB->getNumber() << "\n");
    DEBUG(MBB->dump());
    DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
    bool foundJump    = false;
    bool foundCompare = false;
    bool invertPredicate = false;
    unsigned predReg = 0; // predicate reg of the jump.
    unsigned cmpReg1 = 0;
    int cmpOp2 = 0;
    bool MO1IsKill = false;
    bool MO2IsKill = false;
    MachineBasicBlock::iterator jmpPos;
    MachineBasicBlock::iterator cmpPos;
    MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
    MachineBasicBlock *jmpTarget = nullptr;
    bool afterRA = false;
    bool isSecondOpReg = false;
    bool isSecondOpNewified = false;
    // Traverse the basic block - bottom up
    for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
             MII != E;) {
      MachineInstr &MI = *--MII;
      if (MI.isDebugValue()) {
        continue;
      }

      if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
        break;

      DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n");

      if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt ||
                         MI.getOpcode() == Hexagon::J2_jumpf ||
                         MI.getOpcode() == Hexagon::J2_jumptnewpt ||
                         MI.getOpcode() == Hexagon::J2_jumptnew ||
                         MI.getOpcode() == Hexagon::J2_jumpfnewpt ||
                         MI.getOpcode() == Hexagon::J2_jumpfnew)) {
        // This is where you would insert your compare and
        // instr that feeds compare
        jmpPos = MII;
        jmpInstr = &MI;
        predReg = MI.getOperand(0).getReg();
        afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);

        // If ifconverter had not messed up with the kill flags of the
        // operands, the following check on the kill flag would suffice.
        // if(!jmpInstr->getOperand(0).isKill()) break;

        // This predicate register is live out out of BB
        // this would only work if we can actually use Live
        // variable analysis on phy regs - but LLVM does not
        // provide LV analysis on phys regs.
        //if(LVs.isLiveOut(predReg, *MBB)) break;

        // Get all the successors of this block - which will always
        // be 2. Check if the predicate register is live in in those
        // successor. If yes, we can not delete the predicate -
        // I am doing this only because LLVM does not provide LiveOut
        // at the BB level.
        bool predLive = false;
        for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
                            SIE = MBB->succ_end(); SI != SIE; ++SI) {
          MachineBasicBlock* succMBB = *SI;
         if (succMBB->isLiveIn(predReg)) {
            predLive = true;
          }
        }
        if (predLive)
          break;

        if (!MI.getOperand(1).isMBB())
          continue;
        jmpTarget = MI.getOperand(1).getMBB();
        foundJump = true;
        if (MI.getOpcode() == Hexagon::J2_jumpf ||
            MI.getOpcode() == Hexagon::J2_jumpfnewpt ||
            MI.getOpcode() == Hexagon::J2_jumpfnew) {
          invertPredicate = true;
        }
        continue;
      }

      // No new value jump if there is a barrier. A barrier has to be in its
      // own packet. A barrier has zero operands. We conservatively bail out
      // here if we see any instruction with zero operands.
      if (foundJump && MI.getNumOperands() == 0)
        break;

      if (foundJump && !foundCompare && MI.getOperand(0).isReg() &&
          MI.getOperand(0).getReg() == predReg) {

        // Not all compares can be new value compare. Arch Spec: 7.6.1.1
        if (isNewValueJumpCandidate(MI)) {

          assert(
              (MI.getDesc().isCompare()) &&
              "Only compare instruction can be collapsed into New Value Jump");
          isSecondOpReg = MI.getOperand(2).isReg();

          if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
                                        afterRA, jmpPos, MF))
            break;

          cmpInstr = &MI;
          cmpPos = MII;
          foundCompare = true;

          // We need cmpReg1 and cmpOp2(imm or reg) while building
          // new value jump instruction.
          cmpReg1 = MI.getOperand(1).getReg();
          if (MI.getOperand(1).isKill())
            MO1IsKill = true;

          if (isSecondOpReg) {
            cmpOp2 = MI.getOperand(2).getReg();
            if (MI.getOperand(2).isKill())
              MO2IsKill = true;
          } else
            cmpOp2 = MI.getOperand(2).getImm();
          continue;
        }
      }

      if (foundCompare && foundJump) {

        // If "common" checks fail, bail out on this BB.
        if (!commonChecksToProhibitNewValueJump(afterRA, MII))
          break;

        bool foundFeeder = false;
        MachineBasicBlock::iterator feederPos = MII;
        if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef() &&
            (MI.getOperand(0).getReg() == cmpReg1 ||
             (isSecondOpReg &&
              MI.getOperand(0).getReg() == (unsigned)cmpOp2))) {

          unsigned feederReg = MI.getOperand(0).getReg();

          // First try to see if we can get the feeder from the first operand
          // of the compare. If we can not, and if secondOpReg is true
          // (second operand of the compare is also register), try that one.
          // TODO: Try to come up with some heuristic to figure out which
          // feeder would benefit.

          if (feederReg == cmpReg1) {
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
              if (!isSecondOpReg)
                break;
              else
                continue;
            } else
              foundFeeder = true;
          }

          if (!foundFeeder &&
               isSecondOpReg &&
               feederReg == (unsigned) cmpOp2)
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
              break;

          if (isSecondOpReg) {
            // In case of CMPLT, or CMPLTU, or EQ with the second register
            // to newify, swap the operands.
            if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
                                     feederReg == (unsigned) cmpOp2) {
              unsigned tmp = cmpReg1;
              bool tmpIsKill = MO1IsKill;
              cmpReg1 = cmpOp2;
              MO1IsKill = MO2IsKill;
              cmpOp2 = tmp;
              MO2IsKill = tmpIsKill;
            }

            // Now we have swapped the operands, all we need to check is,
            // if the second operand (after swap) is the feeder.
            // And if it is, make a note.
            if (feederReg == (unsigned)cmpOp2)
              isSecondOpNewified = true;
          }

          // Now that we are moving feeder close the jump,
          // make sure we are respecting the kill values of
          // the operands of the feeder.

          bool updatedIsKill = false;
          for (unsigned i = 0; i < MI.getNumOperands(); i++) {
            MachineOperand &MO = MI.getOperand(i);
            if (MO.isReg() && MO.isUse()) {
              unsigned feederReg = MO.getReg();
              for (MachineBasicBlock::iterator localII = feederPos,
                   end = jmpPos; localII != end; localII++) {
                MachineInstr &localMI = *localII;
                for (unsigned j = 0; j < localMI.getNumOperands(); j++) {
                  MachineOperand &localMO = localMI.getOperand(j);
                  if (localMO.isReg() && localMO.isUse() &&
                      localMO.isKill() && feederReg == localMO.getReg()) {
                    // We found that there is kill of a use register
                    // Set up a kill flag on the register
                    localMO.setIsKill(false);
                    MO.setIsKill();
                    updatedIsKill = true;
                    break;
                  }
                }
                if (updatedIsKill) break;
              }
            }
            if (updatedIsKill) break;
          }

          MBB->splice(jmpPos, MI.getParent(), MI);
          MBB->splice(jmpPos, MI.getParent(), cmpInstr);
          DebugLoc dl = MI.getDebugLoc();
          MachineInstr *NewMI;

          assert((isNewValueJumpCandidate(*cmpInstr)) &&
                 "This compare is not a New Value Jump candidate.");
          unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
                                               isSecondOpNewified,
                                               jmpTarget, MBPI);
          if (invertPredicate)
            opc = QII->getInvertedPredicatedOpcode(opc);

          if (isSecondOpReg)
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                    .addMBB(jmpTarget);

          else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
                    cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
                    cmpOp2 == -1 )
            // Corresponding new-value compare jump instructions don't have the
            // operand for -1 immediate value.
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addMBB(jmpTarget);

          else
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addImm(cmpOp2)
                                    .addMBB(jmpTarget);

          assert(NewMI && "New Value Jump Instruction Not created!");
          (void)NewMI;
          if (cmpInstr->getOperand(0).isReg() &&
              cmpInstr->getOperand(0).isKill())
            cmpInstr->getOperand(0).setIsKill(false);
          if (cmpInstr->getOperand(1).isReg() &&
              cmpInstr->getOperand(1).isKill())
            cmpInstr->getOperand(1).setIsKill(false);
          cmpInstr->eraseFromParent();
          jmpInstr->eraseFromParent();
          ++nvjGenerated;
          ++NumNVJGenerated;
          break;
        }
      }
    }
  }

  return true;

}
示例#26
0
/// Insert epilog code into the function.
/// For ARC, this inserts a call to a function that restores callee saved
/// registers onto the stack, when enough callee saved registers are required.
void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  LLVM_DEBUG(dbgs() << "Emit Epilogue: " << MF.getName() << "\n");
  auto *AFI = MF.getInfo<ARCFunctionInfo>();
  const ARCInstrInfo *TII = MF.getSubtarget<ARCSubtarget>().getInstrInfo();
  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  uint64_t StackSize = MF.getFrameInfo().getStackSize();
  bool SavedBlink = false;
  unsigned AmountAboveFunclet = 0;
  // If we have variable sized frame objects, then we have to move
  // the stack pointer to a known spot (fp - StackSize).
  // Then, replace the frame pointer by (new) [sp,StackSize-4].
  // Then, move the stack pointer the rest of the way (sp = sp + StackSize).
  if (hasFP(MF)) {
    BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARC::SUB_rru6), ARC::SP)
        .addReg(ARC::FP)
        .addImm(StackSize);
    AmountAboveFunclet += 4;
  }

  // Now, move the stack pointer to the bottom of the save area for the funclet.
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  unsigned Last = determineLastCalleeSave(CSI);
  unsigned StackSlotsUsedByFunclet = 0;
  // Now, restore the callee save registers.
  if (UseSaveRestoreFunclet && Last > ARC::R14) {
    // BL to __ld_r13_to_<TRI->getRegAsmName()>
    StackSlotsUsedByFunclet = Last - ARC::R12;
    AmountAboveFunclet += 4 * (StackSlotsUsedByFunclet + 1);
    SavedBlink = true;
  }

  if (MFI.hasCalls() && !SavedBlink) {
    AmountAboveFunclet += 4;
    SavedBlink = true;
  }

  // Move the stack pointer up to the point of the funclet.
  if (StackSize - AmountAboveFunclet) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(StackSize - AmountAboveFunclet);
  }

  if (StackSlotsUsedByFunclet) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::BL))
        .addExternalSymbol(load_funclet_name[Last - ARC::R15])
        .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill);
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(4 * (StackSlotsUsedByFunclet));
  }
  // Now, pop blink if necessary.
  if (SavedBlink) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::POP_S_BLINK));
  }
  // Now, pop fp if necessary.
  if (hasFP(MF)) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::LD_AB_rs9))
        .addReg(ARC::SP, RegState::Define)
        .addReg(ARC::FP, RegState::Define)
        .addReg(ARC::SP)
        .addImm(4);
  }

  // Relieve the varargs area if necessary.
  if (MF.getFunction().isVarArg()) {
    // Add in the varargs area here first.
    LLVM_DEBUG(dbgs() << "Varargs\n");
    unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(VarArgsBytes);
  }
}
static void interruptFrameLayout(MachineFunction &MF) {
  const Function *F = MF.getFunction();
  llvm::CallingConv::ID CallConv = F->getCallingConv();

  // If this function is not using either the interrupt_handler
  // calling convention or the save_volatiles calling convention
  // then we don't need to do any additional frame layout.
  if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
      CallConv != llvm::CallingConv::MBLAZE_SVOL)
      return;

  MachineFrameInfo *MFI = MF.getFrameInfo();
  const MachineRegisterInfo &MRI = MF.getRegInfo();
  const MBlazeInstrInfo &TII =
    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());

  // Determine if the calling convention is the interrupt_handler
  // calling convention. Some pieces of the prologue and epilogue
  // only need to be emitted if we are lowering and interrupt handler.
  bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;

  // Determine where to put prologue and epilogue additions
  MachineBasicBlock &MENT   = MF.front();
  MachineBasicBlock &MEXT   = MF.back();

  MachineBasicBlock::iterator MENTI = MENT.begin();
  MachineBasicBlock::iterator MEXTI = prior(MEXT.end());

  DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
  DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();

  // Store the frame indexes generated during prologue additions for use
  // when we are generating the epilogue additions.
  SmallVector<int, 10> VFI;

  // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
  // always have a SWI because it is used when processing RMSR.
  for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
    if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
    
    int FI = MFI->CreateStackObject(4,4,false,false);
    VFI.push_back(FI);

    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
      .addFrameIndex(FI).addImm(0);
  }
    
  // Build the prologue SWI for R17, R18
  int R17FI = MFI->CreateStackObject(4,4,false,false);
  int R18FI = MFI->CreateStackObject(4,4,false,false);

  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
    .addFrameIndex(R17FI).addImm(0);
    
  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
    .addFrameIndex(R18FI).addImm(0);

  // Buid the prologue SWI and the epilogue LWI for RMSR if needed
  if (isIntr) {
    int MSRFI = MFI->CreateStackObject(4,4,false,false);
    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
      .addReg(MBlaze::RMSR);
    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
      .addFrameIndex(MSRFI).addImm(0);

    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
      .addFrameIndex(MSRFI).addImm(0);
    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
      .addReg(MBlaze::R11);
  }

  // Build the epilogue LWI for R17, R18
  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
    .addFrameIndex(R18FI).addImm(0);

  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
    .addFrameIndex(R17FI).addImm(0);

  // Build the epilogue LWI for R3 - R12 if needed
  for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
    if (!MRI.isPhysRegUsed(r)) continue;
    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
      .addFrameIndex(VFI[--i]).addImm(0);
  }
}
示例#28
0
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
  if (skipFunction(*mf.getFunction()))
    return false;
  MF = &mf;
  TII = MF->getSubtarget().getInstrInfo();
  TRI = MF->getSubtarget().getRegisterInfo();
  RegClassInfo.runOnMachineFunction(mf);
  LiveRegs = nullptr;
  assert(NumRegs == RC->getNumRegs() && "Bad regclass");

  DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
               << TRI->getRegClassName(RC) << " **********\n");

  // If no relevant registers are used in the function, we can skip it
  // completely.
  bool anyregs = false;
  const MachineRegisterInfo &MRI = mf.getRegInfo();
  for (unsigned Reg : *RC) {
    if (MRI.isPhysRegUsed(Reg)) {
      anyregs = true;
      break;
    }
  }
  if (!anyregs) return false;

  // Initialize the AliasMap on the first use.
  if (AliasMap.empty()) {
    // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
    // therefore the LiveRegs array.
    AliasMap.resize(TRI->getNumRegs());
    for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
      for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
           AI.isValid(); ++AI)
        AliasMap[*AI].push_back(i);
  }

  // Initialize the MMBInfos
  for (auto &MBB : mf) {
    MBBInfo InitialInfo;
    MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
  }

  /*
   *  We want to visit every instruction in every basic block in order to update
   *  it's execution domain or break any false dependencies. However, for the
   *  dependency breaking, we need to know clearances from all predecessors
   *  (including any backedges). One way to do so would be to do two complete
   *  passes over all basic blocks/instructions, the first for recording
   *  clearances, the second to break the dependencies. However, for functions
   *  without backedges, or functions with a lot of straight-line code, and
   *  a small loop, that would be a lot of unnecessary work (since only the
   *  BBs that are part of the loop require two passes). As an example,
   *  consider the following loop.
   *
   *
   *     PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
   *           ^                                  |
   *           +----------------------------------+
   *
   *  The iteration order is as follows:
   *  Naive: PH A B C D A' B' C' D'
   *  Optimized: PH A B C A' B' C' D
   *
   *  Note that we avoid processing D twice, because we can entirely process
   *  the predecessors before getting to D. We call a block that is ready
   *  for its second round of processing `done` (isBlockDone). Once we finish
   *  processing some block, we update the counters in MBBInfos and re-process
   *  any successors that are now done.
   */

  MachineBasicBlock *Entry = &*MF->begin();
  ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
    MachineBasicBlock *MBB = *MBBI;
    // N.B: IncomingProcessed and IncomingCompleted were already updated while
    // processing this block's predecessors.
    MBBInfos[MBB].PrimaryCompleted = true;
    MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
    processBasicBlock(MBB, true);
    updateSuccessors(MBB, true);
  }

  // We need to go through again and finalize any blocks that are not done yet.
  // This is possible if blocks have dead predecessors, so we didn't visit them
  // above.
  for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
           MBBI = RPOT.begin(),
           MBBE = RPOT.end();
       MBBI != MBBE; ++MBBI) {
    MachineBasicBlock *MBB = *MBBI;
    if (!isBlockDone(MBB)) {
      processBasicBlock(MBB, false);
      // Don't update successors here. We'll get to them anyway through this
      // loop.
    }
  }

  // Clear the LiveOuts vectors and collapse any remaining DomainValues.
  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
    auto FI = MBBInfos.find(*MBBI);
    if (FI == MBBInfos.end() || !FI->second.OutRegs)
      continue;
    for (unsigned i = 0, e = NumRegs; i != e; ++i)
      if (FI->second.OutRegs[i].Value)
        release(FI->second.OutRegs[i].Value);
    delete[] FI->second.OutRegs;
  }
  MBBInfos.clear();
  UndefReads.clear();
  Avail.clear();
  Allocator.DestroyAll();

  return false;
}
示例#29
0
void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB = MF.front();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const HexagonRegisterInfo *QRI =
    static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
  determineFrameLayout(MF);

  // Check if frame moves are needed for EH.
  bool needsFrameMoves = MMI.hasDebugInfo() ||
    !MF.getFunction()->needsUnwindTableEntry();

  // Get the number of bytes to allocate from the FrameInfo.
  int NumBytes = (int) MFI->getStackSize();

  // LLVM expects allocframe not to be the first instruction in the
  // basic block.
  MachineBasicBlock::iterator InsertPt = MBB.begin();

  //
  // ALLOCA adjust regs.  Iterate over ADJDYNALLOC nodes and change the offset.
  //
  HexagonMachineFunctionInfo *FuncInfo =
    MF.getInfo<HexagonMachineFunctionInfo>();
  const std::vector<MachineInstr*>& AdjustRegs =
    FuncInfo->getAllocaAdjustInsts();
  for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
         e = AdjustRegs.end();
       i != e; ++i) {
    MachineInstr* MI = *i;
    assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
           "Expected adjust alloca node");

    MachineOperand& MO = MI->getOperand(2);
    assert(MO.isImm() && "Expected immediate");
    MO.setImm(MFI->getMaxCallFrameSize());
  }

 std::vector<MachineMove> &Moves = MMI.getFrameMoves();

 if (needsFrameMoves) {
   // Advance CFA. DW_CFA_def_cfa
   unsigned FPReg = QRI->getFrameRegister();
   unsigned RAReg = QRI->getRARegister();

   MachineLocation Dst(MachineLocation::VirtualFP);
   MachineLocation Src(FPReg, -8);
   Moves.push_back(MachineMove(0, Dst, Src));

   // R31 = (R31 - #4)
   MachineLocation LRDst(RAReg, -4);
   MachineLocation LRSrc(RAReg);
   Moves.push_back(MachineMove(0, LRDst, LRSrc));

   // R30 = (R30 - #8)
   MachineLocation SPDst(FPReg, -8);
   MachineLocation SPSrc(FPReg);
   Moves.push_back(MachineMove(0, SPDst, SPSrc));
 }

  //
  // Only insert ALLOCFRAME if we need to.
  //
  if (hasFP(MF)) {
    // Check for overflow.
    // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
    const int ALLOCFRAME_MAX = 16384;
    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();

    if (NumBytes >= ALLOCFRAME_MAX) {
      // Emit allocframe(#0).
      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);

      // Subtract offset from frame pointer.
      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
                                      HEXAGON_RESERVED_REG_1).addImm(NumBytes);
      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
                                      QRI->getStackRegister()).
                                      addReg(QRI->getStackRegister()).
                                      addReg(HEXAGON_RESERVED_REG_1);
    } else {
      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
    }
  }
}
示例#30
0
void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
  MachineBasicBlock::iterator MBBI = MBB.begin();
  const MachineFrameInfo *MFI = MF.getFrameInfo();
  const Function *Fn = MF.getFunction();
  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  MachineModuleInfo &MMI = MF.getMMI();
  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
  bool HasFP = hasFP(MF);
  DebugLoc DL = MBB.findDebugLoc(MBBI);

  int NumBytes = (int)MFI->getStackSize();
  if (!AFI->hasStackFrame()) {
    assert(!HasFP && "unexpected function without stack frame but with FP");

    // All of the stack allocation is for locals.
    AFI->setLocalStackSize(NumBytes);

    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();

    // REDZONE: If the stack size is less than 128 bytes, we don't need
    // to actually allocate.
    if (NumBytes && !canUseRedZone(MF)) {
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);

      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    } else if (NumBytes) {
      ++NumRedZoneFunctions;
    }

    return;
  }

  // Only set up FP if we actually need to.
  int FPOffset = 0;
  if (HasFP) {
    // First instruction must a) allocate the stack  and b) have an immediate
    // that is a multiple of -2.
    assert((MBBI->getOpcode() == AArch64::STPXpre ||
            MBBI->getOpcode() == AArch64::STPDpre) &&
           MBBI->getOperand(3).getReg() == AArch64::SP &&
           MBBI->getOperand(4).getImm() < 0 &&
           (MBBI->getOperand(4).getImm() & 1) == 0);

    // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
    // required for the callee saved register area we get the frame pointer
    // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
    FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
    assert(FPOffset >= 0 && "Bad Framepointer Offset");
  }

  // Move past the saves of the callee-saved registers.
  while (MBBI->getOpcode() == AArch64::STPXi ||
         MBBI->getOpcode() == AArch64::STPDi ||
         MBBI->getOpcode() == AArch64::STPXpre ||
         MBBI->getOpcode() == AArch64::STPDpre) {
    ++MBBI;
    NumBytes -= 16;
  }
  assert(NumBytes >= 0 && "Negative stack allocation size!?");
  if (HasFP) {
    // Issue    sub fp, sp, FPOffset or
    //          mov fp,sp          when FPOffset is zero.
    // Note: All stores of callee-saved registers are marked as "FrameSetup".
    // This code marks the instruction(s) that set the FP also.
    emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
                    MachineInstr::FrameSetup);
  }

  // All of the remaining stack allocations are for locals.
  AFI->setLocalStackSize(NumBytes);

  // Allocate space for the rest of the frame.
  if (NumBytes) {
    // If we're a leaf function, try using the red zone.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
                      MachineInstr::FrameSetup);
  }

  // If we need a base pointer, set it up here. It's whatever the value of the
  // stack pointer is at this point. Any variable size objects will be allocated
  // after this, so we can still use the base pointer to reference locals.
  //
  // FIXME: Clarify FrameSetup flags here.
  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
  // needed.
  //
  if (RegInfo->hasBasePointer(MF))
    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);

  if (needsFrameMoves) {
    const DataLayout *TD = MF.getSubtarget().getDataLayout();
    const int StackGrowth = -TD->getPointerSize(0);
    unsigned FramePtr = RegInfo->getFrameRegister(MF);

    // An example of the prologue:
    //
    //     .globl __foo
    //     .align 2
    //  __foo:
    // Ltmp0:
    //     .cfi_startproc
    //     .cfi_personality 155, ___gxx_personality_v0
    // Leh_func_begin:
    //     .cfi_lsda 16, Lexception33
    //
    //     stp  xa,bx, [sp, -#offset]!
    //     ...
    //     stp  x28, x27, [sp, #offset-32]
    //     stp  fp, lr, [sp, #offset-16]
    //     add  fp, sp, #offset - 16
    //     sub  sp, sp, #1360
    //
    // The Stack:
    //       +-------------------------------------------+
    // 10000 | ........ | ........ | ........ | ........ |
    // 10004 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10008 | ........ | ........ | ........ | ........ |
    // 1000c | ........ | ........ | ........ | ........ |
    //       +===========================================+
    // 10010 |                X28 Register               |
    // 10014 |                X28 Register               |
    //       +-------------------------------------------+
    // 10018 |                X27 Register               |
    // 1001c |                X27 Register               |
    //       +===========================================+
    // 10020 |                Frame Pointer              |
    // 10024 |                Frame Pointer              |
    //       +-------------------------------------------+
    // 10028 |                Link Register              |
    // 1002c |                Link Register              |
    //       +===========================================+
    // 10030 | ........ | ........ | ........ | ........ |
    // 10034 | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    // 10038 | ........ | ........ | ........ | ........ |
    // 1003c | ........ | ........ | ........ | ........ |
    //       +-------------------------------------------+
    //
    //     [sp] = 10030        ::    >>initial value<<
    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    //     fp = sp == 10020    ::  mov fp, sp
    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    //     sp == 10010         ::    >>final value<<
    //
    // The frame pointer (w29) points to address 10020. If we use an offset of
    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    // for w27, and -32 for w28:
    //
    //  Ltmp1:
    //     .cfi_def_cfa w29, 16
    //  Ltmp2:
    //     .cfi_offset w30, -8
    //  Ltmp3:
    //     .cfi_offset w29, -16
    //  Ltmp4:
    //     .cfi_offset w27, -24
    //  Ltmp5:
    //     .cfi_offset w28, -32

    if (HasFP) {
      // Define the current CFA rule to use the provided FP.
      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);

      // Record the location of the stored LR
      unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);

      // Record the location of the stored FP
      CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    } else {
      // Encode the stack size of the leaf function.
      unsigned CFIIndex = MMI.addFrameInst(
          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex);
    }

    // Now emit the moves for whatever callee saved regs we have.
    emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
  }
}