void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
                                            MachineBasicBlock &MBB) const {
  uint64_t StackSize = MF.getFrameInfo()->getStackSize();
  if (!StackSize)
    return;
  const auto *TII = MF.getSubtarget().getInstrInfo();
  auto &MRI = MF.getRegInfo();
  unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  auto InsertPt = MBB.getFirstTerminator();
  DebugLoc DL;

  if (InsertPt != MBB.end()) {
    DL = InsertPt->getDebugLoc();
  }

  // Restore the stack pointer. Without FP its value is just SP32 - stacksize
  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
      .addImm(StackSize);
  auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32)
      .addReg(WebAssembly::SP32)
      .addReg(OffsetReg);
  // Re-use OffsetReg to hold the address of the stacktop
  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
      .addExternalSymbol(SPSymbol);
  auto *MMO = new MachineMemOperand(MachinePointerInfo(),
                                    MachineMemOperand::MOStore, 4, 4);
  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
      .addImm(0)
      .addReg(OffsetReg)
      .addReg(WebAssembly::SP32)
      .addMemOperand(MMO);
}
void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
  DebugLoc dl = MBBI->getDebugLoc();
  //
  // Only insert deallocframe if we need to.  Also at -O0.  See comment
  // in emitPrologue above.
  //
  if (hasFP(MF) || MF.getTarget().getOptLevel() == CodeGenOpt::None) {
    MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
    MachineBasicBlock::iterator MBBI_end = MBB.end();

    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    // Handle EH_RETURN.
    if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) {
      assert(MBBI->getOperand(0).isReg() && "Offset should be in register!");
      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add),
              Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28);
      return;
    }
    // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
    // versions.
    if (MF.getSubtarget<HexagonSubtarget>().hasV4TOps() &&
        MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) {
      // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC
      // instruction if we encounter it.
      MachineBasicBlock::iterator BeforeJMPR =
        MBB.begin() == MBBI ? MBBI : std::prev(MBBI);
      if (BeforeJMPR != MBBI &&
          BeforeJMPR->getOpcode() == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
        // Remove the JMPR node.
        MBB.erase(MBBI);
        return;
      }

      // Add dealloc_return.
      MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return));
      // Transfer the function live-out registers.
      MIB->copyImplicitOps(*MBB.getParent(), &*MBBI);
      // Remove the JUMPR node.
      MBB.erase(MBBI);
    } else { // Add deallocframe for V2 and V3, and V4 tail calls.
      // Check for RESTORE_DEALLOC_BEFORE_TAILCALL_V4. We don't need an extra
      // DEALLOCFRAME instruction after it.
      MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
      MachineBasicBlock::iterator I =
        Term == MBB.begin() ?  MBB.end() : std::prev(Term);
      if (I != MBB.end() &&
          I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
        return;

      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
    }
  }
}
Exemple #3
0
static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
                                          const TargetRegisterInfo &TRI) {
  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
       E = MBB.end(); I != E; ++I) {
    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
      return true;
  }
  return false;
}
Exemple #4
0
/// enterIntvAtEnd - Enter openli at the end of MBB.
/// PhiMBB is a successor inside openli where a PHI value is created.
/// Currently, all entries must share the same PhiMBB.
void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
  assert(openli_ && "openIntv not called before enterIntvAtEnd");

  SlotIndex EndA = lis_.getMBBEndIdx(&A);
  VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
  if (!CurVNIA) {
    DEBUG(dbgs() << "    enterIntvAtEnd, curli not live out of BB#"
                 << A.getNumber() << ".\n");
    return;
  }

  // Add a phi kill value and live range out of A.
  VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
  openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));

  // FIXME: If this is the only entry edge, we don't need the extra PHI value.
  // FIXME: If there are multiple entry blocks (so not a loop), we need proper
  // SSA update.

  // Now look at the start of B.
  SlotIndex StartB = lis_.getMBBStartIdx(&B);
  SlotIndex EndB = lis_.getMBBEndIdx(&B);
  const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
  if (!CurB) {
    DEBUG(dbgs() << "    enterIntvAtEnd: curli not live in to BB#"
                 << B.getNumber() << ".\n");
    return;
  }

  VNInfo *VNIB = openli_->getVNInfoAt(StartB);
  if (!VNIB) {
    // Create a phi value.
    VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
                                 lis_.getVNInfoAllocator());
    VNIB->setIsPHIDef(true);
    VNInfo *&mapVNI = valueMap_[CurB->valno];
    if (mapVNI) {
      // Multiple copies - must create PHI value.
      abort();
    } else {
      // This is the first copy of dupLR. Mark the mapping.
      mapVNI = VNIB;
    }

  }

  DEBUG(dbgs() << "    enterIntvAtEnd: " << *openli_ << '\n');
}
Exemple #5
0
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
  this->MBB = &MBB;
  LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);

  PhysRegState.assign(TRI->getNumRegs(), regDisabled);
  assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");

  MachineBasicBlock::iterator MII = MBB.begin();

  // Add live-in registers as live.
  for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
    if (MRI->isAllocatable(LI.PhysReg))
      definePhysReg(MII, LI.PhysReg, regReserved);

  VirtDead.clear();
  Coalesced.clear();

  // Otherwise, sequentially allocate each instruction in the MBB.
  for (MachineInstr &MI : MBB) {
    LLVM_DEBUG(
      dbgs() << "\n>> " << MI << "Regs:";
      dumpState()
    );

    // Special handling for debug values. Note that they are not allowed to
    // affect codegen of the other instructions in any way.
    if (MI.isDebugValue()) {
      handleDebugValue(MI);
      continue;
    }

    allocateInstruction(MI);
  }

  // Spill all physical registers holding virtual registers now.
  LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
  spillAll(MBB.getFirstTerminator());

  // Erase all the coalesced copies. We are delaying it until now because
  // LiveVirtRegs might refer to the instrs.
  for (MachineInstr *MI : Coalesced)
    MBB.erase(MI);
  NumCoalesced += Coalesced.size();

  LLVM_DEBUG(MBB.dump());
}
Exemple #6
0
/// When an instruction is found to only use loop invariant operands that is
/// safe to hoist, this instruction is called to do the dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
  MachineBasicBlock *Preheader = getCurPreheader();

  // Now move the instructions to the predecessor, inserting it before any
  // terminator instructions.
  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
               << MI->getParent()->getNumber() << ": " << *MI);

  // Splice the instruction to the preheader.
  MachineBasicBlock *MBB = MI->getParent();
  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);

  // Add register to livein list to all the BBs in the current loop since a
  // loop invariant must be kept live throughout the whole loop. This is
  // important to ensure later passes do not scavenge the def register.
  AddToLiveIns(Def);

  ++NumPostRAHoisted;
  Changed = true;
}
Exemple #7
0
// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
// when following the CFG edge to SuccMBB. This needs to be after any def of
// SrcReg, but before any subsequent point where control flow might jump out of
// the basic block.
MachineBasicBlock::iterator
llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
                                          MachineBasicBlock &SuccMBB,
                                          unsigned SrcReg) {
  // Handle the trivial case trivially.
  if (MBB.empty())
    return MBB.begin();

  // Usually, we just want to insert the copy before the first terminator
  // instruction. However, for the edge going to a landing pad, we must insert
  // the copy before the call/invoke instruction.
  if (!SuccMBB.isLandingPad())
    return MBB.getFirstTerminator();

  // Discover any defs/uses in this basic block.
  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
         RE = MRI->reg_end(); RI != RE; ++RI) {
    MachineInstr *DefUseMI = &*RI;
    if (DefUseMI->getParent() == &MBB)
      DefUsesInMBB.insert(DefUseMI);
  }

  MachineBasicBlock::iterator InsertPoint;
  if (DefUsesInMBB.empty()) {
    // No defs.  Insert the copy at the start of the basic block.
    InsertPoint = MBB.begin();
  } else if (DefUsesInMBB.size() == 1) {
    // Insert the copy immediately after the def/use.
    InsertPoint = *DefUsesInMBB.begin();
    ++InsertPoint;
  } else {
    // Insert the copy immediately after the last def/use.
    InsertPoint = MBB.end();
    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
    ++InsertPoint;
  }

  // Make sure the copy goes after any phi nodes however.
  return SkipPHIsAndLabels(MBB, InsertPoint);
}
// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
// This needs to be after any def or uses of SrcReg, but before any subsequent
// point where control flow might jump out of the basic block.
MachineBasicBlock::iterator
llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
                                          unsigned SrcReg) {
  // Handle the trivial case trivially.
  if (MBB.empty())
    return MBB.begin();

  // If this basic block does not contain an invoke, then control flow always
  // reaches the end of it, so place the copy there.  The logic below works in
  // this case too, but is more expensive.
  if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator()))
    return MBB.getFirstTerminator();

  // Discover any definition/uses in this basic block.
  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
       RE = MRI->reg_end(); RI != RE; ++RI) {
    MachineInstr *DefUseMI = &*RI;
    if (DefUseMI->getParent() == &MBB)
      DefUsesInMBB.insert(DefUseMI);
  }

  MachineBasicBlock::iterator InsertPoint;
  if (DefUsesInMBB.empty()) {
    // No def/uses.  Insert the copy at the start of the basic block.
    InsertPoint = MBB.begin();
  } else if (DefUsesInMBB.size() == 1) {
    // Insert the copy immediately after the definition/use.
    InsertPoint = *DefUsesInMBB.begin();
    ++InsertPoint;
  } else {
    // Insert the copy immediately after the last definition/use.
    InsertPoint = MBB.end();
    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
    ++InsertPoint;
  }

  // Make sure the copy goes after any phi nodes however.
  return SkipPHIsAndLabels(MBB, InsertPoint);
}
void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
                                            MachineBasicBlock &MBB) const {
  auto &MFI = MF.getFrameInfo();
  uint64_t StackSize = MFI.getStackSize();
  if (!needsSP(MF, MFI) || !needsSPWriteback(MF, MFI)) return;
  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  auto &MRI = MF.getRegInfo();
  auto InsertPt = MBB.getFirstTerminator();
  DebugLoc DL;

  if (InsertPt != MBB.end())
    DL = InsertPt->getDebugLoc();

  // Restore the stack pointer. If we had fixed-size locals, add the offset
  // subtracted in the prolog.
  unsigned SPReg = 0;
  MachineBasicBlock::iterator InsertAddr = InsertPt;
  if (hasBP(MF)) {
    auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
    SPReg = FI->getBasePointerVreg();
  } else if (StackSize) {
    const TargetRegisterClass *PtrRC =
        MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
    unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
    InsertAddr =
        BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
            .addImm(StackSize);
    // In the epilog we don't need to write the result back to the SP32 physreg
    // because it won't be used again. We can use a stackified register instead.
    SPReg = MRI.createVirtualRegister(PtrRC);
    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
        .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
        .addReg(OffsetReg);
  } else {
    SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
  }

  writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL);
}
/// Insert restore code for the callee-saved registers used in the function.
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
                              std::vector<CalleeSavedInfo> &CSI) {
  MachineFunction &MF = *RestoreBlock.getParent();
  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

  // Restore all registers immediately before the return and any
  // terminators that precede it.
  MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();

  if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
    for (const CalleeSavedInfo &CI : reverse(CSI)) {
      unsigned Reg = CI.getReg();
      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
      assert(I != RestoreBlock.begin() &&
             "loadRegFromStackSlot didn't insert any code!");
      // Insert in reverse order.  loadRegFromStackSlot can insert
      // multiple instructions.
    }
  }
}
Exemple #11
0
bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
  MDT = &getAnalysis<MachineDominatorTree>();

  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
  // any other instructions that might clobber the ctr register.
  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
       I != IE; ++I) {
    MachineBasicBlock *MBB = I;
    if (!MDT->isReachableFromEntry(MBB))
      continue;

    for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
      MIIE = MBB->end(); MII != MIIE; ++MII) {
      unsigned Opc = MII->getOpcode();
      if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
          Opc == PPC::BDZ8  || Opc == PPC::BDZ)
        if (!verifyCTRBranch(MBB, MII))
          llvm_unreachable("Invalid PPC CTR loop!");
    }
  }

  return false;
}
Exemple #12
0
/// Splits a MachineBasicBlock to branch before \p SplitBefore. The original
/// branch is \p OrigBranch. The target of the new branch can either be the same
/// as the target of the original branch or the fallthrough successor of the
/// original block as determined by \p BranchToFallThrough. The branch
/// conditions will be inverted according to \p InvertNewBranch and
/// \p InvertOrigBranch. If an instruction that previously fed the branch is to
/// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as
/// the branch condition. The branch probabilities will be set if the
/// MachineBranchProbabilityInfo isn't null.
static bool splitMBB(BlockSplitInfo &BSI) {
  assert(BSI.allInstrsInSameMBB() &&
         "All instructions must be in the same block.");

  MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent();
  MachineFunction *MF = ThisMBB->getParent();
  MachineRegisterInfo *MRI = &MF->getRegInfo();
  assert(MRI->isSSA() && "Can only do this while the function is in SSA form.");
  if (ThisMBB->succ_size() != 2) {
    LLVM_DEBUG(
        dbgs() << "Don't know how to handle blocks that don't have exactly"
               << " two successors.\n");
    return false;
  }

  const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
  unsigned OrigBROpcode = BSI.OrigBranch->getOpcode();
  unsigned InvertedOpcode =
      OrigBROpcode == PPC::BC
          ? PPC::BCn
          : OrigBROpcode == PPC::BCn
                ? PPC::BC
                : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR;
  unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode;
  MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB();
  MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin()
                                           ? *ThisMBB->succ_rbegin()
                                           : *ThisMBB->succ_begin();
  MachineBasicBlock *NewBRTarget =
      BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
  BranchProbability ProbToNewTarget =
      !BSI.MBPI ? BranchProbability::getUnknown()
                : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);

  // Create a new basic block.
  MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
  const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
  MachineFunction::iterator It = ThisMBB->getIterator();
  MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MF->insert(++It, NewMBB);

  // Move everything after SplitBefore into the new block.
  NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
  NewMBB->transferSuccessors(ThisMBB);

  // Add the two successors to ThisMBB. The probabilities come from the
  // existing blocks if available.
  ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
  ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());

  // Add the branches to ThisMBB.
  BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
          TII->get(NewBROpcode))
      .addReg(BSI.SplitCond->getOperand(0).getReg())
      .addMBB(NewBRTarget);
  BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
          TII->get(PPC::B))
      .addMBB(NewMBB);
  if (BSI.MIToDelete)
    BSI.MIToDelete->eraseFromParent();

  // Change the condition on the original branch and invert it if requested.
  auto FirstTerminator = NewMBB->getFirstTerminator();
  if (BSI.NewCond) {
    assert(FirstTerminator->getOperand(0).isReg() &&
           "Can't update condition of unconditional branch.");
    FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
  }
  if (BSI.InvertOrigBranch)
    FirstTerminator->setDesc(TII->get(InvertedOpcode));

  // If any of the PHIs in the successors of NewMBB reference values that
  // now come from NewMBB, they need to be updated.
  for (auto *Succ : NewMBB->successors()) {
    updatePHIs(Succ, ThisMBB, NewMBB, MRI);
  }
  addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI);

  LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump());
  LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump());
  LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump());
  return true;
}
Exemple #13
0
/// Insert epilog code into the function.
/// For ARC, this inserts a call to a function that restores callee saved
/// registers onto the stack, when enough callee saved registers are required.
void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
  LLVM_DEBUG(dbgs() << "Emit Epilogue: " << MF.getName() << "\n");
  auto *AFI = MF.getInfo<ARCFunctionInfo>();
  const ARCInstrInfo *TII = MF.getSubtarget<ARCSubtarget>().getInstrInfo();
  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  uint64_t StackSize = MF.getFrameInfo().getStackSize();
  bool SavedBlink = false;
  unsigned AmountAboveFunclet = 0;
  // If we have variable sized frame objects, then we have to move
  // the stack pointer to a known spot (fp - StackSize).
  // Then, replace the frame pointer by (new) [sp,StackSize-4].
  // Then, move the stack pointer the rest of the way (sp = sp + StackSize).
  if (hasFP(MF)) {
    BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARC::SUB_rru6), ARC::SP)
        .addReg(ARC::FP)
        .addImm(StackSize);
    AmountAboveFunclet += 4;
  }

  // Now, move the stack pointer to the bottom of the save area for the funclet.
  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  unsigned Last = determineLastCalleeSave(CSI);
  unsigned StackSlotsUsedByFunclet = 0;
  // Now, restore the callee save registers.
  if (UseSaveRestoreFunclet && Last > ARC::R14) {
    // BL to __ld_r13_to_<TRI->getRegAsmName()>
    StackSlotsUsedByFunclet = Last - ARC::R12;
    AmountAboveFunclet += 4 * (StackSlotsUsedByFunclet + 1);
    SavedBlink = true;
  }

  if (MFI.hasCalls() && !SavedBlink) {
    AmountAboveFunclet += 4;
    SavedBlink = true;
  }

  // Move the stack pointer up to the point of the funclet.
  if (StackSize - AmountAboveFunclet) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(StackSize - AmountAboveFunclet);
  }

  if (StackSlotsUsedByFunclet) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::BL))
        .addExternalSymbol(load_funclet_name[Last - ARC::R15])
        .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill);
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(4 * (StackSlotsUsedByFunclet));
  }
  // Now, pop blink if necessary.
  if (SavedBlink) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::POP_S_BLINK));
  }
  // Now, pop fp if necessary.
  if (hasFP(MF)) {
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::LD_AB_rs9))
        .addReg(ARC::SP, RegState::Define)
        .addReg(ARC::FP, RegState::Define)
        .addReg(ARC::SP)
        .addImm(4);
  }

  // Relieve the varargs area if necessary.
  if (MF.getFunction().isVarArg()) {
    // Add in the varargs area here first.
    LLVM_DEBUG(dbgs() << "Varargs\n");
    unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
        .addReg(ARC::SP)
        .addReg(ARC::SP)
        .addImm(VarArgsBytes);
  }
}
/// Insert a BLOCK marker for branches to MBB (if needed).
static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
                             SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
                             const WebAssemblyInstrInfo &TII,
                             const MachineLoopInfo &MLI,
                             MachineDominatorTree &MDT,
                             WebAssemblyFunctionInfo &MFI) {
  // First compute the nearest common dominator of all forward non-fallthrough
  // predecessors so that we minimize the time that the BLOCK is on the stack,
  // which reduces overall stack height.
  MachineBasicBlock *Header = nullptr;
  bool IsBranchedTo = false;
  int MBBNumber = MBB.getNumber();
  for (MachineBasicBlock *Pred : MBB.predecessors())
    if (Pred->getNumber() < MBBNumber) {
      Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
      if (ExplicitlyBranchesTo(Pred, &MBB))
        IsBranchedTo = true;
    }
  if (!Header)
    return;
  if (!IsBranchedTo)
    return;

  assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
  MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));

  // If the nearest common dominator is inside a more deeply nested context,
  // walk out to the nearest scope which isn't more deeply nested.
  for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
    if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
      if (ScopeTop->getNumber() > Header->getNumber()) {
        // Skip over an intervening scope.
        I = next(MachineFunction::iterator(ScopeTop));
      } else {
        // We found a scope level at an appropriate depth.
        Header = ScopeTop;
        break;
      }
    }
  }

  // If there's a loop which ends just before MBB which contains Header, we can
  // reuse its label instead of inserting a new BLOCK.
  for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
       Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
    if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
      return;

  // Decide where in Header to put the BLOCK.
  MachineBasicBlock::iterator InsertPos;
  MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
  if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) {
    // Header is the header of a loop that does not lexically contain MBB, so
    // the BLOCK needs to be above the LOOP, after any END constructs.
    InsertPos = Header->begin();
    while (InsertPos->getOpcode() != WebAssembly::LOOP)
      ++InsertPos;
  } else {
    // Otherwise, insert the BLOCK as late in Header as we can, but before the
    // beginning of the local expression tree and any nested BLOCKs.
    InsertPos = Header->getFirstTerminator();
    while (InsertPos != Header->begin() && IsChild(prev(InsertPos), MFI) &&
           prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
           prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
           prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
      --InsertPos;
  }

  // Add the BLOCK.
  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));

  // Mark the end of the block.
  InsertPos = MBB.begin();
  while (InsertPos != MBB.end() &&
         InsertPos->getOpcode() == WebAssembly::END_LOOP)
    ++InsertPos;
  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));

  // Track the farthest-spanning scope that ends at this point.
  int Number = MBB.getNumber();
  if (!ScopeTops[Number] ||
      ScopeTops[Number]->getNumber() > Header->getNumber())
    ScopeTops[Number] = Header;
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  DebugLoc DL;
  bool IsTailCallReturn = false;
  if (MBB.end() != MBBI) {
    DL = MBBI->getDebugLoc();
    unsigned RetOpcode = MBBI->getOpcode();
    IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
      RetOpcode == AArch64::TCRETURNri;
  }
  int NumBytes = MFI->getStackSize();
  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

  // All calls are tail calls in GHC calling conv, and functions have no
  // prologue/epilogue.
  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    return;

  // Initial and residual are named for consistency with the prologue. Note that
  // in the epilogue, the residual adjustment is executed first.
  uint64_t ArgumentPopSize = 0;
  if (IsTailCallReturn) {
    MachineOperand &StackAdjust = MBBI->getOperand(1);

    // For a tail-call in a callee-pops-arguments environment, some or all of
    // the stack may actually be in use for the call's arguments, this is
    // calculated during LowerCall and consumed here...
    ArgumentPopSize = StackAdjust.getImm();
  } else {
    // ... otherwise the amount to pop is *all* of the argument space,
    // conveniently stored in the MachineFunctionInfo by
    // LowerFormalArguments. This will, of course, be zero for the C calling
    // convention.
    ArgumentPopSize = AFI->getArgumentStackToRestore();
  }

  // The stack frame should be like below,
  //
  //      ----------------------                     ---
  //      |                    |                      |
  //      | BytesInStackArgArea|              CalleeArgStackSize
  //      | (NumReusableBytes) |                (of tail call)
  //      |                    |                     ---
  //      |                    |                      |
  //      ---------------------|        ---           |
  //      |                    |         |            |
  //      |   CalleeSavedReg   |         |            |
  //      | (NumRestores * 8)  |         |            |
  //      |                    |         |            |
  //      ---------------------|         |         NumBytes
  //      |                    |     StackSize  (StackAdjustUp)
  //      |   LocalStackSize   |         |            |
  //      | (covering callee   |         |            |
  //      |       args)        |         |            |
  //      |                    |         |            |
  //      ----------------------        ---          ---
  //
  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
  //             = StackSize + ArgumentPopSize
  //
  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
  // it as the 2nd argument of AArch64ISD::TC_RETURN.
  NumBytes += ArgumentPopSize;

  unsigned NumRestores = 0;
  // Move past the restores of the callee-saved registers.
  MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
  MachineBasicBlock::iterator Begin = MBB.begin();
  while (LastPopI != Begin) {
    --LastPopI;
    unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
    NumRestores += Restores;
    if (Restores == 0) {
      ++LastPopI;
      break;
    }
  }
  NumBytes -= NumRestores * 8;
  assert(NumBytes >= 0 && "Negative stack allocation size!?");

  if (!hasFP(MF)) {
    // If this was a redzone leaf function, we don't need to restore the
    // stack pointer.
    if (!canUseRedZone(MF))
      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
                      TII);
    return;
  }

  // Restore the original stack pointer.
  // FIXME: Rather than doing the math here, we should instead just use
  // non-post-indexed loads for the restores if we aren't actually going to
  // be able to save any instructions.
  if (NumBytes || MFI->hasVarSizedObjects())
    emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                    -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
}
Exemple #16
0
/// EmitSchedule - Emit the machine code in scheduled order. Return the new
/// InsertPos and MachineBasicBlock that contains this insertion
/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
  InstrEmitter Emitter(BB, InsertPos);
  DenseMap<SDValue, unsigned> VRBaseMap;
  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
  SmallSet<unsigned, 8> Seen;
  bool HasDbg = DAG->hasDebugValues();

  // Emit a node, and determine where its first instruction is for debuginfo.
  // Zero, one, or multiple instructions can be created when emitting a node.
  auto EmitNode =
      [&](SDNode *Node, bool IsClone, bool IsCloned,
          DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
    // Fetch instruction prior to this, or end() if nonexistant.
    auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
      if (I == BB->begin())
        return BB->end();
      else
        return std::prev(Emitter.getInsertPos());
    };

    MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
    Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
    MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());

    // If the iterator did not change, no instructions were inserted.
    if (Before == After)
      return nullptr;

    if (Before == BB->end()) {
      // There were no prior instructions; the new ones must start at the
      // beginning of the block.
      return &Emitter.getBlock()->instr_front();
    } else {
      // Return first instruction after the pre-existing instructions.
      return &*std::next(Before);
    }
  };

  // If this is the first BB, emit byval parameter dbg_value's.
  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
    for (; PDI != PDE; ++PDI) {
      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
      if (DbgMI) {
        BB->insert(InsertPos, DbgMI);
        // We re-emit the dbg_value closer to its use, too, after instructions
        // are emitted to the BB.
        (*PDI)->clearIsEmitted();
      }
    }
  }

  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
    SUnit *SU = Sequence[i];
    if (!SU) {
      // Null SUnit* is a noop.
      TII->insertNoop(*Emitter.getBlock(), InsertPos);
      continue;
    }

    // For pre-regalloc scheduling, create instructions corresponding to the
    // SDNode and any glued SDNodes and append them to the block.
    if (!SU->getNode()) {
      // Emit a copy.
      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
      continue;
    }

    SmallVector<SDNode *, 4> GluedNodes;
    for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
      GluedNodes.push_back(N);
    while (!GluedNodes.empty()) {
      SDNode *N = GluedNodes.back();
      auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
      // Remember the source order of the inserted instruction.
      if (HasDbg)
        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
      GluedNodes.pop_back();
    }
    auto NewInsn =
        EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
    // Remember the source order of the inserted instruction.
    if (HasDbg)
      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
                        NewInsn);
  }

  // Insert all the dbg_values which have not already been inserted in source
  // order sequence.
  if (HasDbg) {
    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();

    // Sort the source order instructions and use the order to insert debug
    // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
    // regardless of the host's implementation fo std::sort.
    std::stable_sort(Orders.begin(), Orders.end(), less_first());
    std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
                     [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
                       return LHS->getOrder() < RHS->getOrder();
                     });

    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
    // Now emit the rest according to source order.
    unsigned LastOrder = 0;
    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
      unsigned Order = Orders[i].first;
      MachineInstr *MI = Orders[i].second;
      // Insert all SDDbgValue's whose order(s) are before "Order".
      assert(MI);
      for (; DI != DE; ++DI) {
        if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
          break;
        if ((*DI)->isEmitted())
          continue;

        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
        if (DbgMI) {
          if (!LastOrder)
            // Insert to start of the BB (after PHIs).
            BB->insert(BBBegin, DbgMI);
          else {
            // Insert at the instruction, which may be in a different
            // block, if the block was split by a custom inserter.
            MachineBasicBlock::iterator Pos = MI;
            MI->getParent()->insert(Pos, DbgMI);
          }
        }
      }
      LastOrder = Order;
    }
    // Add trailing DbgValue's before the terminator. FIXME: May want to add
    // some of them before one or more conditional branches?
    SmallVector<MachineInstr*, 8> DbgMIs;
    for (; DI != DE; ++DI) {
      if ((*DI)->isEmitted())
        continue;
      assert((*DI)->getOrder() >= LastOrder &&
             "emitting DBG_VALUE out of order");
      if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
        DbgMIs.push_back(DbgMI);
    }

    MachineBasicBlock *InsertBB = Emitter.getBlock();
    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());

    SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
    SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
    // Now emit the rest according to source order.
    LastOrder = 0;
    for (const auto &InstrOrder : Orders) {
      unsigned Order = InstrOrder.first;
      MachineInstr *MI = InstrOrder.second;
      if (!MI)
        continue;

      // Insert all SDDbgLabel's whose order(s) are before "Order".
      for (; DLI != DLE &&
             (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
             ++DLI) {
        MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
        if (DbgMI) {
          if (!LastOrder)
            // Insert to start of the BB (after PHIs).
            BB->insert(BBBegin, DbgMI);
          else {
            // Insert at the instruction, which may be in a different
            // block, if the block was split by a custom inserter.
            MachineBasicBlock::iterator Pos = MI;
            MI->getParent()->insert(Pos, DbgMI);
          }
        }
      }
      if (DLI == DLE)
        break;

      LastOrder = Order;
    }
  }

  InsertPos = Emitter.getInsertPos();
  return Emitter.getBlock();
}
Exemple #17
0
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                 SmallVector<MachineInstr*, 16> &Copies) {
  if (!shouldTailDuplicate(MF, *TailBB))
    return false;

  DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');

  // Iterate through all the unique predecessors and tail-duplicate this
  // block into them, if possible. Copying the list ahead of time also
  // avoids trouble with the predecessor list reallocating.
  bool Changed = false;
  SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
                                              TailBB->pred_end());
  DenseSet<unsigned> UsedByPhi;
  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
       PE = Preds.end(); PI != PE; ++PI) {
    MachineBasicBlock *PredBB = *PI;

    assert(TailBB != PredBB &&
           "Single-block loop should have been rejected earlier!");
    // EH edges are ignored by AnalyzeBranch.
    if (PredBB->succ_size() > 1)
      continue;

    MachineBasicBlock *PredTBB, *PredFBB;
    SmallVector<MachineOperand, 4> PredCond;
    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
      continue;
    if (!PredCond.empty())
      continue;
    // Don't duplicate into a fall-through predecessor (at least for now).
    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
      continue;

    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
                 << "From Succ: " << *TailBB);

    TDBBs.push_back(PredBB);

    // Remove PredBB's unconditional branch.
    TII->RemoveBranch(*PredBB);

    // Clone the contents of TailBB into PredBB.
    DenseMap<unsigned, unsigned> LocalVRMap;
    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
    MachineBasicBlock::iterator I = TailBB->begin();
    while (I != TailBB->end()) {
      MachineInstr *MI = &*I;
      ++I;
      if (MI->isPHI()) {
        // Replace the uses of the def of the PHI with the register coming
        // from PredBB.
        ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
      } else {
        // Replace def of virtual registers with new registers, and update
        // uses with PHI source register or the new registers.
        DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
      }
    }
    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                               TII->get(TargetOpcode::COPY),
                               CopyInfos[i].first).addReg(CopyInfos[i].second));
    }

    // Simplify
    TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);

    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch

    // Update the CFG.
    PredBB->removeSuccessor(PredBB->succ_begin());
    assert(PredBB->succ_empty() &&
           "TailDuplicate called on block with multiple successors!");
    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
           E = TailBB->succ_end(); I != E; ++I)
      PredBB->addSuccessor(*I);

    Changed = true;
    ++NumTailDups;
  }

  // If TailBB was duplicated into all its predecessors except for the prior
  // block, which falls through unconditionally, move the contents of this
  // block into the prior block.
  MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
  SmallVector<MachineOperand, 4> PriorCond;
  // This has to check PrevBB->succ_size() because EH edges are ignored by
  // AnalyzeBranch.
  if (PrevBB->succ_size() == 1 && 
      !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
      PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
      !TailBB->hasAddressTaken()) {
    DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
          << "From MBB: " << *TailBB);
    if (PreRegAlloc) {
      DenseMap<unsigned, unsigned> LocalVRMap;
      SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
      MachineBasicBlock::iterator I = TailBB->begin();
      // Process PHI instructions first.
      while (I != TailBB->end() && I->isPHI()) {
        // Replace the uses of the def of the PHI with the register coming
        // from PredBB.
        MachineInstr *MI = &*I++;
        ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
        if (MI->getParent())
          MI->eraseFromParent();
      }

      // Now copy the non-PHI instructions.
      while (I != TailBB->end()) {
        // Replace def of virtual registers with new registers, and update
        // uses with PHI source register or the new registers.
        MachineInstr *MI = &*I++;
        DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
        MI->eraseFromParent();
      }
      MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
      for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
        Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
                                 TII->get(TargetOpcode::COPY),
                                 CopyInfos[i].first)
                           .addReg(CopyInfos[i].second));
      }
    } else {
      // No PHIs to worry about, just splice the instructions over.
      PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
    }
    PrevBB->removeSuccessor(PrevBB->succ_begin());
    assert(PrevBB->succ_empty());
    PrevBB->transferSuccessors(TailBB);
    TDBBs.push_back(PrevBB);
    Changed = true;
  }

  // If this is after register allocation, there are no phis to fix.
  if (!PreRegAlloc)
    return Changed;

  // If we made no changes so far, we are safe.
  if (!Changed)
    return Changed;


  // Handle the nasty case in that we duplicated a block that is part of a loop
  // into some but not all of its predecessors. For example:
  //    1 -> 2 <-> 3                 |
  //          \                      |
  //           \---> rest            |
  // if we duplicate 2 into 1 but not into 3, we end up with
  // 12 -> 3 <-> 2 -> rest           |
  //   \             /               |
  //    \----->-----/                |
  // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
  // with a phi in 3 (which now dominates 2).
  // What we do here is introduce a copy in 3 of the register defined by the
  // phi, just like when we are duplicating 2 into 3, but we don't copy any
  // real instructions or remove the 3 -> 2 edge from the phi in 2.
  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
       PE = Preds.end(); PI != PE; ++PI) {
    MachineBasicBlock *PredBB = *PI;
    if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
      continue;

    // EH edges
    if (PredBB->succ_size() != 1)
      continue;

    DenseMap<unsigned, unsigned> LocalVRMap;
    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
    MachineBasicBlock::iterator I = TailBB->begin();
    // Process PHI instructions first.
    while (I != TailBB->end() && I->isPHI()) {
      // Replace the uses of the def of the PHI with the register coming
      // from PredBB.
      MachineInstr *MI = &*I++;
      ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
    }
    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                               TII->get(TargetOpcode::COPY),
                               CopyInfos[i].first).addReg(CopyInfos[i].second));
    }
  }

  return Changed;
}
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
                                              bool DoIt) const {
  MachineFunction &MF = *MBB.getParent();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
  const TargetInstrInfo &TII = *STI.getInstrInfo();
  const ThumbRegisterInfo *RegInfo =
      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());

  // If MBBI is a return instruction, we may be able to directly restore
  // LR in the PC.
  // This is possible if we do not need to emit any SP update.
  // Otherwise, we need a temporary register to pop the value
  // and copy that value into LR.
  auto MBBI = MBB.getFirstTerminator();
  if (!ArgRegsSaveSize && MBBI != MBB.end() &&
      MBBI->getOpcode() == ARM::tBX_RET) {
    if (!DoIt)
      return true;
    MachineInstrBuilder MIB =
        AddDefaultPred(
            BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)))
            .addReg(ARM::PC, RegState::Define);
    MIB.copyImplicitOps(&*MBBI);
    // erase the old tBX_RET instruction
    MBB.erase(MBBI);
    return true;
  }

  // Look for a temporary register to use.
  // First, compute the liveness information.
  LivePhysRegs UsedRegs(STI.getRegisterInfo());
  UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
  // The semantic of pristines changed recently and now,
  // the callee-saved registers that are touched in the function
  // are not part of the pristines set anymore.
  // Add those callee-saved now.
  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
  for (unsigned i = 0; CSRegs[i]; ++i)
    UsedRegs.addReg(CSRegs[i]);

  DebugLoc dl = DebugLoc();
  if (MBBI != MBB.end()) {
    dl = MBBI->getDebugLoc();
    auto InstUpToMBBI = MBB.end();
    // The post-decrement is on purpose here.
    // We want to have the liveness right before MBBI.
    while (InstUpToMBBI-- != MBBI)
      UsedRegs.stepBackward(*InstUpToMBBI);
  }

  // Look for a register that can be directly use in the POP.
  unsigned PopReg = 0;
  // And some temporary register, just in case.
  unsigned TemporaryReg = 0;
  BitVector PopFriendly =
      TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
  assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
  // Rebuild the GPRs from the high registers because they are removed
  // form the GPR reg class for thumb1.
  BitVector GPRsNoLRSP =
      TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
  GPRsNoLRSP |= PopFriendly;
  GPRsNoLRSP.reset(ARM::LR);
  GPRsNoLRSP.reset(ARM::SP);
  GPRsNoLRSP.reset(ARM::PC);
  for (int Register = GPRsNoLRSP.find_first(); Register != -1;
       Register = GPRsNoLRSP.find_next(Register)) {
    if (!UsedRegs.contains(Register)) {
      // Remember the first pop-friendly register and exit.
      if (PopFriendly.test(Register)) {
        PopReg = Register;
        TemporaryReg = 0;
        break;
      }
      // Otherwise, remember that the register will be available to
      // save a pop-friendly register.
      TemporaryReg = Register;
    }
  }

  if (!DoIt && !PopReg && !TemporaryReg)
    return false;

  assert((PopReg || TemporaryReg) && "Cannot get LR");

  if (TemporaryReg) {
    assert(!PopReg && "Unnecessary MOV is about to be inserted");
    PopReg = PopFriendly.find_first();
    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
                       .addReg(TemporaryReg, RegState::Define)
                       .addReg(PopReg, RegState::Kill));
  }

  assert(PopReg && "Do not know how to get LR");
  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
      .addReg(PopReg, RegState::Define);

  emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);

  if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) {
    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
                                  .addReg(PopReg, RegState::Kill);
    AddDefaultPred(MIB);
    MIB.copyImplicitOps(&*MBBI);
    // erase the old tBX_RET instruction
    MBB.erase(MBBI);
    return true;
  }

  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
                     .addReg(ARM::LR, RegState::Define)
                     .addReg(PopReg, RegState::Kill));

  if (TemporaryReg) {
    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
                       .addReg(PopReg, RegState::Define)
                       .addReg(TemporaryReg, RegState::Kill));
  }

  return true;
}
Exemple #19
0
/// getTripCount - Return a loop-invariant LLVM value indicating the
/// number of times the loop will be executed.  The trip count can
/// be either a register or a constant value.  If the trip-count
/// cannot be determined, this returns null.
///
/// We find the trip count from the phi instruction that defines the
/// induction variable.  We follow the links to the CMP instruction
/// to get the trip count.
///
/// Based upon getTripCount in LoopInfo.
///
CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
                           SmallVector<MachineInstr *, 2> &OldInsts) const {
  MachineBasicBlock *LastMBB = L->getExitingBlock();
  // Don't generate a CTR loop if the loop has more than one exit.
  if (LastMBB == 0)
    return 0;

  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
  if (LastI->getOpcode() != PPC::BCC)
    return 0;

  // We need to make sure that this compare is defining the condition
  // register actually used by the terminating branch.

  unsigned PredReg = LastI->getOperand(1).getReg();
  DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);

  unsigned PredCond = LastI->getOperand(0).getImm();
  if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
    return 0;

  // Check that the loop has a induction variable.
  SmallVector<MachineInstr *, 4> IVars, IOps;
  getCanonicalInductionVariable(L, IVars, IOps);
  for (unsigned i = 0; i < IVars.size(); ++i) {
    MachineInstr *IOp = IOps[i];
    MachineInstr *IV_Inst = IVars[i];

    // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
    //  if Imm is 0, get the count from the PHI opnd
    //  if Imm is -M, than M is the count
    //  Otherwise, Imm is the count
    MachineOperand *IV_Opnd;
    const MachineOperand *InitialValue;
    if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
      InitialValue = &IV_Inst->getOperand(1);
      IV_Opnd = &IV_Inst->getOperand(3);
    } else {
      InitialValue = &IV_Inst->getOperand(3);
      IV_Opnd = &IV_Inst->getOperand(1);
    }

    DEBUG(dbgs() << "Considering:\n");
    DEBUG(dbgs() << "  induction operation: " << *IOp);
    DEBUG(dbgs() << "  induction variable: " << *IV_Inst);
    DEBUG(dbgs() << "  initial value: " << *InitialValue << "\n");
  
    // Look for the cmp instruction to determine if we
    // can get a useful trip count.  The trip count can
    // be either a register or an immediate.  The location
    // of the value depends upon the type (reg or imm).
    for (MachineRegisterInfo::reg_iterator
         RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
         RI != RE; ++RI) {
      IV_Opnd = &RI.getOperand();
      bool SignedCmp, Int64Cmp;
      MachineInstr *MI = IV_Opnd->getParent();
      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
          MI->getOperand(0).getReg() == PredReg) {

        OldInsts.push_back(MI);
        OldInsts.push_back(IOp);
 
        DEBUG(dbgs() << "  compare: " << *MI);
 
        const MachineOperand &MO = MI->getOperand(2);
        assert(MO.isImm() && "IV Cmp Operand should be an immediate");

        int64_t ImmVal;
        if (SignedCmp)
          ImmVal = (short) MO.getImm();
        else
          ImmVal = MO.getImm();
  
        const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
        assert(L->contains(IV_DefInstr->getParent()) &&
               "IV definition should occurs in loop");
        int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
  
        assert(InitialValue->isReg() && "Expecting register for init value");
        unsigned InitialValueReg = InitialValue->getReg();
  
        MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
  
        // Here we need to look for an immediate load (an li or lis/ori pair).
        if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
                         DefInstr->getOpcode() == PPC::ORI)) {
          int64_t start = DefInstr->getOperand(2).getImm();
          MachineInstr *DefInstr2 =
            MRI->getVRegDef(DefInstr->getOperand(1).getReg());
          if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
                            DefInstr2->getOpcode() == PPC::LIS)) {
            DEBUG(dbgs() << "  initial constant: " << *DefInstr);
            DEBUG(dbgs() << "  initial constant: " << *DefInstr2);

            start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
  
            int64_t count = ImmVal - start;
            if ((count % iv_value) != 0) {
              return 0;
            }

            OldInsts.push_back(DefInstr);
            OldInsts.push_back(DefInstr2);

            // count/iv_value, the trip count, should be positive here. If it
            // is negative, that indicates that the counter will wrap.
            if (Int64Cmp)
              return new CountValue(count/iv_value);
            else
              return new CountValue(uint32_t(count/iv_value));
          }
        } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
                                DefInstr->getOpcode() == PPC::LI)) {
          DEBUG(dbgs() << "  initial constant: " << *DefInstr);

          int64_t count = ImmVal -
            int64_t(short(DefInstr->getOperand(1).getImm()));
          if ((count % iv_value) != 0) {
            return 0;
          }

          OldInsts.push_back(DefInstr);

          if (Int64Cmp)
            return new CountValue(count/iv_value);
          else
            return new CountValue(uint32_t(count/iv_value));
        } else if (iv_value == 1 || iv_value == -1) {
          // We can't determine a constant starting value.
          if (ImmVal == 0) {
            return new CountValue(InitialValueReg, iv_value > 0);
          }
          // FIXME: handle non-zero end value.
        }
        // FIXME: handle non-unit increments (we might not want to introduce
        // division but we can handle some 2^n cases with shifts).
  
      }
    }
  }
  return 0;
}
Exemple #20
0
/// converToCTRLoop - check if the loop is a candidate for
/// converting to a CTR loop.  If so, then perform the
/// transformation.
///
/// This function works on innermost loops first.  A loop can
/// be converted if it is a counting loop; either a register
/// value or an immediate.
///
/// The code makes several assumptions about the representation
/// of the loop in llvm.
bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
  bool Changed = false;
  // Process nested loops first.
  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    Changed |= convertToCTRLoop(*I);
  }
  // If a nested loop has been converted, then we can't convert this loop.
  if (Changed) {
    return Changed;
  }

  SmallVector<MachineInstr *, 2> OldInsts;
  // Are we able to determine the trip count for the loop?
  CountValue *TripCount = getTripCount(L, OldInsts);
  if (TripCount == 0) {
    DEBUG(dbgs() << "failed to get trip count!\n");
    return false;
  }

  if (TripCount->isImm()) {
    DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");

    // FIXME: We currently can't form 64-bit constants
    // (including 32-bit unsigned constants)
    if (!isInt<32>(TripCount->getImm()))
      return false;
  }

  // Does the loop contain any invalid instructions?
  if (containsInvalidInstruction(L)) {
    return false;
  }
  MachineBasicBlock *Preheader = L->getLoopPreheader();
  // No preheader means there's not place for the loop instr.
  if (Preheader == 0) {
    return false;
  }
  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();

  DebugLoc dl;
  if (InsertPos != Preheader->end())
    dl = InsertPos->getDebugLoc();

  MachineBasicBlock *LastMBB = L->getExitingBlock();
  // Don't generate CTR loop if the loop has more than one exit.
  if (LastMBB == 0) {
    return false;
  }
  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();

  // Determine the loop start.
  MachineBasicBlock *LoopStart = L->getTopBlock();
  if (L->getLoopLatch() != LastMBB) {
    // When the exit and latch are not the same, use the latch block as the
    // start.
    // The loop start address is used only after the 1st iteration, and the loop
    // latch may contains instrs. that need to be executed after the 1st iter.
    LoopStart = L->getLoopLatch();
    // Make sure the latch is a successor of the exit, otherwise it won't work.
    if (!LastMBB->isSuccessor(LoopStart)) {
      return false;
    }
  }

  // Convert the loop to a CTR loop
  DEBUG(dbgs() << "Change to CTR loop at "; L->dump());

  MachineFunction *MF = LastMBB->getParent();
  const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
  bool isPPC64 = Subtarget.isPPC64();

  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
  const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;

  unsigned CountReg;
  if (TripCount->isReg()) {
    // Create a copy of the loop count register.
    const TargetRegisterClass *SrcRC =
      MF->getRegInfo().getRegClass(TripCount->getReg());
    CountReg = MF->getRegInfo().createVirtualRegister(RC);
    unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
                        (unsigned) PPC::EXTSW_32_64 :
                        (unsigned) TargetOpcode::COPY;
    BuildMI(*Preheader, InsertPos, dl,
            TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
    if (TripCount->isNeg()) {
      unsigned CountReg1 = CountReg;
      CountReg = MF->getRegInfo().createVirtualRegister(RC);
      BuildMI(*Preheader, InsertPos, dl,
              TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
                       CountReg).addReg(CountReg1);
    }
  } else {
    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
    // Put the trip count in a register for transfer into the count register.

    int64_t CountImm = TripCount->getImm();
    if (TripCount->isNeg())
      CountImm = -CountImm;

    CountReg = MF->getRegInfo().createVirtualRegister(RC);
    if (abs64(CountImm) > 0x7FFF) {
      BuildMI(*Preheader, InsertPos, dl,
              TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
              CountReg).addImm((CountImm >> 16) & 0xFFFF);
      unsigned CountReg1 = CountReg;
      CountReg = MF->getRegInfo().createVirtualRegister(RC);
      BuildMI(*Preheader, InsertPos, dl,
              TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
              CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
    } else {
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  const ThumbRegisterInfo *RegInfo =
      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
  const Thumb1InstrInfo &TII =
      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());

  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
  int NumBytes = (int)MFI->getStackSize();
  assert((unsigned)NumBytes >= ArgRegsSaveSize &&
         "ArgRegsSaveSize is included in NumBytes");
  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
  unsigned FramePtr = RegInfo->getFrameRegister(MF);

  if (!AFI->hasStackFrame()) {
    if (NumBytes - ArgRegsSaveSize != 0)
      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize);
  } else {
    // Unwind MBBI to point to first LDR / VLDRD.
    if (MBBI != MBB.begin()) {
      do
        --MBBI;
      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
      if (!isCSRestore(MBBI, CSRegs))
        ++MBBI;
    }

    // Move SP to start of FP callee save spill area.
    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
                 AFI->getGPRCalleeSavedArea2Size() +
                 AFI->getDPRCalleeSavedAreaSize() +
                 ArgRegsSaveSize);

    if (AFI->shouldRestoreSPFromFP()) {
      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
      // Reset SP based on frame pointer only if the stack frame extends beyond
      // frame pointer stack slot, the target is ELF and the function has FP, or
      // the target uses var sized objects.
      if (NumBytes) {
        assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
               "No scratch register to restore SP from FP!");
        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                  TII, *RegInfo);
        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
                               ARM::SP)
          .addReg(ARM::R4));
      } else
        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
                               ARM::SP)
          .addReg(FramePtr));
    } else {
      if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
          &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
        MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
        if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
          emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
      } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
        emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
    }
  }

  if (needPopSpecialFixUp(MF)) {
    bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
    (void)Done;
    assert(Done && "Emission of the special fixup failed!?");
  }
}
Exemple #22
0
/// ComputeLocalLiveness - Computes liveness of registers within a basic
/// block, setting the killed/dead flags as appropriate.
void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  // Keep track of the most recently seen previous use or def of each reg, 
  // so that we can update them with dead/kill markers.
  DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
       I != E; ++I) {
    if (I->isDebugValue())
      continue;
    
    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = I->getOperand(i);
      // Uses don't trigger any flags, but we need to save
      // them for later.  Also, we have to process these
      // _before_ processing the defs, since an instr
      // uses regs before it defs them.
      if (!MO.isReg() || !MO.getReg() || !MO.isUse())
        continue;
      
      LastUseDef[MO.getReg()] = std::make_pair(I, i);
      
      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
      
      const unsigned *Aliases = TRI->getAliasSet(MO.getReg());
      if (Aliases == 0)
        continue;
      
      while (*Aliases) {
        DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
          alias = LastUseDef.find(*Aliases);
        
        if (alias != LastUseDef.end() && alias->second.first != I)
          LastUseDef[*Aliases] = std::make_pair(I, i);
        
        ++Aliases;
      }
    }
    
    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = I->getOperand(i);
      // Defs others than 2-addr redefs _do_ trigger flag changes:
      //   - A def followed by a def is dead
      //   - A use followed by a def is a kill
      if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
      
      DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
        last = LastUseDef.find(MO.getReg());
      if (last != LastUseDef.end()) {
        // Check if this is a two address instruction.  If so, then
        // the def does not kill the use.
        if (last->second.first == I &&
            I->isRegTiedToUseOperand(i))
          continue;
        
        MachineOperand &lastUD =
                    last->second.first->getOperand(last->second.second);
        if (lastUD.isDef())
          lastUD.setIsDead(true);
        else
          lastUD.setIsKill(true);
      }
      
      LastUseDef[MO.getReg()] = std::make_pair(I, i);
    }
  }
  
  // Live-out (of the function) registers contain return values of the function,
  // so we need to make sure they are alive at return time.
  MachineBasicBlock::iterator Ret = MBB.getFirstTerminator();
  bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn());

  if (BBEndsInReturn)
    for (MachineRegisterInfo::liveout_iterator
         I = MF->getRegInfo().liveout_begin(),
         E = MF->getRegInfo().liveout_end(); I != E; ++I)
      if (!Ret->readsRegister(*I)) {
        Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
        LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
      }
  
  // Finally, loop over the final use/def of each reg 
  // in the block and determine if it is dead.
  for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
       I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
    MachineInstr *MI = I->second.first;
    unsigned idx = I->second.second;
    MachineOperand &MO = MI->getOperand(idx);
    
    bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
    
    // A crude approximation of "live-out" calculation
    bool usedOutsideBlock = isPhysReg ? false :   
          UsedInMultipleBlocks.test(MO.getReg() -  
                                    TargetRegisterInfo::FirstVirtualRegister);

    // If the machine BB ends in a return instruction, then the value isn't used
    // outside of the BB.
    if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) {
      // DBG_VALUE complicates this:  if the only refs of a register outside
      // this block are DBG_VALUE, we can't keep the reg live just for that,
      // as it will cause the reg to be spilled at the end of this block when
      // it wouldn't have been otherwise.  Nullify the DBG_VALUEs when that
      // happens.
      bool UsedByDebugValueOnly = false;
      for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
             UE = MRI.reg_end(); UI != UE; ++UI) {
        // Two cases:
        // - used in another block
        // - used in the same block before it is defined (loop)
        if (UI->getParent() == &MBB &&
            !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI)))
          continue;
        
        if (UI->isDebugValue()) {
          UsedByDebugValueOnly = true;
          continue;
        }

        // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
        UsedInMultipleBlocks.set(MO.getReg() - 
                                 TargetRegisterInfo::FirstVirtualRegister);
        usedOutsideBlock = true;
        UsedByDebugValueOnly = false;
        break;
      }

      if (UsedByDebugValueOnly)
        for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
             UE = MRI.reg_end(); UI != UE; ++UI)
          if (UI->isDebugValue() &&
              (UI->getParent() != &MBB ||
               (MO.isDef() && precedes(&*UI, MI))))
            UI.getOperand().setReg(0U);
    }
  
    // Physical registers and those that are not live-out of the block are
    // killed/dead at their last use/def within this block.
    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
      if (MO.isUse()) {
        // Don't mark uses that are tied to defs as kills.
        if (!MI->isRegTiedToDefOperand(idx))
          MO.setIsKill(true);
      } else {
        MO.setIsDead(true);
      }
    }
  }
}
bool Thumb1FrameLowering::
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI,
                            std::vector<CalleeSavedInfo> &CSI,
                            const TargetRegisterInfo *TRI) const {
  if (CSI.empty())
    return false;

  MachineFunction &MF = *MBB.getParent();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  const TargetInstrInfo &TII = *STI.getInstrInfo();
  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());

  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();

  ARMRegSet LoRegsToRestore;
  ARMRegSet HiRegsToRestore;
  // Low registers (r0-r7) which can be used to restore the high registers.
  ARMRegSet CopyRegs;

  for (CalleeSavedInfo I : CSI) {
    unsigned Reg = I.getReg();

    if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
      LoRegsToRestore[Reg] = true;
    } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
      HiRegsToRestore[Reg] = true;
    } else {
      llvm_unreachable("callee-saved register of unexpected class");
    }

    // If this is a low register not used as the frame pointer, we may want to
    // use it for restoring the high registers.
    if ((ARM::tGPRRegClass.contains(Reg)) &&
        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
      CopyRegs[Reg] = true;
  }

  // If this is a return block, we may be able to use some unused return value
  // registers for restoring the high regs.
  auto Terminator = MBB.getFirstTerminator();
  if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
    CopyRegs[ARM::R0] = true;
    CopyRegs[ARM::R1] = true;
    CopyRegs[ARM::R2] = true;
    CopyRegs[ARM::R3] = true;
    for (auto Op : Terminator->implicit_operands()) {
      if (Op.isReg())
        CopyRegs[Op.getReg()] = false;
    }
  }

  static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
                                         ARM::R4, ARM::R5, ARM::R6, ARM::R7};
  static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};

  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);

  // Find the first register to restore.
  auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
                                           HiRegsToRestore, AllHighRegsEnd);

  while (HiRegToRestore != AllHighRegsEnd) {
    assert(!CopyRegs.none());
    // Find the first low register to use.
    auto CopyReg =
        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);

    // Create the POP instruction.
    MachineInstrBuilder PopMIB =
        BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));

    while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
      // Add the low register to the POP.
      PopMIB.addReg(*CopyReg, RegState::Define);

      // Create the MOV from low to high register.
      BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
          .addReg(*HiRegToRestore, RegState::Define)
          .addReg(*CopyReg, RegState::Kill)
          .add(predOps(ARMCC::AL));

      CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
      HiRegToRestore =
          findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
    }
  }

  MachineInstrBuilder MIB =
      BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));

  bool NeedsPop = false;
  for (unsigned i = CSI.size(); i != 0; --i) {
    CalleeSavedInfo &Info = CSI[i-1];
    unsigned Reg = Info.getReg();

    // High registers (excluding lr) have already been dealt with
    if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
      continue;

    if (Reg == ARM::LR) {
      Info.setRestored(false);
      if (!MBB.succ_empty() ||
          MI->getOpcode() == ARM::TCRETURNdi ||
          MI->getOpcode() == ARM::TCRETURNri)
        // LR may only be popped into PC, as part of return sequence.
        // If this isn't the return sequence, we'll need emitPopSpecialFixUp
        // to restore LR the hard way.
        // FIXME: if we don't pass any stack arguments it would be actually
        // advantageous *and* correct to do the conversion to an ordinary call
        // instruction here.
        continue;
      // Special epilogue for vararg functions. See emitEpilogue
      if (isVarArg)
        continue;
      // ARMv4T requires BX, see emitEpilogue
      if (!STI.hasV5TOps())
        continue;

      // Pop LR into PC.
      Reg = ARM::PC;
      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
      if (MI != MBB.end())
        MIB.copyImplicitOps(*MI);
      MI = MBB.erase(MI);
    }
    MIB.addReg(Reg, getDefRegState(true));
    NeedsPop = true;
  }

  // It's illegal to emit pop instruction without operands.
  if (NeedsPop)
    MBB.insert(MI, &*MIB);
  else
    MF.DeleteMachineInstr(MIB);

  return true;
}
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
                                              bool DoIt) const {
  MachineFunction &MF = *MBB.getParent();
  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
  const TargetInstrInfo &TII = *STI.getInstrInfo();
  const ThumbRegisterInfo *RegInfo =
      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());

  // If MBBI is a return instruction, or is a tPOP followed by a return
  // instruction in the successor BB, we may be able to directly restore
  // LR in the PC.
  // This is only possible with v5T ops (v4T can't change the Thumb bit via
  // a POP PC instruction), and only if we do not need to emit any SP update.
  // Otherwise, we need a temporary register to pop the value
  // and copy that value into LR.
  auto MBBI = MBB.getFirstTerminator();
  bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
  if (CanRestoreDirectly) {
    if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
      CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
                            MBBI->getOpcode() == ARM::tPOP_RET);
    else {
      auto MBBI_prev = MBBI;
      MBBI_prev--;
      assert(MBBI_prev->getOpcode() == ARM::tPOP);
      assert(MBB.succ_size() == 1);
      if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
        MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
      else
        CanRestoreDirectly = false;
    }
  }

  if (CanRestoreDirectly) {
    if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
      return true;
    MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
            .add(predOps(ARMCC::AL));
    // Copy implicit ops and popped registers, if any.
    for (auto MO: MBBI->operands())
      if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
        MIB.add(MO);
    MIB.addReg(ARM::PC, RegState::Define);
    // Erase the old instruction (tBX_RET or tPOP).
    MBB.erase(MBBI);
    return true;
  }

  // Look for a temporary register to use.
  // First, compute the liveness information.
  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
  LivePhysRegs UsedRegs(TRI);
  UsedRegs.addLiveOuts(MBB);
  // The semantic of pristines changed recently and now,
  // the callee-saved registers that are touched in the function
  // are not part of the pristines set anymore.
  // Add those callee-saved now.
  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
  for (unsigned i = 0; CSRegs[i]; ++i)
    UsedRegs.addReg(CSRegs[i]);

  DebugLoc dl = DebugLoc();
  if (MBBI != MBB.end()) {
    dl = MBBI->getDebugLoc();
    auto InstUpToMBBI = MBB.end();
    while (InstUpToMBBI != MBBI)
      // The pre-decrement is on purpose here.
      // We want to have the liveness right before MBBI.
      UsedRegs.stepBackward(*--InstUpToMBBI);
  }

  // Look for a register that can be directly use in the POP.
  unsigned PopReg = 0;
  // And some temporary register, just in case.
  unsigned TemporaryReg = 0;
  BitVector PopFriendly =
      TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID));
  assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
  // Rebuild the GPRs from the high registers because they are removed
  // form the GPR reg class for thumb1.
  BitVector GPRsNoLRSP =
      TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID));
  GPRsNoLRSP |= PopFriendly;
  GPRsNoLRSP.reset(ARM::LR);
  GPRsNoLRSP.reset(ARM::SP);
  GPRsNoLRSP.reset(ARM::PC);
  findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);

  // If we couldn't find a pop-friendly register, restore LR before popping the
  // other callee-saved registers, so we can use one of them as a temporary.
  bool UseLDRSP = false;
  if (!PopReg && MBBI != MBB.begin()) {
    auto PrevMBBI = MBBI;
    PrevMBBI--;
    if (PrevMBBI->getOpcode() == ARM::tPOP) {
      MBBI = PrevMBBI;
      UsedRegs.stepBackward(*MBBI);
      findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
      UseLDRSP = true;
    }
  }

  if (!DoIt && !PopReg && !TemporaryReg)
    return false;

  assert((PopReg || TemporaryReg) && "Cannot get LR");

  if (UseLDRSP) {
    assert(PopReg && "Do not know how to get LR");
    // Load the LR via LDR tmp, [SP, #off]
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi))
      .addReg(PopReg, RegState::Define)
      .addReg(ARM::SP)
      .addImm(MBBI->getNumExplicitOperands() - 2)
      .add(predOps(ARMCC::AL));
    // Move from the temporary register to the LR.
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
      .addReg(ARM::LR, RegState::Define)
      .addReg(PopReg, RegState::Kill)
      .add(predOps(ARMCC::AL));
    // Advance past the pop instruction.
    MBBI++;
    // Increment the SP.
    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4);
    return true;
  }

  if (TemporaryReg) {
    assert(!PopReg && "Unnecessary MOV is about to be inserted");
    PopReg = PopFriendly.find_first();
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
        .addReg(TemporaryReg, RegState::Define)
        .addReg(PopReg, RegState::Kill)
        .add(predOps(ARMCC::AL));
  }

  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
    // We couldn't use the direct restoration above, so
    // perform the opposite conversion: tPOP_RET to tPOP.
    MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
            .add(predOps(ARMCC::AL));
    bool Popped = false;
    for (auto MO: MBBI->operands())
      if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
          MO.getReg() != ARM::PC) {
        MIB.add(MO);
        if (!MO.isImplicit())
          Popped = true;
      }
    // Is there anything left to pop?
    if (!Popped)
      MBB.erase(MIB.getInstr());
    // Erase the old instruction.
    MBB.erase(MBBI);
    MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
               .add(predOps(ARMCC::AL));
  }

  assert(PopReg && "Do not know how to get LR");
  BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
      .add(predOps(ARMCC::AL))
      .addReg(PopReg, RegState::Define);

  emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);

  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
      .addReg(ARM::LR, RegState::Define)
      .addReg(PopReg, RegState::Kill)
      .add(predOps(ARMCC::AL));

  if (TemporaryReg)
    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
        .addReg(PopReg, RegState::Define)
        .addReg(TemporaryReg, RegState::Kill)
        .add(predOps(ARMCC::AL));

  return true;
}
/// EmitSchedule - Emit the machine code in scheduled order. Return the new
/// InsertPos and MachineBasicBlock that contains this insertion
/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
  InstrEmitter Emitter(BB, InsertPos);
  DenseMap<SDValue, unsigned> VRBaseMap;
  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
  SmallSet<unsigned, 8> Seen;
  bool HasDbg = DAG->hasDebugValues();

  // If this is the first BB, emit byval parameter dbg_value's.
  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
    for (; PDI != PDE; ++PDI) {
      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
      if (DbgMI)
        BB->insert(InsertPos, DbgMI);
    }
  }

  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
    SUnit *SU = Sequence[i];
    if (!SU) {
      // Null SUnit* is a noop.
      TII->insertNoop(*Emitter.getBlock(), InsertPos);
      continue;
    }

    // For pre-regalloc scheduling, create instructions corresponding to the
    // SDNode and any glued SDNodes and append them to the block.
    if (!SU->getNode()) {
      // Emit a copy.
      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
      continue;
    }

    SmallVector<SDNode *, 4> GluedNodes;
    for (SDNode *N = SU->getNode()->getGluedNode(); N;
         N = N->getGluedNode())
      GluedNodes.push_back(N);
    while (!GluedNodes.empty()) {
      SDNode *N = GluedNodes.back();
      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
                       VRBaseMap);
      // Remember the source order of the inserted instruction.
      if (HasDbg)
        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
      GluedNodes.pop_back();
    }
    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
                     VRBaseMap);
    // Remember the source order of the inserted instruction.
    if (HasDbg)
      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
                        Seen);
  }

  // Insert all the dbg_values which have not already been inserted in source
  // order sequence.
  if (HasDbg) {
    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();

    // Sort the source order instructions and use the order to insert debug
    // values.
    std::sort(Orders.begin(), Orders.end(), OrderSorter());

    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
    // Now emit the rest according to source order.
    unsigned LastOrder = 0;
    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
      unsigned Order = Orders[i].first;
      MachineInstr *MI = Orders[i].second;
      // Insert all SDDbgValue's whose order(s) are before "Order".
      if (!MI)
        continue;
      for (; DI != DE &&
             (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
        if ((*DI)->isInvalidated())
          continue;
        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
        if (DbgMI) {
          if (!LastOrder)
            // Insert to start of the BB (after PHIs).
            BB->insert(BBBegin, DbgMI);
          else {
            // Insert at the instruction, which may be in a different
            // block, if the block was split by a custom inserter.
            MachineBasicBlock::iterator Pos = MI;
            MI->getParent()->insert(llvm::next(Pos), DbgMI);
          }
        }
      }
      LastOrder = Order;
    }
    // Add trailing DbgValue's before the terminator. FIXME: May want to add
    // some of them before one or more conditional branches?
    SmallVector<MachineInstr*, 8> DbgMIs;
    while (DI != DE) {
      if (!(*DI)->isInvalidated())
        if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
          DbgMIs.push_back(DbgMI);
      ++DI;
    }

    MachineBasicBlock *InsertBB = Emitter.getBlock();
    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
  }

  InsertPos = Emitter.getInsertPos();
  return Emitter.getBlock();
}
Exemple #26
0
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                 SmallVector<MachineInstr*, 16> &Copies) {
    // Set the limit on the number of instructions to duplicate, with a default
    // of one less than the tail-merge threshold. When optimizing for size,
    // duplicate only one, because one branch instruction can be eliminated to
    // compensate for the duplication.
    unsigned MaxDuplicateCount;
    if (TailDuplicateSize.getNumOccurrences() == 0 &&
            MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
        MaxDuplicateCount = 1;
    else
        MaxDuplicateCount = TailDuplicateSize;

    if (PreRegAlloc) {
        if (TailBB->empty())
            return false;
        const TargetInstrDesc &TID = TailBB->back().getDesc();
        // Pre-regalloc tail duplication hurts compile time and doesn't help
        // much except for indirect branches and returns.
        if (!TID.isIndirectBranch() && !TID.isReturn())
            return false;
        // If the target has hardware branch prediction that can handle indirect
        // branches, duplicating them can often make them predictable when there
        // are common paths through the code.  The limit needs to be high enough
        // to allow undoing the effects of tail merging and other optimizations
        // that rearrange the predecessors of the indirect branch.
        MaxDuplicateCount = 20;
    }

    // Don't try to tail-duplicate single-block loops.
    if (TailBB->isSuccessor(TailBB))
        return false;

    // Check the instructions in the block to determine whether tail-duplication
    // is invalid or unlikely to be profitable.
    unsigned InstrCount = 0;
    bool HasCall = false;
    for (MachineBasicBlock::iterator I = TailBB->begin();
            I != TailBB->end(); ++I) {
        // Non-duplicable things shouldn't be tail-duplicated.
        if (I->getDesc().isNotDuplicable()) return false;
        // Do not duplicate 'return' instructions if this is a pre-regalloc run.
        // A return may expand into a lot more instructions (e.g. reload of callee
        // saved registers) after PEI.
        if (PreRegAlloc && I->getDesc().isReturn()) return false;
        // Don't duplicate more than the threshold.
        if (InstrCount == MaxDuplicateCount) return false;
        // Remember if we saw a call.
        if (I->getDesc().isCall()) HasCall = true;
        if (!I->isPHI() && !I->isDebugValue())
            InstrCount += 1;
    }
    // Don't tail-duplicate calls before register allocation. Calls presents a
    // barrier to register allocation so duplicating them may end up increasing
    // spills.
    if (InstrCount > 1 && (PreRegAlloc && HasCall))
        return false;

    DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');

    // Iterate through all the unique predecessors and tail-duplicate this
    // block into them, if possible. Copying the list ahead of time also
    // avoids trouble with the predecessor list reallocating.
    bool Changed = false;
    SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
            TailBB->pred_end());
    for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
            PE = Preds.end(); PI != PE; ++PI) {
        MachineBasicBlock *PredBB = *PI;

        assert(TailBB != PredBB &&
               "Single-block loop should have been rejected earlier!");
        if (PredBB->succ_size() > 1) continue;

        MachineBasicBlock *PredTBB, *PredFBB;
        SmallVector<MachineOperand, 4> PredCond;
        if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
            continue;
        if (!PredCond.empty())
            continue;
        // EH edges are ignored by AnalyzeBranch.
        if (PredBB->succ_size() != 1)
            continue;
        // Don't duplicate into a fall-through predecessor (at least for now).
        if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
            continue;

        DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
              << "From Succ: " << *TailBB);

        TDBBs.push_back(PredBB);

        // Remove PredBB's unconditional branch.
        TII->RemoveBranch(*PredBB);

        // Clone the contents of TailBB into PredBB.
        DenseMap<unsigned, unsigned> LocalVRMap;
        SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
        MachineBasicBlock::iterator I = TailBB->begin();
        while (I != TailBB->end()) {
            MachineInstr *MI = &*I;
            ++I;
            if (MI->isPHI()) {
                // Replace the uses of the def of the PHI with the register coming
                // from PredBB.
                ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
            } else {
                // Replace def of virtual registers with new registers, and update
                // uses with PHI source register or the new registers.
                DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
            }
        }
        MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
        for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
            Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                                     TII->get(TargetOpcode::COPY),
                                     CopyInfos[i].first).addReg(CopyInfos[i].second));
        }
        NumInstrDups += TailBB->size() - 1; // subtract one for removed branch

        // Update the CFG.
        PredBB->removeSuccessor(PredBB->succ_begin());
        assert(PredBB->succ_empty() &&
               "TailDuplicate called on block with multiple successors!");
        for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
                E = TailBB->succ_end(); I != E; ++I)
            PredBB->addSuccessor(*I);

        Changed = true;
        ++NumTailDups;
    }

    // If TailBB was duplicated into all its predecessors except for the prior
    // block, which falls through unconditionally, move the contents of this
    // block into the prior block.
    MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
    MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
    SmallVector<MachineOperand, 4> PriorCond;
    bool PriorUnAnalyzable =
        TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
    // This has to check PrevBB->succ_size() because EH edges are ignored by
    // AnalyzeBranch.
    if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
            TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
            !TailBB->hasAddressTaken()) {
        DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
              << "From MBB: " << *TailBB);
        if (PreRegAlloc) {
            DenseMap<unsigned, unsigned> LocalVRMap;
            SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
            MachineBasicBlock::iterator I = TailBB->begin();
            // Process PHI instructions first.
            while (I != TailBB->end() && I->isPHI()) {
                // Replace the uses of the def of the PHI with the register coming
                // from PredBB.
                MachineInstr *MI = &*I++;
                ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
                if (MI->getParent())
                    MI->eraseFromParent();
            }

            // Now copy the non-PHI instructions.
            while (I != TailBB->end()) {
                // Replace def of virtual registers with new registers, and update
                // uses with PHI source register or the new registers.
                MachineInstr *MI = &*I++;
                DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
                MI->eraseFromParent();
            }
            MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
            for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
                Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
                                         TII->get(TargetOpcode::COPY),
                                         CopyInfos[i].first)
                                 .addReg(CopyInfos[i].second));
            }
        } else {
            // No PHIs to worry about, just splice the instructions over.
            PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
        }
        PrevBB->removeSuccessor(PrevBB->succ_begin());
        assert(PrevBB->succ_empty());
        PrevBB->transferSuccessors(TailBB);
        TDBBs.push_back(PrevBB);
        Changed = true;
    }

    return Changed;
}
Exemple #27
0
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
 MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  // First pass, collect all live intervals for SGPRs
  for (const MachineBasicBlock &MBB : MF) {
    for (const MachineInstr &MI : MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def)))
            SGPRLiveRanges.push_back(
                std::make_pair(Def, &LIS->getInterval(Def)));
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
            SGPRLiveRanges.push_back(
                std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }
  }

  // Second pass fix the intervals
  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {
    MachineBasicBlock &MBB = *BI;
    if (MBB.succ_size() < 2)
      continue;

    // We have structured control flow, so number of succesors should be two.
    assert(MBB.succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB.succ_begin();
    MachineBasicBlock *SuccB = *(++MBB.succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }
    assert(SuccA && SuccB);
    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here.  If the register is Live-In to
      // one block, but the other doesn't have any SGPR defs, then there
      // won't be a conflict.  Also, if the branch decision is based on
      // a value in an SGPR, then there will be no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if ((!LiveInToA && !LiveInToB) ||
          (LiveInToA && LiveInToB))
        continue;

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected " <<  " in " << *LR <<
                      " BB#" << SuccA->getNumber() << ", BB#" <<
                      SuccB->getNumber() <<
                      " with NCD = " << NCD->getNumber() << '\n');

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
              TII->get(AMDGPU::SGPR_USE))
              .addReg(Reg, RegState::Implicit);
      DEBUG(NCD->getFirstNonPHI()->dump());
    }
  }

  return false;
}
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  bool MadeChange = false;

  MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
  MachineBasicBlock *Entry = MF.begin();

  // Use a depth first order so that in SSA, we encounter all defs before
  // uses. Once the defs of the block have been found, attempt to insert
  // SGPR_USE instructions in successor blocks if required.
  for (MachineBasicBlock *MBB : depth_first(Entry)) {
    for (const MachineInstr &MI : *MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
            // Only consider defs that are live outs. We don't care about def /
            // use within the same block.
            LiveRange &LR = LIS->getInterval(Def);
            if (LIS->isLiveOutOfMBB(LR, MBB))
              SGPRLiveRanges.push_back(std::make_pair(Def, &LR));
          }
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
          SGPRLiveRanges.push_back(std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }

    if (MBB->succ_size() < 2)
      continue;

    // We have structured control flow, so the number of successors should be
    // two.
    assert(MBB->succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB->succ_begin();
    MachineBasicBlock *SuccB = *(++MBB->succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }

    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here. If the register is Live-In to one
      // block, but the other doesn't have any SGPR defs, then there won't be a
      // conflict. Also, if the branch condition is uniform then there will be
      // no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if (!LiveInToA && !LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into neither successor\n");
        continue;
      }

      if (LiveInToA && LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into both successors\n");
        continue;
      }

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected for "
            << PrintReg(Reg, TRI, 0) <<  " in " << *LR
            << " BB#" << SuccA->getNumber() << ", BB#"
            << SuccB->getNumber()
            << " with NCD = BB#" << NCD->getNumber() << '\n');

      assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
             "Not expecting to extend live range of physreg");

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      MachineInstr *NCDSGPRUse =
        BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
                TII->get(AMDGPU::SGPR_USE))
        .addReg(Reg, RegState::Implicit);

      MadeChange = true;

      SlotIndex SI = LIS->InsertMachineInstrInMaps(NCDSGPRUse);
      LIS->extendToIndices(*LR, SI.getRegSlot());

      if (LV) {
        // TODO: This won't work post-SSA
        LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
      }

      DEBUG(NCDSGPRUse->dump());
    }
  }

  return MadeChange;
}
/// converToHardwareLoop - check if the loop is a candidate for
/// converting to a hardware loop.  If so, then perform the
/// transformation.
///
/// This function works on innermost loops first.  A loop can
/// be converted if it is a counting loop; either a register
/// value or an immediate.
///
/// The code makes several assumptions about the representation
/// of the loop in llvm.
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
  bool Changed = false;
  // Process nested loops first.
  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
    Changed |= convertToHardwareLoop(*I);
  }
  // If a nested loop has been converted, then we can't convert this loop.
  if (Changed) {
    return Changed;
  }
  // Are we able to determine the trip count for the loop?
  CountValue *TripCount = getTripCount(L);
  if (TripCount == 0) {
    return false;
  }
  // Does the loop contain any invalid instructions?
  if (containsInvalidInstruction(L)) {
    return false;
  }
  MachineBasicBlock *Preheader = L->getLoopPreheader();
  // No preheader means there's not place for the loop instr.
  if (Preheader == 0) {
    return false;
  }
  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();

  MachineBasicBlock *LastMBB = L->getExitingBlock();
  // Don't generate hw loop if the loop has more than one exit.
  if (LastMBB == 0) {
    return false;
  }
  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();

  // Determine the loop start.
  MachineBasicBlock *LoopStart = L->getTopBlock();
  if (L->getLoopLatch() != LastMBB) {
    // When the exit and latch are not the same, use the latch block as the
    // start.
    // The loop start address is used only after the 1st iteration, and the loop
    // latch may contains instrs. that need to be executed after the 1st iter.
    LoopStart = L->getLoopLatch();
    // Make sure the latch is a successor of the exit, otherwise it won't work.
    if (!LastMBB->isSuccessor(LoopStart)) {
      return false;
    }
  }

  // Convert the loop to a hardware loop
  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());

  if (TripCount->isReg()) {
    // Create a copy of the loop count register.
    MachineFunction *MF = LastMBB->getParent();
    const TargetRegisterClass *RC =
      MF->getRegInfo().getRegClass(TripCount->getReg());
    unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
            TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
    if (TripCount->isNeg()) {
      unsigned CountReg1 = CountReg;
      CountReg = MF->getRegInfo().createVirtualRegister(RC);
      BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
              TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
    }

    // Add the Loop instruction to the begining of the loop.
    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
            TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
  } else {
    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
    // Add the Loop immediate instruction to the beginning of the loop.
    int64_t CountImm = TripCount->getImm();
    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
            TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
  }

  // Make sure the loop start always has a reference in the CFG.  We need to
  // create a BlockAddress operand to get this mechanism to work both the
  // MachineBasicBlock and BasicBlock objects need the flag set.
  LoopStart->setHasAddressTaken();
  // This line is needed to set the hasAddressTaken flag on the BasicBlock
  // object
  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));

  // Replace the loop branch with an endloop instruction.
  DebugLoc dl = LastI->getDebugLoc();
  BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);

  // The loop ends with either:
  //  - a conditional branch followed by an unconditional branch, or
  //  - a conditional branch to the loop start.
  if (LastI->getOpcode() == Hexagon::JMP_c ||
      LastI->getOpcode() == Hexagon::JMP_cNot) {
    // delete one and change/add an uncond. branch to out of the loop
    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
    LastI = LastMBB->erase(LastI);
    if (!L->contains(BranchTarget)) {
      if (LastI != LastMBB->end()) {
        TII->RemoveBranch(*LastMBB);
      }
      SmallVector<MachineOperand, 0> Cond;
      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
    }
  } else {
    // Conditional branch to loop start; just delete it.
    LastMBB->erase(LastI);
  }
  delete TripCount;

  ++NumHWLoops;
  return true;
}
Exemple #30
0
/// Walk the specified region of the CFG and hoist loop invariants out to the
/// preheader.
void MachineLICM::HoistRegionPostRA() {
  MachineBasicBlock *Preheader = getCurPreheader();
  if (!Preheader)
    return;

  unsigned NumRegs = TRI->getNumRegs();
  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.

  SmallVector<CandidateInfo, 32> Candidates;
  SmallSet<int, 32> StoredFIs;

  // Walk the entire region, count number of defs for each register, and
  // collect potential LICM candidates.
  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
  for (MachineBasicBlock *BB : Blocks) {
    // If the header of the loop containing this basic block is a landing pad,
    // then don't try to hoist instructions out of this loop.
    const MachineLoop *ML = MLI->getLoopFor(BB);
    if (ML && ML->getHeader()->isEHPad()) continue;

    // Conservatively treat live-in's as an external def.
    // FIXME: That means a reload that're reused in successor block(s) will not
    // be LICM'ed.
    for (const auto &LI : BB->liveins()) {
      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
        PhysRegDefs.set(*AI);
    }

    SpeculationState = SpeculateUnknown;
    for (MachineInstr &MI : *BB)
      ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
  }

  // Gather the registers read / clobbered by the terminator.
  BitVector TermRegs(NumRegs);
  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
  if (TI != Preheader->end()) {
    for (const MachineOperand &MO : TI->operands()) {
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;
      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
        TermRegs.set(*AI);
    }
  }

  // Now evaluate whether the potential candidates qualify.
  // 1. Check if the candidate defined register is defined by another
  //    instruction in the loop.
  // 2. If the candidate is a load from stack slot (always true for now),
  //    check if the slot is stored anywhere in the loop.
  // 3. Make sure candidate def should not clobber
  //    registers read by the terminator. Similarly its def should not be
  //    clobbered by the terminator.
  for (CandidateInfo &Candidate : Candidates) {
    if (Candidate.FI != INT_MIN &&
        StoredFIs.count(Candidate.FI))
      continue;

    unsigned Def = Candidate.Def;
    if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
      bool Safe = true;
      MachineInstr *MI = Candidate.MI;
      for (const MachineOperand &MO : MI->operands()) {
        if (!MO.isReg() || MO.isDef() || !MO.getReg())
          continue;
        unsigned Reg = MO.getReg();
        if (PhysRegDefs.test(Reg) ||
            PhysRegClobbers.test(Reg)) {
          // If it's using a non-loop-invariant register, then it's obviously
          // not safe to hoist.
          Safe = false;
          break;
        }
      }
      if (Safe)
        HoistPostRA(MI, Candidate.Def);
    }
  }
}