bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
                                                MachineInstr &IntoMI) const {
  // Immediate neighbours are already folded.
  if (MI.getParent() == IntoMI.getParent() &&
      std::next(MI.getIterator()) == IntoMI.getIterator())
    return true;

  return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
         MI.implicit_operands().begin() == MI.implicit_operands().end();
}
Esempio n. 2
0
void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
  Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
  if (mi2iItr == mi2iMap.end())
    return;

  SlotIndex MIIndex = mi2iItr->second;
  IndexListEntry &MIEntry = *MIIndex.listEntry();
  assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
  mi2iMap.erase(mi2iItr);

  // When removing the first instruction of a bundle update mapping to next
  // instruction.
  if (MI.isBundledWithSucc()) {
    // Only the first instruction of a bundle should have an index assigned.
    assert(!MI.isBundledWithPred() && "Should have first bundle isntruction");

    MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
    MachineInstr &NextMI = *Next;
    MIEntry.setInstr(&NextMI);
    mi2iMap.insert(std::make_pair(&NextMI, MIIndex));
    return;
  } else {
    // FIXME: Eventually we want to actually delete these indexes.
    MIEntry.setInstr(nullptr);
  }
}
Esempio n. 3
0
bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
  MachineBasicBlock &MBB = *MI.getParent();
  MachineFunction *MF = MBB.getParent();

  if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
      !shouldSkip(MBB, MBB.getParent()->back()))
    return false;

  MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());

  const DebugLoc &DL = MI.getDebugLoc();

  // If the exec mask is non-zero, skip the next two instructions
  BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
    .addMBB(&NextBB);

  MachineBasicBlock::iterator Insert = SkipBB->begin();

  // Exec mask is zero: Export to NULL target...
  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
    .addImm(0)
    .addImm(0x09) // V_008DFC_SQ_EXP_NULL
    .addImm(0)
    .addImm(1)
    .addImm(1)
    .addReg(AMDGPU::VGPR0, RegState::Undef)
    .addReg(AMDGPU::VGPR0, RegState::Undef)
    .addReg(AMDGPU::VGPR0, RegState::Undef)
    .addReg(AMDGPU::VGPR0, RegState::Undef);

  // ... and terminate wavefront.
  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));

  return true;
}
// Compute base address using Addr and return the final register.
unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
                                           const MemAddress &Addr) {
  MachineBasicBlock *MBB = MI.getParent();
  MachineBasicBlock::iterator MBBI = MI.getIterator();
  DebugLoc DL = MI.getDebugLoc();

  assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 ||
          Addr.Base.LoSubReg) &&
         "Expected 32-bit Base-Register-Low!!");

  assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 ||
          Addr.Base.HiSubReg) &&
         "Expected 32-bit Base-Register-Hi!!");

  LLVM_DEBUG(dbgs() << "  Re-Computed Anchor-Base:\n");
  MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
  MachineOperand OffsetHi =
    createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
  unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
  unsigned DeadCarryReg =
    MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);

  unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
  unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
  MachineInstr *LoHalf =
    BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
      .addReg(CarryReg, RegState::Define)
      .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
    .add(OffsetLo);
  (void)LoHalf;
  LLVM_DEBUG(dbgs() << "    "; LoHalf->dump(););
// endPacket - End the current packet, bundle packet instructions and reset
// DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
                                   MachineInstr *MI) {
    if (CurrentPacketMIs.size() > 1) {
        MachineInstr *MIFirst = CurrentPacketMIs.front();
        finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator());
    }
    CurrentPacketMIs.clear();
    ResourceTracker->clearResources();
}
bool Mips16InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
  MachineBasicBlock &MBB = *MI.getParent();
  switch (MI.getDesc().getOpcode()) {
  default:
    return false;
  case Mips::RetRA16:
    ExpandRetRA16(MBB, MI, Mips::JrcRa16);
    break;
  }

  MBB.erase(MI.getIterator());
  return true;
}
Esempio n. 7
0
// Returns true if a branch over the block was inserted.
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
                                   MachineBasicBlock &SrcMBB) {
  MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();

  if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
    return false;

  const DebugLoc &DL = MI.getDebugLoc();
  MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());

  BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
    .addMBB(DestBB);

  return true;
}
MachineOperand
SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
  APInt V(32, Val, true);
  if (TII->isInlineConstant(V))
    return MachineOperand::CreateImm(Val);

  unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
  MachineInstr *Mov =
  BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
          TII->get(AMDGPU::S_MOV_B32), Reg)
    .addImm(Val);
  (void)Mov;
  LLVM_DEBUG(dbgs() << "    "; Mov->dump());
  return MachineOperand::CreateReg(Reg, false);
}
bool ARMInstructionSelector::select(MachineInstr &I,
                                    CodeGenCoverage &CoverageInfo) const {
  assert(I.getParent() && "Instruction should be in a basic block!");
  assert(I.getParent()->getParent() && "Instruction should be in a function!");

  auto &MBB = *I.getParent();
  auto &MF = *MBB.getParent();
  auto &MRI = MF.getRegInfo();

  if (!isPreISelGenericOpcode(I.getOpcode())) {
    if (I.isCopy())
      return selectCopy(I, TII, MRI, TRI, RBI);

    return true;
  }

  using namespace TargetOpcode;

  if (selectImpl(I, CoverageInfo))
    return true;

  MachineInstrBuilder MIB{MF, I};
  bool isSExt = false;

  switch (I.getOpcode()) {
  case G_SEXT:
    isSExt = true;
    LLVM_FALLTHROUGH;
  case G_ZEXT: {
    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
    // FIXME: Smaller destination sizes coming soon!
    if (DstTy.getSizeInBits() != 32) {
      LLVM_DEBUG(dbgs() << "Unsupported destination size for extension");
      return false;
    }

    LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
    unsigned SrcSize = SrcTy.getSizeInBits();
    switch (SrcSize) {
    case 1: {
      // ZExt boils down to & 0x1; for SExt we also subtract that from 0
      I.setDesc(TII.get(Opcodes.AND));
      MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());

      if (isSExt) {
        unsigned SExtResult = I.getOperand(0).getReg();

        // Use a new virtual register for the result of the AND
        unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
        I.getOperand(0).setReg(AndResult);

        auto InsertBefore = std::next(I.getIterator());
        auto SubI =
            BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.RSB))
                .addDef(SExtResult)
                .addUse(AndResult)
                .addImm(0)
                .add(predOps(ARMCC::AL))
                .add(condCodeOp());
        if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
          return false;
      }
      break;
    }
    case 8:
    case 16: {
      unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize);
      if (NewOpc == I.getOpcode())
        return false;
      I.setDesc(TII.get(NewOpc));
      MIB.addImm(0).add(predOps(ARMCC::AL));
      break;
    }
    default:
      LLVM_DEBUG(dbgs() << "Unsupported source size for extension");
      return false;
    }
    break;
  }
  case G_ANYEXT:
  case G_TRUNC: {
    // The high bits are undefined, so there's nothing special to do, just
    // treat it as a copy.
    auto SrcReg = I.getOperand(1).getReg();
    auto DstReg = I.getOperand(0).getReg();

    const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
    const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

    if (SrcRegBank.getID() == ARM::FPRRegBankID) {
      // This should only happen in the obscure case where we have put a 64-bit
      // integer into a D register. Get it out of there and keep only the
      // interesting part.
      assert(I.getOpcode() == G_TRUNC && "Unsupported operand for G_ANYEXT");
      assert(DstRegBank.getID() == ARM::GPRRegBankID &&
             "Unsupported combination of register banks");
      assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size");
      assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size");

      unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass);
      auto InsertBefore = std::next(I.getIterator());
      auto MovI =
          BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD))
              .addDef(DstReg)
              .addDef(IgnoredBits)
              .addUse(SrcReg)
              .add(predOps(ARMCC::AL));
      if (!constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI))
        return false;

      MIB->eraseFromParent();
      return true;
    }

    if (SrcRegBank.getID() != DstRegBank.getID()) {
      LLVM_DEBUG(
          dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n");
      return false;
    }

    if (SrcRegBank.getID() != ARM::GPRRegBankID) {
      LLVM_DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n");
      return false;
    }

    I.setDesc(TII.get(COPY));
    return selectCopy(I, TII, MRI, TRI, RBI);
  }
  case G_CONSTANT: {
    if (!MRI.getType(I.getOperand(0).getReg()).isPointer()) {
      // Non-pointer constants should be handled by TableGen.
      LLVM_DEBUG(dbgs() << "Unsupported constant type\n");
      return false;
    }

    auto &Val = I.getOperand(1);
    if (Val.isCImm()) {
      if (!Val.getCImm()->isZero()) {
        LLVM_DEBUG(dbgs() << "Unsupported pointer constant value\n");
        return false;
      }
      Val.ChangeToImmediate(0);
    } else {
      assert(Val.isImm() && "Unexpected operand for G_CONSTANT");
      if (Val.getImm() != 0) {
        LLVM_DEBUG(dbgs() << "Unsupported pointer constant value\n");
        return false;
      }
    }

    I.setDesc(TII.get(ARM::MOVi));
    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  }
  case G_INTTOPTR:
  case G_PTRTOINT: {
    auto SrcReg = I.getOperand(1).getReg();
    auto DstReg = I.getOperand(0).getReg();

    const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
    const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

    if (SrcRegBank.getID() != DstRegBank.getID()) {
      LLVM_DEBUG(
          dbgs()
          << "G_INTTOPTR/G_PTRTOINT operands on different register banks\n");
      return false;
    }

    if (SrcRegBank.getID() != ARM::GPRRegBankID) {
      LLVM_DEBUG(
          dbgs() << "G_INTTOPTR/G_PTRTOINT on non-GPR not supported yet\n");
      return false;
    }

    I.setDesc(TII.get(COPY));
    return selectCopy(I, TII, MRI, TRI, RBI);
  }
  case G_SELECT:
    return selectSelect(MIB, MRI);
  case G_ICMP: {
    CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
                        ARM::GPRRegBankID, 32);
    return selectCmp(Helper, MIB, MRI);
  }
  case G_FCMP: {
    assert(STI.hasVFP2() && "Can't select fcmp without VFP");

    unsigned OpReg = I.getOperand(2).getReg();
    unsigned Size = MRI.getType(OpReg).getSizeInBits();

    if (Size == 64 && STI.isFPOnlySP()) {
      LLVM_DEBUG(dbgs() << "Subtarget only supports single precision");
      return false;
    }
    if (Size != 32 && Size != 64) {
      LLVM_DEBUG(dbgs() << "Unsupported size for G_FCMP operand");
      return false;
    }

    CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
                        ARM::FPRRegBankID, Size);
    return selectCmp(Helper, MIB, MRI);
  }
  case G_LSHR:
    return selectShift(ARM_AM::ShiftOpc::lsr, MIB);
  case G_ASHR:
    return selectShift(ARM_AM::ShiftOpc::asr, MIB);
  case G_SHL: {
    return selectShift(ARM_AM::ShiftOpc::lsl, MIB);
  }
  case G_GEP:
    I.setDesc(TII.get(ARM::ADDrr));
    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  case G_FRAME_INDEX:
    // Add 0 to the given frame index and hope it will eventually be folded into
    // the user(s).
    I.setDesc(TII.get(ARM::ADDri));
    MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  case G_GLOBAL_VALUE:
    return selectGlobal(MIB, MRI);
  case G_STORE:
  case G_LOAD: {
    const auto &MemOp = **I.memoperands_begin();
    if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
      LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
      return false;
    }

    unsigned Reg = I.getOperand(0).getReg();
    unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();

    LLT ValTy = MRI.getType(Reg);
    const auto ValSize = ValTy.getSizeInBits();

    assert((ValSize != 64 || STI.hasVFP2()) &&
           "Don't know how to load/store 64-bit value without VFP");

    const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
    if (NewOpc == G_LOAD || NewOpc == G_STORE)
      return false;

    I.setDesc(TII.get(NewOpc));

    if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
      // LDRH has a funny addressing mode (there's already a FIXME for it).
      MIB.addReg(0);
    MIB.addImm(0).add(predOps(ARMCC::AL));
    break;
  }
  case G_MERGE_VALUES: {
    if (!selectMergeValues(MIB, TII, MRI, TRI, RBI))
      return false;
    break;
  }
  case G_UNMERGE_VALUES: {
    if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI))
      return false;
    break;
  }
  case G_BRCOND: {
    if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) {
      LLVM_DEBUG(dbgs() << "Unsupported condition register for G_BRCOND");
      return false;
    }

    // Set the flags.
    auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
                    .addReg(I.getOperand(0).getReg())
                    .addImm(1)
                    .add(predOps(ARMCC::AL));
    if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
      return false;

    // Branch conditionally.
    auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
                      .add(I.getOperand(1))
                      .add(predOps(ARMCC::NE, ARM::CPSR));
    if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
      return false;
    I.eraseFromParent();
    return true;
  }
  case G_PHI: {
    I.setDesc(TII.get(PHI));

    unsigned DstReg = I.getOperand(0).getReg();
    const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI);
    if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
      break;
    }

    return true;
  }
  default:
    return false;
  }

  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2,
                                   MachineBasicBlock::iterator &MI,
                                   bool DoInsertAtI1, bool OptForSize) {
  // We are going to delete I2. If MI points to I2 advance it to the next
  // instruction.
  if (MI == I2.getIterator())
    ++MI;

  // Figure out whether I1 or I2 goes into the lowreg part.
  unsigned I1DestReg = I1.getOperand(0).getReg();
  unsigned I2DestReg = I2.getOperand(0).getReg();
  bool IsI1Loreg = (I2DestReg - I1DestReg) == 1;
  unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg;

  // Get the double word register.
  unsigned DoubleRegDest =
    TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg,
                             &Hexagon::DoubleRegsRegClass);
  assert(DoubleRegDest != 0 && "Expect a valid register");


  // Setup source operands.
  MachineOperand &LoOperand = IsI1Loreg ? I1.getOperand(1) : I2.getOperand(1);
  MachineOperand &HiOperand = IsI1Loreg ? I2.getOperand(1) : I1.getOperand(1);

  // Figure out which source is a register and which a constant.
  bool IsHiReg = HiOperand.isReg();
  bool IsLoReg = LoOperand.isReg();

  // There is a combine of two constant extended values into CONST64.
  bool IsC64 = OptForSize && LoOperand.isImm() && HiOperand.isImm() &&
               isGreaterThanNBitTFRI<16>(I1) && isGreaterThanNBitTFRI<16>(I2);

  MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2);
  // Emit combine.
  if (IsHiReg && IsLoReg)
    emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
  else if (IsHiReg)
    emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand);
  else if (IsLoReg)
    emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
  else if (IsC64 && !IsConst64Disabled)
    emitConst64(InsertPt, DoubleRegDest, HiOperand, LoOperand);
  else
    emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand);

  // Move debug instructions along with I1 if it's being
  // moved towards I2.
  if (!DoInsertAtI1 && DbgMItoMove.size() != 0) {
    // Insert debug instructions at the new location before I2.
    MachineBasicBlock *BB = InsertPt->getParent();
    for (auto NewMI : DbgMItoMove) {
      // If iterator MI is pointing to DEBUG_VAL, make sure
      // MI now points to next relevant instruction.
      if (NewMI == (MachineInstr*)MI)
        ++MI;
      BB->splice(InsertPt, BB, NewMI);
    }
  }

  I1.eraseFromParent();
  I2.eraseFromParent();
}
Esempio n. 11
0
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;

  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
  const SIInstrInfo *TII = ST.getInstrInfo();

  // Optimize sequences emitted for control flow lowering. They are originally
  // emitted as the separate operations because spill code may need to be
  // inserted for the saved copy of exec.
  //
  //     x = copy exec
  //     z = s_<op>_b64 x, y
  //     exec = copy z
  // =>
  //     x = s_<op>_saveexec_b64 y
  //

  for (MachineBasicBlock &MBB : MF) {
    MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
    MachineBasicBlock::reverse_iterator E = MBB.rend();
    if (I == E)
      continue;

    unsigned CopyToExec = isCopyToExec(*I);
    if (CopyToExec == AMDGPU::NoRegister)
      continue;

    // Scan backwards to find the def.
    auto CopyToExecInst = &*I;
    auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
    if (CopyFromExecInst == E) {
      auto PrepareExecInst = std::next(I);
      if (PrepareExecInst == E)
        continue;
      // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
      if (CopyToExecInst->getOperand(1).isKill() &&
          isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
        DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);

        PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
        PrepareExecInst->getOperand(0).setIsRenamable(false);

        DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');

        CopyToExecInst->eraseFromParent();
      }

      continue;
    }

    if (isLiveOut(MBB, CopyToExec)) {
      // The copied register is live out and has a second use in another block.
      DEBUG(dbgs() << "Exec copy source register is live out\n");
      continue;
    }

    unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
    MachineInstr *SaveExecInst = nullptr;
    SmallVector<MachineInstr *, 4> OtherUseInsts;

    for (MachineBasicBlock::iterator J
           = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
         J != JE; ++J) {
      if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
        DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
        // Make sure this is inserted after any VALU ops that may have been
        // scheduled in between.
        SaveExecInst = nullptr;
        break;
      }

      bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);

      if (J->modifiesRegister(CopyToExec, TRI)) {
        if (SaveExecInst) {
          DEBUG(dbgs() << "Multiple instructions modify "
                << printReg(CopyToExec, TRI) << '\n');
          SaveExecInst = nullptr;
          break;
        }

        unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
        if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
          break;

        if (ReadsCopyFromExec) {
          SaveExecInst = &*J;
          DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
          continue;
        } else {
          DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
          break;
        }
      } else if (ReadsCopyFromExec && !SaveExecInst) {
        // Make sure no other instruction is trying to use this copy, before it
        // will be rewritten by the saveexec, i.e. hasOneUse. There may have
        // been another use, such as an inserted spill. For example:
        //
        // %sgpr0_sgpr1 = COPY %exec
        // spill %sgpr0_sgpr1
        // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
        //
        DEBUG(dbgs() << "Found second use of save inst candidate: "
              << *J << '\n');
        break;
      }

      if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
        assert(SaveExecInst != &*J);
        OtherUseInsts.push_back(&*J);
      }
    }

    if (!SaveExecInst)
      continue;

    DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');

    MachineOperand &Src0 = SaveExecInst->getOperand(1);
    MachineOperand &Src1 = SaveExecInst->getOperand(2);

    MachineOperand *OtherOp = nullptr;

    if (Src0.isReg() && Src0.getReg() == CopyFromExec) {
      OtherOp = &Src1;
    } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) {
      if (!SaveExecInst->isCommutable())
        break;

      OtherOp = &Src0;
    } else
      llvm_unreachable("unexpected");

    CopyFromExecInst->eraseFromParent();

    auto InsPt = SaveExecInst->getIterator();
    const DebugLoc &DL = SaveExecInst->getDebugLoc();

    BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
            CopyFromExec)
      .addReg(OtherOp->getReg());
    SaveExecInst->eraseFromParent();

    CopyToExecInst->eraseFromParent();

    for (MachineInstr *OtherInst : OtherUseInsts) {
      OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
                                    AMDGPU::NoSubRegister, *TRI,
                                    /*ClearIsRenamable=*/true);
    }
  }

  return true;

}
Esempio n. 12
0
bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
  HST = &MF.getSubtarget<HexagonSubtarget>();
  HII = HST->getInstrInfo();
  const auto &HRI = *HST->getRegisterInfo();
  MachineRegisterInfo &MRI = MF.getRegInfo();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap;
  bool Changed = false;

  for (MachineBasicBlock &MBB : MF) {
    for (MachineInstr &MI : MBB) {
      unsigned Opc = MI.getOpcode();
      if (Opc != Hexagon::V6_extractw)
        continue;
      unsigned VecR = MI.getOperand(1).getReg();
      VExtractMap[VecR].push_back(&MI);
    }
  }

  for (auto &P : VExtractMap) {
    unsigned VecR = P.first;
    if (P.second.size() <= VExtractThreshold)
      continue;

    const auto &VecRC = *MRI.getRegClass(VecR);
    int FI = MFI.CreateSpillStackObject(HRI.getSpillSize(VecRC),
                                        HRI.getSpillAlignment(VecRC));
    MachineInstr *DefI = MRI.getVRegDef(VecR);
    MachineBasicBlock::iterator At = std::next(DefI->getIterator());
    MachineBasicBlock &DefB = *DefI->getParent();
    unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID
                          ? Hexagon::V6_vS32b_ai
                          : Hexagon::PS_vstorerw_ai;
    BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc))
      .addFrameIndex(FI)
      .addImm(0)
      .addReg(VecR);

    unsigned VecSize = HRI.getRegSizeInBits(VecRC) / 8;

    for (MachineInstr *ExtI : P.second) {
      assert(ExtI->getOpcode() == Hexagon::V6_extractw);
      unsigned SR = ExtI->getOperand(1).getSubReg();
      assert(ExtI->getOperand(1).getReg() == VecR);

      MachineBasicBlock &ExtB = *ExtI->getParent();
      DebugLoc DL = ExtI->getDebugLoc();
      unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
      BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR)
        .addFrameIndex(FI)
        .addImm(SR == 0 ? 0 : VecSize/2);

      unsigned ElemR = genElemLoad(ExtI, BaseR, MRI);
      unsigned ExtR = ExtI->getOperand(0).getReg();
      MRI.replaceRegWith(ExtR, ElemR);
      ExtB.erase(ExtI);
      Changed = true;
    }
  }

  return Changed;
}
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
/// too far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
  MachineBasicBlock *DestBB = getDestBlock(MI);

  // Add an unconditional branch to the destination and invert the branch
  // condition to jump over it:
  // tbz L1
  // =>
  // tbnz L2
  // b   L1
  // L2:

  // If the branch is at the end of its MBB and that has a fall-through block,
  // direct the updated conditional branch to the fall-through block. Otherwise,
  // split the MBB before the next instruction.
  MachineBasicBlock *MBB = MI.getParent();
  MachineInstr *BMI = &MBB->back();
  bool NeedSplit = (BMI != &MI) || !hasFallthrough(*MBB);

  if (BMI != &MI) {
    if (std::next(MachineBasicBlock::iterator(MI)) ==
            std::prev(MBB->getLastNonDebugInstr()) &&
        BMI->isUnconditionalBranch()) {
      // Last MI in the BB is an unconditional branch. We can simply invert the
      // condition and swap destinations:
      // beq L1
      // b   L2
      // =>
      // bne L2
      // b   L1
      MachineBasicBlock *NewDest = getDestBlock(*BMI);
      if (isBlockInRange(MI, *NewDest)) {
        DEBUG(dbgs() << "  Invert condition and swap its destination with "
                     << *BMI);
        changeBranchDestBlock(*BMI, *DestBB);

        int NewSize =
          insertInvertedConditionalBranch(*MBB, MI.getIterator(),
                                          MI.getDebugLoc(), MI, *NewDest);
        int OldSize = TII->getInstSizeInBytes(MI);
        BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
        MI.eraseFromParent();
        return true;
      }
    }
  }

  if (NeedSplit) {
    // Analyze the branch so we know how to update the successor lists.
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    SmallVector<MachineOperand, 2> Cond;
    bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond, false);
    assert(!Fail && "branches to relax should be analyzable");
    (void)Fail;

    MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
    // No need for the branch to the next block. We're adding an unconditional
    // branch to the destination.
    int delta = TII->getInstSizeInBytes(MBB->back());
    BlockInfo[MBB->getNumber()].Size -= delta;
    MBB->back().eraseFromParent();
    // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below

    // Update the successor lists according to the transformation to follow.
    // Do it here since if there's no split, no update is needed.
    MBB->replaceSuccessor(FBB, NewBB);
    NewBB->addSuccessor(FBB);
  }

  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));

  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
               << ", invert condition and change dest. to BB#"
               << NextBB.getNumber() << '\n');

  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;

  // Insert a new conditional branch and a new unconditional branch.
  MBBSize += insertInvertedConditionalBranch(*MBB, MBB->end(),
                                             MI.getDebugLoc(), MI, NextBB);

  MBBSize += insertUnconditionalBranch(*MBB, *DestBB, MI.getDebugLoc());

  // Remove the old conditional branch.  It may or may not still be in MBB.
  MBBSize -= TII->getInstSizeInBytes(MI);
  MI.eraseFromParent();

  // Finally, keep the block offsets up to date.
  adjustBlockOffsets(*MBB);
  return true;
}
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
  bool Modified = false;

  SmallSet<unsigned, 4> Defs;
  SmallSet<unsigned, 4> Uses;
  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  while (MBBI != E) {
    MachineInstr *MI = &*MBBI;
    DebugLoc dl = MI->getDebugLoc();
    unsigned PredReg = 0;
    ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
    if (CC == ARMCC::AL) {
      ++MBBI;
      continue;
    }

    Defs.clear();
    Uses.clear();
    TrackDefUses(MI, Defs, Uses, TRI);

    // Insert an IT instruction.
    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
      .addImm(CC);

    // Add implicit use of ITSTATE to IT block instructions.
    MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                                             true/*isImp*/, false/*isKill*/));

    MachineInstr *LastITMI = MI;
    MachineBasicBlock::iterator InsertPos = MIB.getInstr();
    ++MBBI;

    // Form IT block.
    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
    unsigned Mask = 0, Pos = 3;

    // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
    // is set: skip the loop
    if (!restrictIT) {
      // Branches, including tricky ones like LDM_RET, need to end an IT
      // block so check the instruction we just put in the block.
      for (; MBBI != E && Pos &&
             (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
        if (MBBI->isDebugValue())
          continue;

        MachineInstr *NMI = &*MBBI;
        MI = NMI;

        unsigned NPredReg = 0;
        ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg);
        if (NCC == CC || NCC == OCC) {
          Mask |= (NCC & 1) << Pos;
          // Add implicit use of ITSTATE.
          NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                                                 true/*isImp*/, false/*isKill*/));
          LastITMI = NMI;
        } else {
          if (NCC == ARMCC::AL &&
              MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
            --MBBI;
            MBB.remove(NMI);
            MBB.insert(InsertPos, NMI);
            ClearKillFlags(MI, Uses);
            ++NumMovedInsts;
            continue;
          }
          break;
        }
        TrackDefUses(NMI, Defs, Uses, TRI);
        --Pos;
      }
    }

    // Finalize IT mask.
    Mask |= (1 << Pos);
    // Tag along (firstcond[0] << 4) with the mask.
    Mask |= (CC & 1) << 4;
    MIB.addImm(Mask);

    // Last instruction in IT block kills ITSTATE.
    LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();

    // Finalize the bundle.
    finalizeBundle(MBB, InsertPos.getInstrIterator(),
                   ++LastITMI->getIterator());

    Modified = true;
    ++NumITs;
  }

  return Modified;
}
Esempio n. 15
0
bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;

  MachineRegisterInfo &MRI = MF.getRegInfo();

  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');

  InstOrderMap IOM;
  // Map from register to instruction order (value of IOM) where the
  // register is used last. When moving instructions up, we need to
  // make sure all its defs (including dead def) will not cross its
  // last use when moving up.
  DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap;

  for (MachineBasicBlock &MBB : MF) {
    if (MBB.empty())
      continue;
    bool SawStore = false;
    BuildInstOrderMap(MBB.begin(), IOM);
    UseMap.clear();

    for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
      MachineInstr &MI = *Next;
      ++Next;
      if (MI.isPHI() || MI.isDebugInstr())
        continue;
      if (MI.mayStore())
        SawStore = true;

      unsigned CurrentOrder = IOM[&MI];
      unsigned Barrier = 0;
      MachineInstr *BarrierMI = nullptr;
      for (const MachineOperand &MO : MI.operands()) {
        if (!MO.isReg() || MO.isDebug())
          continue;
        if (MO.isUse())
          UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI);
        else if (MO.isDead() && UseMap.count(MO.getReg()))
          // Barrier is the last instruction where MO get used. MI should not
          // be moved above Barrier.
          if (Barrier < UseMap[MO.getReg()].first) {
            Barrier = UseMap[MO.getReg()].first;
            BarrierMI = UseMap[MO.getReg()].second;
          }
      }

      if (!MI.isSafeToMove(nullptr, SawStore)) {
        // If MI has side effects, it should become a barrier for code motion.
        // IOM is rebuild from the next instruction to prevent later
        // instructions from being moved before this MI.
        if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
          BuildInstOrderMap(Next, IOM);
          SawStore = false;
        }
        continue;
      }

      const MachineOperand *DefMO = nullptr;
      MachineInstr *Insert = nullptr;

      // Number of live-ranges that will be shortened. We do not count
      // live-ranges that are defined by a COPY as it could be coalesced later.
      unsigned NumEligibleUse = 0;

      for (const MachineOperand &MO : MI.operands()) {
        if (!MO.isReg() || MO.isDead() || MO.isDebug())
          continue;
        unsigned Reg = MO.getReg();
        // Do not move the instruction if it def/uses a physical register,
        // unless it is a constant physical register or a noreg.
        if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
          if (!Reg || MRI.isConstantPhysReg(Reg))
            continue;
          Insert = nullptr;
          break;
        }
        if (MO.isDef()) {
          // Do not move if there is more than one def.
          if (DefMO) {
            Insert = nullptr;
            break;
          }
          DefMO = &MO;
        } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO &&
                   MRI.getRegClass(DefMO->getReg()) ==
                       MRI.getRegClass(MO.getReg())) {
          // The heuristic does not handle different register classes yet
          // (registers of different sizes, looser/tighter constraints). This
          // is because it needs more accurate model to handle register
          // pressure correctly.
          MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
          if (!DefInstr.isCopy())
            NumEligibleUse++;
          Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
        } else {
          Insert = nullptr;
          break;
        }
      }

      // If Barrier equals IOM[I], traverse forward to find if BarrierMI is
      // after Insert, if yes, then we should not hoist.
      for (MachineInstr *I = Insert; I && IOM[I] == Barrier;
           I = I->getNextNode())
        if (I == BarrierMI) {
          Insert = nullptr;
          break;
        }
      // Move the instruction when # of shrunk live range > 1.
      if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
        MachineBasicBlock::iterator I = std::next(Insert->getIterator());
        // Skip all the PHI and debug instructions.
        while (I != MBB.end() && (I->isPHI() || I->isDebugInstr()))
          I = std::next(I);
        if (I == MI.getIterator())
          continue;

        // Update the dominator order to be the same as the insertion point.
        // We do this to maintain a non-decreasing order without need to update
        // all instruction orders after the insertion point.
        unsigned NewOrder = IOM[&*I];
        IOM[&MI] = NewOrder;
        NumInstrsHoistedToShrinkLiveRange++;

        // Find MI's debug value following MI.
        MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
        if (MI.getOperand(0).isReg())
          for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
                 EndIter->getOperand(0).isReg() &&
                 EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
               ++EndIter, ++Next)
            IOM[&*EndIter] = NewOrder;
        MBB.splice(I, &MBB, MI.getIterator(), EndIter);
      }
    }
  }
  return false;
}