C++ (Cpp) iterator::modifiesRegister Examples

Programming Language: C++ (Cpp)

Namespace/Package Name: machinebasicblock

Class/Type: iterator

Method/Function: modifiesRegister

Examples at hotexamples.com: 8

C++ (Cpp) iterator::modifiesRegister - 8 examples found. These are the top rated real world C++ (Cpp) examples of machinebasicblock::iterator::modifiesRegister extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getOpcode(30)

eraseFromParent(30)

getParent(30)

getDesc(30)

getDebugLoc(30)

getOperand(30)

getNumOperands(30)

isDebugValue(30)

isPHI(30)

memoperands_begin(14)

mayStore(14)

isBranch(13)

setDesc(13)

isTerminator(12)

isReturn(11)

readsRegister(11)

hasDelaySlot(8)

modifiesRegister(8)

memoperands_end(8)

addOperand(7)

isCall(7)

hasUnmodeledSideEffects(6)

isBundle(6)

isDebugInstr(6)

isInlineAsm(6)

clearRegisterKills(6)

getInstrIterator(5)

getFlag(5)

findRegisterUseOperandIdx(5)

dump(5)

operands(5)

isKill(4)

isLabel(4)

hasOrderedMemoryRef(4)

isImplicitDef(4)

isUnconditionalBranch(4)

RemoveOperand(3)

print(3)

isPseudo(3)

isConditionalBranch(3)

isRegTiedToDefOperand(3)

isCopy(3)

isRegTiedToUseOperand(3)

mayLoadOrStore(3)

operands_end(2)

isPosition(2)

operands_begin(2)

AddRegOperandsToUseLists(2)

uses(2)

isBundledWithSucc(2)

Example #1

Show file

File: SystemZElimCompare.cpp Project: 8l/SPIRV-LLVM

// Try to fuse comparison instruction Compare into a later branch.
// Return true on success and if Compare is therefore redundant.
bool SystemZElimCompare::
fuseCompareAndBranch(MachineInstr *Compare,
                     SmallVectorImpl<MachineInstr *> &CCUsers) {
  // See whether we have a comparison that can be fused.
  unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
                                                  Compare);
  if (!FusedOpcode)
    return false;

  // See whether we have a single branch with which to fuse.
  if (CCUsers.size() != 1)
    return false;
  MachineInstr *Branch = CCUsers[0];
  if (Branch->getOpcode() != SystemZ::BRC)
    return false;

  // Make sure that the operands are available at the branch.
  unsigned SrcReg = Compare->getOperand(0).getReg();
  unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
                      Compare->getOperand(1).getReg() : 0);
  MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
  for (++MBBI; MBBI != MBBE; ++MBBI)
    if (MBBI->modifiesRegister(SrcReg, TRI) ||
        (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
      return false;

  // Read the branch mask and target.
  MachineOperand CCMask(MBBI->getOperand(1));
  MachineOperand Target(MBBI->getOperand(2));
  assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
         "Invalid condition-code mask for integer comparison");

  // Clear out all current operands.
  int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
  assert(CCUse >= 0 && "BRC must use CC");
  Branch->RemoveOperand(CCUse);
  Branch->RemoveOperand(2);
  Branch->RemoveOperand(1);
  Branch->RemoveOperand(0);

  // Rebuild Branch as a fused compare and branch.
  Branch->setDesc(TII->get(FusedOpcode));
  MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
    .addOperand(Compare->getOperand(0))
    .addOperand(Compare->getOperand(1))
    .addOperand(CCMask)
    .addOperand(Target)
    .addReg(SystemZ::CC, RegState::ImplicitDefine);

  // Clear any intervening kills of SrcReg and SrcReg2.
  MBBI = Compare;
  for (++MBBI; MBBI != MBBE; ++MBBI) {
    MBBI->clearRegisterKills(SrcReg, TRI);
    if (SrcReg2)
      MBBI->clearRegisterKills(SrcReg2, TRI);
  }
  FusedComparisons += 1;
  return true;
}

Example #2

Show file

File: HexagonNewValueJump.cpp Project: mikaoP/llvm

// We have identified this II could be feeder to NVJ,
// verify that it can be.
static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
                                      const TargetRegisterInfo *TRI,
                                      MachineBasicBlock::iterator II,
                                      MachineBasicBlock::iterator end,
                                      MachineBasicBlock::iterator skip,
                                      MachineFunction &MF) {

  // Predicated instruction can not be feeder to NVJ.
  if (QII->isPredicated(*II))
    return false;

  // Bail out if feederReg is a paired register (double regs in
  // our case). One would think that we can check to see if a given
  // register cmpReg1 or cmpReg2 is a sub register of feederReg
  // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
  // before the callsite of this function
  // But we can not as it comes in the following fashion.
  //    %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
  //    %R0<def> = KILL %R0, %D0<imp-use,kill>
  //    %P0<def> = CMPEQri %R0<kill>, 0
  // Hence, we need to check if it's a KILL instruction.
  if (II->getOpcode() == TargetOpcode::KILL)
    return false;

  if (II->isImplicitDef())
    return false;

  // Make sure there there is no 'def' or 'use' of any of the uses of
  // feeder insn between it's definition, this MI and jump, jmpInst
  // skipping compare, cmpInst.
  // Here's the example.
  //    r21=memub(r22+r24<<#0)
  //    p0 = cmp.eq(r21, #0)
  //    r4=memub(r3+r21<<#0)
  //    if (p0.new) jump:t .LBB29_45
  // Without this check, it will be converted into
  //    r4=memub(r3+r21<<#0)
  //    r21=memub(r22+r24<<#0)
  //    p0 = cmp.eq(r21, #0)
  //    if (p0.new) jump:t .LBB29_45
  // and result WAR hazards if converted to New Value Jump.

  for (unsigned i = 0; i < II->getNumOperands(); ++i) {
    if (II->getOperand(i).isReg() &&
        (II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
      MachineBasicBlock::iterator localII = II;
      ++localII;
      unsigned Reg = II->getOperand(i).getReg();
      for (MachineBasicBlock::iterator localBegin = localII;
                        localBegin != end; ++localBegin) {
        if (localBegin == skip ) continue;
        // Check for Subregisters too.
        if (localBegin->modifiesRegister(Reg, TRI) ||
            localBegin->readsRegister(Reg, TRI))
          return false;
      }
    }
  }
  return true;
}

Example #3

Show file

File: SIInsertWaits.cpp Project: anupam128/llvm

// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
  bool Changes = false;

  ST = &MF.getSubtarget<SISubtarget>();
  TII = ST->getInstrInfo();
  TRI = &TII->getRegisterInfo();
  MRI = &MF.getRegInfo();
  IV = getIsaVersion(ST->getFeatureBits());
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  HardwareLimits.Named.VM = getVmcntBitMask(IV);
  HardwareLimits.Named.EXP = getExpcntBitMask(IV);
  HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);

  WaitedOn = ZeroCounts;
  DelayedWaitOn = ZeroCounts;
  LastIssued = ZeroCounts;
  LastOpcodeType = OTHER;
  LastInstWritesM0 = false;
  IsFlatOutstanding = false;
  ReturnsVoid = MFI->returnsVoid();

  memset(&UsedRegs, 0, sizeof(UsedRegs));
  memset(&DefinedRegs, 0, sizeof(DefinedRegs));

  SmallVector<MachineInstr *, 4> RemoveMI;
  SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;

  bool HaveScalarStores = false;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;

    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
         I != E; ++I) {

      if (!HaveScalarStores && TII->isScalarStore(*I))
        HaveScalarStores = true;

      if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
        // There is a hardware bug on CI/SI where SMRD instruction may corrupt
        // vccz bit, so when we detect that an instruction may read from a
        // corrupt vccz bit, we need to:
        // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
        //    complete.
        // 2. Restore the correct value of vccz by writing the current value
        //    of vcc back to vcc.

        if (TII->isSMRD(I->getOpcode())) {
          VCCZCorrupt = true;
        } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
          // FIXME: We only care about SMRD instructions here, not LDS or GDS.
          // Whenever we store a value in vcc, the correct value of vccz is
          // restored.
          VCCZCorrupt = false;
        }

        // Check if we need to apply the bug work-around
        if (VCCZCorrupt && readsVCCZ(*I)) {
          DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');

          // Wait on everything, not just LGKM.  vccz reads usually come from
          // terminators, and we always wait on everything at the end of the
          // block, so if we only wait on LGKM here, we might end up with
          // another s_waitcnt inserted right after this if there are non-LGKM
          // instructions still outstanding.
          insertWait(MBB, I, LastIssued);

          // Restore the vccz bit.  Any time a value is written to vcc, the vcc
          // bit is updated, so we can restore the bit by reading the value of
          // vcc and then writing it back to the register.
          BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
                  AMDGPU::VCC)
            .addReg(AMDGPU::VCC);
        }
      }

      // Record pre-existing, explicitly requested waits
      if (I->getOpcode() == AMDGPU::S_WAITCNT) {
        handleExistingWait(*I);
        RemoveMI.push_back(&*I);
        continue;
      }

      Counters Required;

      // Wait for everything before a barrier.
      //
      // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
      // but we also want to wait for any other outstanding transfers before
      // signalling other hardware blocks
      if ((I->getOpcode() == AMDGPU::S_BARRIER &&
               ST->needWaitcntBeforeBarrier()) ||
           I->getOpcode() == AMDGPU::S_SENDMSG)
        Required = LastIssued;
      else
        Required = handleOperands(*I);

      Counters Increment = getHwCounts(*I);

      if (countersNonZero(Required) || countersNonZero(Increment))
        increaseCounters(Required, DelayedWaitOn);

      Changes |= insertWait(MBB, I, Required);

      pushInstruction(MBB, I, Increment);
      handleSendMsg(MBB, I);

      if (I->getOpcode() == AMDGPU::S_ENDPGM ||
          I->getOpcode() == AMDGPU::SI_RETURN)
        EndPgmBlocks.push_back(&MBB);
    }

    // Wait for everything at the end of the MBB
    Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
  }

  if (HaveScalarStores) {
    // If scalar writes are used, the cache must be flushed or else the next
    // wave to reuse the same scratch memory can be clobbered.
    //
    // Insert s_dcache_wb at wave termination points if there were any scalar
    // stores, and only if the cache hasn't already been flushed. This could be
    // improved by looking across blocks for flushes in postdominating blocks
    // from the stores but an explicitly requested flush is probably very rare.
    for (MachineBasicBlock *MBB : EndPgmBlocks) {
      bool SeenDCacheWB = false;

      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
           I != E; ++I) {

        if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
          SeenDCacheWB = true;
        else if (TII->isScalarStore(*I))
          SeenDCacheWB = false;

        // FIXME: It would be better to insert this before a waitcnt if any.
        if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
             I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
          Changes = true;
          BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
        }
      }
    }
  }

  for (MachineInstr *I : RemoveMI)
    I->eraseFromParent();

  return Changes;
}

Example #4

Show file

File: HexagonNewValueJump.cpp Project: CSI-LLVM/llvm

static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
                                     const TargetRegisterInfo *TRI,
                                     MachineBasicBlock::iterator II,
                                     unsigned pReg,
                                     bool secondReg,
                                     bool optLocation,
                                     MachineBasicBlock::iterator end,
                                     MachineFunction &MF) {

  MachineInstr &MI = *II;

  // If the second operand of the compare is an imm, make sure it's in the
  // range specified by the arch.
  if (!secondReg) {
    int64_t v = MI.getOperand(2).getImm();

    if (!(isUInt<5>(v) || ((MI.getOpcode() == Hexagon::C2_cmpeqi ||
                            MI.getOpcode() == Hexagon::C2_cmpgti) &&
                           (v == -1))))
      return false;
  }

  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
  cmpReg1 = MI.getOperand(1).getReg();

  if (secondReg) {
    cmpOp2 = MI.getOperand(2).getReg();

    // Make sure that that second register is not from COPY
    // At machine code level, we don't need this, but if we decide
    // to move new value jump prior to RA, we would be needing this.
    MachineRegisterInfo &MRI = MF.getRegInfo();
    if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
      MachineInstr *def = MRI.getVRegDef(cmpOp2);
      if (def->getOpcode() == TargetOpcode::COPY)
        return false;
    }
  }

  // Walk the instructions after the compare (predicate def) to the jump,
  // and satisfy the following conditions.
  ++II ;
  for (MachineBasicBlock::iterator localII = II; localII != end;
       ++localII) {

    // Check 1.
    // If "common" checks fail, bail out.
    if (!commonChecksToProhibitNewValueJump(optLocation, localII))
      return false;

    // Check 2.
    // If there is a def or use of predicate (result of compare), bail out.
    if (localII->modifiesRegister(pReg, TRI) ||
        localII->readsRegister(pReg, TRI))
      return false;

    // Check 3.
    // If there is a def of any of the use of the compare (operands of compare),
    // bail out.
    // Eg.
    //    p0 = cmp.eq(r2, r0)
    //    r2 = r4
    //    if (p0.new) jump:t .LBB28_3
    if (localII->modifiesRegister(cmpReg1, TRI) ||
        (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
      return false;
  }
  return true;
}

Example #5

Show file

File: SystemZElimCompare.cpp Project: AstroVPK/LLVM-4.0.0

// Try to fuse comparison instruction Compare into a later branch.
// Return true on success and if Compare is therefore redundant.
bool SystemZElimCompare::fuseCompareOperations(
    MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) {
  // See whether we have a single branch with which to fuse.
  if (CCUsers.size() != 1)
    return false;
  MachineInstr *Branch = CCUsers[0];
  SystemZII::FusedCompareType Type;
  switch (Branch->getOpcode()) {
  case SystemZ::BRC:
    Type = SystemZII::CompareAndBranch;
    break;
  case SystemZ::CondReturn:
    Type = SystemZII::CompareAndReturn;
    break;
  case SystemZ::CallBCR:
    Type = SystemZII::CompareAndSibcall;
    break;
  case SystemZ::CondTrap:
    Type = SystemZII::CompareAndTrap;
    break;
  default:
    return false;
  }

  // See whether we have a comparison that can be fused.
  unsigned FusedOpcode =
      TII->getFusedCompare(Compare.getOpcode(), Type, &Compare);
  if (!FusedOpcode)
    return false;

  // Make sure that the operands are available at the branch.
  // SrcReg2 is the register if the source operand is a register,
  // 0 if the source operand is immediate, and the base register
  // if the source operand is memory (index is not supported).
  unsigned SrcReg = Compare.getOperand(0).getReg();
  unsigned SrcReg2 =
      Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
  MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
  for (++MBBI; MBBI != MBBE; ++MBBI)
    if (MBBI->modifiesRegister(SrcReg, TRI) ||
        (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
      return false;

  // Read the branch mask, target (if applicable), regmask (if applicable).
  MachineOperand CCMask(MBBI->getOperand(1));
  assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
         "Invalid condition-code mask for integer comparison");
  // This is only valid for CompareAndBranch.
  MachineOperand Target(MBBI->getOperand(
    Type == SystemZII::CompareAndBranch ? 2 : 0));
  const uint32_t *RegMask;
  if (Type == SystemZII::CompareAndSibcall)
    RegMask = MBBI->getOperand(2).getRegMask();

  // Clear out all current operands.
  int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
  assert(CCUse >= 0 && "BRC/BCR must use CC");
  Branch->RemoveOperand(CCUse);
  // Remove target (branch) or regmask (sibcall).
  if (Type == SystemZII::CompareAndBranch ||
      Type == SystemZII::CompareAndSibcall)
    Branch->RemoveOperand(2);
  Branch->RemoveOperand(1);
  Branch->RemoveOperand(0);

  // Rebuild Branch as a fused compare and branch.
  // SrcNOps is the number of MI operands of the compare instruction
  // that we need to copy over.
  unsigned SrcNOps = 2;
  if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT)
    SrcNOps = 3;
  Branch->setDesc(TII->get(FusedOpcode));
  MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
  for (unsigned I = 0; I < SrcNOps; I++)
    MIB.addOperand(Compare.getOperand(I));
  MIB.addOperand(CCMask);

  if (Type == SystemZII::CompareAndBranch) {
    // Only conditional branches define CC, as they may be converted back
    // to a non-fused branch because of a long displacement.  Conditional
    // returns don't have that problem.
    MIB.addOperand(Target)
       .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
  }

  if (Type == SystemZII::CompareAndSibcall)
    MIB.addRegMask(RegMask);

  // Clear any intervening kills of SrcReg and SrcReg2.
  MBBI = Compare;
  for (++MBBI; MBBI != MBBE; ++MBBI) {
    MBBI->clearRegisterKills(SrcReg, TRI);
    if (SrcReg2)
      MBBI->clearRegisterKills(SrcReg2, TRI);
  }
  FusedComparisons += 1;
  return true;
}

Example #6

Show file

File: SIOptimizeExecMasking.cpp Project: bgabor666/llvm

bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;

  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
  const SIInstrInfo *TII = ST.getInstrInfo();

  // Optimize sequences emitted for control flow lowering. They are originally
  // emitted as the separate operations because spill code may need to be
  // inserted for the saved copy of exec.
  //
  //     x = copy exec
  //     z = s_<op>_b64 x, y
  //     exec = copy z
  // =>
  //     x = s_<op>_saveexec_b64 y
  //

  for (MachineBasicBlock &MBB : MF) {
    MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
    MachineBasicBlock::reverse_iterator E = MBB.rend();
    if (I == E)
      continue;

    unsigned CopyToExec = isCopyToExec(*I);
    if (CopyToExec == AMDGPU::NoRegister)
      continue;

    // Scan backwards to find the def.
    auto CopyToExecInst = &*I;
    auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
    if (CopyFromExecInst == E) {
      auto PrepareExecInst = std::next(I);
      if (PrepareExecInst == E)
        continue;
      // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
      if (CopyToExecInst->getOperand(1).isKill() &&
          isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
        DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);

        PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
        PrepareExecInst->getOperand(0).setIsRenamable(false);

        DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');

        CopyToExecInst->eraseFromParent();
      }

      continue;
    }

    if (isLiveOut(MBB, CopyToExec)) {
      // The copied register is live out and has a second use in another block.
      DEBUG(dbgs() << "Exec copy source register is live out\n");
      continue;
    }

    unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
    MachineInstr *SaveExecInst = nullptr;
    SmallVector<MachineInstr *, 4> OtherUseInsts;

    for (MachineBasicBlock::iterator J
           = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
         J != JE; ++J) {
      if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
        DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
        // Make sure this is inserted after any VALU ops that may have been
        // scheduled in between.
        SaveExecInst = nullptr;
        break;
      }

      bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);

      if (J->modifiesRegister(CopyToExec, TRI)) {
        if (SaveExecInst) {
          DEBUG(dbgs() << "Multiple instructions modify "
                << printReg(CopyToExec, TRI) << '\n');
          SaveExecInst = nullptr;
          break;
        }

        unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
        if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
          break;

        if (ReadsCopyFromExec) {
          SaveExecInst = &*J;
          DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
          continue;
        } else {
          DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
          break;
        }
      } else if (ReadsCopyFromExec && !SaveExecInst) {
        // Make sure no other instruction is trying to use this copy, before it
        // will be rewritten by the saveexec, i.e. hasOneUse. There may have
        // been another use, such as an inserted spill. For example:
        //
        // %sgpr0_sgpr1 = COPY %exec
        // spill %sgpr0_sgpr1
        // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
        //
        DEBUG(dbgs() << "Found second use of save inst candidate: "
              << *J << '\n');
        break;
      }

      if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
        assert(SaveExecInst != &*J);
        OtherUseInsts.push_back(&*J);
      }
    }

    if (!SaveExecInst)
      continue;

    DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');

    MachineOperand &Src0 = SaveExecInst->getOperand(1);
    MachineOperand &Src1 = SaveExecInst->getOperand(2);

    MachineOperand *OtherOp = nullptr;

    if (Src0.isReg() && Src0.getReg() == CopyFromExec) {
      OtherOp = &Src1;
    } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) {
      if (!SaveExecInst->isCommutable())
        break;

      OtherOp = &Src0;
    } else
      llvm_unreachable("unexpected");

    CopyFromExecInst->eraseFromParent();

    auto InsPt = SaveExecInst->getIterator();
    const DebugLoc &DL = SaveExecInst->getDebugLoc();

    BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
            CopyFromExec)
      .addReg(OtherOp->getReg());
    SaveExecInst->eraseFromParent();

    CopyToExecInst->eraseFromParent();

    for (MachineInstr *OtherInst : OtherUseInsts) {
      OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
                                    AMDGPU::NoSubRegister, *TRI,
                                    /*ClearIsRenamable=*/true);
    }
  }

  return true;

}

Example #7

Show file

File: HexagonExpandCondsets.cpp Project: AnachroNia/llvm

/// For a given conditional copy, predicate the definition of the source of
/// the copy under the given condition (using the same predicate register as
/// the copy).
bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
  unsigned Opc = TfrI->getOpcode();
  (void)Opc;
  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
  DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
               << ": " << *TfrI);

  MachineOperand &MD = TfrI->getOperand(0);
  MachineOperand &MP = TfrI->getOperand(1);
  MachineOperand &MS = TfrI->getOperand(2);
  // The source operand should be a <kill>. This is not strictly necessary,
  // but it makes things a lot simpler. Otherwise, we would need to rename
  // some registers, which would complicate the transformation considerably.
  if (!MS.isKill())
    return false;

  RegisterRef RT(MS);
  unsigned PredR = MP.getReg();
  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
  if (!DefI || !isPredicable(DefI))
    return false;

  DEBUG(dbgs() << "Source def: " << *DefI);

  // Collect the information about registers defined and used between the
  // DefI and the TfrI.
  // Map: reg -> bitmask of subregs
  ReferenceMap Uses, Defs;
  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;

  // Check if the predicate register is valid between DefI and TfrI.
  // If it is, we can then ignore instructions predicated on the negated
  // conditions when collecting def and use information.
  bool PredValid = true;
  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
    if (!I->modifiesRegister(PredR, 0))
      continue;
    PredValid = false;
    break;
  }

  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
    MachineInstr *MI = &*I;
    // If this instruction is predicated on the same register, it could
    // potentially be ignored.
    // By default assume that the instruction executes on the same condition
    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
    unsigned Exec = Exec_Then | Exec_Else;
    if (PredValid && HII->isPredicated(*MI) && MI->readsRegister(PredR))
      Exec = (Cond == HII->isPredicatedTrue(*MI)) ? Exec_Then : Exec_Else;

    for (auto &Op : MI->operands()) {
      if (!Op.isReg())
        continue;
      // We don't want to deal with physical registers. The reason is that
      // they can be aliased with other physical registers. Aliased virtual
      // registers must share the same register number, and can only differ
      // in the subregisters, which we are keeping track of. Physical
      // registers ters no longer have subregisters---their super- and
      // subregisters are other physical registers, and we are not checking
      // that.
      RegisterRef RR = Op;
      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
        return false;

      ReferenceMap &Map = Op.isDef() ? Defs : Uses;
      addRefToMap(RR, Map, Exec);
    }
  }

  // The situation:
  //   RT = DefI
  //   ...
  //   RD = TfrI ..., RT

  // If the register-in-the-middle (RT) is used or redefined between
  // DefI and TfrI, we may not be able proceed with this transformation.
  // We can ignore a def that will not execute together with TfrI, and a
  // use that will. If there is such a use (that does execute together with
  // TfrI), we will not be able to move DefI down. If there is a use that
  // executed if TfrI's condition is false, then RT must be available
  // unconditionally (cannot be predicated).
  // Essentially, we need to be able to rename RT to RD in this segment.
  if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
    return false;
  RegisterRef RD = MD;
  // If the predicate register is defined between DefI and TfrI, the only
  // potential thing to do would be to move the DefI down to TfrI, and then
  // predicate. The reaching def (DefI) must be movable down to the location
  // of the TfrI.
  // If the target register of the TfrI (RD) is not used or defined between
  // DefI and TfrI, consider moving TfrI up to DefI.
  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
  bool CanDown = canMoveOver(DefI, Defs, Uses);
  // The TfrI does not access memory, but DefI could. Check if it's safe
  // to move DefI down to TfrI.
  if (DefI->mayLoad() || DefI->mayStore())
    if (!canMoveMemTo(DefI, TfrI, true))
      CanDown = false;

  DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
               << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
  if (CanUp)
    predicateAt(RD, DefI, PastDefIt, PredR, Cond, MP.isUndef());
  else if (CanDown)
    predicateAt(RD, DefI, TfrIt, PredR, Cond, MP.isUndef());
  else
    return false;

  if (RT != RD)
    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);

  // Delete the user of RT first (it should work either way, but this order
  // of deleting is more natural).
  removeInstrFromLiveness(TfrI);
  removeInstrFromLiveness(DefI);
  return true;
}

Example #8

Show file

File: SIInsertWaits.cpp Project: CSI-LLVM/llvm

// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
  bool Changes = false;

  ST = &MF.getSubtarget<SISubtarget>();
  TII = ST->getInstrInfo();
  TRI = &TII->getRegisterInfo();
  MRI = &MF.getRegInfo();

  WaitedOn = ZeroCounts;
  DelayedWaitOn = ZeroCounts;
  LastIssued = ZeroCounts;
  LastOpcodeType = OTHER;
  LastInstWritesM0 = false;
  ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();

  memset(&UsedRegs, 0, sizeof(UsedRegs));
  memset(&DefinedRegs, 0, sizeof(DefinedRegs));

  SmallVector<MachineInstr *, 4> RemoveMI;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
       BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
         I != E; ++I) {

      if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
        // There is a hardware bug on CI/SI where SMRD instruction may corrupt
        // vccz bit, so when we detect that an instruction may read from a
        // corrupt vccz bit, we need to:
        // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
        //    complete.
        // 2. Restore the correct value of vccz by writing the current value
        //    of vcc back to vcc.

        if (TII->isSMRD(I->getOpcode())) {
          VCCZCorrupt = true;
        } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
          // FIXME: We only care about SMRD instructions here, not LDS or GDS.
          // Whenever we store a value in vcc, the correct value of vccz is
          // restored.
          VCCZCorrupt = false;
        }

        // Check if we need to apply the bug work-around
        if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
          DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');

          // Wait on everything, not just LGKM.  vccz reads usually come from
          // terminators, and we always wait on everything at the end of the
          // block, so if we only wait on LGKM here, we might end up with
          // another s_waitcnt inserted right after this if there are non-LGKM
          // instructions still outstanding.
          insertWait(MBB, I, LastIssued);

          // Restore the vccz bit.  Any time a value is written to vcc, the vcc
          // bit is updated, so we can restore the bit by reading the value of
          // vcc and then writing it back to the register.
          BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
                  AMDGPU::VCC)
                  .addReg(AMDGPU::VCC);
        }
      }

      // Record pre-existing, explicitly requested waits
      if (I->getOpcode() == AMDGPU::S_WAITCNT) {
        handleExistingWait(*I);
        RemoveMI.push_back(&*I);
        continue;
      }

      Counters Required;

      // Wait for everything before a barrier.
      //
      // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
      // but we also want to wait for any other outstanding transfers before
      // signalling other hardware blocks
      if (I->getOpcode() == AMDGPU::S_BARRIER ||
          I->getOpcode() == AMDGPU::S_SENDMSG)
        Required = LastIssued;
      else
        Required = handleOperands(*I);

      Counters Increment = getHwCounts(*I);

      if (countersNonZero(Required) || countersNonZero(Increment))
        increaseCounters(Required, DelayedWaitOn);

      Changes |= insertWait(MBB, I, Required);

      pushInstruction(MBB, I, Increment);
      handleSendMsg(MBB, I);
    }

    // Wait for everything at the end of the MBB
    Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
  }

  for (MachineInstr *I : RemoveMI)
    I->eraseFromParent();

  return Changes;
}