Esempio n. 1
0
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs.  This lowering method is always correct all of the
/// time.
///
void llvm::PHIElimination::LowerAtomicPHINode(
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator AfterPHIsIt) {
  ++NumAtomic;
  // Unlink the PHI node from the basic block, but don't delete the PHI yet.
  MachineInstr *MPhi = MBB.remove(MBB.begin());

  unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
  unsigned DestReg = MPhi->getOperand(0).getReg();
  assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
  bool isDead = MPhi->getOperand(0).isDead();

  // Create a new register for the incoming PHI arguments.
  MachineFunction &MF = *MBB.getParent();
  unsigned IncomingReg = 0;
  bool reusedIncoming = false;  // Is IncomingReg reused from an earlier PHI?

  // Insert a register to register copy at the top of the current block (but
  // after any remaining phi nodes) which copies the new incoming register
  // into the phi node destination.
  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
  if (isSourceDefinedByImplicitDef(MPhi, MRI))
    // If all sources of a PHI node are implicit_def, just emit an
    // implicit_def instead of a copy.
    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
            TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
  else {
    // Can we reuse an earlier PHI node? This only happens for critical edges,
    // typically those created by tail duplication.
    unsigned &entry = LoweredPHIs[MPhi];
    if (entry) {
      // An identical PHI node was already lowered. Reuse the incoming register.
      IncomingReg = entry;
      reusedIncoming = true;
      ++NumReused;
      DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
    } else {
      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
      entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
    }
    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
            TII->get(TargetOpcode::COPY), DestReg)
      .addReg(IncomingReg);
  }

  // Update live variable information if there is any.
  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
  if (LV) {
    MachineInstr *PHICopy = prior(AfterPHIsIt);

    if (IncomingReg) {
      LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);

      // Increment use count of the newly created virtual register.
      VI.NumUses++;
      LV->setPHIJoin(IncomingReg);

      // When we are reusing the incoming register, it may already have been
      // killed in this block. The old kill will also have been inserted at
      // AfterPHIsIt, so it appears before the current PHICopy.
      if (reusedIncoming)
        if (MachineInstr *OldKill = VI.findKill(&MBB)) {
          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
          LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
          DEBUG(MBB.dump());
        }

      // Add information to LiveVariables to know that the incoming value is
      // killed.  Note that because the value is defined in several places (once
      // each for each incoming block), the "def" block and instruction fields
      // for the VarInfo is not filled in.
      LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
    }

    // Since we are going to be deleting the PHI node, if it is the last use of
    // any registers, or if the value itself is dead, we need to move this
    // information over to the new copy we just inserted.
    LV->removeVirtualRegistersKilled(MPhi);

    // If the result is dead, update LV.
    if (isDead) {
      LV->addVirtualRegisterDead(DestReg, PHICopy);
      LV->removeVirtualRegisterDead(DestReg, MPhi);
    }
  }

  // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
                                 MPhi->getOperand(i).getReg())];

  // Now loop over all of the incoming arguments, changing them to copy into the
  // IncomingReg register in the corresponding predecessor basic block.
  SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
  for (int i = NumSrcs - 1; i >= 0; --i) {
    unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
    unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();

    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
           "Machine PHI Operands must all be virtual registers!");

    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
    // path the PHI.
    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();

    // If source is defined by an implicit def, there is no need to insert a
    // copy.
    MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
    if (DefMI->isImplicitDef()) {
      ImpDefs.insert(DefMI);
      continue;
    }

    // Check to make sure we haven't already emitted the copy for this block.
    // This can happen because PHI nodes may have multiple entries for the same
    // basic block.
    if (!MBBsInsertedInto.insert(&opBlock))
      continue;  // If the copy has already been emitted, we're done.

    // Find a safe location to insert the copy, this may be the first terminator
    // in the block (or end()).
    MachineBasicBlock::iterator InsertPos =
      FindCopyInsertPoint(opBlock, MBB, SrcReg);

    // Insert the copy.
    if (!reusedIncoming && IncomingReg)
      BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
              TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);

    // Now update live variable information if we have it.  Otherwise we're done
    if (!LV) continue;

    // We want to be able to insert a kill of the register if this PHI (aka, the
    // copy we just inserted) is the last use of the source value.  Live
    // variable analysis conservatively handles this by saying that the value is
    // live until the end of the block the PHI entry lives in.  If the value
    // really is dead at the PHI copy, there will be no successor blocks which
    // have the value live-in.

    // Also check to see if this register is in use by another PHI node which
    // has not yet been eliminated.  If so, it will be killed at an appropriate
    // point later.

    // Is it used by any PHI instructions in this block?
    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];

    // Okay, if we now know that the value is not live out of the block, we can
    // add a kill marker in this block saying that it kills the incoming value!
    if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
      // In our final twist, we have to decide which instruction kills the
      // register.  In most cases this is the copy, however, the first
      // terminator instruction at the end of the block may also use the value.
      // In this case, we should mark *it* as being the killing block, not the
      // copy.
      MachineBasicBlock::iterator KillInst;
      MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
      if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
        KillInst = Term;

        // Check that no other terminators use values.
#ifndef NDEBUG
        for (MachineBasicBlock::iterator TI = llvm::next(Term);
             TI != opBlock.end(); ++TI) {
          assert(!TI->readsRegister(SrcReg) &&
                 "Terminator instructions cannot use virtual registers unless"
                 "they are the first terminator in a block!");
        }
#endif
      } else if (reusedIncoming || !IncomingReg) {
        // We may have to rewind a bit if we didn't insert a copy this time.
        KillInst = Term;
        while (KillInst != opBlock.begin())
          if ((--KillInst)->readsRegister(SrcReg))
            break;
      } else {
        // We just inserted this copy.
        KillInst = prior(InsertPos);
      }
      assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");

      // Finally, mark it killed.
      LV->addVirtualRegisterKilled(SrcReg, KillInst);

      // This vreg no longer lives all of the way through opBlock.
      unsigned opBlockNum = opBlock.getNumber();
      LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
    }
  }

  // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
  if (reusedIncoming || !IncomingReg)
    MF.DeleteMachineInstr(MPhi);
}
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
    bool Modified = false;

    SmallSet<unsigned, 4> Defs;
    SmallSet<unsigned, 4> Uses;
    MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
    while (MBBI != E) {
        MachineInstr *MI = &*MBBI;
        DebugLoc dl = MI->getDebugLoc();
        unsigned PredReg = 0;
        ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
        if (CC == ARMCC::AL) {
            ++MBBI;
            continue;
        }

        Defs.clear();
        Uses.clear();
        TrackDefUses(MI, Defs, Uses, TRI);

        // Insert an IT instruction.
        MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
                                  .addImm(CC);

        // Add implicit use of ITSTATE to IT block instructions.
        MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                       true/*isImp*/, false/*isKill*/));

        MachineInstr *LastITMI = MI;
        MachineBasicBlock::iterator InsertPos = MIB.getInstr();
        ++MBBI;

        // Form IT block.
        ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
        unsigned Mask = 0, Pos = 3;

        // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
        // is set: skip the loop
        if (!restrictIT) {
            // Branches, including tricky ones like LDM_RET, need to end an IT
            // block so check the instruction we just put in the block.
            for (; MBBI != E && Pos &&
                    (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
                if (MBBI->isDebugValue())
                    continue;

                MachineInstr *NMI = &*MBBI;
                MI = NMI;

                unsigned NPredReg = 0;
                ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
                if (NCC == CC || NCC == OCC) {
                    Mask |= (NCC & 1) << Pos;
                    // Add implicit use of ITSTATE.
                    NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                                    true/*isImp*/, false/*isKill*/));
                    LastITMI = NMI;
                } else {
                    if (NCC == ARMCC::AL &&
                            MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
                        --MBBI;
                        MBB.remove(NMI);
                        MBB.insert(InsertPos, NMI);
                        ++NumMovedInsts;
                        continue;
                    }
                    break;
                }
                TrackDefUses(NMI, Defs, Uses, TRI);
                --Pos;
            }
        }

        // Finalize IT mask.
        Mask |= (1 << Pos);
        // Tag along (firstcond[0] << 4) with the mask.
        Mask |= (CC & 1) << 4;
        MIB.addImm(Mask);

        // Last instruction in IT block kills ITSTATE.
        LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();

        // Finalize the bundle.
        MachineBasicBlock::instr_iterator LI = LastITMI;
        finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI));

        Modified = true;
        ++NumITs;
    }

    return Modified;
}
/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  unsigned SrcReg, DstReg, SubIdx;
  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    return false;

  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
      TargetRegisterInfo::isPhysicalRegister(SrcReg))
    return false;

  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
  if (++UI == MRI->use_nodbg_end())
    // No other uses.
    return false;

  // The source has other uses. See if we can replace the other uses with use of
  // the result of the extension.
  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  UI = MRI->use_nodbg_begin(DstReg);
  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
       UI != UE; ++UI)
    ReachedBBs.insert(UI->getParent());

  // Uses that are in the same BB of uses of the result of the instruction.
  SmallVector<MachineOperand*, 8> Uses;

  // Uses that the result of the instruction can reach.
  SmallVector<MachineOperand*, 8> ExtendedUses;

  bool ExtendLife = true;
  UI = MRI->use_nodbg_begin(SrcReg);
  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
       UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    MachineInstr *UseMI = &*UI;
    if (UseMI == MI)
      continue;

    if (UseMI->isPHI()) {
      ExtendLife = false;
      continue;
    }

    // It's an error to translate this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    //
    // into this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1027 = COPY %reg1025:4
    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    //
    // The problem here is that SUBREG_TO_REG is there to assert that an
    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    // the COPY here, it will give us the value after the <sext>, not the
    // original value of %reg1024 before <sext>.
    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
      continue;

    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      // Local uses that come after the extension.
      if (!LocalMIs.count(UseMI))
        Uses.push_back(&UseMO);
    } else if (ReachedBBs.count(UseMBB)) {
      // Non-local uses where the result of the extension is used. Always
      // replace these unless it's a PHI.
      Uses.push_back(&UseMO);
    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
      // We may want to extend the live range of the extension result in order
      // to replace these uses.
      ExtendedUses.push_back(&UseMO);
    } else {
      // Both will be live out of the def MBB anyway. Don't extend live range of
      // the extension result.
      ExtendLife = false;
      break;
    }
  }

  if (ExtendLife && !ExtendedUses.empty())
    // Extend the liveness of the extension result.
    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
              std::back_inserter(Uses));

  // Now replace all uses.
  bool Changed = false;
  if (!Uses.empty()) {
    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;

    // Look for PHI uses of the extended result, we don't want to extend the
    // liveness of a PHI input. It breaks all kinds of assumptions down
    // stream. A PHI use is expected to be the kill of its source values.
    UI = MRI->use_nodbg_begin(DstReg);
    for (MachineRegisterInfo::use_nodbg_iterator
           UE = MRI->use_nodbg_end(); UI != UE; ++UI)
      if (UI->isPHI())
        PHIBBs.insert(UI->getParent());

    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
      MachineOperand *UseMO = Uses[i];
      MachineInstr *UseMI = UseMO->getParent();
      MachineBasicBlock *UseMBB = UseMI->getParent();
      if (PHIBBs.count(UseMBB))
        continue;

      // About to add uses of DstReg, clear DstReg's kill flags.
      if (!Changed)
        MRI->clearKillFlags(DstReg);

      unsigned NewVR = MRI->createVirtualRegister(RC);
      BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
              TII->get(TargetOpcode::COPY), NewVR)
        .addReg(DstReg, 0, SubIdx);

      UseMO->setReg(NewVR);
      ++NumReuse;
      Changed = true;
    }
  }

  return Changed;
}
Esempio n. 4
0
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
  const MSP430InstrInfo *TII =
             static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
  // Give the blocks of the function a dense, in-order, numbering.
  Fn.RenumberBlocks();
  BlockSizes.resize(Fn.getNumBlockIDs());

  // Measure each MBB and compute a size for the entire function.
  unsigned FuncSize = 0;
  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
       ++MFI) {
    MachineBasicBlock *MBB = MFI;

    unsigned BlockSize = 0;
    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
         MBBI != EE; ++MBBI)
      BlockSize += TII->GetInstSizeInBytes(MBBI);

    BlockSizes[MBB->getNumber()] = BlockSize;
    FuncSize += BlockSize;
  }

  // If the entire function is smaller than the displacement of a branch field,
  // we know we don't need to shrink any branches in this function.  This is a
  // common case.
  if (FuncSize < (1 << 9)) {
    BlockSizes.clear();
    return false;
  }

  // For each conditional branch, if the offset to its destination is larger
  // than the offset field allows, transform it into a long branch sequence
  // like this:
  //   short branch:
  //     bCC MBB
  //   long branch:
  //     b!CC $PC+6
  //     b MBB
  //
  bool MadeChange = true;
  bool EverMadeChange = false;
  while (MadeChange) {
    // Iteratively expand branches until we reach a fixed point.
    MadeChange = false;

    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
         ++MFI) {
      MachineBasicBlock &MBB = *MFI;
      unsigned MBBStartOffset = 0;
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E; ++I) {
        if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
            I->getOpcode() != MSP430::JMP) {
          MBBStartOffset += TII->GetInstSizeInBytes(I);
          continue;
        }

        // Determine the offset from the current branch to the destination
        // block.
        MachineBasicBlock *Dest = I->getOperand(0).getMBB();

        int BranchSize;
        if (Dest->getNumber() <= MBB.getNumber()) {
          // If this is a backwards branch, the delta is the offset from the
          // start of this block to this branch, plus the sizes of all blocks
          // from this block to the dest.
          BranchSize = MBBStartOffset;

          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        } else {
          // Otherwise, add the size of the blocks between this block and the
          // dest to the number of bytes left in this block.
          BranchSize = -MBBStartOffset;

          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        }

        // If this branch is in range, ignore it.
        if (isInt<10>(BranchSize)) {
          MBBStartOffset += 2;
          continue;
        }

        // Otherwise, we have to expand it to a long branch.
        unsigned NewSize;
        MachineInstr *OldBranch = I;
        DebugLoc dl = OldBranch->getDebugLoc();

        if (I->getOpcode() == MSP430::JMP) {
          NewSize = 4;
        } else {
          // The BCC operands are:
          // 0. MSP430 branch predicate
          // 1. Target MBB
          SmallVector<MachineOperand, 1> Cond;
          Cond.push_back(I->getOperand(1));

          // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
          TII->ReverseBranchCondition(Cond);
          BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
            .addImm(4).addOperand(Cond[0]);

          NewSize = 6;
        }
        // Uncond branch to the real destination.
        I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);

        // Remove the old branch from the function.
        OldBranch->eraseFromParent();

        // Remember that this instruction is NewSize bytes, increase the size of the
        // block by NewSize-2, remember to iterate.
        BlockSizes[MBB.getNumber()] += NewSize-2;
        MBBStartOffset += NewSize;

        ++NumExpanded;
        MadeChange = true;
      }
    }
    EverMadeChange |= MadeChange;
  }

  BlockSizes.clear();
  return true;
}
Esempio n. 5
0
// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
//
// SGPRx = ...
// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
// VGPRz = COPY SGPRy
//
// ==>
//
// VGPRx = COPY SGPRx
// VGPRz = REG_SEQUENCE VGPRx, sub0
//
// This exposes immediate folding opportunities when materializing 64-bit
// immediates.
static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
                                        const SIRegisterInfo *TRI,
                                        const SIInstrInfo *TII,
                                        MachineRegisterInfo &MRI) {
  assert(MI.isRegSequence());

  unsigned DstReg = MI.getOperand(0).getReg();
  if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
    return false;

  if (!MRI.hasOneUse(DstReg))
    return false;

  MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
  if (!CopyUse.isCopy())
    return false;

  const TargetRegisterClass *SrcRC, *DstRC;
  std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);

  if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
    return false;

  // TODO: Could have multiple extracts?
  unsigned SubReg = CopyUse.getOperand(1).getSubReg();
  if (SubReg != AMDGPU::NoSubRegister)
    return false;

  MRI.setRegClass(DstReg, DstRC);

  // SGPRx = ...
  // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
  // VGPRz = COPY SGPRy

  // =>
  // VGPRx = COPY SGPRx
  // VGPRz = REG_SEQUENCE VGPRx, sub0

  MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());

  for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
    unsigned SrcReg = MI.getOperand(I).getReg();
    unsigned SrcSubReg = MI.getOperand(I).getSubReg();

    const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
    assert(TRI->isSGPRClass(SrcRC) &&
           "Expected SGPR REG_SEQUENCE to only have SGPR inputs");

    SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
    const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);

    unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);

    BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
      .addOperand(MI.getOperand(I));

    MI.getOperand(I).setReg(TmpReg);
  }

  CopyUse.eraseFromParent();
  return true;
}
Esempio n. 6
0
/// LowerPHINode - Lower the PHI node at the top of the specified block,
///
void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator LastPHIIt) {
  ++NumLowered;

  MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt);

  // Unlink the PHI node from the basic block, but don't delete the PHI yet.
  MachineInstr *MPhi = MBB.remove(MBB.begin());

  unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
  unsigned DestReg = MPhi->getOperand(0).getReg();
  assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
  bool isDead = MPhi->getOperand(0).isDead();

  // Create a new register for the incoming PHI arguments.
  MachineFunction &MF = *MBB.getParent();
  unsigned IncomingReg = 0;
  bool reusedIncoming = false;  // Is IncomingReg reused from an earlier PHI?

  // Insert a register to register copy at the top of the current block (but
  // after any remaining phi nodes) which copies the new incoming register
  // into the phi node destination.
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  if (isSourceDefinedByImplicitDef(MPhi, MRI))
    // If all sources of a PHI node are implicit_def, just emit an
    // implicit_def instead of a copy.
    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
            TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
  else {
    // Can we reuse an earlier PHI node? This only happens for critical edges,
    // typically those created by tail duplication.
    unsigned &entry = LoweredPHIs[MPhi];
    if (entry) {
      // An identical PHI node was already lowered. Reuse the incoming register.
      IncomingReg = entry;
      reusedIncoming = true;
      ++NumReused;
      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
    } else {
      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
      entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
    }
    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
            TII->get(TargetOpcode::COPY), DestReg)
      .addReg(IncomingReg);
  }

  // Update live variable information if there is any.
  if (LV) {
    MachineInstr *PHICopy = std::prev(AfterPHIsIt);

    if (IncomingReg) {
      LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);

      // Increment use count of the newly created virtual register.
      LV->setPHIJoin(IncomingReg);

      // When we are reusing the incoming register, it may already have been
      // killed in this block. The old kill will also have been inserted at
      // AfterPHIsIt, so it appears before the current PHICopy.
      if (reusedIncoming)
        if (MachineInstr *OldKill = VI.findKill(&MBB)) {
          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
          LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
          DEBUG(MBB.dump());
        }

      // Add information to LiveVariables to know that the incoming value is
      // killed.  Note that because the value is defined in several places (once
      // each for each incoming block), the "def" block and instruction fields
      // for the VarInfo is not filled in.
      LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
    }

    // Since we are going to be deleting the PHI node, if it is the last use of
    // any registers, or if the value itself is dead, we need to move this
    // information over to the new copy we just inserted.
    LV->removeVirtualRegistersKilled(MPhi);

    // If the result is dead, update LV.
    if (isDead) {
      LV->addVirtualRegisterDead(DestReg, PHICopy);
      LV->removeVirtualRegisterDead(DestReg, MPhi);
    }
  }

  // Update LiveIntervals for the new copy or implicit def.
  if (LIS) {
    MachineInstr *NewInstr = std::prev(AfterPHIsIt);
    SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);

    SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
    if (IncomingReg) {
      // Add the region from the beginning of MBB to the copy instruction to
      // IncomingReg's live interval.
      LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg);
      VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
      if (!IncomingVNI)
        IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
                                              LIS->getVNInfoAllocator());
      IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex,
                                                  DestCopyIndex.getRegSlot(),
                                                  IncomingVNI));
    }

    LiveInterval &DestLI = LIS->getInterval(DestReg);
    assert(DestLI.begin() != DestLI.end() &&
           "PHIs should have nonempty LiveIntervals.");
    if (DestLI.endIndex().isDead()) {
      // A dead PHI's live range begins and ends at the start of the MBB, but
      // the lowered copy, which will still be dead, needs to begin and end at
      // the copy instruction.
      VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
      assert(OrigDestVNI && "PHI destination should be live at block entry.");
      DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot());
      DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
                           LIS->getVNInfoAllocator());
      DestLI.removeValNo(OrigDestVNI);
    } else {
      // Otherwise, remove the region from the beginning of MBB to the copy
      // instruction from DestReg's live interval.
      DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot());
      VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
      assert(DestVNI && "PHI destination should be live at its definition.");
      DestVNI->def = DestCopyIndex.getRegSlot();
    }
  }

  // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
                                 MPhi->getOperand(i).getReg())];

  // Now loop over all of the incoming arguments, changing them to copy into the
  // IncomingReg register in the corresponding predecessor basic block.
  SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
  for (int i = NumSrcs - 1; i >= 0; --i) {
    unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
    unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
    bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
      isImplicitlyDefined(SrcReg, MRI);
    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
           "Machine PHI Operands must all be virtual registers!");

    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
    // path the PHI.
    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();

    // Check to make sure we haven't already emitted the copy for this block.
    // This can happen because PHI nodes may have multiple entries for the same
    // basic block.
    if (!MBBsInsertedInto.insert(&opBlock))
      continue;  // If the copy has already been emitted, we're done.

    // Find a safe location to insert the copy, this may be the first terminator
    // in the block (or end()).
    MachineBasicBlock::iterator InsertPos =
      findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);

    // Insert the copy.
    MachineInstr *NewSrcInstr = nullptr;
    if (!reusedIncoming && IncomingReg) {
      if (SrcUndef) {
        // The source register is undefined, so there is no need for a real
        // COPY, but we still need to ensure joint dominance by defs.
        // Insert an IMPLICIT_DEF instruction.
        NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
                              TII->get(TargetOpcode::IMPLICIT_DEF),
                              IncomingReg);

        // Clean up the old implicit-def, if there even was one.
        if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
          if (DefMI->isImplicitDef())
            ImpDefs.insert(DefMI);
      } else {
        NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
                            TII->get(TargetOpcode::COPY), IncomingReg)
                        .addReg(SrcReg, 0, SrcSubReg);
      }
    }

    // We only need to update the LiveVariables kill of SrcReg if this was the
    // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
    // out of the predecessor. We can also ignore undef sources.
    if (LV && !SrcUndef &&
        !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
        !LV->isLiveOut(SrcReg, opBlock)) {
      // We want to be able to insert a kill of the register if this PHI (aka,
      // the copy we just inserted) is the last use of the source value. Live
      // variable analysis conservatively handles this by saying that the value
      // is live until the end of the block the PHI entry lives in. If the value
      // really is dead at the PHI copy, there will be no successor blocks which
      // have the value live-in.

      // Okay, if we now know that the value is not live out of the block, we
      // can add a kill marker in this block saying that it kills the incoming
      // value!

      // In our final twist, we have to decide which instruction kills the
      // register.  In most cases this is the copy, however, terminator
      // instructions at the end of the block may also use the value. In this
      // case, we should mark the last such terminator as being the killing
      // block, not the copy.
      MachineBasicBlock::iterator KillInst = opBlock.end();
      MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
      for (MachineBasicBlock::iterator Term = FirstTerm;
          Term != opBlock.end(); ++Term) {
        if (Term->readsRegister(SrcReg))
          KillInst = Term;
      }

      if (KillInst == opBlock.end()) {
        // No terminator uses the register.

        if (reusedIncoming || !IncomingReg) {
          // We may have to rewind a bit if we didn't insert a copy this time.
          KillInst = FirstTerm;
          while (KillInst != opBlock.begin()) {
            --KillInst;
            if (KillInst->isDebugValue())
              continue;
            if (KillInst->readsRegister(SrcReg))
              break;
          }
        } else {
          // We just inserted this copy.
          KillInst = std::prev(InsertPos);
        }
      }
      assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");

      // Finally, mark it killed.
      LV->addVirtualRegisterKilled(SrcReg, KillInst);

      // This vreg no longer lives all of the way through opBlock.
      unsigned opBlockNum = opBlock.getNumber();
      LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
    }

    if (LIS) {
      if (NewSrcInstr) {
        LIS->InsertMachineInstrInMaps(NewSrcInstr);
        LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr);
      }

      if (!SrcUndef &&
          !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
        LiveInterval &SrcLI = LIS->getInterval(SrcReg);

        bool isLiveOut = false;
        for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
             SE = opBlock.succ_end(); SI != SE; ++SI) {
          SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
          VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);

          // Definitions by other PHIs are not truly live-in for our purposes.
          if (VNI && VNI->def != startIdx) {
            isLiveOut = true;
            break;
          }
        }

        if (!isLiveOut) {
          MachineBasicBlock::iterator KillInst = opBlock.end();
          MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
          for (MachineBasicBlock::iterator Term = FirstTerm;
              Term != opBlock.end(); ++Term) {
            if (Term->readsRegister(SrcReg))
              KillInst = Term;
          }

          if (KillInst == opBlock.end()) {
            // No terminator uses the register.

            if (reusedIncoming || !IncomingReg) {
              // We may have to rewind a bit if we didn't just insert a copy.
              KillInst = FirstTerm;
              while (KillInst != opBlock.begin()) {
                --KillInst;
                if (KillInst->isDebugValue())
                  continue;
                if (KillInst->readsRegister(SrcReg))
                  break;
              }
            } else {
              // We just inserted this copy.
              KillInst = std::prev(InsertPos);
            }
          }
          assert(KillInst->readsRegister(SrcReg) &&
                 "Cannot find kill instruction");

          SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
          SrcLI.removeSegment(LastUseIndex.getRegSlot(),
                              LIS->getMBBEndIdx(&opBlock));
        }
      }
    }
  }

  // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
  if (reusedIncoming || !IncomingReg) {
    if (LIS)
      LIS->RemoveMachineInstrFromMaps(MPhi);
    MF.DeleteMachineInstr(MPhi);
  }
}
Esempio n. 7
0
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
  const PPCInstrInfo *TII =
                static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
  // Give the blocks of the function a dense, in-order, numbering.
  Fn.RenumberBlocks();
  BlockSizes.resize(Fn.getNumBlockIDs());

  // Measure each MBB and compute a size for the entire function.
  unsigned FuncSize = 0;
  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
       ++MFI) {
    MachineBasicBlock *MBB = MFI;

    unsigned BlockSize = 0;
    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
         MBBI != EE; ++MBBI)
      BlockSize += TII->GetInstSizeInBytes(MBBI);
    
    BlockSizes[MBB->getNumber()] = BlockSize;
    FuncSize += BlockSize;
  }
  
  // If the entire function is smaller than the displacement of a branch field,
  // we know we don't need to shrink any branches in this function.  This is a
  // common case.
  if (FuncSize < (1 << 15)) {
    BlockSizes.clear();
    return false;
  }
  
  // For each conditional branch, if the offset to its destination is larger
  // than the offset field allows, transform it into a long branch sequence
  // like this:
  //   short branch:
  //     bCC MBB
  //   long branch:
  //     b!CC $PC+8
  //     b MBB
  //
  bool MadeChange = true;
  bool EverMadeChange = false;
  while (MadeChange) {
    // Iteratively expand branches until we reach a fixed point.
    MadeChange = false;
  
    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
         ++MFI) {
      MachineBasicBlock &MBB = *MFI;
      unsigned MBBStartOffset = 0;
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E; ++I) {
        MachineBasicBlock *Dest = 0;
        if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
          Dest = I->getOperand(2).getMBB();
        else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
                 !I->getOperand(1).isImm())
          Dest = I->getOperand(1).getMBB();
        else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
                  I->getOpcode() == PPC::BDZ8  || I->getOpcode() == PPC::BDZ) &&
                 !I->getOperand(0).isImm())
          Dest = I->getOperand(0).getMBB();

        if (!Dest) {
          MBBStartOffset += TII->GetInstSizeInBytes(I);
          continue;
        }
        
        // Determine the offset from the current branch to the destination
        // block.
        int BranchSize;
        if (Dest->getNumber() <= MBB.getNumber()) {
          // If this is a backwards branch, the delta is the offset from the
          // start of this block to this branch, plus the sizes of all blocks
          // from this block to the dest.
          BranchSize = MBBStartOffset;
          
          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        } else {
          // Otherwise, add the size of the blocks between this block and the
          // dest to the number of bytes left in this block.
          BranchSize = -MBBStartOffset;

          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        }

        // If this branch is in range, ignore it.
        if (isInt<16>(BranchSize)) {
          MBBStartOffset += 4;
          continue;
        }

        // Otherwise, we have to expand it to a long branch.
        MachineInstr *OldBranch = I;
        DebugLoc dl = OldBranch->getDebugLoc();
 
        if (I->getOpcode() == PPC::BCC) {
          // The BCC operands are:
          // 0. PPC branch predicate
          // 1. CR register
          // 2. Target MBB
          PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
          unsigned CRReg = I->getOperand(1).getReg();
       
          // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
          BuildMI(MBB, I, dl, TII->get(PPC::BCC))
            .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
        } else if (I->getOpcode() == PPC::BC) {
          unsigned CRBit = I->getOperand(0).getReg();
          BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
        } else if (I->getOpcode() == PPC::BCn) {
          unsigned CRBit = I->getOperand(0).getReg();
          BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
        } else if (I->getOpcode() == PPC::BDNZ) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
        } else if (I->getOpcode() == PPC::BDNZ8) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
        } else if (I->getOpcode() == PPC::BDZ) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
        } else if (I->getOpcode() == PPC::BDZ8) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
        } else {
           llvm_unreachable("Unhandled branch type!");
        }
        
        // Uncond branch to the real destination.
        I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);

        // Remove the old branch from the function.
        OldBranch->eraseFromParent();
        
        // Remember that this instruction is 8-bytes, increase the size of the
        // block by 4, remember to iterate.
        BlockSizes[MBB.getNumber()] += 4;
        MBBStartOffset += 8;
        ++NumExpanded;
        MadeChange = true;
      }
    }
    EverMadeChange |= MadeChange;
  }
  
  BlockSizes.clear();
  return true;
}
Esempio n. 8
0
bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
  bool Modified = false;

  SmallSet<unsigned, 4> Defs;
  SmallSet<unsigned, 4> Uses;
  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  while (MBBI != E) {
    MachineInstr *MI = &*MBBI;
    DebugLoc dl = MI->getDebugLoc();
    unsigned PredReg = 0;
    ARMCC::CondCodes CC = getPredicate(MI, PredReg);
    if (CC == ARMCC::AL) {
      ++MBBI;
      continue;
    }

    Defs.clear();
    Uses.clear();
    TrackDefUses(MI, Defs, Uses);

    // Insert an IT instruction.
    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
      .addImm(CC);
    MachineBasicBlock::iterator InsertPos = MIB;
    ++MBBI;

    // Finalize IT mask.
    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
    unsigned Mask = 0, Pos = 3;
    // Branches, including tricky ones like LDM_RET, need to end an IT
    // block so check the instruction we just put in the block.
    for (; MBBI != E && Pos &&
           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
      if (MBBI->isDebugValue())
        continue;

      MachineInstr *NMI = &*MBBI;
      MI = NMI;

      unsigned NPredReg = 0;
      ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
      if (NCC == CC || NCC == OCC)
        Mask |= (NCC & 1) << Pos;
      else {
        unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
        if (NCC == ARMCC::AL &&
            TII->isMoveInstr(*NMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
          assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
                 "Sub-register indices still around?");
          // llvm models select's as two-address instructions. That means a copy
          // is inserted before a t2MOVccr, etc. If the copy is scheduled in
          // between selects we would end up creating multiple IT blocks.
          if (!Uses.count(DstReg) && !Defs.count(SrcReg)) {
            --MBBI;
            MBB.remove(NMI);
            MBB.insert(InsertPos, NMI);
            ++NumMovedInsts;
            continue;
          }
        }
        break;
      }
      TrackDefUses(NMI, Defs, Uses);
      --Pos;
    }

    Mask |= (1 << Pos);
    // Tag along (firstcond[0] << 4) with the mask.
    Mask |= (CC & 1) << 4;
    MIB.addImm(Mask);
    Modified = true;
    ++NumITs;
  }

  return Modified;
}
Esempio n. 9
0
void SIInsertSkips::kill(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  DebugLoc DL = MI.getDebugLoc();

  switch (MI.getOpcode()) {
  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
    unsigned Opcode = 0;

    // The opcodes are inverted because the inline immediate has to be
    // the first operand, e.g. from "x < imm" to "imm > x"
    switch (MI.getOperand(2).getImm()) {
    case ISD::SETOEQ:
    case ISD::SETEQ:
      Opcode = AMDGPU::V_CMPX_EQ_F32_e64;
      break;
    case ISD::SETOGT:
    case ISD::SETGT:
      Opcode = AMDGPU::V_CMPX_LT_F32_e64;
      break;
    case ISD::SETOGE:
    case ISD::SETGE:
      Opcode = AMDGPU::V_CMPX_LE_F32_e64;
      break;
    case ISD::SETOLT:
    case ISD::SETLT:
      Opcode = AMDGPU::V_CMPX_GT_F32_e64;
      break;
    case ISD::SETOLE:
    case ISD::SETLE:
      Opcode = AMDGPU::V_CMPX_GE_F32_e64;
      break;
    case ISD::SETONE:
    case ISD::SETNE:
      Opcode = AMDGPU::V_CMPX_LG_F32_e64;
      break;
    case ISD::SETO:
      Opcode = AMDGPU::V_CMPX_O_F32_e64;
      break;
    case ISD::SETUO:
      Opcode = AMDGPU::V_CMPX_U_F32_e64;
      break;
    case ISD::SETUEQ:
      Opcode = AMDGPU::V_CMPX_NLG_F32_e64;
      break;
    case ISD::SETUGT:
      Opcode = AMDGPU::V_CMPX_NGE_F32_e64;
      break;
    case ISD::SETUGE:
      Opcode = AMDGPU::V_CMPX_NGT_F32_e64;
      break;
    case ISD::SETULT:
      Opcode = AMDGPU::V_CMPX_NLE_F32_e64;
      break;
    case ISD::SETULE:
      Opcode = AMDGPU::V_CMPX_NLT_F32_e64;
      break;
    case ISD::SETUNE:
      Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;
      break;
    default:
      llvm_unreachable("invalid ISD:SET cond code");
    }

    assert(MI.getOperand(0).isReg());

    if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
                    MI.getOperand(0).getReg())) {
      Opcode = AMDGPU::getVOPe32(Opcode);
      BuildMI(MBB, &MI, DL, TII->get(Opcode))
          .add(MI.getOperand(1))
          .add(MI.getOperand(0));
    } else {
      BuildMI(MBB, &MI, DL, TII->get(Opcode))
          .addReg(AMDGPU::VCC, RegState::Define)
          .addImm(0)  // src0 modifiers
          .add(MI.getOperand(1))
          .addImm(0)  // src1 modifiers
          .add(MI.getOperand(0))
          .addImm(0);  // omod
    }
    break;
  }
  case AMDGPU::SI_KILL_I1_TERMINATOR: {
    const MachineOperand &Op = MI.getOperand(0);
    int64_t KillVal = MI.getOperand(1).getImm();
    assert(KillVal == 0 || KillVal == -1);

    // Kill all threads if Op0 is an immediate and equal to the Kill value.
    if (Op.isImm()) {
      int64_t Imm = Op.getImm();
      assert(Imm == 0 || Imm == -1);

      if (Imm == KillVal)
        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
          .addImm(0);
      break;
    }

    unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
    BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
        .addReg(AMDGPU::EXEC)
        .add(Op);
    break;
  }
  default:
    llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");
  }
}
Esempio n. 10
0
void SILowerControlFlow::emitIf(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  const DebugLoc &DL = MI.getDebugLoc();
  MachineBasicBlock::iterator I(&MI);

  MachineOperand &SaveExec = MI.getOperand(0);
  MachineOperand &Cond = MI.getOperand(1);
  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
         Cond.getSubReg() == AMDGPU::NoSubRegister);

  unsigned SaveExecReg = SaveExec.getReg();

  MachineOperand &ImpDefSCC = MI.getOperand(4);
  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());

  // Add an implicit def of exec to discourage scheduling VALU after this which
  // will interfere with trying to form s_and_saveexec_b64 later.
  MachineInstr *CopyExec =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SaveExecReg)
    .addReg(AMDGPU::EXEC)
    .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);

  unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

  MachineInstr *And =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
    .addReg(SaveExecReg)
    //.addReg(AMDGPU::EXEC)
    .addReg(Cond.getReg());
  setImpSCCDefDead(*And, true);

  MachineInstr *Xor =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
    .addReg(Tmp)
    .addReg(SaveExecReg);
  setImpSCCDefDead(*Xor, ImpDefSCC.isDead());

  // Use a copy that is a terminator to get correct spill code placement it with
  // fast regalloc.
  MachineInstr *SetExec =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
    .addReg(Tmp, RegState::Kill);

  // Insert a pseudo terminator to help keep the verifier happy. This will also
  // be used later when inserting skips.
  MachineInstr *NewBr =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
    .addOperand(MI.getOperand(2));

  if (!LIS) {
    MI.eraseFromParent();
    return;
  }

  LIS->InsertMachineInstrInMaps(*CopyExec);

  // Replace with and so we don't need to fix the live interval for condition
  // register.
  LIS->ReplaceMachineInstrInMaps(MI, *And);

  LIS->InsertMachineInstrInMaps(*Xor);
  LIS->InsertMachineInstrInMaps(*SetExec);
  LIS->InsertMachineInstrInMaps(*NewBr);

  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
  MI.eraseFromParent();

  // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
  // hard to add another def here but I'm not sure how to correctly update the
  // valno.
  LIS->removeInterval(SaveExecReg);
  LIS->createAndComputeVirtRegInterval(SaveExecReg);
  LIS->createAndComputeVirtRegInterval(Tmp);
}
Esempio n. 11
0
void SILowerControlFlow::emitElse(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  const DebugLoc &DL = MI.getDebugLoc();

  unsigned DstReg = MI.getOperand(0).getReg();
  assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);

  bool ExecModified = MI.getOperand(3).getImm() != 0;
  MachineBasicBlock::iterator Start = MBB.begin();

  // We are running before TwoAddressInstructions, and si_else's operands are
  // tied. In order to correctly tie the registers, split this into a copy of
  // the src like it does.
  BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), DstReg)
    .addOperand(MI.getOperand(1)); // Saved EXEC

  // This must be inserted before phis and any spill code inserted before the
  // else.
  MachineInstr *OrSaveExec =
    BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), DstReg)
    .addReg(DstReg);

  MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();

  MachineBasicBlock::iterator ElsePt(MI);

  if (ExecModified) {
    MachineInstr *And =
      BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
      .addReg(AMDGPU::EXEC)
      .addReg(DstReg);

    if (LIS)
      LIS->InsertMachineInstrInMaps(*And);
  }

  MachineInstr *Xor =
    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
    .addReg(AMDGPU::EXEC)
    .addReg(DstReg);

  MachineInstr *Branch =
    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
    .addMBB(DestBB);

  if (!LIS) {
    MI.eraseFromParent();
    return;
  }

  LIS->RemoveMachineInstrFromMaps(MI);
  MI.eraseFromParent();

  LIS->InsertMachineInstrInMaps(*OrSaveExec);

  LIS->InsertMachineInstrInMaps(*Xor);
  LIS->InsertMachineInstrInMaps(*Branch);

  // src reg is tied to dst reg.
  LIS->removeInterval(DstReg);
  LIS->createAndComputeVirtRegInterval(DstReg);

  // Let this be recomputed.
  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
}
Esempio n. 12
0
// transformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
  DEBUG(dbgs() << "Scalar transform: " << MI);

  MachineBasicBlock *MBB = MI.getParent();
  unsigned OldOpc = MI.getOpcode();
  unsigned NewOpc = getTransformOpcode(OldOpc);
  assert(OldOpc != NewOpc && "transform an instruction to itself?!");

  // Check if we need a copy for the source registers.
  unsigned OrigSrc0 = MI.getOperand(1).getReg();
  unsigned OrigSrc1 = MI.getOperand(2).getReg();
  unsigned Src0 = 0, SubReg0;
  unsigned Src1 = 0, SubReg1;
  bool KillSrc0 = false, KillSrc1 = false;
  if (!MRI->def_empty(OrigSrc0)) {
    MachineRegisterInfo::def_instr_iterator Def =
        MRI->def_instr_begin(OrigSrc0);
    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
    MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0);
    // If there are no other users of the original source, we can delete
    // that instruction.
    if (MOSrc0) {
      Src0 = MOSrc0->getReg();
      KillSrc0 = MOSrc0->isKill();
      // Src0 is going to be reused, thus, it cannot be killed anymore.
      MOSrc0->setIsKill(false);
      if (MRI->hasOneNonDBGUse(OrigSrc0)) {
        assert(MOSrc0 && "Can't delete copy w/o a valid original source!");
        Def->eraseFromParent();
        ++NumCopiesDeleted;
      }
    }
  }
  if (!MRI->def_empty(OrigSrc1)) {
    MachineRegisterInfo::def_instr_iterator Def =
        MRI->def_instr_begin(OrigSrc1);
    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
    MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1);
    // If there are no other users of the original source, we can delete
    // that instruction.
    if (MOSrc1) {
      Src1 = MOSrc1->getReg();
      KillSrc1 = MOSrc1->isKill();
      // Src0 is going to be reused, thus, it cannot be killed anymore.
      MOSrc1->setIsKill(false);
      if (MRI->hasOneNonDBGUse(OrigSrc1)) {
        assert(MOSrc1 && "Can't delete copy w/o a valid original source!");
        Def->eraseFromParent();
        ++NumCopiesDeleted;
      }
    }
  }
  // If we weren't able to reference the original source directly, create a
  // copy.
  if (!Src0) {
    SubReg0 = 0;
    Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
    insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0);
    KillSrc0 = true;
  }
  if (!Src1) {
    SubReg1 = 0;
    Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
    insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1);
    KillSrc1 = true;
  }

  // Create a vreg for the destination.
  // FIXME: No need to do this if the ultimate user expects an FPR64.
  // Check for that and avoid the copy if possible.
  unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);

  // For now, all of the new instructions have the same simple three-register
  // form, so no need to special case based on what instruction we're
  // building.
  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst)
      .addReg(Src0, getKillRegState(KillSrc0), SubReg0)
      .addReg(Src1, getKillRegState(KillSrc1), SubReg1);

  // Now copy the result back out to a GPR.
  // FIXME: Try to avoid this if all uses could actually just use the FPR64
  // directly.
  insertCopy(TII, MI, MI.getOperand(0).getReg(), Dst, true);

  // Erase the old instruction.
  MI.eraseFromParent();

  ++NumScalarInsnsUsed;
}
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {

  DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
               << "********** Function: "
               << MF.getName() << "\n");

#if 0
  // for now disable this, if we move NewValueJump before register
  // allocation we need this information.
  LiveVariables &LVs = getAnalysis<LiveVariables>();
#endif

  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
  QRI = static_cast<const HexagonRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();

  if (!QRI->Subtarget.hasV4TOps() ||
      DisableNewValueJumps) {
    return false;
  }

  int nvjCount = DbgNVJCount;
  int nvjGenerated = 0;

  // Loop through all the bb's of the function
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
    MachineBasicBlock* MBB = MBBb;

    DEBUG(dbgs() << "** dumping bb ** "
                 << MBB->getNumber() << "\n");
    DEBUG(MBB->dump());
    DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
    bool foundJump    = false;
    bool foundCompare = false;
    bool invertPredicate = false;
    unsigned predReg = 0; // predicate reg of the jump.
    unsigned cmpReg1 = 0;
    int cmpOp2 = 0;
    bool MO1IsKill = false;
    bool MO2IsKill = false;
    MachineBasicBlock::iterator jmpPos;
    MachineBasicBlock::iterator cmpPos;
    MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
    MachineBasicBlock *jmpTarget = nullptr;
    bool afterRA = false;
    bool isSecondOpReg = false;
    bool isSecondOpNewified = false;
    // Traverse the basic block - bottom up
    for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
             MII != E;) {
      MachineInstr *MI = --MII;
      if (MI->isDebugValue()) {
        continue;
      }

      if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
        break;

      DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");

      if (!foundJump &&
         (MI->getOpcode() == Hexagon::JMP_t ||
          MI->getOpcode() == Hexagon::JMP_f ||
          MI->getOpcode() == Hexagon::JMP_tnew_t ||
          MI->getOpcode() == Hexagon::JMP_tnew_nt ||
          MI->getOpcode() == Hexagon::JMP_fnew_t ||
          MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
        // This is where you would insert your compare and
        // instr that feeds compare
        jmpPos = MII;
        jmpInstr = MI;
        predReg = MI->getOperand(0).getReg();
        afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);

        // If ifconverter had not messed up with the kill flags of the
        // operands, the following check on the kill flag would suffice.
        // if(!jmpInstr->getOperand(0).isKill()) break;

        // This predicate register is live out out of BB
        // this would only work if we can actually use Live
        // variable analysis on phy regs - but LLVM does not
        // provide LV analysis on phys regs.
        //if(LVs.isLiveOut(predReg, *MBB)) break;

        // Get all the successors of this block - which will always
        // be 2. Check if the predicate register is live in in those
        // successor. If yes, we can not delete the predicate -
        // I am doing this only because LLVM does not provide LiveOut
        // at the BB level.
        bool predLive = false;
        for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
                            SIE = MBB->succ_end(); SI != SIE; ++SI) {
          MachineBasicBlock* succMBB = *SI;
         if (succMBB->isLiveIn(predReg)) {
            predLive = true;
          }
        }
        if (predLive)
          break;

        jmpTarget = MI->getOperand(1).getMBB();
        foundJump = true;
        if (MI->getOpcode() == Hexagon::JMP_f ||
            MI->getOpcode() == Hexagon::JMP_fnew_t ||
            MI->getOpcode() == Hexagon::JMP_fnew_nt) {
          invertPredicate = true;
        }
        continue;
      }

      // No new value jump if there is a barrier. A barrier has to be in its
      // own packet. A barrier has zero operands. We conservatively bail out
      // here if we see any instruction with zero operands.
      if (foundJump && MI->getNumOperands() == 0)
        break;

      if (foundJump &&
         !foundCompare &&
          MI->getOperand(0).isReg() &&
          MI->getOperand(0).getReg() == predReg) {

        // Not all compares can be new value compare. Arch Spec: 7.6.1.1
        if (QII->isNewValueJumpCandidate(MI)) {

          assert((MI->getDesc().isCompare()) &&
              "Only compare instruction can be collapsed into New Value Jump");
          isSecondOpReg = MI->getOperand(2).isReg();

          if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
                                        afterRA, jmpPos, MF))
            break;

          cmpInstr = MI;
          cmpPos = MII;
          foundCompare = true;

          // We need cmpReg1 and cmpOp2(imm or reg) while building
          // new value jump instruction.
          cmpReg1 = MI->getOperand(1).getReg();
          if (MI->getOperand(1).isKill())
            MO1IsKill = true;

          if (isSecondOpReg) {
            cmpOp2 = MI->getOperand(2).getReg();
            if (MI->getOperand(2).isKill())
              MO2IsKill = true;
          } else
            cmpOp2 = MI->getOperand(2).getImm();
          continue;
        }
      }

      if (foundCompare && foundJump) {

        // If "common" checks fail, bail out on this BB.
        if (!commonChecksToProhibitNewValueJump(afterRA, MII))
          break;

        bool foundFeeder = false;
        MachineBasicBlock::iterator feederPos = MII;
        if (MI->getOperand(0).isReg() &&
            MI->getOperand(0).isDef() &&
           (MI->getOperand(0).getReg() == cmpReg1 ||
            (isSecondOpReg &&
             MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {

          unsigned feederReg = MI->getOperand(0).getReg();

          // First try to see if we can get the feeder from the first operand
          // of the compare. If we can not, and if secondOpReg is true
          // (second operand of the compare is also register), try that one.
          // TODO: Try to come up with some heuristic to figure out which
          // feeder would benefit.

          if (feederReg == cmpReg1) {
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
              if (!isSecondOpReg)
                break;
              else
                continue;
            } else
              foundFeeder = true;
          }

          if (!foundFeeder &&
               isSecondOpReg &&
               feederReg == (unsigned) cmpOp2)
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
              break;

          if (isSecondOpReg) {
            // In case of CMPLT, or CMPLTU, or EQ with the second register
            // to newify, swap the operands.
            if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
                                     feederReg == (unsigned) cmpOp2) {
              unsigned tmp = cmpReg1;
              bool tmpIsKill = MO1IsKill;
              cmpReg1 = cmpOp2;
              MO1IsKill = MO2IsKill;
              cmpOp2 = tmp;
              MO2IsKill = tmpIsKill;
            }

            // Now we have swapped the operands, all we need to check is,
            // if the second operand (after swap) is the feeder.
            // And if it is, make a note.
            if (feederReg == (unsigned)cmpOp2)
              isSecondOpNewified = true;
          }

          // Now that we are moving feeder close the jump,
          // make sure we are respecting the kill values of
          // the operands of the feeder.

          bool updatedIsKill = false;
          for (unsigned i = 0; i < MI->getNumOperands(); i++) {
            MachineOperand &MO = MI->getOperand(i);
            if (MO.isReg() && MO.isUse()) {
              unsigned feederReg = MO.getReg();
              for (MachineBasicBlock::iterator localII = feederPos,
                   end = jmpPos; localII != end; localII++) {
                MachineInstr *localMI = localII;
                for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
                  MachineOperand &localMO = localMI->getOperand(j);
                  if (localMO.isReg() && localMO.isUse() &&
                      localMO.isKill() && feederReg == localMO.getReg()) {
                    // We found that there is kill of a use register
                    // Set up a kill flag on the register
                    localMO.setIsKill(false);
                    MO.setIsKill();
                    updatedIsKill = true;
                    break;
                  }
                }
                if (updatedIsKill) break;
              }
            }
            if (updatedIsKill) break;
          }

          MBB->splice(jmpPos, MI->getParent(), MI);
          MBB->splice(jmpPos, MI->getParent(), cmpInstr);
          DebugLoc dl = MI->getDebugLoc();
          MachineInstr *NewMI;

           assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
                      "This compare is not a New Value Jump candidate.");
          unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
                                               isSecondOpNewified,
                                               jmpTarget, MBPI);
          if (invertPredicate)
            opc = QII->getInvertedPredicatedOpcode(opc);

          if (isSecondOpReg)
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                    .addMBB(jmpTarget);

          else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
                    cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
                    cmpOp2 == -1 )
            // Corresponding new-value compare jump instructions don't have the
            // operand for -1 immediate value.
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addMBB(jmpTarget);

          else
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addImm(cmpOp2)
                                    .addMBB(jmpTarget);

          assert(NewMI && "New Value Jump Instruction Not created!");
          (void)NewMI;
          if (cmpInstr->getOperand(0).isReg() &&
              cmpInstr->getOperand(0).isKill())
            cmpInstr->getOperand(0).setIsKill(false);
          if (cmpInstr->getOperand(1).isReg() &&
              cmpInstr->getOperand(1).isKill())
            cmpInstr->getOperand(1).setIsKill(false);
          cmpInstr->eraseFromParent();
          jmpInstr->eraseFromParent();
          ++nvjGenerated;
          ++NumNVJGenerated;
          break;
        }
      }
    }
  }

  return true;

}
Esempio n. 14
0
DeadMemOpElimination::instr_iterator
DeadMemOpElimination::handleMemOp(instr_iterator I, DefMapTy &Defs,
                                  AliasSetTracker &AST) {
  MachineInstr *MI = I;

  MachineMemOperand *MO = *MI->memoperands_begin();
  // AliasAnalysis cannot handle offset right now, so we pretend to write a
  // a big enough size to the location pointed by the base pointer.
  uint64_t Size = MO->getSize() + MO->getOffset();
  AliasSet *ASet = &AST.getAliasSetForPointer(const_cast<Value*>(MO->getValue()),
                                              Size, 0);

  MachineInstr *&LastMI = Defs[ASet];

  bool canHandleLastStore = LastMI && ASet->isMustAlias()
                            && LastMI->getOpcode() != VTM::VOpInternalCall
                            // FIXME: We may need to remember the last
                            // definition for all predicates.
                            && isPredIdentical(LastMI, MI);

  if (canHandleLastStore) {
    MachineMemOperand *LastMO = *LastMI->memoperands_begin();
    // We can only handle last store if and only if their memory operand have
    // the must-alias address and the same size.
    canHandleLastStore = LastMO->getSize() == MO->getSize()
                         && !LastMO->isVolatile()
                         && MachineMemOperandAlias(MO, LastMO, AA, SE)
                            == AliasAnalysis::MustAlias;
  }

  // FIXME: These elimination is only valid if we are in single-thread mode!
  if (VInstrInfo::mayStore(MI)) {
    if (canHandleLastStore) {
      // Dead store find, remove it.
      LastMI->eraseFromParent();
      ++DeadStoreEliminated;
    }

    // Update the definition.
    LastMI = MI;
    return I;
  }

  // Now MI is a load.
  if (!canHandleLastStore) return I;

  // Loading the value that just be stored, the load is not necessary.
  MachineOperand LoadedMO = MI->getOperand(0);
  MachineOperand StoredMO = LastMI->getOperand(2);

  // Simply replace the load by a copy.
  DebugLoc dl = MI->getDebugLoc();
  I = *BuildMI(*MI->getParent(), I, dl, VInstrInfo::getDesc(VTM::VOpMove))
        .addOperand(LoadedMO).addOperand(StoredMO).
        addOperand(*VInstrInfo::getPredOperand(MI)).
        addOperand(*VInstrInfo::getTraceOperand(MI));

  MI->eraseFromParent();
  ++DeadLoadEliminated;
  return I;
}
Esempio n. 15
0
void MIPrinter::print(const MachineInstr &MI) {
  const auto *MF = MI.getParent()->getParent();
  const auto &MRI = MF->getRegInfo();
  const auto &SubTarget = MF->getSubtarget();
  const auto *TRI = SubTarget.getRegisterInfo();
  assert(TRI && "Expected target register info");
  const auto *TII = SubTarget.getInstrInfo();
  assert(TII && "Expected target instruction info");
  if (MI.isCFIInstruction())
    assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");

  bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
  unsigned I = 0, E = MI.getNumOperands();
  for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
         !MI.getOperand(I).isImplicit();
       ++I) {
    if (I)
      OS << ", ";
    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI,
          /*IsDef=*/true);
  }

  if (I)
    OS << " = ";
  if (MI.getFlag(MachineInstr::FrameSetup))
    OS << "frame-setup ";
  OS << TII->getName(MI.getOpcode());
  if (isPreISelGenericOpcode(MI.getOpcode())) {
    assert(MI.getType() && "Generic instructions must have a type");
    OS << ' ';
    MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
  }
  if (I < E)
    OS << ' ';

  bool NeedComma = false;
  for (; I < E; ++I) {
    if (NeedComma)
      OS << ", ";
    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
    NeedComma = true;
  }

  if (MI.getDebugLoc()) {
    if (NeedComma)
      OS << ',';
    OS << " debug-location ";
    MI.getDebugLoc()->printAsOperand(OS, MST);
  }

  if (!MI.memoperands_empty()) {
    OS << " :: ";
    bool NeedComma = false;
    for (const auto *Op : MI.memoperands()) {
      if (NeedComma)
        OS << ", ";
      print(*Op);
      NeedComma = true;
    }
  }
}
Esempio n. 16
0
MachineInstr *
ARMBSISimplifyIndexMemOpsPass::convertToSimpleInstrs(MachineFunction::iterator &MFI,
      MachineBasicBlock::iterator &MBBI,
      LiveVariables *LV) const {
   // FIXME: Thumb2 support.
   llvm::errs() << "about to convert to three address\n";

   llvm::errs() << "arg passed to convert to three address\n";

   MachineInstr *MI = MBBI;
   MachineFunction &MF = *MI->getParent()->getParent();
   uint64_t TSFlags = MI->getDesc().TSFlags;
   bool isPre = false;
   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
      default: return NULL;
      case ARMII::IndexModePre:
               isPre = true;
               break;
      case ARMII::IndexModePost:
               break;
   }

   // Try splitting an indexed load/store to an un-indexed one plus an add/sub
   // operation.
   unsigned MemOpc = TII->getUnindexedOpcode(MI->getOpcode());
   if (MemOpc == 0)
      return NULL;

   MachineInstr *UpdateMI = NULL;
   MachineInstr *MemMI = NULL;
   MachineInstrBuilder MemMIB;
   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned NumOps = MCID.getNumOperands();
   bool isLoad = !MI->mayStore();
   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
   const MachineOperand &Base = MI->getOperand(2);
   const MachineOperand &Offset = MI->getOperand(NumOps-3);
   unsigned WBReg = WB.getReg();
   unsigned BaseReg = Base.getReg();
   unsigned OffReg = -1;
   unsigned OffImm = -1;
   if (Offset.isReg())
      OffReg = Offset.getReg();
   else if (MI->getOperand(NumOps-3).isImm()) {
      OffImm = MI->getOperand(NumOps-3).getImm();
      OffReg = 0;
   }
   else {
      llvm::errs() << "FAIL\n";
      return NULL;//FAIL
   }
   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-2).getImm();
   switch (AddrMode) {
      default: llvm_unreachable("Unknown indexed op!");
      case ARMII::AddrMode2: {
           bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
           uint64_t Amt = ARM_AM::getAM2Offset(OffImm);
           if (isSub) {
              Amt = ~Amt;
              Amt++;
              Amt &= 0x0fff;
              return NULL;
           }
           if (OffReg == 0) {
              if (ARM_AM::getSOImmVal(Amt) == -1) {
                 // Can't encode it in a so_imm operand. This transformation will
                 // add more than 1 instruction. Abandon!
                 llvm::errs() << "here amt =  " << Amt << " is sub: " << isSub  << "\n";
                 return NULL;
              }
              llvm::errs() << "NONFAIL  offImm amt " << OffImm << " pred = " << Pred << "here amt =  " << Amt << " is sub: " << isSub  << "\n";
              UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                    TII->get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
                 .addReg(BaseReg).addImm(Amt)
                 .addImm(Pred).addReg(0).addReg(0);
           } else if (Amt != 0) {
              ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
              unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
              UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                    TII->get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
                 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
                 .addImm(Pred).addReg(0).addReg(0);
           } else
              UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                    TII->get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
                 .addReg(BaseReg).addReg(OffReg)
                 .addImm(Pred).addReg(0).addReg(0);
           break;
        }
      case ARMII::AddrMode3 : {
         bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
         unsigned Amt = ARM_AM::getAM3Offset(OffImm);
         llvm::errs() << " in addressing mode 3\n";
         if (OffReg == 0)
            // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
            UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                  TII->get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
               .addReg(BaseReg).addImm(Amt)
               .addImm(Pred).addReg(0).addReg(0);
         else
            UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                  TII->get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
               .addReg(BaseReg).addReg(OffReg)
               .addImm(Pred).addReg(0).addReg(0);
         break;
      }
   }

   std::vector<MachineInstr*> NewMIs;
   if (isPre) {
      if (isLoad)
         MemMI = BuildMI(MF, MI->getDebugLoc(),
               TII->get(MemOpc), MI->getOperand(0).getReg())
            .addReg(WBReg).addImm(0).addImm(Pred).addReg(0);
      else {
         MemMI = BuildMI(MF, MI->getDebugLoc(),
               TII->get(MemOpc)).addReg(MI->getOperand(1).getReg())
            .addReg(WBReg).addImm(0).addImm(Pred).addReg(0);
         //AddDefaultPred(MemMIB);
      }
      NewMIs.push_back(MemMI);
   } else {
      if (isLoad)
         MemMI = BuildMI(MF, MI->getDebugLoc(),
               TII->get(MemOpc), MI->getOperand(0).getReg())
            .addReg(BaseReg).addImm(0).addImm(Pred).addReg(0);
      else {
         MemMI = BuildMI(MF, MI->getDebugLoc(),
               TII->get(MemOpc)).addReg(MI->getOperand(1).getReg())
            .addReg(BaseReg).addImm(0).addImm(Pred).addReg(0);
         //AddDefaultPred(MemMIB);
      }
      if (WB.isDead())
         UpdateMI->getOperand(0).setIsDead();
      NewMIs.push_back(UpdateMI);
      NewMIs.push_back(MemMI);
   }

   // Transfer LiveVariables states, kill / dead info.
   if (LV) {
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
         if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
            unsigned Reg = MO.getReg();

            LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
            if (MO.isDef()) {
               MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
               if (MO.isDead())
                  LV->addVirtualRegisterDead(Reg, NewMI);
            }
            if (MO.isUse() && MO.isKill()) {
               for (unsigned j = 0; j < 2; ++j) {
                  // Look at the two new MI's in reverse order.
                  MachineInstr *NewMI = NewMIs[j];
                  if (!NewMI->readsRegister(Reg))
                     continue;
                  LV->addVirtualRegisterKilled(Reg, NewMI);
                  if (VI.removeKill(MI))
                     VI.Kills.push_back(NewMI);
                  break;
               }
            }
         }
      }
   }

   MFI->insert(MBBI, NewMIs[1]);
   MFI->insert(MBBI, NewMIs[0]);
   return NewMIs[0];
}
Esempio n. 17
0
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
/// too far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
  DebugLoc DL = MI.getDebugLoc();
  MachineBasicBlock *MBB = MI.getParent();
  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  SmallVector<MachineOperand, 4> Cond;

  bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
  assert(!Fail && "branches to be relaxed must be analyzable");
  (void)Fail;

  // Add an unconditional branch to the destination and invert the branch
  // condition to jump over it:
  // tbz L1
  // =>
  // tbnz L2
  // b   L1
  // L2:

  if (FBB && isBlockInRange(MI, *FBB)) {
    // Last MI in the BB is an unconditional branch. We can simply invert the
    // condition and swap destinations:
    // beq L1
    // b   L2
    // =>
    // bne L2
    // b   L1
    DEBUG(dbgs() << "  Invert condition and swap "
                    "its destination with " << MBB->back());

    TII->reverseBranchCondition(Cond);
    int OldSize = 0, NewSize = 0;
    TII->removeBranch(*MBB, &OldSize);
    TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize);

    BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
    return true;
  } else if (FBB) {
    // We need to split the basic block here to obtain two long-range
    // unconditional branches.
    auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock());
    MF->insert(++MBB->getIterator(), &NewBB);

    // Insert an entry into BlockInfo to align it properly with the block
    // numbers.
    BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo());

    unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size;
    int NewBrSize;
    TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize);
    NewBBSize += NewBrSize;

    // Update the successor lists according to the transformation to follow.
    // Do it here since if there's no split, no update is needed.
    MBB->replaceSuccessor(FBB, &NewBB);
    NewBB.addSuccessor(FBB);
  }

  // We now have an appropriate fall-through block in place (either naturally or
  // just created), so we can invert the condition.
  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));

  DEBUG(dbgs() << "  Insert B to BB#" << TBB->getNumber()
               << ", invert condition and change dest. to BB#"
               << NextBB.getNumber() << '\n');

  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;

  // Insert a new conditional branch and a new unconditional branch.
  int RemovedSize = 0;
  TII->reverseBranchCondition(Cond);
  TII->removeBranch(*MBB, &RemovedSize);
  MBBSize -= RemovedSize;

  int AddedSize = 0;
  TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize);
  MBBSize += AddedSize;

  // Finally, keep the block offsets up to date.
  adjustBlockOffsets(*MBB);
  return true;
}
bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(dbgs() << "********** Lowering br_unless **********\n"
                  "********** Function: "
               << MF.getName() << '\n');

  auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  auto &MRI = MF.getRegInfo();

  for (auto &MBB : MF) {
    for (auto MII = MBB.begin(); MII != MBB.end();) {
      MachineInstr *MI = &*MII++;
      if (MI->getOpcode() != WebAssembly::BR_UNLESS)
        continue;

      unsigned Cond = MI->getOperand(1).getReg();
      bool Inverted = false;

      // Attempt to invert the condition in place.
      if (MFI.isVRegStackified(Cond)) {
        assert(MRI.hasOneDef(Cond));
        MachineInstr *Def = MRI.getVRegDef(Cond);
        switch (Def->getOpcode()) {
          using namespace WebAssembly;
        case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
        case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
        case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
        case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
        case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
        case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
        case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
        case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
        case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
        case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
        case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
        case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
        case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
        case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
        case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
        case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
        case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
        case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
        case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
        case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
        case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
        case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
        case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
        case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
        default: break;
        }
      }

      // If we weren't able to invert the condition in place. Insert an
      // instruction to invert it.
      if (!Inverted) {
        unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
        BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
            .addReg(Cond);
        MFI.stackifyVReg(Tmp);
        Cond = Tmp;
        Inverted = true;
      }

      // The br_unless condition has now been inverted. Insert a br_if and
      // delete the br_unless.
      assert(Inverted);
      BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
          .addOperand(MI->getOperand(0))
          .addReg(Cond);
      MBB.erase(MI);
    }
  }

  return true;
}
Esempio n. 19
0
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
    MachineInstr &Root, MachineInstr &Prev,
    MachineCombinerPattern Pattern,
    SmallVectorImpl<MachineInstr *> &InsInstrs,
    SmallVectorImpl<MachineInstr *> &DelInstrs,
    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
    MachineFunction *MF = Root.getParent()->getParent();
    MachineRegisterInfo &MRI = MF->getRegInfo();
    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);

    // This array encodes the operand index for each parameter because the
    // operands may be commuted. Each row corresponds to a pattern value,
    // and each column specifies the index of A, B, X, Y.
    unsigned OpIdx[4][4] = {
        { 1, 1, 2, 2 },
        { 1, 2, 2, 1 },
        { 2, 1, 1, 2 },
        { 2, 2, 1, 1 }
    };

    int Row;
    switch (Pattern) {
    case MachineCombinerPattern::REASSOC_AX_BY:
        Row = 0;
        break;
    case MachineCombinerPattern::REASSOC_AX_YB:
        Row = 1;
        break;
    case MachineCombinerPattern::REASSOC_XA_BY:
        Row = 2;
        break;
    case MachineCombinerPattern::REASSOC_XA_YB:
        Row = 3;
        break;
    default:
        llvm_unreachable("unexpected MachineCombinerPattern");
    }

    MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
    MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
    MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
    MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
    MachineOperand &OpC = Root.getOperand(0);

    unsigned RegA = OpA.getReg();
    unsigned RegB = OpB.getReg();
    unsigned RegX = OpX.getReg();
    unsigned RegY = OpY.getReg();
    unsigned RegC = OpC.getReg();

    if (TargetRegisterInfo::isVirtualRegister(RegA))
        MRI.constrainRegClass(RegA, RC);
    if (TargetRegisterInfo::isVirtualRegister(RegB))
        MRI.constrainRegClass(RegB, RC);
    if (TargetRegisterInfo::isVirtualRegister(RegX))
        MRI.constrainRegClass(RegX, RC);
    if (TargetRegisterInfo::isVirtualRegister(RegY))
        MRI.constrainRegClass(RegY, RC);
    if (TargetRegisterInfo::isVirtualRegister(RegC))
        MRI.constrainRegClass(RegC, RC);

    // Create a new virtual register for the result of (X op Y) instead of
    // recycling RegB because the MachineCombiner's computation of the critical
    // path requires a new register definition rather than an existing one.
    unsigned NewVR = MRI.createVirtualRegister(RC);
    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    unsigned Opcode = Root.getOpcode();
    bool KillA = OpA.isKill();
    bool KillX = OpX.isKill();
    bool KillY = OpY.isKill();

    // Create new instructions for insertion.
    MachineInstrBuilder MIB1 =
        BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
        .addReg(RegX, getKillRegState(KillX))
        .addReg(RegY, getKillRegState(KillY));
    MachineInstrBuilder MIB2 =
        BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
        .addReg(RegA, getKillRegState(KillA))
        .addReg(NewVR, getKillRegState(true));

    setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);

    // Record new instructions for insertion and old instructions for deletion.
    InsInstrs.push_back(MIB1);
    InsInstrs.push_back(MIB2);
    DelInstrs.push_back(&Prev);
    DelInstrs.push_back(&Root);
}
Esempio n. 20
0
bool ARMInstructionSelector::select(MachineInstr &I) const {
  assert(I.getParent() && "Instruction should be in a basic block!");
  assert(I.getParent()->getParent() && "Instruction should be in a function!");

  auto &MBB = *I.getParent();
  auto &MF = *MBB.getParent();
  auto &MRI = MF.getRegInfo();

  if (!isPreISelGenericOpcode(I.getOpcode())) {
    if (I.isCopy())
      return selectCopy(I, TII, MRI, TRI, RBI);

    return true;
  }

  if (selectImpl(I))
    return true;

  MachineInstrBuilder MIB{MF, I};
  bool isSExt = false;

  using namespace TargetOpcode;
  switch (I.getOpcode()) {
  case G_SEXT:
    isSExt = true;
    LLVM_FALLTHROUGH;
  case G_ZEXT: {
    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
    // FIXME: Smaller destination sizes coming soon!
    if (DstTy.getSizeInBits() != 32) {
      DEBUG(dbgs() << "Unsupported destination size for extension");
      return false;
    }

    LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
    unsigned SrcSize = SrcTy.getSizeInBits();
    switch (SrcSize) {
    case 1: {
      // ZExt boils down to & 0x1; for SExt we also subtract that from 0
      I.setDesc(TII.get(ARM::ANDri));
      MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());

      if (isSExt) {
        unsigned SExtResult = I.getOperand(0).getReg();

        // Use a new virtual register for the result of the AND
        unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
        I.getOperand(0).setReg(AndResult);

        auto InsertBefore = std::next(I.getIterator());
        auto SubI =
            BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
                .addDef(SExtResult)
                .addUse(AndResult)
                .addImm(0)
                .add(predOps(ARMCC::AL))
                .add(condCodeOp());
        if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
          return false;
      }
      break;
    }
    case 8:
    case 16: {
      unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize);
      if (NewOpc == I.getOpcode())
        return false;
      I.setDesc(TII.get(NewOpc));
      MIB.addImm(0).add(predOps(ARMCC::AL));
      break;
    }
    default:
      DEBUG(dbgs() << "Unsupported source size for extension");
      return false;
    }
    break;
  }
  case G_ANYEXT:
  case G_TRUNC: {
    // The high bits are undefined, so there's nothing special to do, just
    // treat it as a copy.
    auto SrcReg = I.getOperand(1).getReg();
    auto DstReg = I.getOperand(0).getReg();

    const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
    const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

    if (SrcRegBank.getID() != DstRegBank.getID()) {
      DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n");
      return false;
    }

    if (SrcRegBank.getID() != ARM::GPRRegBankID) {
      DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n");
      return false;
    }

    I.setDesc(TII.get(COPY));
    return selectCopy(I, TII, MRI, TRI, RBI);
  }
  case G_SELECT:
    return selectSelect(MIB, MRI);
  case G_ICMP: {
    CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
                        ARM::GPRRegBankID, 32);
    return selectCmp(Helper, MIB, MRI);
  }
  case G_FCMP: {
    assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP");

    unsigned OpReg = I.getOperand(2).getReg();
    unsigned Size = MRI.getType(OpReg).getSizeInBits();

    if (Size == 64 && TII.getSubtarget().isFPOnlySP()) {
      DEBUG(dbgs() << "Subtarget only supports single precision");
      return false;
    }
    if (Size != 32 && Size != 64) {
      DEBUG(dbgs() << "Unsupported size for G_FCMP operand");
      return false;
    }

    CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
                        ARM::FPRRegBankID, Size);
    return selectCmp(Helper, MIB, MRI);
  }
  case G_GEP:
    I.setDesc(TII.get(ARM::ADDrr));
    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  case G_FRAME_INDEX:
    // Add 0 to the given frame index and hope it will eventually be folded into
    // the user(s).
    I.setDesc(TII.get(ARM::ADDri));
    MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  case G_CONSTANT: {
    unsigned Reg = I.getOperand(0).getReg();

    if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID))
      return false;

    I.setDesc(TII.get(ARM::MOVi));
    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());

    auto &Val = I.getOperand(1);
    if (Val.isCImm()) {
      if (Val.getCImm()->getBitWidth() > 32)
        return false;
      Val.ChangeToImmediate(Val.getCImm()->getZExtValue());
    }

    if (!Val.isImm()) {
      return false;
    }

    break;
  }
  case G_GLOBAL_VALUE:
    return selectGlobal(MIB, MRI);
  case G_STORE:
  case G_LOAD: {
    const auto &MemOp = **I.memoperands_begin();
    if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
      DEBUG(dbgs() << "Atomic load/store not supported yet\n");
      return false;
    }

    unsigned Reg = I.getOperand(0).getReg();
    unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();

    LLT ValTy = MRI.getType(Reg);
    const auto ValSize = ValTy.getSizeInBits();

    assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) &&
           "Don't know how to load/store 64-bit value without VFP");

    const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
    if (NewOpc == G_LOAD || NewOpc == G_STORE)
      return false;

    I.setDesc(TII.get(NewOpc));

    if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
      // LDRH has a funny addressing mode (there's already a FIXME for it).
      MIB.addReg(0);
    MIB.addImm(0).add(predOps(ARMCC::AL));
    break;
  }
  case G_MERGE_VALUES: {
    if (!selectMergeValues(MIB, TII, MRI, TRI, RBI))
      return false;
    break;
  }
  case G_UNMERGE_VALUES: {
    if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI))
      return false;
    break;
  }
  case G_BRCOND: {
    if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) {
      DEBUG(dbgs() << "Unsupported condition register for G_BRCOND");
      return false;
    }

    // Set the flags.
    auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
                    .addReg(I.getOperand(0).getReg())
                    .addImm(1)
                    .add(predOps(ARMCC::AL));
    if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
      return false;

    // Branch conditionally.
    auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
                      .add(I.getOperand(1))
                      .add(predOps(ARMCC::EQ, ARM::CPSR));
    if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
      return false;
    I.eraseFromParent();
    return true;
  }
  default:
    return false;
  }

  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
Esempio n. 21
0
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  unsigned SrcReg, DstReg, SubIdx;
  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    return false;

  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
      TargetRegisterInfo::isPhysicalRegister(SrcReg))
    return false;

  if (MRI->hasOneNonDBGUse(SrcReg))
    // No other uses.
    return false;

  // Ensure DstReg can get a register class that actually supports
  // sub-registers. Don't change the class until we commit.
  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
  DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
  if (!DstRC)
    return false;

  // The ext instr may be operating on a sub-register of SrcReg as well.
  // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
  // register.
  // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
  // SrcReg:SubIdx should be replaced.
  bool UseSrcSubIdx = TM->getRegisterInfo()->
    getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;

  // The source has other uses. See if we can replace the other uses with use of
  // the result of the extension.
  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  for (MachineRegisterInfo::use_nodbg_iterator
       UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
       UI != UE; ++UI)
    ReachedBBs.insert(UI->getParent());

  // Uses that are in the same BB of uses of the result of the instruction.
  SmallVector<MachineOperand*, 8> Uses;

  // Uses that the result of the instruction can reach.
  SmallVector<MachineOperand*, 8> ExtendedUses;

  bool ExtendLife = true;
  for (MachineRegisterInfo::use_nodbg_iterator
       UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
       UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    MachineInstr *UseMI = &*UI;
    if (UseMI == MI)
      continue;

    if (UseMI->isPHI()) {
      ExtendLife = false;
      continue;
    }

    // Only accept uses of SrcReg:SubIdx.
    if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
      continue;

    // It's an error to translate this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    //
    // into this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1027 = COPY %reg1025:4
    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    //
    // The problem here is that SUBREG_TO_REG is there to assert that an
    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    // the COPY here, it will give us the value after the <sext>, not the
    // original value of %reg1024 before <sext>.
    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
      continue;

    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      // Local uses that come after the extension.
      if (!LocalMIs.count(UseMI))
        Uses.push_back(&UseMO);
    } else if (ReachedBBs.count(UseMBB)) {
      // Non-local uses where the result of the extension is used. Always
      // replace these unless it's a PHI.
      Uses.push_back(&UseMO);
    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
      // We may want to extend the live range of the extension result in order
      // to replace these uses.
      ExtendedUses.push_back(&UseMO);
    } else {
      // Both will be live out of the def MBB anyway. Don't extend live range of
      // the extension result.
      ExtendLife = false;
      break;
    }
  }

  if (ExtendLife && !ExtendedUses.empty())
    // Extend the liveness of the extension result.
    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
              std::back_inserter(Uses));

  // Now replace all uses.
  bool Changed = false;
  if (!Uses.empty()) {
    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;

    // Look for PHI uses of the extended result, we don't want to extend the
    // liveness of a PHI input. It breaks all kinds of assumptions down
    // stream. A PHI use is expected to be the kill of its source values.
    for (MachineRegisterInfo::use_nodbg_iterator
         UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
         UI != UE; ++UI)
      if (UI->isPHI())
        PHIBBs.insert(UI->getParent());

    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
      MachineOperand *UseMO = Uses[i];
      MachineInstr *UseMI = UseMO->getParent();
      MachineBasicBlock *UseMBB = UseMI->getParent();
      if (PHIBBs.count(UseMBB))
        continue;

      // About to add uses of DstReg, clear DstReg's kill flags.
      if (!Changed) {
        MRI->clearKillFlags(DstReg);
        MRI->constrainRegClass(DstReg, DstRC);
      }

      unsigned NewVR = MRI->createVirtualRegister(RC);
      MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
                                   TII->get(TargetOpcode::COPY), NewVR)
        .addReg(DstReg, 0, SubIdx);
      // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
      if (UseSrcSubIdx) {
        Copy->getOperand(0).setSubReg(SubIdx);
        Copy->getOperand(0).setIsUndef();
      }
      UseMO->setReg(NewVR);
      ++NumReuse;
      Changed = true;
    }
  }

  return Changed;
}
Esempio n. 22
0
MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr(
    MachineFunction &MF, MachineBasicBlock &MBB,
    MachineBasicBlock::iterator I) const {
  const MSP430InstrInfo &TII =
      *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
  unsigned StackAlign = getStackAlignment();

  if (!hasReservedCallFrame(MF)) {
    // If the stack pointer can be changed after prologue, turn the
    // adjcallstackup instruction into a 'sub SP, <amt>' and the
    // adjcallstackdown instruction into 'add SP, <amt>'
    // TODO: consider using push / pop instead of sub + store / add
    MachineInstr *Old = I;
    uint64_t Amount = Old->getOperand(0).getImm();
    if (Amount != 0) {
      // We need to keep the stack aligned properly.  To do this, we round the
      // amount of space needed for the outgoing arguments up to the next
      // alignment boundary.
      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;

      MachineInstr *New = nullptr;
      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
        New = BuildMI(MF, Old->getDebugLoc(),
                      TII.get(MSP430::SUB16ri), MSP430::SP)
          .addReg(MSP430::SP).addImm(Amount);
      } else {
        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
        // factor out the amount the callee already popped.
        uint64_t CalleeAmt = Old->getOperand(1).getImm();
        Amount -= CalleeAmt;
        if (Amount)
          New = BuildMI(MF, Old->getDebugLoc(),
                        TII.get(MSP430::ADD16ri), MSP430::SP)
            .addReg(MSP430::SP).addImm(Amount);
      }

      if (New) {
        // The SRW implicit def is dead.
        New->getOperand(3).setIsDead();

        // Replace the pseudo instruction with a new instruction...
        MBB.insert(I, New);
      }
    }
  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
    // If we are performing frame pointer elimination and if the callee pops
    // something off the stack pointer, add it back.
    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
      MachineInstr *Old = I;
      MachineInstr *New =
        BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
                MSP430::SP).addReg(MSP430::SP).addImm(CalleeAmt);
      // The SRW implicit def is dead.
      New->getOperand(3).setIsDead();

      MBB.insert(I, New);
    }
  }

  return MBB.erase(I);
}
Esempio n. 23
0
/// spillAroundUses - insert spill code around each use of Reg.
void InlineSpiller::spillAroundUses(unsigned Reg) {
  DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
  LiveInterval &OldLI = LIS.getInterval(Reg);

  // Iterate over instructions using Reg.
  for (MachineRegisterInfo::reg_bundle_iterator
       RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
       RegI != E; ) {
    MachineInstr *MI = &*(RegI++);

    // Debug values are not allowed to affect codegen.
    if (MI->isDebugValue()) {
      // Modify DBG_VALUE now that the value is in a spill slot.
      bool IsIndirect = MI->isIndirectDebugValue();
      uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
      const MDNode *Var = MI->getDebugVariable();
      const MDNode *Expr = MI->getDebugExpression();
      DebugLoc DL = MI->getDebugLoc();
      DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
      MachineBasicBlock *MBB = MI->getParent();
      assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
             "Expected inlined-at fields to agree");
      BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
          .addFrameIndex(StackSlot)
          .addImm(Offset)
          .addMetadata(Var)
          .addMetadata(Expr);
      continue;
    }

    // Ignore copies to/from snippets. We'll delete them.
    if (SnippetCopies.count(MI))
      continue;

    // Stack slot accesses may coalesce away.
    if (coalesceStackAccess(MI, Reg))
      continue;

    // Analyze instruction.
    SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
    MIBundleOperands::VirtRegInfo RI =
      MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);

    // Find the slot index where this instruction reads and writes OldLI.
    // This is usually the def slot, except for tied early clobbers.
    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
      if (SlotIndex::isSameInstr(Idx, VNI->def))
        Idx = VNI->def;

    // Check for a sibling copy.
    unsigned SibReg = isFullCopyOf(MI, Reg);
    if (SibReg && isSibling(SibReg)) {
      // This may actually be a copy between snippets.
      if (isRegToSpill(SibReg)) {
        DEBUG(dbgs() << "Found new snippet copy: " << *MI);
        SnippetCopies.insert(MI);
        continue;
      }
      if (RI.Writes) {
        // Hoist the spill of a sib-reg copy.
        if (hoistSpill(OldLI, MI)) {
          // This COPY is now dead, the value is already in the stack slot.
          MI->getOperand(0).setIsDead();
          DeadDefs.push_back(MI);
          continue;
        }
      } else {
        // This is a reload for a sib-reg copy. Drop spills downstream.
        LiveInterval &SibLI = LIS.getInterval(SibReg);
        eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
        // The COPY will fold to a reload below.
      }
    }

    // Attempt to fold memory ops.
    if (foldMemoryOperand(Ops))
      continue;

    // Create a new virtual register for spill/fill.
    // FIXME: Infer regclass from instruction alone.
    unsigned NewVReg = Edit->createFrom(Reg);

    if (RI.Reads)
      insertReload(NewVReg, Idx, MI);

    // Rewrite instruction operands.
    bool hasLiveDef = false;
    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
      MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
      MO.setReg(NewVReg);
      if (MO.isUse()) {
        if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
          MO.setIsKill();
      } else {
        if (!MO.isDead())
          hasLiveDef = true;
      }
    }
    DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');

    // FIXME: Use a second vreg if instruction has no tied ops.
    if (RI.Writes)
      if (hasLiveDef)
        insertSpill(NewVReg, true, MI);
  }
}
Esempio n. 24
0
void X86ExpandPseudo::ExpandICallBranchFunnel(
    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
  MachineBasicBlock *JTMBB = MBB;
  MachineInstr *JTInst = &*MBBI;
  MachineFunction *MF = MBB->getParent();
  const BasicBlock *BB = MBB->getBasicBlock();
  auto InsPt = MachineFunction::iterator(MBB);
  ++InsPt;

  std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs;
  DebugLoc DL = JTInst->getDebugLoc();
  MachineOperand Selector = JTInst->getOperand(0);
  const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();

  auto CmpTarget = [&](unsigned Target) {
    BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
        .addReg(X86::RIP)
        .addImm(1)
        .addReg(0)
        .addGlobalAddress(CombinedGlobal,
                          JTInst->getOperand(2 + 2 * Target).getImm())
        .addReg(0);
    BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr))
        .add(Selector)
        .addReg(X86::R11);
  };

  auto CreateMBB = [&]() {
    auto *NewMBB = MF->CreateMachineBasicBlock(BB);
    MBB->addSuccessor(NewMBB);
    return NewMBB;
  };

  auto EmitCondJump = [&](unsigned Opcode, MachineBasicBlock *ThenMBB) {
    BuildMI(*MBB, MBBI, DL, TII->get(Opcode)).addMBB(ThenMBB);

    auto *ElseMBB = CreateMBB();
    MF->insert(InsPt, ElseMBB);
    MBB = ElseMBB;
    MBBI = MBB->end();
  };

  auto EmitCondJumpTarget = [&](unsigned Opcode, unsigned Target) {
    auto *ThenMBB = CreateMBB();
    TargetMBBs.push_back({ThenMBB, Target});
    EmitCondJump(Opcode, ThenMBB);
  };

  auto EmitTailCall = [&](unsigned Target) {
    BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64))
        .add(JTInst->getOperand(3 + 2 * Target));
  };

  std::function<void(unsigned, unsigned)> EmitBranchFunnel =
      [&](unsigned FirstTarget, unsigned NumTargets) {
    if (NumTargets == 1) {
      EmitTailCall(FirstTarget);
      return;
    }

    if (NumTargets == 2) {
      CmpTarget(FirstTarget + 1);
      EmitCondJumpTarget(X86::JB_1, FirstTarget);
      EmitTailCall(FirstTarget + 1);
      return;
    }

    if (NumTargets < 6) {
      CmpTarget(FirstTarget + 1);
      EmitCondJumpTarget(X86::JB_1, FirstTarget);
      EmitCondJumpTarget(X86::JE_1, FirstTarget + 1);
      EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
      return;
    }

    auto *ThenMBB = CreateMBB();
    CmpTarget(FirstTarget + (NumTargets / 2));
    EmitCondJump(X86::JB_1, ThenMBB);
    EmitCondJumpTarget(X86::JE_1, FirstTarget + (NumTargets / 2));
    EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
                  NumTargets - (NumTargets / 2) - 1);

    MF->insert(InsPt, ThenMBB);
    MBB = ThenMBB;
    MBBI = MBB->end();
    EmitBranchFunnel(FirstTarget, NumTargets / 2);
  };

  EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2);
  for (auto P : TargetMBBs) {
    MF->insert(InsPt, P.first);
    BuildMI(P.first, DL, TII->get(X86::TAILJMPd64))
        .add(JTInst->getOperand(3 + 2 * P.second));
  }
  JTMBB->erase(JTInst);
}
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
/// too far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
  MachineBasicBlock *DestBB = getDestBlock(MI);

  // Add an unconditional branch to the destination and invert the branch
  // condition to jump over it:
  // tbz L1
  // =>
  // tbnz L2
  // b   L1
  // L2:

  // If the branch is at the end of its MBB and that has a fall-through block,
  // direct the updated conditional branch to the fall-through block. Otherwise,
  // split the MBB before the next instruction.
  MachineBasicBlock *MBB = MI.getParent();
  MachineInstr *BMI = &MBB->back();
  bool NeedSplit = (BMI != &MI) || !hasFallthrough(*MBB);

  if (BMI != &MI) {
    if (std::next(MachineBasicBlock::iterator(MI)) ==
            std::prev(MBB->getLastNonDebugInstr()) &&
        BMI->isUnconditionalBranch()) {
      // Last MI in the BB is an unconditional branch. We can simply invert the
      // condition and swap destinations:
      // beq L1
      // b   L2
      // =>
      // bne L2
      // b   L1
      MachineBasicBlock *NewDest = getDestBlock(*BMI);
      if (isBlockInRange(MI, *NewDest)) {
        DEBUG(dbgs() << "  Invert condition and swap its destination with "
                     << *BMI);
        changeBranchDestBlock(*BMI, *DestBB);

        int NewSize =
          insertInvertedConditionalBranch(*MBB, MI.getIterator(),
                                          MI.getDebugLoc(), MI, *NewDest);
        int OldSize = TII->getInstSizeInBytes(MI);
        BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
        MI.eraseFromParent();
        return true;
      }
    }
  }

  if (NeedSplit) {
    // Analyze the branch so we know how to update the successor lists.
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    SmallVector<MachineOperand, 2> Cond;
    bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond, false);
    assert(!Fail && "branches to relax should be analyzable");
    (void)Fail;

    MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
    // No need for the branch to the next block. We're adding an unconditional
    // branch to the destination.
    int delta = TII->getInstSizeInBytes(MBB->back());
    BlockInfo[MBB->getNumber()].Size -= delta;
    MBB->back().eraseFromParent();
    // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below

    // Update the successor lists according to the transformation to follow.
    // Do it here since if there's no split, no update is needed.
    MBB->replaceSuccessor(FBB, NewBB);
    NewBB->addSuccessor(FBB);
  }

  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));

  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
               << ", invert condition and change dest. to BB#"
               << NextBB.getNumber() << '\n');

  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;

  // Insert a new conditional branch and a new unconditional branch.
  MBBSize += insertInvertedConditionalBranch(*MBB, MBB->end(),
                                             MI.getDebugLoc(), MI, NextBB);

  MBBSize += insertUnconditionalBranch(*MBB, *DestBB, MI.getDebugLoc());

  // Remove the old conditional branch.  It may or may not still be in MBB.
  MBBSize -= TII->getInstSizeInBytes(MI);
  MI.eraseFromParent();

  // Finally, keep the block offsets up to date.
  adjustBlockOffsets(*MBB);
  return true;
}
Esempio n. 26
0
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {

  MachineBasicBlock &MBB = *MI.getParent();
  DebugLoc DL = MI.getDebugLoc();
  MachineBasicBlock::iterator I = MI;

  unsigned Save = MI.getOperand(1).getReg();
  unsigned Idx = MI.getOperand(3).getReg();

  if (AMDGPU::SReg_32RegClass.contains(Idx)) {
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
            .addReg(Idx);
    MBB.insert(I, MovRel);
  } else {

    assert(AMDGPU::SReg_64RegClass.contains(Save));
    assert(AMDGPU::VReg_32RegClass.contains(Idx));

    // Save the EXEC mask
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
            .addReg(AMDGPU::EXEC);

    // Read the next variant into VCC (lower 32 bits) <- also loop target
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
            AMDGPU::VCC_LO)
            .addReg(Idx);

    // Move index from VCC into M0
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
            .addReg(AMDGPU::VCC_LO);

    // Compare the just read M0 value to all possible Idx values
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
            .addReg(AMDGPU::M0)
            .addReg(Idx);

    // Update EXEC, save the original EXEC value to VCC
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
            .addReg(AMDGPU::VCC);

    // Do the actual move
    MBB.insert(I, MovRel);

    // Update EXEC, switch all done bits to 0 and all todo bits to 1
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
            .addReg(AMDGPU::EXEC)
            .addReg(AMDGPU::VCC);

    // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
            .addImm(-7)
            .addReg(AMDGPU::EXEC);

    // Restore EXEC
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
            .addReg(Save);

  }
  // FIXME: Are there any values other than the LDS address clamp that need to
  // be stored in the m0 register and may be live for more than a few
  // instructions?  If so, we should save the m0 register at the beginning
  // of this function and restore it here.
  // FIXME: Add support for LDS direct loads.
  InitM0ForLDS(&MI);
  MI.eraseFromParent();
}
Esempio n. 27
0
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {

  MachineBasicBlock &MBB = *MI.getParent();
  DebugLoc DL = MI.getDebugLoc();
  MachineBasicBlock::iterator I = MI;

  unsigned Save = MI.getOperand(1).getReg();
  unsigned Idx = MI.getOperand(3).getReg();

  if (AMDGPU::SReg_32RegClass.contains(Idx)) {
    if (Offset) {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
              .addReg(Idx)
              .addImm(Offset);
    } else {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
              .addReg(Idx);
    }
    MBB.insert(I, MovRel);
  } else {

    assert(AMDGPU::SReg_64RegClass.contains(Save));
    assert(AMDGPU::VGPR_32RegClass.contains(Idx));

    // Save the EXEC mask
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
            .addReg(AMDGPU::EXEC);

    // Read the next variant into VCC (lower 32 bits) <- also loop target
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
            AMDGPU::VCC_LO)
            .addReg(Idx);

    // Move index from VCC into M0
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
            .addReg(AMDGPU::VCC_LO);

    // Compare the just read M0 value to all possible Idx values
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
      .addReg(AMDGPU::M0)
      .addReg(Idx);

    // Update EXEC, save the original EXEC value to VCC
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
            .addReg(AMDGPU::VCC);

    if (Offset) {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
              .addReg(AMDGPU::M0)
              .addImm(Offset);
    }
    // Do the actual move
    MBB.insert(I, MovRel);

    // Update EXEC, switch all done bits to 0 and all todo bits to 1
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
            .addReg(AMDGPU::EXEC)
            .addReg(AMDGPU::VCC);

    // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
      .addImm(-7);

    // Restore EXEC
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
            .addReg(Save);

  }
  MI.eraseFromParent();
}
Esempio n. 28
0
bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
                                      MachineInstr *AddAslMI,
                                      const MachineOperand &ImmOp,
                                      unsigned ImmOpNum) {
  NodeAddr<StmtNode *> SA = AddAslUN.Addr->getOwner(*DFG);

  DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n");

  NodeList UNodeList;
  getAllRealUses(SA, UNodeList);

  for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
    NodeAddr<UseNode *> UseUN = *I;
    assert(!(UseUN.Addr->getFlags() & NodeAttrs::PhiRef) &&
           "Can't transform this 'AddAsl' instruction!");

    NodeAddr<StmtNode *> UseIA = UseUN.Addr->getOwner(*DFG);
    DEBUG(dbgs() << "[InstrNode]: " << Print<NodeAddr<InstrNode *>>(UseIA, *DFG)
                 << "\n");
    MachineInstr *UseMI = UseIA.Addr->getCode();
    DEBUG(dbgs() << "[MI <BB#" << UseMI->getParent()->getNumber()
                 << ">]: " << *UseMI << "\n");
    const MCInstrDesc &UseMID = UseMI->getDesc();
    assert(HII->getAddrMode(UseMI) == HexagonII::BaseImmOffset);

    auto UsePos = MachineBasicBlock::iterator(UseMI);
    MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator();
    short NewOpCode = getBaseWithLongOffset(UseMI);
    assert(NewOpCode >= 0 && "Invalid New opcode\n");

    unsigned OpStart;
    unsigned OpEnd = UseMI->getNumOperands();

    MachineBasicBlock *BB = UseMI->getParent();
    MachineInstrBuilder MIB =
        BuildMI(*BB, InsertPt, UseMI->getDebugLoc(), HII->get(NewOpCode));
    // change mem(Rs + # ) -> mem(Rt << # + ##)
    if (UseMID.mayLoad()) {
      MIB.addOperand(UseMI->getOperand(0));
      MIB.addOperand(AddAslMI->getOperand(2));
      MIB.addOperand(AddAslMI->getOperand(3));
      const GlobalValue *GV = ImmOp.getGlobal();
      MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(),
                           ImmOp.getTargetFlags());
      OpStart = 3;
    } else if (UseMID.mayStore()) {
      MIB.addOperand(AddAslMI->getOperand(2));
      MIB.addOperand(AddAslMI->getOperand(3));
      const GlobalValue *GV = ImmOp.getGlobal();
      MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(),
                           ImmOp.getTargetFlags());
      MIB.addOperand(UseMI->getOperand(2));
      OpStart = 3;
    } else
      llvm_unreachable("Unhandled instruction");

    for (unsigned i = OpStart; i < OpEnd; ++i)
      MIB.addOperand(UseMI->getOperand(i));

    Deleted.insert(UseMI);
  }

  return true;
}