Example #1
0
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  if (skipOptnoneFunction(*MF.getFunction()))
    return false;

  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');

  if (DisablePeephole)
    return false;

  TM  = &MF.getTarget();
  TII = TM->getInstrInfo();
  MRI = &MF.getRegInfo();
  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;

  bool Changed = false;

  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    bool SeenMoveImm = false;
    SmallPtrSet<MachineInstr*, 8> LocalMIs;
    SmallSet<unsigned, 4> ImmDefRegs;
    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;

    for (MachineBasicBlock::iterator
           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
      MachineInstr *MI = &*MII;
      // We may be erasing MI below, increment MII now.
      ++MII;
      LocalMIs.insert(MI);

      // Skip debug values. They should not affect this peephole optimization.
      if (MI->isDebugValue())
          continue;

      // If there exists an instruction which belongs to the following
      // categories, we will discard the load candidates.
      if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
          MI->isKill() || MI->isInlineAsm() ||
          MI->hasUnmodeledSideEffects()) {
        FoldAsLoadDefCandidates.clear();
        continue;
      }
      if (MI->mayStore() || MI->isCall())
        FoldAsLoadDefCandidates.clear();

      if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
          (MI->isSelect() && optimizeSelect(MI))) {
        // MI is deleted.
        LocalMIs.erase(MI);
        Changed = true;
        continue;
      }

      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
        SeenMoveImm = true;
      } else {
        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
        // optimizeExtInstr might have created new instructions after MI
        // and before the already incremented MII. Adjust MII so that the
        // next iteration sees the new instructions.
        MII = MI;
        ++MII;
        if (SeenMoveImm)
          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
      }

      // Check whether MI is a load candidate for folding into a later
      // instruction. If MI is not a candidate, check whether we can fold an
      // earlier load into MI.
      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
          !FoldAsLoadDefCandidates.empty()) {
        const MCInstrDesc &MIDesc = MI->getDesc();
        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
             ++i) {
          const MachineOperand &MOp = MI->getOperand(i);
          if (!MOp.isReg())
            continue;
          unsigned FoldAsLoadDefReg = MOp.getReg();
          if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
            // We need to fold load after optimizeCmpInstr, since
            // optimizeCmpInstr can enable folding by converting SUB to CMP.
            // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
            // we need it for markUsesInDebugValueAsUndef().
            unsigned FoldedReg = FoldAsLoadDefReg;
            MachineInstr *DefMI = nullptr;
            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
                                                          FoldAsLoadDefReg,
                                                          DefMI);
            if (FoldMI) {
              // Update LocalMIs since we replaced MI with FoldMI and deleted
              // DefMI.
              DEBUG(dbgs() << "Replacing: " << *MI);
              DEBUG(dbgs() << "     With: " << *FoldMI);
              LocalMIs.erase(MI);
              LocalMIs.erase(DefMI);
              LocalMIs.insert(FoldMI);
              MI->eraseFromParent();
              DefMI->eraseFromParent();
              MRI->markUsesInDebugValueAsUndef(FoldedReg);
              FoldAsLoadDefCandidates.erase(FoldedReg);
              ++NumLoadFold;
              // MI is replaced with FoldMI.
              Changed = true;
              break;
            }
          }
        }
      }
    }
  }

  return Changed;
}
bool ARMInstructionSelector::select(MachineInstr &I) const {
  assert(I.getParent() && "Instruction should be in a basic block!");
  assert(I.getParent()->getParent() && "Instruction should be in a function!");

  auto &MBB = *I.getParent();
  auto &MF = *MBB.getParent();
  auto &MRI = MF.getRegInfo();

  if (!isPreISelGenericOpcode(I.getOpcode())) {
    if (I.isCopy())
      return selectCopy(I, TII, MRI, TRI, RBI);

    return true;
  }

  if (selectImpl(I))
    return true;

  MachineInstrBuilder MIB{MF, I};
  bool isSExt = false;

  using namespace TargetOpcode;
  switch (I.getOpcode()) {
  case G_SEXT:
    isSExt = true;
    LLVM_FALLTHROUGH;
  case G_ZEXT: {
    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
    // FIXME: Smaller destination sizes coming soon!
    if (DstTy.getSizeInBits() != 32) {
      DEBUG(dbgs() << "Unsupported destination size for extension");
      return false;
    }

    LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
    unsigned SrcSize = SrcTy.getSizeInBits();
    switch (SrcSize) {
    case 1: {
      // ZExt boils down to & 0x1; for SExt we also subtract that from 0
      I.setDesc(TII.get(ARM::ANDri));
      MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());

      if (isSExt) {
        unsigned SExtResult = I.getOperand(0).getReg();

        // Use a new virtual register for the result of the AND
        unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
        I.getOperand(0).setReg(AndResult);

        auto InsertBefore = std::next(I.getIterator());
        auto SubI =
            BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
                .addDef(SExtResult)
                .addUse(AndResult)
                .addImm(0)
                .add(predOps(ARMCC::AL))
                .add(condCodeOp());
        if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
          return false;
      }
      break;
    }
    case 8:
    case 16: {
      unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize);
      if (NewOpc == I.getOpcode())
        return false;
      I.setDesc(TII.get(NewOpc));
      MIB.addImm(0).add(predOps(ARMCC::AL));
      break;
    }
    default:
      DEBUG(dbgs() << "Unsupported source size for extension");
      return false;
    }
    break;
  }
  case G_ANYEXT:
  case G_TRUNC: {
    // The high bits are undefined, so there's nothing special to do, just
    // treat it as a copy.
    auto SrcReg = I.getOperand(1).getReg();
    auto DstReg = I.getOperand(0).getReg();

    const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
    const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

    if (SrcRegBank.getID() != DstRegBank.getID()) {
      DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n");
      return false;
    }

    if (SrcRegBank.getID() != ARM::GPRRegBankID) {
      DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n");
      return false;
    }

    I.setDesc(TII.get(COPY));
    return selectCopy(I, TII, MRI, TRI, RBI);
  }
  case G_SELECT:
    return selectSelect(MIB, MRI);
  case G_ICMP: {
    CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
                        ARM::GPRRegBankID, 32);
    return selectCmp(Helper, MIB, MRI);
  }
  case G_FCMP: {
    assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP");

    unsigned OpReg = I.getOperand(2).getReg();
    unsigned Size = MRI.getType(OpReg).getSizeInBits();

    if (Size == 64 && TII.getSubtarget().isFPOnlySP()) {
      DEBUG(dbgs() << "Subtarget only supports single precision");
      return false;
    }
    if (Size != 32 && Size != 64) {
      DEBUG(dbgs() << "Unsupported size for G_FCMP operand");
      return false;
    }

    CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
                        ARM::FPRRegBankID, Size);
    return selectCmp(Helper, MIB, MRI);
  }
  case G_GEP:
    I.setDesc(TII.get(ARM::ADDrr));
    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  case G_FRAME_INDEX:
    // Add 0 to the given frame index and hope it will eventually be folded into
    // the user(s).
    I.setDesc(TII.get(ARM::ADDri));
    MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
    break;
  case G_CONSTANT: {
    unsigned Reg = I.getOperand(0).getReg();

    if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID))
      return false;

    I.setDesc(TII.get(ARM::MOVi));
    MIB.add(predOps(ARMCC::AL)).add(condCodeOp());

    auto &Val = I.getOperand(1);
    if (Val.isCImm()) {
      if (Val.getCImm()->getBitWidth() > 32)
        return false;
      Val.ChangeToImmediate(Val.getCImm()->getZExtValue());
    }

    if (!Val.isImm()) {
      return false;
    }

    break;
  }
  case G_GLOBAL_VALUE:
    return selectGlobal(MIB, MRI);
  case G_STORE:
  case G_LOAD: {
    const auto &MemOp = **I.memoperands_begin();
    if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
      DEBUG(dbgs() << "Atomic load/store not supported yet\n");
      return false;
    }

    unsigned Reg = I.getOperand(0).getReg();
    unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();

    LLT ValTy = MRI.getType(Reg);
    const auto ValSize = ValTy.getSizeInBits();

    assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) &&
           "Don't know how to load/store 64-bit value without VFP");

    const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
    if (NewOpc == G_LOAD || NewOpc == G_STORE)
      return false;

    I.setDesc(TII.get(NewOpc));

    if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
      // LDRH has a funny addressing mode (there's already a FIXME for it).
      MIB.addReg(0);
    MIB.addImm(0).add(predOps(ARMCC::AL));
    break;
  }
  case G_MERGE_VALUES: {
    if (!selectMergeValues(MIB, TII, MRI, TRI, RBI))
      return false;
    break;
  }
  case G_UNMERGE_VALUES: {
    if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI))
      return false;
    break;
  }
  case G_BRCOND: {
    if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) {
      DEBUG(dbgs() << "Unsupported condition register for G_BRCOND");
      return false;
    }

    // Set the flags.
    auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
                    .addReg(I.getOperand(0).getReg())
                    .addImm(1)
                    .add(predOps(ARMCC::AL));
    if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
      return false;

    // Branch conditionally.
    auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
                      .add(I.getOperand(1))
                      .add(predOps(ARMCC::EQ, ARM::CPSR));
    if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
      return false;
    I.eraseFromParent();
    return true;
  }
  default:
    return false;
  }

  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
  bool AnyChanges = false;
  MRI = &MF.getRegInfo();
  TRI = MF.getTarget().getRegisterInfo();
  TII = MF.getTarget().getInstrInfo();

  // Treat reserved registers as always live.
  BitVector ReservedRegs = TRI->getReservedRegs(MF);

  // Loop over all instructions in all blocks, from bottom to top, so that it's
  // more likely that chains of dependent but ultimately dead instructions will
  // be cleaned up.
  for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
       I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    // Start out assuming that reserved registers are live out of this block.
    LivePhysRegs = ReservedRegs;

    // Also add any explicit live-out physregs for this block.
    if (!MBB->empty() && MBB->back().getDesc().isReturn())
      for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
           LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
        unsigned Reg = *LOI;
        if (TargetRegisterInfo::isPhysicalRegister(Reg))
          LivePhysRegs.set(Reg);
      }

    // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
    // are not live across blocks, but some targets (x86) can have flags live
    // out of a block.

    // Now scan the instructions and delete dead ones, tracking physreg
    // liveness as we go.
    for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
         MIE = MBB->rend(); MII != MIE; ) {
      MachineInstr *MI = &*MII;

      // If the instruction is dead, delete it!
      if (isDead(MI)) {
        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
        // It is possible that some DBG_VALUE instructions refer to this
        // instruction.  Examine each def operand for such references;
        // if found, mark the DBG_VALUE as undef (but don't delete it).
        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
          const MachineOperand &MO = MI->getOperand(i);
          if (!MO.isReg() || !MO.isDef())
            continue;
          unsigned Reg = MO.getReg();
          if (!TargetRegisterInfo::isVirtualRegister(Reg))
            continue;
          MachineRegisterInfo::use_iterator nextI;
          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
               E = MRI->use_end(); I!=E; I=nextI) {
            nextI = llvm::next(I);  // I is invalidated by the setReg
            MachineOperand& Use = I.getOperand();
            MachineInstr *UseMI = Use.getParent();
            if (UseMI==MI)
              continue;
            assert(Use.isDebug());
            UseMI->getOperand(0).setReg(0U);
          }
        }
        AnyChanges = true;
        MI->eraseFromParent();
        ++NumDeletes;
        MIE = MBB->rend();
        // MII is now pointing to the next instruction to process,
        // so don't increment it.
        continue;
      }

      // Record the physreg defs.
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = MI->getOperand(i);
        if (MO.isReg() && MO.isDef()) {
          unsigned Reg = MO.getReg();
          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
            LivePhysRegs.reset(Reg);
            // Check the subreg set, not the alias set, because a def
            // of a super-register may still be partially live after
            // this def.
            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
                 *SubRegs; ++SubRegs)
              LivePhysRegs.reset(*SubRegs);
          }
        }
      }
      // Record the physreg uses, after the defs, in case a physreg is
      // both defined and used in the same instruction.
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = MI->getOperand(i);
        if (MO.isReg() && MO.isUse()) {
          unsigned Reg = MO.getReg();
          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
            LivePhysRegs.set(Reg);
            for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
                 *AliasSet; ++AliasSet)
              LivePhysRegs.set(*AliasSet);
          }
        }
      }

      // We didn't delete the current instruction, so increment MII to
      // the next one.
      ++MII;
    }
  }

  LivePhysRegs.clear();
  return AnyChanges;
}
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
/// too far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
  MachineBasicBlock *DestBB = getDestBlock(MI);

  // Add an unconditional branch to the destination and invert the branch
  // condition to jump over it:
  // tbz L1
  // =>
  // tbnz L2
  // b   L1
  // L2:

  // If the branch is at the end of its MBB and that has a fall-through block,
  // direct the updated conditional branch to the fall-through block. Otherwise,
  // split the MBB before the next instruction.
  MachineBasicBlock *MBB = MI.getParent();
  MachineInstr *BMI = &MBB->back();
  bool NeedSplit = (BMI != &MI) || !hasFallthrough(*MBB);

  if (BMI != &MI) {
    if (std::next(MachineBasicBlock::iterator(MI)) ==
            std::prev(MBB->getLastNonDebugInstr()) &&
        BMI->isUnconditionalBranch()) {
      // Last MI in the BB is an unconditional branch. We can simply invert the
      // condition and swap destinations:
      // beq L1
      // b   L2
      // =>
      // bne L2
      // b   L1
      MachineBasicBlock *NewDest = getDestBlock(*BMI);
      if (isBlockInRange(MI, *NewDest)) {
        DEBUG(dbgs() << "  Invert condition and swap its destination with "
                     << *BMI);
        changeBranchDestBlock(*BMI, *DestBB);

        int NewSize =
          insertInvertedConditionalBranch(*MBB, MI.getIterator(),
                                          MI.getDebugLoc(), MI, *NewDest);
        int OldSize = TII->getInstSizeInBytes(MI);
        BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
        MI.eraseFromParent();
        return true;
      }
    }
  }

  if (NeedSplit) {
    // Analyze the branch so we know how to update the successor lists.
    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    SmallVector<MachineOperand, 2> Cond;
    bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond, false);
    assert(!Fail && "branches to relax should be analyzable");
    (void)Fail;

    MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
    // No need for the branch to the next block. We're adding an unconditional
    // branch to the destination.
    int delta = TII->getInstSizeInBytes(MBB->back());
    BlockInfo[MBB->getNumber()].Size -= delta;
    MBB->back().eraseFromParent();
    // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below

    // Update the successor lists according to the transformation to follow.
    // Do it here since if there's no split, no update is needed.
    MBB->replaceSuccessor(FBB, NewBB);
    NewBB->addSuccessor(FBB);
  }

  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));

  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
               << ", invert condition and change dest. to BB#"
               << NextBB.getNumber() << '\n');

  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;

  // Insert a new conditional branch and a new unconditional branch.
  MBBSize += insertInvertedConditionalBranch(*MBB, MBB->end(),
                                             MI.getDebugLoc(), MI, NextBB);

  MBBSize += insertUnconditionalBranch(*MBB, *DestBB, MI.getDebugLoc());

  // Remove the old conditional branch.  It may or may not still be in MBB.
  MBBSize -= TII->getInstSizeInBytes(MI);
  MI.eraseFromParent();

  // Finally, keep the block offsets up to date.
  adjustBlockOffsets(*MBB);
  return true;
}
Example #5
0
/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
bool
TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
        bool IsSimple,
        MachineFunction &MF) {
    // Save the successors list.
    SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
            MBB->succ_end());

    SmallVector<MachineBasicBlock*, 8> TDBBs;
    SmallVector<MachineInstr*, 16> Copies;
    if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
        return false;

    ++NumTails;

    SmallVector<MachineInstr*, 8> NewPHIs;
    MachineSSAUpdater SSAUpdate(MF, &NewPHIs);

    // TailBB's immediate successors are now successors of those predecessors
    // which duplicated TailBB. Add the predecessors as sources to the PHI
    // instructions.
    bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
    if (PreRegAlloc)
        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);

    // If it is dead, remove it.
    if (isDead) {
        NumInstrDups -= MBB->size();
        RemoveDeadBlock(MBB);
        ++NumDeadBlocks;
    }

    // Update SSA form.
    if (!SSAUpdateVRs.empty()) {
        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
            unsigned VReg = SSAUpdateVRs[i];
            SSAUpdate.Initialize(VReg);

            // If the original definition is still around, add it as an available
            // value.
            MachineInstr *DefMI = MRI->getVRegDef(VReg);
            MachineBasicBlock *DefBB = 0;
            if (DefMI) {
                DefBB = DefMI->getParent();
                SSAUpdate.AddAvailableValue(DefBB, VReg);
            }

            // Add the new vregs as available values.
            DenseMap<unsigned, AvailableValsTy>::iterator LI =
                SSAUpdateVals.find(VReg);
            for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
                MachineBasicBlock *SrcBB = LI->second[j].first;
                unsigned SrcReg = LI->second[j].second;
                SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
            }

            // Rewrite uses that are outside of the original def's block.
            MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
            while (UI != MRI->use_end()) {
                MachineOperand &UseMO = UI.getOperand();
                MachineInstr *UseMI = &*UI;
                ++UI;
                if (UseMI->isDebugValue()) {
                    // SSAUpdate can replace the use with an undef. That creates
                    // a debug instruction that is a kill.
                    // FIXME: Should it SSAUpdate job to delete debug instructions
                    // instead of replacing the use with undef?
                    UseMI->eraseFromParent();
                    continue;
                }
                if (UseMI->getParent() == DefBB && !UseMI->isPHI())
                    continue;
                SSAUpdate.RewriteUse(UseMO);
            }
        }

        SSAUpdateVRs.clear();
        SSAUpdateVals.clear();
    }

    // Eliminate some of the copies inserted by tail duplication to maintain
    // SSA form.
    for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
        MachineInstr *Copy = Copies[i];
        if (!Copy->isCopy())
            continue;
        unsigned Dst = Copy->getOperand(0).getReg();
        unsigned Src = Copy->getOperand(1).getReg();
        if (MRI->hasOneNonDBGUse(Src) &&
                MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
            // Copy is the only use. Do trivial copy propagation here.
            MRI->replaceRegWith(Dst, Src);
            Copy->eraseFromParent();
        }
    }

    if (NewPHIs.size())
        NumAddedPHIs += NewPHIs.size();

    return true;
}
bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
  MRI = &MF.getRegInfo();
  TII = MF.getTarget().getInstrInfo();
  DT = &getAnalysis<MachineDominatorTree>();
  LI = &getAnalysis<LiveIntervals>();

  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
         BBI != BBE && BBI->isPHI(); ++BBI) {
      unsigned DestReg = BBI->getOperand(0).getReg();
      addReg(DestReg);
      PHISrcDefs[I].push_back(BBI);

      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
        MachineOperand &SrcMO = BBI->getOperand(i);
        unsigned SrcReg = SrcMO.getReg();
        addReg(SrcReg);
        unionRegs(DestReg, SrcReg);

        MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
        if (DefMI)
          PHISrcDefs[DefMI->getParent()].push_back(DefMI);
      }
    }
  }

  // Perform a depth-first traversal of the dominator tree, splitting
  // interferences amongst PHI-congruence classes.
  DenseMap<unsigned, unsigned> CurrentDominatingParent;
  DenseMap<unsigned, unsigned> ImmediateDominatingParent;
  for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
    SplitInterferencesForBasicBlock(*DI->getBlock(),
                                    CurrentDominatingParent,
                                    ImmediateDominatingParent);
  }

  // Insert copies for all PHI source and destination registers.
  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
         BBI != BBE && BBI->isPHI(); ++BBI) {
      InsertCopiesForPHI(BBI, I);
    }
  }

  // FIXME: Preserve the equivalence classes during copy insertion and use
  // the preversed equivalence classes instead of recomputing them.
  RegNodeMap.clear();
  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
         BBI != BBE && BBI->isPHI(); ++BBI) {
      unsigned DestReg = BBI->getOperand(0).getReg();
      addReg(DestReg);

      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
        unsigned SrcReg = BBI->getOperand(i).getReg();
        addReg(SrcReg);
        unionRegs(DestReg, SrcReg);
      }
    }
  }

  DenseMap<unsigned, unsigned> RegRenamingMap;
  bool Changed = false;
  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
       I != E; ++I) {
    MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
    while (BBI != BBE && BBI->isPHI()) {
      MachineInstr *PHI = BBI;

      assert(PHI->getNumOperands() > 0);

      unsigned SrcReg = PHI->getOperand(1).getReg();
      unsigned SrcColor = getRegColor(SrcReg);
      unsigned NewReg = RegRenamingMap[SrcColor];
      if (!NewReg) {
        NewReg = SrcReg;
        RegRenamingMap[SrcColor] = SrcReg;
      }
      MergeLIsAndRename(SrcReg, NewReg);

      unsigned DestReg = PHI->getOperand(0).getReg();
      if (!InsertedDestCopies.count(DestReg))
        MergeLIsAndRename(DestReg, NewReg);

      for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
        unsigned SrcReg = PHI->getOperand(i).getReg();
        MergeLIsAndRename(SrcReg, NewReg);
      }

      ++BBI;
      LI->RemoveMachineInstrFromMaps(PHI);
      PHI->eraseFromParent();
      Changed = true;
    }
  }

  // Due to the insertion of copies to split live ranges, the live intervals are
  // guaranteed to not overlap, except in one case: an original PHI source and a
  // PHI destination copy. In this case, they have the same value and thus don't
  // truly intersect, so we merge them into the value live at that point.
  // FIXME: Is there some better way we can handle this?
  for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
       E = InsertedDestCopies.end(); I != E; ++I) {
    unsigned DestReg = I->first;
    unsigned DestColor = getRegColor(DestReg);
    unsigned NewReg = RegRenamingMap[DestColor];

    LiveInterval &DestLI = LI->getInterval(DestReg);
    LiveInterval &NewLI = LI->getInterval(NewReg);

    assert(DestLI.ranges.size() == 1
           && "PHI destination copy's live interval should be a single live "
               "range from the beginning of the BB to the copy instruction.");
    LiveRange *DestLR = DestLI.begin();
    VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
    if (!NewVNI) {
      NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
      MachineInstr *CopyInstr = I->second;
      CopyInstr->getOperand(1).setIsKill(true);
    }

    LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
    NewLI.addRange(NewLR);

    LI->removeInterval(DestReg);
    MRI->replaceRegWith(DestReg, NewReg);
  }

  // Adjust the live intervals of all PHI source registers to handle the case
  // where the PHIs in successor blocks were the only later uses of the source
  // register.
  for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
       E = InsertedSrcCopySet.end(); I != E; ++I) {
    MachineBasicBlock *MBB = I->first;
    unsigned SrcReg = I->second;
    if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
      SrcReg = RenamedRegister;

    LiveInterval &SrcLI = LI->getInterval(SrcReg);

    bool isLiveOut = false;
    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
         SE = MBB->succ_end(); SI != SE; ++SI) {
      if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
        isLiveOut = true;
        break;
      }
    }

    if (isLiveOut)
      continue;

    MachineOperand *LastUse = findLastUse(MBB, SrcReg);
    assert(LastUse);
    SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
    LastUse->setIsKill(true);
  }

  LI->renumber();

  Allocator.Reset();
  RegNodeMap.clear();
  PHISrcDefs.clear();
  InsertedSrcCopySet.clear();
  InsertedSrcCopyMap.clear();
  InsertedDestCopies.clear();

  return Changed;
}
Example #7
0
void SILowerControlFlow::emitIf(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  const DebugLoc &DL = MI.getDebugLoc();
  MachineBasicBlock::iterator I(&MI);

  MachineOperand &SaveExec = MI.getOperand(0);
  MachineOperand &Cond = MI.getOperand(1);
  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
         Cond.getSubReg() == AMDGPU::NoSubRegister);

  unsigned SaveExecReg = SaveExec.getReg();

  MachineOperand &ImpDefSCC = MI.getOperand(4);
  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());

  // Add an implicit def of exec to discourage scheduling VALU after this which
  // will interfere with trying to form s_and_saveexec_b64 later.
  MachineInstr *CopyExec =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SaveExecReg)
    .addReg(AMDGPU::EXEC)
    .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);

  unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

  MachineInstr *And =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
    .addReg(SaveExecReg)
    //.addReg(AMDGPU::EXEC)
    .addReg(Cond.getReg());
  setImpSCCDefDead(*And, true);

  MachineInstr *Xor =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
    .addReg(Tmp)
    .addReg(SaveExecReg);
  setImpSCCDefDead(*Xor, ImpDefSCC.isDead());

  // Use a copy that is a terminator to get correct spill code placement it with
  // fast regalloc.
  MachineInstr *SetExec =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
    .addReg(Tmp, RegState::Kill);

  // Insert a pseudo terminator to help keep the verifier happy. This will also
  // be used later when inserting skips.
  MachineInstr *NewBr =
    BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
    .addOperand(MI.getOperand(2));

  if (!LIS) {
    MI.eraseFromParent();
    return;
  }

  LIS->InsertMachineInstrInMaps(*CopyExec);

  // Replace with and so we don't need to fix the live interval for condition
  // register.
  LIS->ReplaceMachineInstrInMaps(MI, *And);

  LIS->InsertMachineInstrInMaps(*Xor);
  LIS->InsertMachineInstrInMaps(*SetExec);
  LIS->InsertMachineInstrInMaps(*NewBr);

  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
  MI.eraseFromParent();

  // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
  // hard to add another def here but I'm not sure how to correctly update the
  // valno.
  LIS->removeInterval(SaveExecReg);
  LIS->createAndComputeVirtRegInterval(SaveExecReg);
  LIS->createAndComputeVirtRegInterval(Tmp);
}
void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
  LIS->RemoveMachineInstrFromMaps(MI);
  MI.eraseFromParent();
}
Example #9
0
bool LanaiInstrInfo::optimizeCompareInstr(
    MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int /*CmpMask*/,
    int CmpValue, const MachineRegisterInfo *MRI) const {
  // Get the unique definition of SrcReg.
  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
  if (!MI)
    return false;

  // Get ready to iterate backward from CmpInstr.
  MachineBasicBlock::iterator I = CmpInstr, E = MI,
                              B = CmpInstr.getParent()->begin();

  // Early exit if CmpInstr is at the beginning of the BB.
  if (I == B)
    return false;

  // There are two possible candidates which can be changed to set SR:
  // One is MI, the other is a SUB instruction.
  // * For SFSUB_F_RR(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
  // * For SFSUB_F_RI(r1, CmpValue), we are looking for SUB(r1, CmpValue).
  MachineInstr *Sub = nullptr;
  if (SrcReg2 != 0)
    // MI is not a candidate to transform into a flag setting instruction.
    MI = nullptr;
  else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
    // Conservatively refuse to convert an instruction which isn't in the same
    // BB as the comparison. Don't return if SFSUB_F_RI and CmpValue != 0 as Sub
    // may still be a candidate.
    if (CmpInstr.getOpcode() == Lanai::SFSUB_F_RI_LO)
      MI = nullptr;
    else
      return false;
  }

  // Check that SR isn't set between the comparison instruction and the
  // instruction we want to change while searching for Sub.
  const TargetRegisterInfo *TRI = &getRegisterInfo();
  for (--I; I != E; --I) {
    const MachineInstr &Instr = *I;

    if (Instr.modifiesRegister(Lanai::SR, TRI) ||
        Instr.readsRegister(Lanai::SR, TRI))
      // This instruction modifies or uses SR after the one we want to change.
      // We can't do this transformation.
      return false;

    // Check whether CmpInstr can be made redundant by the current instruction.
    if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
      Sub = &*I;
      break;
    }

    // Don't search outside the containing basic block.
    if (I == B)
      return false;
  }

  // Return false if no candidates exist.
  if (!MI && !Sub)
    return false;

  // The single candidate is called MI.
  if (!MI)
    MI = Sub;

  if (flagSettingOpcodeVariant(MI->getOpcode()) != Lanai::NOP) {
    bool isSafe = false;

    SmallVector<std::pair<MachineOperand *, LPCC::CondCode>, 4>
        OperandsToUpdate;
    I = CmpInstr;
    E = CmpInstr.getParent()->end();
    while (!isSafe && ++I != E) {
      const MachineInstr &Instr = *I;
      for (unsigned IO = 0, EO = Instr.getNumOperands(); !isSafe && IO != EO;
           ++IO) {
        const MachineOperand &MO = Instr.getOperand(IO);
        if (MO.isRegMask() && MO.clobbersPhysReg(Lanai::SR)) {
          isSafe = true;
          break;
        }
        if (!MO.isReg() || MO.getReg() != Lanai::SR)
          continue;
        if (MO.isDef()) {
          isSafe = true;
          break;
        }
        // Condition code is after the operand before SR.
        LPCC::CondCode CC;
        CC = (LPCC::CondCode)Instr.getOperand(IO - 1).getImm();

        if (Sub) {
          LPCC::CondCode NewCC = getOppositeCondition(CC);
          if (NewCC == LPCC::ICC_T)
            return false;
          // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on
          // CMP needs to be updated to be based on SUB.  Push the condition
          // code operands to OperandsToUpdate.  If it is safe to remove
          // CmpInstr, the condition code of these operands will be modified.
          if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
              Sub->getOperand(2).getReg() == SrcReg) {
            OperandsToUpdate.push_back(
                std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
          }
        } else {
          // No Sub, so this is x = <op> y, z; cmp x, 0.
          switch (CC) {
          case LPCC::ICC_EQ: // Z
          case LPCC::ICC_NE: // Z
          case LPCC::ICC_MI: // N
          case LPCC::ICC_PL: // N
          case LPCC::ICC_F:  // none
          case LPCC::ICC_T:  // none
            // SR can be used multiple times, we should continue.
            break;
          case LPCC::ICC_CS: // C
          case LPCC::ICC_CC: // C
          case LPCC::ICC_VS: // V
          case LPCC::ICC_VC: // V
          case LPCC::ICC_HI: // C Z
          case LPCC::ICC_LS: // C Z
          case LPCC::ICC_GE: // N V
          case LPCC::ICC_LT: // N V
          case LPCC::ICC_GT: // Z N V
          case LPCC::ICC_LE: // Z N V
            // The instruction uses the V bit or C bit which is not safe.
            return false;
          case LPCC::UNKNOWN:
            return false;
          }
        }
      }
    }

    // If SR is not killed nor re-defined, we should check whether it is
    // live-out. If it is live-out, do not optimize.
    if (!isSafe) {
      MachineBasicBlock *MBB = CmpInstr.getParent();
      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
                                            SE = MBB->succ_end();
           SI != SE; ++SI)
        if ((*SI)->isLiveIn(Lanai::SR))
          return false;
    }

    // Toggle the optional operand to SR.
    MI->setDesc(get(flagSettingOpcodeVariant(MI->getOpcode())));
    MI->addRegisterDefined(Lanai::SR);
    CmpInstr.eraseFromParent();
    return true;
  }

  return false;
}
Example #10
0
/// foldMemoryOperand - Try folding stack slot references in Ops into their
/// instructions.
///
/// @param Ops    Operand indices from analyzeVirtReg().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
/// @return       True on success.
bool InlineSpiller::
foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
                  MachineInstr *LoadMI) {
  if (Ops.empty())
    return false;
  // Don't attempt folding in bundles.
  MachineInstr *MI = Ops.front().first;
  if (Ops.back().first != MI || MI->isBundled())
    return false;

  bool WasCopy = MI->isCopy();
  unsigned ImpReg = 0;

  bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
                       MI->getOpcode() == TargetOpcode::PATCHPOINT ||
                       MI->getOpcode() == TargetOpcode::STACKMAP);

  // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
  // operands.
  SmallVector<unsigned, 8> FoldOps;
  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
    unsigned Idx = Ops[i].second;
    assert(MI == Ops[i].first && "Instruction conflict during operand folding");
    MachineOperand &MO = MI->getOperand(Idx);
    if (MO.isImplicit()) {
      ImpReg = MO.getReg();
      continue;
    }
    // FIXME: Teach targets to deal with subregs.
    if (!SpillSubRegs && MO.getSubReg())
      return false;
    // We cannot fold a load instruction into a def.
    if (LoadMI && MO.isDef())
      return false;
    // Tied use operands should not be passed to foldMemoryOperand.
    if (!MI->isRegTiedToDefOperand(Idx))
      FoldOps.push_back(Idx);
  }

  MachineInstrSpan MIS(MI);

  MachineInstr *FoldMI =
                LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
                       : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
  if (!FoldMI)
    return false;

  // Remove LIS for any dead defs in the original MI not in FoldMI.
  for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
    if (!MO->isReg())
      continue;
    unsigned Reg = MO->getReg();
    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
        MRI.isReserved(Reg)) {
      continue;
    }
    // Skip non-Defs, including undef uses and internal reads.
    if (MO->isUse())
      continue;
    MIBundleOperands::PhysRegInfo RI =
      MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
    if (RI.Defines)
      continue;
    // FoldMI does not define this physreg. Remove the LI segment.
    assert(MO->isDead() && "Cannot fold physreg def");
    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
    LIS.removePhysRegDefAt(Reg, Idx);
  }

  LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
  MI->eraseFromParent();

  // Insert any new instructions other than FoldMI into the LIS maps.
  assert(!MIS.empty() && "Unexpected empty span of instructions!");
  for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end();
       MII != End; ++MII)
    if (&*MII != FoldMI)
      LIS.InsertMachineInstrInMaps(&*MII);

  // TII.foldMemoryOperand may have left some implicit operands on the
  // instruction.  Strip them.
  if (ImpReg)
    for (unsigned i = FoldMI->getNumOperands(); i; --i) {
      MachineOperand &MO = FoldMI->getOperand(i - 1);
      if (!MO.isReg() || !MO.isImplicit())
        break;
      if (MO.getReg() == ImpReg)
        FoldMI->RemoveOperand(i - 1);
    }

  DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
                                           "folded"));

  if (!WasCopy)
    ++NumFolded;
  else if (Ops.front().second == 0)
    ++NumSpills;
  else
    ++NumReloads;
  return true;
}
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {

  DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
               << "********** Function: "
               << MF.getName() << "\n");

#if 0
  // for now disable this, if we move NewValueJump before register
  // allocation we need this information.
  LiveVariables &LVs = getAnalysis<LiveVariables>();
#endif

  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
  QRI = static_cast<const HexagonRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();

  if (!QRI->Subtarget.hasV4TOps() ||
      DisableNewValueJumps) {
    return false;
  }

  int nvjCount = DbgNVJCount;
  int nvjGenerated = 0;

  // Loop through all the bb's of the function
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
    MachineBasicBlock* MBB = MBBb;

    DEBUG(dbgs() << "** dumping bb ** "
                 << MBB->getNumber() << "\n");
    DEBUG(MBB->dump());
    DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
    bool foundJump    = false;
    bool foundCompare = false;
    bool invertPredicate = false;
    unsigned predReg = 0; // predicate reg of the jump.
    unsigned cmpReg1 = 0;
    int cmpOp2 = 0;
    bool MO1IsKill = false;
    bool MO2IsKill = false;
    MachineBasicBlock::iterator jmpPos;
    MachineBasicBlock::iterator cmpPos;
    MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
    MachineBasicBlock *jmpTarget = nullptr;
    bool afterRA = false;
    bool isSecondOpReg = false;
    bool isSecondOpNewified = false;
    // Traverse the basic block - bottom up
    for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
             MII != E;) {
      MachineInstr *MI = --MII;
      if (MI->isDebugValue()) {
        continue;
      }

      if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
        break;

      DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");

      if (!foundJump &&
         (MI->getOpcode() == Hexagon::JMP_t ||
          MI->getOpcode() == Hexagon::JMP_f ||
          MI->getOpcode() == Hexagon::JMP_tnew_t ||
          MI->getOpcode() == Hexagon::JMP_tnew_nt ||
          MI->getOpcode() == Hexagon::JMP_fnew_t ||
          MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
        // This is where you would insert your compare and
        // instr that feeds compare
        jmpPos = MII;
        jmpInstr = MI;
        predReg = MI->getOperand(0).getReg();
        afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);

        // If ifconverter had not messed up with the kill flags of the
        // operands, the following check on the kill flag would suffice.
        // if(!jmpInstr->getOperand(0).isKill()) break;

        // This predicate register is live out out of BB
        // this would only work if we can actually use Live
        // variable analysis on phy regs - but LLVM does not
        // provide LV analysis on phys regs.
        //if(LVs.isLiveOut(predReg, *MBB)) break;

        // Get all the successors of this block - which will always
        // be 2. Check if the predicate register is live in in those
        // successor. If yes, we can not delete the predicate -
        // I am doing this only because LLVM does not provide LiveOut
        // at the BB level.
        bool predLive = false;
        for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
                            SIE = MBB->succ_end(); SI != SIE; ++SI) {
          MachineBasicBlock* succMBB = *SI;
         if (succMBB->isLiveIn(predReg)) {
            predLive = true;
          }
        }
        if (predLive)
          break;

        jmpTarget = MI->getOperand(1).getMBB();
        foundJump = true;
        if (MI->getOpcode() == Hexagon::JMP_f ||
            MI->getOpcode() == Hexagon::JMP_fnew_t ||
            MI->getOpcode() == Hexagon::JMP_fnew_nt) {
          invertPredicate = true;
        }
        continue;
      }

      // No new value jump if there is a barrier. A barrier has to be in its
      // own packet. A barrier has zero operands. We conservatively bail out
      // here if we see any instruction with zero operands.
      if (foundJump && MI->getNumOperands() == 0)
        break;

      if (foundJump &&
         !foundCompare &&
          MI->getOperand(0).isReg() &&
          MI->getOperand(0).getReg() == predReg) {

        // Not all compares can be new value compare. Arch Spec: 7.6.1.1
        if (QII->isNewValueJumpCandidate(MI)) {

          assert((MI->getDesc().isCompare()) &&
              "Only compare instruction can be collapsed into New Value Jump");
          isSecondOpReg = MI->getOperand(2).isReg();

          if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
                                        afterRA, jmpPos, MF))
            break;

          cmpInstr = MI;
          cmpPos = MII;
          foundCompare = true;

          // We need cmpReg1 and cmpOp2(imm or reg) while building
          // new value jump instruction.
          cmpReg1 = MI->getOperand(1).getReg();
          if (MI->getOperand(1).isKill())
            MO1IsKill = true;

          if (isSecondOpReg) {
            cmpOp2 = MI->getOperand(2).getReg();
            if (MI->getOperand(2).isKill())
              MO2IsKill = true;
          } else
            cmpOp2 = MI->getOperand(2).getImm();
          continue;
        }
      }

      if (foundCompare && foundJump) {

        // If "common" checks fail, bail out on this BB.
        if (!commonChecksToProhibitNewValueJump(afterRA, MII))
          break;

        bool foundFeeder = false;
        MachineBasicBlock::iterator feederPos = MII;
        if (MI->getOperand(0).isReg() &&
            MI->getOperand(0).isDef() &&
           (MI->getOperand(0).getReg() == cmpReg1 ||
            (isSecondOpReg &&
             MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {

          unsigned feederReg = MI->getOperand(0).getReg();

          // First try to see if we can get the feeder from the first operand
          // of the compare. If we can not, and if secondOpReg is true
          // (second operand of the compare is also register), try that one.
          // TODO: Try to come up with some heuristic to figure out which
          // feeder would benefit.

          if (feederReg == cmpReg1) {
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
              if (!isSecondOpReg)
                break;
              else
                continue;
            } else
              foundFeeder = true;
          }

          if (!foundFeeder &&
               isSecondOpReg &&
               feederReg == (unsigned) cmpOp2)
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
              break;

          if (isSecondOpReg) {
            // In case of CMPLT, or CMPLTU, or EQ with the second register
            // to newify, swap the operands.
            if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
                                     feederReg == (unsigned) cmpOp2) {
              unsigned tmp = cmpReg1;
              bool tmpIsKill = MO1IsKill;
              cmpReg1 = cmpOp2;
              MO1IsKill = MO2IsKill;
              cmpOp2 = tmp;
              MO2IsKill = tmpIsKill;
            }

            // Now we have swapped the operands, all we need to check is,
            // if the second operand (after swap) is the feeder.
            // And if it is, make a note.
            if (feederReg == (unsigned)cmpOp2)
              isSecondOpNewified = true;
          }

          // Now that we are moving feeder close the jump,
          // make sure we are respecting the kill values of
          // the operands of the feeder.

          bool updatedIsKill = false;
          for (unsigned i = 0; i < MI->getNumOperands(); i++) {
            MachineOperand &MO = MI->getOperand(i);
            if (MO.isReg() && MO.isUse()) {
              unsigned feederReg = MO.getReg();
              for (MachineBasicBlock::iterator localII = feederPos,
                   end = jmpPos; localII != end; localII++) {
                MachineInstr *localMI = localII;
                for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
                  MachineOperand &localMO = localMI->getOperand(j);
                  if (localMO.isReg() && localMO.isUse() &&
                      localMO.isKill() && feederReg == localMO.getReg()) {
                    // We found that there is kill of a use register
                    // Set up a kill flag on the register
                    localMO.setIsKill(false);
                    MO.setIsKill();
                    updatedIsKill = true;
                    break;
                  }
                }
                if (updatedIsKill) break;
              }
            }
            if (updatedIsKill) break;
          }

          MBB->splice(jmpPos, MI->getParent(), MI);
          MBB->splice(jmpPos, MI->getParent(), cmpInstr);
          DebugLoc dl = MI->getDebugLoc();
          MachineInstr *NewMI;

           assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
                      "This compare is not a New Value Jump candidate.");
          unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
                                               isSecondOpNewified,
                                               jmpTarget, MBPI);
          if (invertPredicate)
            opc = QII->getInvertedPredicatedOpcode(opc);

          if (isSecondOpReg)
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                    .addMBB(jmpTarget);

          else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
                    cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
                    cmpOp2 == -1 )
            // Corresponding new-value compare jump instructions don't have the
            // operand for -1 immediate value.
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addMBB(jmpTarget);

          else
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addImm(cmpOp2)
                                    .addMBB(jmpTarget);

          assert(NewMI && "New Value Jump Instruction Not created!");
          (void)NewMI;
          if (cmpInstr->getOperand(0).isReg() &&
              cmpInstr->getOperand(0).isKill())
            cmpInstr->getOperand(0).setIsKill(false);
          if (cmpInstr->getOperand(1).isReg() &&
              cmpInstr->getOperand(1).isKill())
            cmpInstr->getOperand(1).setIsKill(false);
          cmpInstr->eraseFromParent();
          jmpInstr->eraseFromParent();
          ++nvjGenerated;
          ++NumNVJGenerated;
          break;
        }
      }
    }
  }

  return true;

}
Example #12
0
/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
  // Do a quick scan of the interval values to find if any are remattable.
  reMattable_.clear();
  usedValues_.clear();
  for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
       E = li_->vni_end(); I != E; ++I) {
    VNInfo *VNI = *I;
    if (VNI->isUnused() || !VNI->isDefAccurate())
      continue;
    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
    if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
      continue;
    reMattable_.insert(VNI);
  }

  // Often, no defs are remattable.
  if (reMattable_.empty())
    return;

  // Try to remat before all uses of li_->reg.
  bool anyRemat = false;
  for (MachineRegisterInfo::use_nodbg_iterator
       RI = mri_.use_nodbg_begin(li_->reg);
       MachineInstr *MI = RI.skipInstruction();)
     anyRemat |= reMaterializeFor(MI);

  if (!anyRemat)
    return;

  // Remove any values that were completely rematted.
  bool anyRemoved = false;
  for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
       E = reMattable_.end(); I != E; ++I) {
    VNInfo *VNI = *I;
    if (VNI->hasPHIKill() || usedValues_.count(VNI))
      continue;
    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
    DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
    lis_.RemoveMachineInstrFromMaps(DefMI);
    vrm_.RemoveMachineInstrFromMaps(DefMI);
    DefMI->eraseFromParent();
    VNI->setIsDefAccurate(false);
    anyRemoved = true;
  }

  if (!anyRemoved)
    return;

  // Removing values may cause debug uses where li_ is not live.
  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
       MachineInstr *MI = RI.skipInstruction();) {
    if (!MI->isDebugValue())
      continue;
    // Try to preserve the debug value if li_ is live immediately after it.
    MachineBasicBlock::iterator NextMI = MI;
    ++NextMI;
    if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
      VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
      if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
        continue;
    }
    DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
    MI->eraseFromParent();
  }
}
Example #13
0
bool rvexInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                  MachineBasicBlock *&TBB,
                                  MachineBasicBlock *&FBB,
                                  SmallVectorImpl<MachineOperand> &Cond,
                                  bool AllowModify) const
{
    MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();

    // Skip all the debug instructions.
    while (I != REnd && I->isDebugValue())
        ++I;

    if (I == REnd || !isUnpredicatedTerminator(&*I)) {
        // If this block ends with no branches (it just falls through to its succ)
        // just return false, leaving TBB/FBB null.
        TBB = FBB = NULL;
        return false;
    }

    MachineInstr *LastInst = &*I;
    unsigned LastOpc = LastInst->getOpcode();

    // Not an analyzable branch (must be an indirect jump).
    if (!GetAnalyzableBrOpc(LastOpc)) {
        return true;
    }

    // Get the second to last instruction in the block.
    unsigned SecondLastOpc = 0;
    MachineInstr *SecondLastInst = NULL;

    if (++I != REnd) {
        SecondLastInst = &*I;
        SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());

        // Not an analyzable branch (must be an indirect jump).
        if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) {
            return true;
        }
    }

    // If there is only one terminator instruction, process it.
    if (!SecondLastOpc) {
        // Unconditional branch
        if (LastOpc == rvex::JMP) {
            TBB = LastInst->getOperand(0).getMBB();
            // If the basic block is next, remove the GOTO inst
            if(MBB.isLayoutSuccessor(TBB)) {
                LastInst->eraseFromParent();
            }

            return false;
        }

        // Conditional branch
        AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
        return false;
    }

    // If we reached here, there are two branches.
    // If there are three terminators, we don't know what sort of block this is.
    if (++I != REnd && isUnpredicatedTerminator(&*I)) {
        return true;
    }

    // If second to last instruction is an unconditional branch,
    // analyze it and remove the last instruction.
    if (SecondLastOpc == rvex::JMP) {
        // Return if the last instruction cannot be removed.
        if (!AllowModify) {
            return true;
        }

        TBB = SecondLastInst->getOperand(0).getMBB();
        LastInst->eraseFromParent();
        return false;
    }

    // Conditional branch followed by an unconditional branch.
    // The last one must be unconditional.
    if (LastOpc != rvex::JMP) {
        return true;
    }

    AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
    FBB = LastInst->getOperand(0).getMBB();

    return false;
}
DeadMemOpElimination::instr_iterator
DeadMemOpElimination::handleMemOp(instr_iterator I, DefMapTy &Defs,
                                  AliasSetTracker &AST) {
  MachineInstr *MI = I;

  MachineMemOperand *MO = *MI->memoperands_begin();
  // AliasAnalysis cannot handle offset right now, so we pretend to write a
  // a big enough size to the location pointed by the base pointer.
  uint64_t Size = MO->getSize() + MO->getOffset();
  AliasSet *ASet = &AST.getAliasSetForPointer(const_cast<Value*>(MO->getValue()),
                                              Size, 0);

  MachineInstr *&LastMI = Defs[ASet];

  bool canHandleLastStore = LastMI && ASet->isMustAlias()
                            && LastMI->getOpcode() != VTM::VOpInternalCall
                            // FIXME: We may need to remember the last
                            // definition for all predicates.
                            && isPredIdentical(LastMI, MI);

  if (canHandleLastStore) {
    MachineMemOperand *LastMO = *LastMI->memoperands_begin();
    // We can only handle last store if and only if their memory operand have
    // the must-alias address and the same size.
    canHandleLastStore = LastMO->getSize() == MO->getSize()
                         && !LastMO->isVolatile()
                         && MachineMemOperandAlias(MO, LastMO, AA, SE)
                            == AliasAnalysis::MustAlias;
  }

  // FIXME: These elimination is only valid if we are in single-thread mode!
  if (VInstrInfo::mayStore(MI)) {
    if (canHandleLastStore) {
      // Dead store find, remove it.
      LastMI->eraseFromParent();
      ++DeadStoreEliminated;
    }

    // Update the definition.
    LastMI = MI;
    return I;
  }

  // Now MI is a load.
  if (!canHandleLastStore) return I;

  // Loading the value that just be stored, the load is not necessary.
  MachineOperand LoadedMO = MI->getOperand(0);
  MachineOperand StoredMO = LastMI->getOperand(2);

  // Simply replace the load by a copy.
  DebugLoc dl = MI->getDebugLoc();
  I = *BuildMI(*MI->getParent(), I, dl, VInstrInfo::getDesc(VTM::VOpMove))
        .addOperand(LoadedMO).addOperand(StoredMO).
        addOperand(*VInstrInfo::getPredOperand(MI)).
        addOperand(*VInstrInfo::getTraceOperand(MI));

  MI->eraseFromParent();
  ++DeadLoadEliminated;
  return I;
}
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
                                                  const CallContext &Context) {
  // Ok, we can in fact do the transformation for this call.
  // Do not remove the FrameSetup instruction, but adjust the parameters.
  // PEI will end up finalizing the handling of this.
  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
  MachineBasicBlock &MBB = *(FrameSetup->getParent());
  FrameSetup->getOperand(1).setImm(Context.ExpectedDist);

  DebugLoc DL = FrameSetup->getDebugLoc();
  // Now, iterate through the vector in reverse order, and replace the movs
  // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
  // replace uses.
  for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
    MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
    MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
    MachineBasicBlock::iterator Push = nullptr;
    if (MOV->getOpcode() == X86::MOV32mi) {
      unsigned PushOpcode = X86::PUSHi32;
      // If the operand is a small (8-bit) immediate, we can use a
      // PUSH instruction with a shorter encoding.
      // Note that isImm() may fail even though this is a MOVmi, because
      // the operand can also be a symbol.
      if (PushOp.isImm()) {
        int64_t Val = PushOp.getImm();
        if (isInt<8>(Val))
          PushOpcode = X86::PUSH32i8;
      }
      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
          .addOperand(PushOp);
    } else {
      unsigned int Reg = PushOp.getReg();

      // If PUSHrmm is not slow on this target, try to fold the source of the
      // push into the instruction.
      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();

      // Check that this is legal to fold. Right now, we're extremely
      // conservative about that.
      MachineInstr *DefMov = nullptr;
      if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));

        unsigned NumOps = DefMov->getDesc().getNumOperands();
        for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
          Push->addOperand(DefMov->getOperand(i));

        DefMov->eraseFromParent();
      } else {
        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
            .addReg(Reg)
            .getInstr();
      }
    }

    // For debugging, when using SP-based CFA, we need to adjust the CFA
    // offset after each push.
    // TODO: This is needed only if we require precise CFA.
    if (!TFL->hasFP(MF))
      TFL->BuildCFI(MBB, std::next(Push), DL, 
                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));

    MBB.erase(MOV);
  }

  // The stack-pointer copy is no longer used in the call sequences.
  // There should not be any other users, but we can't commit to that, so:
  if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
    Context.SPCopy->eraseFromParent();

  // Once we've done this, we need to make sure PEI doesn't assume a reserved
  // frame.
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  FuncInfo->setHasPushSequences(true);

  return true;
}
Example #16
0
bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
    bool Changed = false;

    SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
    SmallVector<unsigned, 2> ImplicitDefsToUpdate;
    SmallVector<unsigned, 2> ImplicitDefs;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
        MachineInstr *MI = &*I;
        ++I;

        if (!isCSECandidate(MI))
            continue;

        bool FoundCSE = VNT.count(MI);
        if (!FoundCSE) {
            // Using trivial copy propagation to find more CSE opportunities.
            if (PerformTrivialCopyPropagation(MI, MBB)) {
                Changed = true;

                // After coalescing MI itself may become a copy.
                if (MI->isCopyLike())
                    continue;

                // Try again to see if CSE is possible.
                FoundCSE = VNT.count(MI);
            }
        }

        // Commute commutable instructions.
        bool Commuted = false;
        if (!FoundCSE && MI->isCommutable()) {
            MachineInstr *NewMI = TII->commuteInstruction(MI);
            if (NewMI) {
                Commuted = true;
                FoundCSE = VNT.count(NewMI);
                if (NewMI != MI) {
                    // New instruction. It doesn't need to be kept.
                    NewMI->eraseFromParent();
                    Changed = true;
                } else if (!FoundCSE)
                    // MI was changed but it didn't help, commute it back!
                    (void)TII->commuteInstruction(MI);
            }
        }

        // If the instruction defines physical registers and the values *may* be
        // used, then it's not safe to replace it with a common subexpression.
        // It's also not safe if the instruction uses physical registers.
        bool CrossMBBPhysDef = false;
        SmallSet<unsigned, 8> PhysRefs;
        SmallVector<unsigned, 2> PhysDefs;
        bool PhysUseDef = false;
        if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
                                              PhysDefs, PhysUseDef)) {
            FoundCSE = false;

            // ... Unless the CS is local or is in the sole predecessor block
            // and it also defines the physical register which is not clobbered
            // in between and the physical register uses were not clobbered.
            // This can never be the case if the instruction both uses and
            // defines the same physical register, which was detected above.
            if (!PhysUseDef) {
                unsigned CSVN = VNT.lookup(MI);
                MachineInstr *CSMI = Exps[CSVN];
                if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
                    FoundCSE = true;
            }
        }

        if (!FoundCSE) {
            VNT.insert(MI, CurrVN++);
            Exps.push_back(MI);
            continue;
        }

        // Found a common subexpression, eliminate it.
        unsigned CSVN = VNT.lookup(MI);
        MachineInstr *CSMI = Exps[CSVN];
        DEBUG(dbgs() << "Examining: " << *MI);
        DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);

        // Check if it's profitable to perform this CSE.
        bool DoCSE = true;
        unsigned NumDefs = MI->getDesc().getNumDefs() +
                           MI->getDesc().getNumImplicitDefs();

        for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
            MachineOperand &MO = MI->getOperand(i);
            if (!MO.isReg() || !MO.isDef())
                continue;
            unsigned OldReg = MO.getReg();
            unsigned NewReg = CSMI->getOperand(i).getReg();

            // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
            // we should make sure it is not dead at CSMI.
            if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
                ImplicitDefsToUpdate.push_back(i);

            // Keep track of implicit defs of CSMI and MI, to clear possibly
            // made-redundant kill flags.
            if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
                ImplicitDefs.push_back(OldReg);

            if (OldReg == NewReg) {
                --NumDefs;
                continue;
            }

            assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
                   TargetRegisterInfo::isVirtualRegister(NewReg) &&
                   "Do not CSE physical register defs!");

            if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
                DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
                DoCSE = false;
                break;
            }

            // Don't perform CSE if the result of the old instruction cannot exist
            // within the register class of the new instruction.
            const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
            if (!MRI->constrainRegClass(NewReg, OldRC)) {
                DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
                DoCSE = false;
                break;
            }

            CSEPairs.push_back(std::make_pair(OldReg, NewReg));
            --NumDefs;
        }

        // Actually perform the elimination.
        if (DoCSE) {
            for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
                MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
                MRI->clearKillFlags(CSEPairs[i].second);
            }

            // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
            // we should make sure it is not dead at CSMI.
            for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
                CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);

            // Go through implicit defs of CSMI and MI, and clear the kill flags on
            // their uses in all the instructions between CSMI and MI.
            // We might have made some of the kill flags redundant, consider:
            //   subs  ... %NZCV<imp-def>        <- CSMI
            //   csinc ... %NZCV<imp-use,kill>   <- this kill flag isn't valid anymore
            //   subs  ... %NZCV<imp-def>        <- MI, to be eliminated
            //   csinc ... %NZCV<imp-use,kill>
            // Since we eliminated MI, and reused a register imp-def'd by CSMI
            // (here %NZCV), that register, if it was killed before MI, should have
            // that kill flag removed, because it's lifetime was extended.
            if (CSMI->getParent() == MI->getParent()) {
                for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
                    for (auto ImplicitDef : ImplicitDefs)
                        if (MachineOperand *MO = II->findRegisterUseOperand(
                                                     ImplicitDef, /*isKill=*/true, TRI))
                            MO->setIsKill(false);
            } else {
                // If the instructions aren't in the same BB, bail out and clear the
                // kill flag on all uses of the imp-def'd register.
                for (auto ImplicitDef : ImplicitDefs)
                    MRI->clearKillFlags(ImplicitDef);
            }

            if (CrossMBBPhysDef) {
                // Add physical register defs now coming in from a predecessor to MBB
                // livein list.
                while (!PhysDefs.empty()) {
                    unsigned LiveIn = PhysDefs.pop_back_val();
                    if (!MBB->isLiveIn(LiveIn))
                        MBB->addLiveIn(LiveIn);
                }
                ++NumCrossBBCSEs;
            }

            MI->eraseFromParent();
            ++NumCSEs;
            if (!PhysRefs.empty())
                ++NumPhysCSEs;
            if (Commuted)
                ++NumCommutes;
            Changed = true;
        } else {
            VNT.insert(MI, CurrVN++);
            Exps.push_back(MI);
        }
        CSEPairs.clear();
        ImplicitDefsToUpdate.clear();
        ImplicitDefs.clear();
    }

    return Changed;
}
// transformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
  DEBUG(dbgs() << "Scalar transform: " << MI);

  MachineBasicBlock *MBB = MI.getParent();
  unsigned OldOpc = MI.getOpcode();
  unsigned NewOpc = getTransformOpcode(OldOpc);
  assert(OldOpc != NewOpc && "transform an instruction to itself?!");

  // Check if we need a copy for the source registers.
  unsigned OrigSrc0 = MI.getOperand(1).getReg();
  unsigned OrigSrc1 = MI.getOperand(2).getReg();
  unsigned Src0 = 0, SubReg0;
  unsigned Src1 = 0, SubReg1;
  bool KillSrc0 = false, KillSrc1 = false;
  if (!MRI->def_empty(OrigSrc0)) {
    MachineRegisterInfo::def_instr_iterator Def =
        MRI->def_instr_begin(OrigSrc0);
    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
    MachineOperand *MOSrc0 = getSrcFromCopy(&*Def, MRI, SubReg0);
    // If there are no other users of the original source, we can delete
    // that instruction.
    if (MOSrc0) {
      Src0 = MOSrc0->getReg();
      KillSrc0 = MOSrc0->isKill();
      // Src0 is going to be reused, thus, it cannot be killed anymore.
      MOSrc0->setIsKill(false);
      if (MRI->hasOneNonDBGUse(OrigSrc0)) {
        assert(MOSrc0 && "Can't delete copy w/o a valid original source!");
        Def->eraseFromParent();
        ++NumCopiesDeleted;
      }
    }
  }
  if (!MRI->def_empty(OrigSrc1)) {
    MachineRegisterInfo::def_instr_iterator Def =
        MRI->def_instr_begin(OrigSrc1);
    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
    MachineOperand *MOSrc1 = getSrcFromCopy(&*Def, MRI, SubReg1);
    // If there are no other users of the original source, we can delete
    // that instruction.
    if (MOSrc1) {
      Src1 = MOSrc1->getReg();
      KillSrc1 = MOSrc1->isKill();
      // Src0 is going to be reused, thus, it cannot be killed anymore.
      MOSrc1->setIsKill(false);
      if (MRI->hasOneNonDBGUse(OrigSrc1)) {
        assert(MOSrc1 && "Can't delete copy w/o a valid original source!");
        Def->eraseFromParent();
        ++NumCopiesDeleted;
      }
    }
  }
  // If we weren't able to reference the original source directly, create a
  // copy.
  if (!Src0) {
    SubReg0 = 0;
    Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
    insertCopy(TII, MI, Src0, OrigSrc0, KillSrc0);
    KillSrc0 = true;
  }
  if (!Src1) {
    SubReg1 = 0;
    Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
    insertCopy(TII, MI, Src1, OrigSrc1, KillSrc1);
    KillSrc1 = true;
  }

  // Create a vreg for the destination.
  // FIXME: No need to do this if the ultimate user expects an FPR64.
  // Check for that and avoid the copy if possible.
  unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);

  // For now, all of the new instructions have the same simple three-register
  // form, so no need to special case based on what instruction we're
  // building.
  BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), Dst)
      .addReg(Src0, getKillRegState(KillSrc0), SubReg0)
      .addReg(Src1, getKillRegState(KillSrc1), SubReg1);

  // Now copy the result back out to a GPR.
  // FIXME: Try to avoid this if all uses could actually just use the FPR64
  // directly.
  insertCopy(TII, MI, MI.getOperand(0).getReg(), Dst, true);

  // Erase the old instruction.
  MI.eraseFromParent();

  ++NumScalarInsnsUsed;
}
Example #18
0
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(errs() << "Machine Function\n");
  const TargetMachine &TM = MF.getTarget();
  MRI = &MF.getRegInfo();
  TII = TM.getInstrInfo();
  TRI = TM.getRegisterInfo();
  LV = getAnalysisIfAvailable<LiveVariables>();
  AA = &getAnalysis<AliasAnalysis>();

  bool MadeChange = false;

  DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
  DEBUG(errs() << "********** Function: " 
        << MF.getFunction()->getName() << '\n');

  // ReMatRegs - Keep track of the registers whose def's are remat'ed.
  BitVector ReMatRegs;
  ReMatRegs.resize(MRI->getLastVirtReg()+1);

  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
    TiedOperandMap;
  TiedOperandMap TiedOperands(4);

  SmallPtrSet<MachineInstr*, 8> Processed;
  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
       mbbi != mbbe; ++mbbi) {
    unsigned Dist = 0;
    DistanceMap.clear();
    SrcRegMap.clear();
    DstRegMap.clear();
    Processed.clear();
    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
         mi != me; ) {
      MachineBasicBlock::iterator nmi = next(mi);
      const TargetInstrDesc &TID = mi->getDesc();
      bool FirstTied = true;

      DistanceMap.insert(std::make_pair(mi, ++Dist));

      ProcessCopy(&*mi, &*mbbi, Processed);

      // First scan through all the tied register uses in this instruction
      // and record a list of pairs of tied operands for each register.
      unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
        ? mi->getNumOperands() : TID.getNumOperands();
      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
        unsigned DstIdx = 0;
        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
          continue;

        if (FirstTied) {
          FirstTied = false;
          ++NumTwoAddressInstrs;
          DEBUG(errs() << '\t' << *mi);
        }

        assert(mi->getOperand(SrcIdx).isReg() &&
               mi->getOperand(SrcIdx).getReg() &&
               mi->getOperand(SrcIdx).isUse() &&
               "two address instruction invalid");

        unsigned regB = mi->getOperand(SrcIdx).getReg();
        TiedOperandMap::iterator OI = TiedOperands.find(regB);
        if (OI == TiedOperands.end()) {
          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
        }
        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
      }

      // Now iterate over the information collected above.
      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
             OE = TiedOperands.end(); OI != OE; ++OI) {
        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;

        // If the instruction has a single pair of tied operands, try some
        // transformations that may either eliminate the tied operands or
        // improve the opportunities for coalescing away the register copy.
        if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
          unsigned SrcIdx = TiedPairs[0].first;
          unsigned DstIdx = TiedPairs[0].second;

          // If the registers are already equal, nothing needs to be done.
          if (mi->getOperand(SrcIdx).getReg() ==
              mi->getOperand(DstIdx).getReg())
            break; // Done with this instruction.

          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
            break; // The tied operands have been eliminated.
        }

        bool RemovedKillFlag = false;
        bool AllUsesCopied = true;
        unsigned LastCopiedReg = 0;
        unsigned regB = OI->first;
        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
          unsigned SrcIdx = TiedPairs[tpi].first;
          unsigned DstIdx = TiedPairs[tpi].second;
          unsigned regA = mi->getOperand(DstIdx).getReg();
          // Grab regB from the instruction because it may have changed if the
          // instruction was commuted.
          regB = mi->getOperand(SrcIdx).getReg();

          if (regA == regB) {
            // The register is tied to multiple destinations (or else we would
            // not have continued this far), but this use of the register
            // already matches the tied destination.  Leave it.
            AllUsesCopied = false;
            continue;
          }
          LastCopiedReg = regA;

          assert(TargetRegisterInfo::isVirtualRegister(regB) &&
                 "cannot make instruction into two-address form");

#ifndef NDEBUG
          // First, verify that we don't have a use of "a" in the instruction
          // (a = b + a for example) because our transformation will not
          // work. This should never occur because we are in SSA form.
          for (unsigned i = 0; i != mi->getNumOperands(); ++i)
            assert(i == DstIdx ||
                   !mi->getOperand(i).isReg() ||
                   mi->getOperand(i).getReg() != regA);
#endif

          // Emit a copy or rematerialize the definition.
          const TargetRegisterClass *rc = MRI->getRegClass(regB);
          MachineInstr *DefMI = MRI->getVRegDef(regB);
          // If it's safe and profitable, remat the definition instead of
          // copying it.
          if (DefMI &&
              DefMI->getDesc().isAsCheapAsAMove() &&
              DefMI->isSafeToReMat(TII, regB, AA) &&
              isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
            DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
            ReMatRegs.set(regB);
            ++NumReMats;
          } else {
            bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
            (void)Emitted;
            assert(Emitted && "Unable to issue a copy instruction!\n");
          }

          MachineBasicBlock::iterator prevMI = prior(mi);
          // Update DistanceMap.
          DistanceMap.insert(std::make_pair(prevMI, Dist));
          DistanceMap[mi] = ++Dist;

          DEBUG(errs() << "\t\tprepend:\t" << *prevMI);

          MachineOperand &MO = mi->getOperand(SrcIdx);
          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
                 "inconsistent operand info for 2-reg pass");
          if (MO.isKill()) {
            MO.setIsKill(false);
            RemovedKillFlag = true;
          }
          MO.setReg(regA);
        }

        if (AllUsesCopied) {
          // Replace other (un-tied) uses of regB with LastCopiedReg.
          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
            MachineOperand &MO = mi->getOperand(i);
            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
              if (MO.isKill()) {
                MO.setIsKill(false);
                RemovedKillFlag = true;
              }
              MO.setReg(LastCopiedReg);
            }
          }

          // Update live variables for regB.
          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
            LV->addVirtualRegisterKilled(regB, prior(mi));

        } else if (RemovedKillFlag) {
          // Some tied uses of regB matched their destination registers, so
          // regB is still used in this instruction, but a kill flag was
          // removed from a different tied use of regB, so now we need to add
          // a kill flag to one of the remaining uses of regB.
          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
            MachineOperand &MO = mi->getOperand(i);
            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
              MO.setIsKill(true);
              break;
            }
          }
        }
          
        MadeChange = true;

        DEBUG(errs() << "\t\trewrite to:\t" << *mi);
      }

      // Clear TiedOperands here instead of at the top of the loop
      // since most instructions do not have tied operands.
      TiedOperands.clear();
      mi = nmi;
    }
  }

  // Some remat'ed instructions are dead.
  int VReg = ReMatRegs.find_first();
  while (VReg != -1) {
    if (MRI->use_empty(VReg)) {
      MachineInstr *DefMI = MRI->getVRegDef(VReg);
      DefMI->eraseFromParent();
    }
    VReg = ReMatRegs.find_next(VReg);
  }

  return MadeChange;
}
Example #19
0
/// foldMemoryOperand - Try folding stack slot references in Ops into their
/// instructions.
///
/// @param Ops    Operand indices from analyzeVirtReg().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
/// @return       True on success.
bool InlineSpiller::
foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
                  MachineInstr *LoadMI) {
  if (Ops.empty())
    return false;
  // Don't attempt folding in bundles.
  MachineInstr *MI = Ops.front().first;
  if (Ops.back().first != MI || MI->isBundled())
    return false;

  bool WasCopy = MI->isCopy();
  unsigned ImpReg = 0;

  // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
  // operands.
  SmallVector<unsigned, 8> FoldOps;
  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
    unsigned Idx = Ops[i].second;
    MachineOperand &MO = MI->getOperand(Idx);
    if (MO.isImplicit()) {
      ImpReg = MO.getReg();
      continue;
    }
    // FIXME: Teach targets to deal with subregs.
    if (MO.getSubReg())
      return false;
    // We cannot fold a load instruction into a def.
    if (LoadMI && MO.isDef())
      return false;
    // Tied use operands should not be passed to foldMemoryOperand.
    if (!MI->isRegTiedToDefOperand(Idx))
      FoldOps.push_back(Idx);
  }

  MachineInstr *FoldMI =
                LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
                       : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
  if (!FoldMI)
    return false;

  // Remove LIS for any dead defs in the original MI not in FoldMI.
  for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
    if (!MO->isReg())
      continue;
    unsigned Reg = MO->getReg();
    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
        MRI.isReserved(Reg)) {
      continue;
    }
    MIBundleOperands::PhysRegInfo RI =
      MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
    if (MO->readsReg()) {
      assert(RI.Reads && "Cannot fold physreg reader");
      continue;
    }
    if (RI.Defines)
      continue;
    // FoldMI does not define this physreg. Remove the LI segment.
    assert(MO->isDead() && "Cannot fold physreg def");
    for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
      if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) {
        SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
        if (VNInfo *VNI = LI->getVNInfoAt(Idx))
          LI->removeValNo(VNI);
      }
    }
  }
  LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
  MI->eraseFromParent();

  // TII.foldMemoryOperand may have left some implicit operands on the
  // instruction.  Strip them.
  if (ImpReg)
    for (unsigned i = FoldMI->getNumOperands(); i; --i) {
      MachineOperand &MO = FoldMI->getOperand(i - 1);
      if (!MO.isReg() || !MO.isImplicit())
        break;
      if (MO.getReg() == ImpReg)
        FoldMI->RemoveOperand(i - 1);
    }

  DEBUG(dbgs() << "\tfolded:  " << LIS.getInstructionIndex(FoldMI) << '\t'
               << *FoldMI);
  if (!WasCopy)
    ++NumFolded;
  else if (Ops.front().second == 0)
    ++NumSpills;
  else
    ++NumReloads;
  return true;
}
Example #20
0
void SILowerControlFlow::Branch(MachineInstr &MI) {
  if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode())
    MI.eraseFromParent();

  // If these aren't equal, this is probably an infinite loop.
}
Example #21
0
void SILowerControlFlow::emitElse(MachineInstr &MI) {
  MachineBasicBlock &MBB = *MI.getParent();
  const DebugLoc &DL = MI.getDebugLoc();

  unsigned DstReg = MI.getOperand(0).getReg();
  assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);

  bool ExecModified = MI.getOperand(3).getImm() != 0;
  MachineBasicBlock::iterator Start = MBB.begin();

  // We are running before TwoAddressInstructions, and si_else's operands are
  // tied. In order to correctly tie the registers, split this into a copy of
  // the src like it does.
  BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), DstReg)
    .addOperand(MI.getOperand(1)); // Saved EXEC

  // This must be inserted before phis and any spill code inserted before the
  // else.
  MachineInstr *OrSaveExec =
    BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), DstReg)
    .addReg(DstReg);

  MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();

  MachineBasicBlock::iterator ElsePt(MI);

  if (ExecModified) {
    MachineInstr *And =
      BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
      .addReg(AMDGPU::EXEC)
      .addReg(DstReg);

    if (LIS)
      LIS->InsertMachineInstrInMaps(*And);
  }

  MachineInstr *Xor =
    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
    .addReg(AMDGPU::EXEC)
    .addReg(DstReg);

  MachineInstr *Branch =
    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
    .addMBB(DestBB);

  if (!LIS) {
    MI.eraseFromParent();
    return;
  }

  LIS->RemoveMachineInstrFromMaps(MI);
  MI.eraseFromParent();

  LIS->InsertMachineInstrInMaps(*OrSaveExec);

  LIS->InsertMachineInstrInMaps(*Xor);
  LIS->InsertMachineInstrInMaps(*Branch);

  // src reg is tied to dst reg.
  LIS->removeInterval(DstReg);
  LIS->createAndComputeVirtRegInterval(DstReg);

  // Let this be recomputed.
  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
}
Example #22
0
void SILowerControlFlow::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {

  MachineBasicBlock &MBB = *MI.getParent();
  DebugLoc DL = MI.getDebugLoc();
  MachineBasicBlock::iterator I = MI;

  unsigned Save = MI.getOperand(1).getReg();
  unsigned Idx = MI.getOperand(3).getReg();

  if (AMDGPU::SReg_32RegClass.contains(Idx)) {
    if (Offset) {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
              .addReg(Idx)
              .addImm(Offset);
    } else {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
              .addReg(Idx);
    }
    MBB.insert(I, MovRel);
  } else {

    assert(AMDGPU::SReg_64RegClass.contains(Save));
    assert(AMDGPU::VGPR_32RegClass.contains(Idx));

    // Save the EXEC mask
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
            .addReg(AMDGPU::EXEC);

    // Read the next variant into VCC (lower 32 bits) <- also loop target
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
            AMDGPU::VCC_LO)
            .addReg(Idx);

    // Move index from VCC into M0
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
            .addReg(AMDGPU::VCC_LO);

    // Compare the just read M0 value to all possible Idx values
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
      .addReg(AMDGPU::M0)
      .addReg(Idx);

    // Update EXEC, save the original EXEC value to VCC
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
            .addReg(AMDGPU::VCC);

    if (Offset) {
      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
              .addReg(AMDGPU::M0)
              .addImm(Offset);
    }
    // Do the actual move
    MBB.insert(I, MovRel);

    // Update EXEC, switch all done bits to 0 and all todo bits to 1
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
            .addReg(AMDGPU::EXEC)
            .addReg(AMDGPU::VCC);

    // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
      .addImm(-7);

    // Restore EXEC
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
            .addReg(Save);

  }
  MI.eraseFromParent();
}
Example #23
0
// Erase the nodes given in the Nodes set from DFG. In addition to removing
// them from the DFG, if a node corresponds to a statement, the corresponding
// machine instruction is erased from the function.
bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
  if (Nodes.empty())
    return false;

  // Prepare the actual set of ref nodes to remove: ref nodes from Nodes
  // are included directly, for each InstrNode in Nodes, include the set
  // of all RefNodes from it.
  NodeList DRNs, DINs;
  for (auto I : Nodes) {
    auto BA = DFG.addr<NodeBase*>(I);
    uint16_t Type = BA.Addr->getType();
    if (Type == NodeAttrs::Ref) {
      DRNs.push_back(DFG.addr<RefNode*>(I));
      continue;
    }

    // If it's a code node, add all ref nodes from it.
    uint16_t Kind = BA.Addr->getKind();
    if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) {
      for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG))
        DRNs.push_back(N);
      DINs.push_back(DFG.addr<InstrNode*>(I));
    } else {
      llvm_unreachable("Unexpected code node");
      return false;
    }
  }

  // Sort the list so that use nodes are removed first. This makes the
  // "unlink" functions a bit faster.
  auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool {
    uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind();
    if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def)
      return true;
    if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use)
      return false;
    return A.Id < B.Id;
  };
  std::sort(DRNs.begin(), DRNs.end(), UsesFirst);

  if (trace())
    dbgs() << "Removing dead ref nodes:\n";
  for (NodeAddr<RefNode*> RA : DRNs) {
    if (trace())
      dbgs() << "  " << PrintNode<RefNode*>(RA, DFG) << '\n';
    if (DFG.IsUse(RA))
      DFG.unlinkUse(RA);
    else if (DFG.IsDef(RA))
      DFG.unlinkDef(RA);
  }

  // Now, remove all dead instruction nodes.
  for (NodeAddr<InstrNode*> IA : DINs) {
    NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
    BA.Addr->removeMember(IA, DFG);
    if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
      continue;

    MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
    if (trace())
      dbgs() << "erasing: " << *MI;
    MI->eraseFromParent();
  }
  return true;
}
Example #24
0
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
  DenseMap<unsigned, MachineInstr*> AvailCopyMap;   // Def -> available copies map
  DenseMap<unsigned, MachineInstr*> CopyMap;        // Def -> copies map
  DenseMap<unsigned, unsigned> SrcMap;              // Src -> Def map

  bool Changed = false;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isCopy()) {
      unsigned Def = MI->getOperand(0).getReg();
      unsigned Src = MI->getOperand(1).getReg();

      if (TargetRegisterInfo::isVirtualRegister(Def) ||
          TargetRegisterInfo::isVirtualRegister(Src))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
      if (CI != AvailCopyMap.end()) {
        MachineInstr *CopyMI = CI->second;
        unsigned SrcSrc = CopyMI->getOperand(1).getReg();
        if (!ReservedRegs.test(Def) &&
            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
            (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) {
          // The two copies cancel out and the source of the first copy
          // hasn't been overridden, eliminate the second one. e.g.
          //  %ECX<def> = COPY %EAX<kill>
          //  ... nothing clobbered EAX.
          //  %EAX<def> = COPY %ECX
          // =>
          //  %ECX<def> = COPY %EAX
          //
          // Also avoid eliminating a copy from reserved registers unless the
          // definition is proven not clobbered. e.g.
          // %RSP<def> = COPY %RAX
          // CALL
          // %RAX<def> = COPY %RSP
          CopyMI->getOperand(1).setIsKill(false);
          MI->eraseFromParent();
          Changed = true;
          ++NumDeletes;
          continue;
        }
      }

      // If Src is defined by a previous copy, it cannot be eliminated.
      CI = CopyMap.find(Src);
      if (CI != CopyMap.end())
        MaybeDeadCopies.remove(CI->second);
      for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) {
        CI = CopyMap.find(*AS);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }

      // Copy is now a candidate for deletion.
      MaybeDeadCopies.insert(MI);

      // If 'Src' is previously source of another copy, then this earlier copy's
      // source is no longer available. e.g.
      // %xmm9<def> = copy %xmm2
      // ...
      // %xmm2<def> = copy %xmm0
      // ...
      // %xmm2<def> = copy %xmm9
      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);

      // Remember Def is defined by the copy.
      CopyMap[Def] = MI;
      AvailCopyMap[Def] = MI;
      for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
        CopyMap[*SR] = MI;
        AvailCopyMap[*SR] = MI;
      }

      // Remember source that's copied to Def. Once it's clobbered, then
      // it's no longer available for copy propagation.
      SrcMap[Src] = Def;

      continue;
    }

    // Not a copy.
    SmallVector<unsigned, 2> Defs;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (!MO.isReg())
        continue;
      unsigned Reg = MO.getReg();
      if (!Reg)
        continue;

      if (TargetRegisterInfo::isVirtualRegister(Reg))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      if (MO.isDef()) {
        Defs.push_back(Reg);
        continue;
      }

      // If 'Reg' is defined by a copy, the copy is no longer a candidate
      // for elimination.
      DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
      if (CI != CopyMap.end())
        MaybeDeadCopies.remove(CI->second);
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
        CI = CopyMap.find(*AS);
        if (CI != CopyMap.end())
          MaybeDeadCopies.remove(CI->second);
      }
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Reg = Defs[i];

      // No longer defined by a copy.
      CopyMap.erase(Reg);
      AvailCopyMap.erase(Reg);
      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
        CopyMap.erase(*AS);
        AvailCopyMap.erase(*AS);
      }

      // If 'Reg' is previously source of a copy, it is no longer available for
      // copy propagation.
      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
    }
  }

  // If MBB doesn't have successors, delete the copies whose defs are not used.
  // If MBB does have successors, then conservative assume the defs are live-out
  // since we don't want to trust live-in lists.
  if (MBB.succ_empty()) {
    for (SmallSetVector<MachineInstr*, 8>::iterator
           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
         DI != DE; ++DI) {
      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
        (*DI)->eraseFromParent();
        Changed = true;
        ++NumDeletes;
      }
    }
  }

  return Changed;
}
Example #25
0
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
bool
TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
                                 bool IsSimple,
                                 MachineFunction &MF,
                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                 SmallVector<MachineInstr*, 16> &Copies) {
    DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');

    DenseSet<unsigned> UsedByPhi;
    getRegsUsedByPHIs(*TailBB, &UsedByPhi);

    if (IsSimple)
        return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);

    // Iterate through all the unique predecessors and tail-duplicate this
    // block into them, if possible. Copying the list ahead of time also
    // avoids trouble with the predecessor list reallocating.
    bool Changed = false;
    SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
            TailBB->pred_end());
    for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
            PE = Preds.end(); PI != PE; ++PI) {
        MachineBasicBlock *PredBB = *PI;

        assert(TailBB != PredBB &&
               "Single-block loop should have been rejected earlier!");
        // EH edges are ignored by AnalyzeBranch.
        if (PredBB->succ_size() > 1)
            continue;

        MachineBasicBlock *PredTBB, *PredFBB;
        SmallVector<MachineOperand, 4> PredCond;
        if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
            continue;
        if (!PredCond.empty())
            continue;
        // Don't duplicate into a fall-through predecessor (at least for now).
        if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
            continue;

        DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
              << "From Succ: " << *TailBB);

        TDBBs.push_back(PredBB);

        // Remove PredBB's unconditional branch.
        TII->RemoveBranch(*PredBB);

        if (RS && !TailBB->livein_empty()) {
            // Update PredBB livein.
            RS->enterBasicBlock(PredBB);
            if (!PredBB->empty())
                RS->forward(prior(PredBB->end()));
            BitVector RegsLiveAtExit(TRI->getNumRegs());
            RS->getRegsUsed(RegsLiveAtExit, false);
            for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
                    E = TailBB->livein_end(); I != E; ++I) {
                if (!RegsLiveAtExit[*I])
                    // If a register is previously livein to the tail but it's not live
                    // at the end of predecessor BB, then it should be added to its
                    // livein list.
                    PredBB->addLiveIn(*I);
            }
        }

        // Clone the contents of TailBB into PredBB.
        DenseMap<unsigned, unsigned> LocalVRMap;
        SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
        // Use instr_iterator here to properly handle bundles, e.g.
        // ARM Thumb2 IT block.
        MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
        while (I != TailBB->instr_end()) {
            MachineInstr *MI = &*I;
            ++I;
            if (MI->isPHI()) {
                // Replace the uses of the def of the PHI with the register coming
                // from PredBB.
                ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
            } else {
                // Replace def of virtual registers with new registers, and update
                // uses with PHI source register or the new registers.
                DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
            }
        }
        MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
        for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
            Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                                     TII->get(TargetOpcode::COPY),
                                     CopyInfos[i].first).addReg(CopyInfos[i].second));
        }

        // Simplify
        TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);

        NumInstrDups += TailBB->size() - 1; // subtract one for removed branch

        // Update the CFG.
        PredBB->removeSuccessor(PredBB->succ_begin());
        assert(PredBB->succ_empty() &&
               "TailDuplicate called on block with multiple successors!");
        for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
                E = TailBB->succ_end(); I != E; ++I)
            PredBB->addSuccessor(*I);

        Changed = true;
        ++NumTailDups;
    }

    // If TailBB was duplicated into all its predecessors except for the prior
    // block, which falls through unconditionally, move the contents of this
    // block into the prior block.
    MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
    MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
    SmallVector<MachineOperand, 4> PriorCond;
    // This has to check PrevBB->succ_size() because EH edges are ignored by
    // AnalyzeBranch.
    if (PrevBB->succ_size() == 1 &&
            !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
            PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
            !TailBB->hasAddressTaken()) {
        DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
              << "From MBB: " << *TailBB);
        if (PreRegAlloc) {
            DenseMap<unsigned, unsigned> LocalVRMap;
            SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
            MachineBasicBlock::iterator I = TailBB->begin();
            // Process PHI instructions first.
            while (I != TailBB->end() && I->isPHI()) {
                // Replace the uses of the def of the PHI with the register coming
                // from PredBB.
                MachineInstr *MI = &*I++;
                ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
                if (MI->getParent())
                    MI->eraseFromParent();
            }

            // Now copy the non-PHI instructions.
            while (I != TailBB->end()) {
                // Replace def of virtual registers with new registers, and update
                // uses with PHI source register or the new registers.
                MachineInstr *MI = &*I++;
                assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
                DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
                MI->eraseFromParent();
            }
            MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
            for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
                Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
                                         TII->get(TargetOpcode::COPY),
                                         CopyInfos[i].first)
                                 .addReg(CopyInfos[i].second));
            }
        } else {
            // No PHIs to worry about, just splice the instructions over.
            PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
        }
        PrevBB->removeSuccessor(PrevBB->succ_begin());
        assert(PrevBB->succ_empty());
        PrevBB->transferSuccessors(TailBB);
        TDBBs.push_back(PrevBB);
        Changed = true;
    }

    // If this is after register allocation, there are no phis to fix.
    if (!PreRegAlloc)
        return Changed;

    // If we made no changes so far, we are safe.
    if (!Changed)
        return Changed;


    // Handle the nasty case in that we duplicated a block that is part of a loop
    // into some but not all of its predecessors. For example:
    //    1 -> 2 <-> 3                 |
    //          \                      |
    //           \---> rest            |
    // if we duplicate 2 into 1 but not into 3, we end up with
    // 12 -> 3 <-> 2 -> rest           |
    //   \             /               |
    //    \----->-----/                |
    // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
    // with a phi in 3 (which now dominates 2).
    // What we do here is introduce a copy in 3 of the register defined by the
    // phi, just like when we are duplicating 2 into 3, but we don't copy any
    // real instructions or remove the 3 -> 2 edge from the phi in 2.
    for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
            PE = Preds.end(); PI != PE; ++PI) {
        MachineBasicBlock *PredBB = *PI;
        if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
            continue;

        // EH edges
        if (PredBB->succ_size() != 1)
            continue;

        DenseMap<unsigned, unsigned> LocalVRMap;
        SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
        MachineBasicBlock::iterator I = TailBB->begin();
        // Process PHI instructions first.
        while (I != TailBB->end() && I->isPHI()) {
            // Replace the uses of the def of the PHI with the register coming
            // from PredBB.
            MachineInstr *MI = &*I++;
            ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
        }
        MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
        for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
            Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
                                     TII->get(TargetOpcode::COPY),
                                     CopyInfos[i].first).addReg(CopyInfos[i].second));
        }
    }

    return Changed;
}
Example #26
0
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
  const PPCInstrInfo *TII =
                static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
  // Give the blocks of the function a dense, in-order, numbering.
  Fn.RenumberBlocks();
  BlockSizes.resize(Fn.getNumBlockIDs());

  // Measure each MBB and compute a size for the entire function.
  unsigned FuncSize = 0;
  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
       ++MFI) {
    MachineBasicBlock *MBB = MFI;

    unsigned BlockSize = 0;
    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
         MBBI != EE; ++MBBI)
      BlockSize += TII->GetInstSizeInBytes(MBBI);
    
    BlockSizes[MBB->getNumber()] = BlockSize;
    FuncSize += BlockSize;
  }
  
  // If the entire function is smaller than the displacement of a branch field,
  // we know we don't need to shrink any branches in this function.  This is a
  // common case.
  if (FuncSize < (1 << 15)) {
    BlockSizes.clear();
    return false;
  }
  
  // For each conditional branch, if the offset to its destination is larger
  // than the offset field allows, transform it into a long branch sequence
  // like this:
  //   short branch:
  //     bCC MBB
  //   long branch:
  //     b!CC $PC+8
  //     b MBB
  //
  bool MadeChange = true;
  bool EverMadeChange = false;
  while (MadeChange) {
    // Iteratively expand branches until we reach a fixed point.
    MadeChange = false;
  
    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
         ++MFI) {
      MachineBasicBlock &MBB = *MFI;
      unsigned MBBStartOffset = 0;
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E; ++I) {
        MachineBasicBlock *Dest = nullptr;
        if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
          Dest = I->getOperand(2).getMBB();
        else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
                 !I->getOperand(1).isImm())
          Dest = I->getOperand(1).getMBB();
        else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
                  I->getOpcode() == PPC::BDZ8  || I->getOpcode() == PPC::BDZ) &&
                 !I->getOperand(0).isImm())
          Dest = I->getOperand(0).getMBB();

        if (!Dest) {
          MBBStartOffset += TII->GetInstSizeInBytes(I);
          continue;
        }
        
        // Determine the offset from the current branch to the destination
        // block.
        int BranchSize;
        if (Dest->getNumber() <= MBB.getNumber()) {
          // If this is a backwards branch, the delta is the offset from the
          // start of this block to this branch, plus the sizes of all blocks
          // from this block to the dest.
          BranchSize = MBBStartOffset;
          
          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        } else {
          // Otherwise, add the size of the blocks between this block and the
          // dest to the number of bytes left in this block.
          BranchSize = -MBBStartOffset;

          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
            BranchSize += BlockSizes[i];
        }

        // If this branch is in range, ignore it.
        if (isInt<16>(BranchSize)) {
          MBBStartOffset += 4;
          continue;
        }

        // Otherwise, we have to expand it to a long branch.
        MachineInstr *OldBranch = I;
        DebugLoc dl = OldBranch->getDebugLoc();
 
        if (I->getOpcode() == PPC::BCC) {
          // The BCC operands are:
          // 0. PPC branch predicate
          // 1. CR register
          // 2. Target MBB
          PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
          unsigned CRReg = I->getOperand(1).getReg();
       
          // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
          BuildMI(MBB, I, dl, TII->get(PPC::BCC))
            .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
        } else if (I->getOpcode() == PPC::BC) {
          unsigned CRBit = I->getOperand(0).getReg();
          BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
        } else if (I->getOpcode() == PPC::BCn) {
          unsigned CRBit = I->getOperand(0).getReg();
          BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
        } else if (I->getOpcode() == PPC::BDNZ) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
        } else if (I->getOpcode() == PPC::BDNZ8) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
        } else if (I->getOpcode() == PPC::BDZ) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
        } else if (I->getOpcode() == PPC::BDZ8) {
          BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
        } else {
           llvm_unreachable("Unhandled branch type!");
        }
        
        // Uncond branch to the real destination.
        I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);

        // Remove the old branch from the function.
        OldBranch->eraseFromParent();
        
        // Remember that this instruction is 8-bytes, increase the size of the
        // block by 4, remember to iterate.
        BlockSizes[MBB.getNumber()] += 4;
        MBBStartOffset += 8;
        ++NumExpanded;
        MadeChange = true;
      }
    }
    EverMadeChange |= MadeChange;
  }
  
  BlockSizes.clear();
  return true;
}
Example #27
0
MipsInstrInfo::BranchType MipsInstrInfo::
AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
              MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond,
              bool AllowModify,
              SmallVectorImpl<MachineInstr*> &BranchInstrs) const {

  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();

  // Skip all the debug instructions.
  while (I != REnd && I->isDebugValue())
    ++I;

  if (I == REnd || !isUnpredicatedTerminator(&*I)) {
    // This block ends with no branches (it just falls through to its succ).
    // Leave TBB/FBB null.
    TBB = FBB = NULL;
    return BT_NoBranch;
  }

  MachineInstr *LastInst = &*I;
  unsigned LastOpc = LastInst->getOpcode();
  BranchInstrs.push_back(LastInst);

  // Not an analyzable branch (e.g., indirect jump).
  if (!GetAnalyzableBrOpc(LastOpc))
    return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;

  // Get the second to last instruction in the block.
  unsigned SecondLastOpc = 0;
  MachineInstr *SecondLastInst = NULL;

  if (++I != REnd) {
    SecondLastInst = &*I;
    SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());

    // Not an analyzable branch (must be an indirect jump).
    if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
      return BT_None;
  }

  // If there is only one terminator instruction, process it.
  if (!SecondLastOpc) {
    // Unconditional branch
    if (LastOpc == UncondBrOpc) {
      TBB = LastInst->getOperand(0).getMBB();
      return BT_Uncond;
    }

    // Conditional branch
    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
    return BT_Cond;
  }

  // If we reached here, there are two branches.
  // If there are three terminators, we don't know what sort of block this is.
  if (++I != REnd && isUnpredicatedTerminator(&*I))
    return BT_None;

  BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);

  // If second to last instruction is an unconditional branch,
  // analyze it and remove the last instruction.
  if (SecondLastOpc == UncondBrOpc) {
    // Return if the last instruction cannot be removed.
    if (!AllowModify)
      return BT_None;

    TBB = SecondLastInst->getOperand(0).getMBB();
    LastInst->eraseFromParent();
    BranchInstrs.pop_back();
    return BT_Uncond;
  }

  // Conditional branch followed by an unconditional branch.
  // The last one must be unconditional.
  if (LastOpc != UncondBrOpc)
    return BT_None;

  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
  FBB = LastInst->getOperand(0).getMBB();

  return BT_CondUncond;
}
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
  DEBUG(dbgs() << "********** Register Stackifying **********\n"
                  "********** Function: "
               << MF.getName() << '\n');

  bool Changed = false;
  MachineRegisterInfo &MRI = MF.getRegInfo();
  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  LiveIntervals &LIS = getAnalysis<LiveIntervals>();

  // Walk the instructions from the bottom up. Currently we don't look past
  // block boundaries, and the blocks aren't ordered so the block visitation
  // order isn't significant, but we may want to change this in the future.
  for (MachineBasicBlock &MBB : MF) {
    // Don't use a range-based for loop, because we modify the list as we're
    // iterating over it and the end iterator may change.
    for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
      MachineInstr *Insert = &*MII;
      // Don't nest anything inside a phi.
      if (Insert->getOpcode() == TargetOpcode::PHI)
        break;

      // Don't nest anything inside an inline asm, because we don't have
      // constraints for $push inputs.
      if (Insert->getOpcode() == TargetOpcode::INLINEASM)
        break;

      // Iterate through the inputs in reverse order, since we'll be pulling
      // operands off the stack in LIFO order.
      bool AnyStackified = false;
      for (MachineOperand &Op : reverse(Insert->uses())) {
        // We're only interested in explicit virtual register operands.
        if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
          continue;

        unsigned Reg = Op.getReg();

        // Only consider registers with a single definition.
        // TODO: Eventually we may relax this, to stackify phi transfers.
        MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
        if (!Def)
          continue;

        // Don't nest an INLINE_ASM def into anything, because we don't have
        // constraints for $pop outputs.
        if (Def->getOpcode() == TargetOpcode::INLINEASM)
          continue;

        // Don't nest PHIs inside of anything.
        if (Def->getOpcode() == TargetOpcode::PHI)
          continue;

        // Argument instructions represent live-in registers and not real
        // instructions.
        if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
            Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
            Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
            Def->getOpcode() == WebAssembly::ARGUMENT_F64)
          continue;

        if (MRI.hasOneUse(Reg) && Def->getParent() == &MBB &&
            IsSafeToMove(Def, Insert, AA, LIS, MRI)) {
          // A single-use def in the same block with no intervening memory or
          // register dependencies; move the def down and nest it with the
          // current instruction.
          // TODO: Stackify multiple-use values, taking advantage of set_local
          // returning its result.
          Changed = true;
          AnyStackified = true;
          MBB.splice(Insert, &MBB, Def);
          LIS.handleMove(Def);
          MFI.stackifyVReg(Reg);
          ImposeStackOrdering(Def);
          Insert = Def;
        } else if (Def->isAsCheapAsAMove() &&
                   TII->isTriviallyReMaterializable(Def, &AA)) {
          // A trivially cloneable instruction; clone it and nest the new copy
          // with the current instruction.
          Changed = true;
          AnyStackified = true;
          unsigned OldReg = Def->getOperand(0).getReg();
          unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
          TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
          Op.setReg(NewReg);
          MachineInstr *Clone =
              &*std::prev(MachineBasicBlock::instr_iterator(Insert));
          LIS.InsertMachineInstrInMaps(Clone);
          LIS.createAndComputeVirtRegInterval(NewReg);
          MFI.stackifyVReg(NewReg);
          ImposeStackOrdering(Clone);
          Insert = Clone;

          // If that was the last use of the original, delete the original.
          // Otherwise shrink the LiveInterval.
          if (MRI.use_empty(OldReg)) {
            SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
            LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
            LIS.removeVRegDefAt(LIS.getInterval(OldReg), Idx);
            LIS.removeInterval(OldReg);
            LIS.RemoveMachineInstrFromMaps(Def);
            Def->eraseFromParent();
          } else {
            LIS.shrinkToUses(&LIS.getInterval(OldReg));
          }
        }
      }
      if (AnyStackified)
        ImposeStackOrdering(&*MII);
    }
  }

  // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere
  // so that it never looks like a use-before-def.
  if (Changed) {
    MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
    for (MachineBasicBlock &MBB : MF)
      MBB.addLiveIn(WebAssembly::EXPR_STACK);
  }

#ifndef NDEBUG
  // Verify that pushes and pops are performed in LIFO order.
  SmallVector<unsigned, 0> Stack;
  for (MachineBasicBlock &MBB : MF) {
    for (MachineInstr &MI : MBB) {
      for (MachineOperand &MO : reverse(MI.explicit_operands())) {
        if (!MO.isReg())
          continue;
        unsigned VReg = MO.getReg();

        // Don't stackify physregs like SP or FP.
        if (!TargetRegisterInfo::isVirtualRegister(VReg))
          continue;

        if (MFI.isVRegStackified(VReg)) {
          if (MO.isDef())
            Stack.push_back(VReg);
          else
            assert(Stack.pop_back_val() == VReg);
        }
      }
    }
    // TODO: Generalize this code to support keeping values on the stack across
    // basic block boundaries.
    assert(Stack.empty());
  }
#endif

  return Changed;
}
Example #29
0
bool
AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                                MachineBasicBlock *&FBB,
                                SmallVectorImpl<MachineOperand> &Cond,
                                bool AllowModify) const {
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (!isUnpredicatedTerminator(I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (LastOpc == AArch64::Bimm) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }
    if (isCondBranch(LastOpc)) {
      classifyCondBranch(LastInst, TBB, Cond);
      return false;
    }
    return true;  // Can't handle indirect branch.
  }

  // Get the instruction before it if it is a terminator.
  MachineInstr *SecondLastInst = I;
  unsigned SecondLastOpc = SecondLastInst->getOpcode();

  // If AllowModify is true and the block ends with two or more unconditional
  // branches, delete all but the first unconditional branch.
  if (AllowModify && LastOpc == AArch64::Bimm) {
    while (SecondLastOpc == AArch64::Bimm) {
      LastInst->eraseFromParent();
      LastInst = SecondLastInst;
      LastOpc = LastInst->getOpcode();
      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
        // Return now the only terminator is an unconditional branch.
        TBB = LastInst->getOperand(0).getMBB();
        return false;
      } else {
        SecondLastInst = I;
        SecondLastOpc = SecondLastInst->getOpcode();
      }
    }
  }

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with a B and a Bcc, handle it.
  if (LastOpc == AArch64::Bimm) {
    if (SecondLastOpc == AArch64::Bcc) {
      TBB =  SecondLastInst->getOperand(1).getMBB();
      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
      Cond.push_back(SecondLastInst->getOperand(0));
      FBB = LastInst->getOperand(0).getMBB();
      return false;
    } else if (isCondBranch(SecondLastOpc)) {
      classifyCondBranch(SecondLastInst, TBB, Cond);
      FBB = LastInst->getOperand(0).getMBB();
      return false;
    }
  }

  // If the block ends with two unconditional branches, handle it.  The second
  // one is not executed, so remove it.
  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
Example #30
0
bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
                                      MachineRegisterInfo &MRI,
                                      MachineIRBuilder &MIRBuilder) const {
  using namespace TargetOpcode;

  MIRBuilder.setInstr(MI);

  switch (MI.getOpcode()) {
  default:
    return false;
  case G_SREM:
  case G_UREM: {
    unsigned OriginalResult = MI.getOperand(0).getReg();
    auto Size = MRI.getType(OriginalResult).getSizeInBits();
    if (Size != 32)
      return false;

    auto Libcall =
        MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;

    // Our divmod libcalls return a struct containing the quotient and the
    // remainder. We need to create a virtual register for it.
    auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
    Type *ArgTy = Type::getInt32Ty(Ctx);
    StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
    auto RetVal = MRI.createGenericVirtualRegister(
        getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));

    auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy},
                                {{MI.getOperand(1).getReg(), ArgTy},
                                 {MI.getOperand(2).getReg(), ArgTy}});
    if (Status != LegalizerHelper::Legalized)
      return false;

    // The remainder is the second result of divmod. Split the return value into
    // a new, unused register for the quotient and the destination of the
    // original instruction for the remainder.
    MIRBuilder.buildUnmerge(
        {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
        RetVal);
    break;
  }
  case G_FCMP: {
    assert(MRI.getType(MI.getOperand(2).getReg()) ==
               MRI.getType(MI.getOperand(3).getReg()) &&
           "Mismatched operands for G_FCMP");
    auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();

    auto OriginalResult = MI.getOperand(0).getReg();
    auto Predicate =
        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
    auto Libcalls = getFCmpLibcalls(Predicate, OpSize);

    if (Libcalls.empty()) {
      assert((Predicate == CmpInst::FCMP_TRUE ||
              Predicate == CmpInst::FCMP_FALSE) &&
             "Predicate needs libcalls, but none specified");
      MIRBuilder.buildConstant(OriginalResult,
                               Predicate == CmpInst::FCMP_TRUE ? 1 : 0);
      MI.eraseFromParent();
      return true;
    }

    auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
    assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size");
    auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
    auto *RetTy = Type::getInt32Ty(Ctx);

    SmallVector<unsigned, 2> Results;
    for (auto Libcall : Libcalls) {
      auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32));
      auto Status =
          createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy},
                        {{MI.getOperand(2).getReg(), ArgTy},
                         {MI.getOperand(3).getReg(), ArgTy}});

      if (Status != LegalizerHelper::Legalized)
        return false;

      auto ProcessedResult =
          Libcalls.size() == 1
              ? OriginalResult
              : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult));

      // We have a result, but we need to transform it into a proper 1-bit 0 or
      // 1, taking into account the different peculiarities of the values
      // returned by the comparison functions.
      CmpInst::Predicate ResultPred = Libcall.Predicate;
      if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) {
        // We have a nice 0 or 1, and we just need to truncate it back to 1 bit
        // to keep the types consistent.
        MIRBuilder.buildTrunc(ProcessedResult, LibcallResult);
      } else {
        // We need to compare against 0.
        assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate");
        auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32));
        MIRBuilder.buildConstant(Zero, 0);
        MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero);
      }
      Results.push_back(ProcessedResult);
    }

    if (Results.size() != 1) {
      assert(Results.size() == 2 && "Unexpected number of results");
      MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]);
    }
    break;
  }
  }

  MI.eraseFromParent();
  return true;
}