Exemplo n.º 1
0
void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
  // This procedure was essentially copied from DeadMachineInstructionElim

  SmallVector<MachineInstr *, 1> DeadPhis;
  if (isDead(MI, DeadPhis)) {
    DEBUG(dbgs() << "CTR looping will remove: " << *MI);

    // It is possible that some DBG_VALUE instructions refer to this
    // instruction.  Examine each def operand for such references;
    // if found, mark the DBG_VALUE as undef (but don't delete it).
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = MI->getOperand(i);
      if (!MO.isReg() || !MO.isDef())
        continue;
      unsigned Reg = MO.getReg();
      MachineRegisterInfo::use_iterator nextI;
      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
           E = MRI->use_end(); I!=E; I=nextI) {
        nextI = llvm::next(I);  // I is invalidated by the setReg
        MachineOperand& Use = I.getOperand();
        MachineInstr *UseMI = Use.getParent();
        if (UseMI==MI)
          continue;
        if (Use.isDebug()) // this might also be a instr -> phi -> instr case
                           // which can also be removed.
          UseMI->getOperand(0).setReg(0U);
      }
    }

    MI->eraseFromParent();
    for (unsigned i = 0; i < DeadPhis.size(); ++i) {
      DeadPhis[i]->eraseFromParent();
    }
  }
}
Exemplo n.º 2
0
/// isLiveInButUnusedBefore - Return true if register is livein the MBB not
/// not used before it reaches the MI that defines register.
static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI,
                                    MachineBasicBlock *MBB,
                                    const TargetRegisterInfo *TRI,
                                    MachineRegisterInfo* MRI) {
  // First check if register is livein.
  bool isLiveIn = false;
  for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
         E = MBB->livein_end(); I != E; ++I)
    if (Reg == *I || TRI->isSuperRegister(Reg, *I)) {
      isLiveIn = true;
      break;
    }
  if (!isLiveIn)
    return false;

  // Is there any use of it before the specified MI?
  SmallPtrSet<MachineInstr*, 4> UsesInMBB;
  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
         UE = MRI->use_end(); UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    if (UseMO.isReg() && UseMO.isUndef())
      continue;
    MachineInstr *UseMI = &*UI;
    if (UseMI->getParent() == MBB)
      UsesInMBB.insert(UseMI);
  }
  if (UsesInMBB.empty())
    return true;

  for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I)
    if (UsesInMBB.count(&*I))
      return false;
  return true;
}
Exemplo n.º 3
0
/// isProfitableToReMat - Return true if the heuristics determines it is likely
/// to be profitable to re-materialize the definition of Reg rather than copy
/// the register.
bool
TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
                                         const TargetRegisterClass *RC,
                                         MachineInstr *MI, MachineInstr *DefMI,
                                         MachineBasicBlock *MBB, unsigned Loc) {
  bool OtherUse = false;
  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
         UE = MRI->use_end(); UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    MachineInstr *UseMI = UseMO.getParent();
    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
      if (DI != DistanceMap.end() && DI->second == Loc)
        continue;  // Current use.
      OtherUse = true;
      // There is at least one other use in the MBB that will clobber the
      // register. 
      if (isTwoAddrUse(UseMI, Reg))
        return true;
    }
  }

  // If other uses in MBB are not two-address uses, then don't remat.
  if (OtherUse)
    return false;

  // No other uses in the same block, remat if it's defined in the same
  // block so it does not unnecessarily extend the live range.
  return MBB == DefMI->getParent();
}
Exemplo n.º 4
0
bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
                                              const MachineInstr& MI) {
  unsigned DstReg = 0, ZeroReg = 0;

  // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
  if ((MI.getOpcode() == Mips::ADDiu) &&
      (MI.getOperand(1).getReg() == Mips::ZERO) &&
      (MI.getOperand(2).getImm() == 0)) {
    DstReg = MI.getOperand(0).getReg();
    ZeroReg = Mips::ZERO;
  } else if ((MI.getOpcode() == Mips::DADDiu) &&
             (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
             (MI.getOperand(2).getImm() == 0)) {
    DstReg = MI.getOperand(0).getReg();
    ZeroReg = Mips::ZERO_64;
  }

  if (!DstReg)
    return false;

  // Replace uses with ZeroReg.
  for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
       E = MRI->use_end(); U != E; ++U) {
    MachineOperand &MO = U.getOperand();
    MachineInstr *MI = MO.getParent();

    // Do not replace if it is a phi's operand or is tied to def operand.
    if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
      continue;

    MO.setReg(ZeroReg);
  }

  return true;
}
Exemplo n.º 5
0
/// isDead returns true if the instruction is dead
/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
/// with special cases for inline asm, physical registers and instructions with
/// side effects removed)
bool PPCCTRLoops::isDead(const MachineInstr *MI,
                         SmallVector<MachineInstr *, 1> &DeadPhis) const {
  // Examine each operand.
  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    const MachineOperand &MO = MI->getOperand(i);
    if (MO.isReg() && MO.isDef()) {
      unsigned Reg = MO.getReg();
      if (!MRI->use_nodbg_empty(Reg)) {
        // This instruction has users, but if the only user is the phi node for
        // the parent block, and the only use of that phi node is this
        // instruction, then this instruction is dead: both it (and the phi
        // node) can be removed.
        MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
        if (llvm::next(I) == MRI->use_end() &&
            I.getOperand().getParent()->isPHI()) {
          MachineInstr *OnePhi = I.getOperand().getParent();

          for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
            const MachineOperand &OPO = OnePhi->getOperand(j);
            if (OPO.isReg() && OPO.isDef()) {
              unsigned OPReg = OPO.getReg();

              MachineRegisterInfo::use_iterator nextJ;
              for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
                   E = MRI->use_end(); J!=E; J=nextJ) {
                nextJ = llvm::next(J);
                MachineOperand& Use = J.getOperand();
                MachineInstr *UseMI = Use.getParent();

                if (MI != UseMI) {
                  // The phi node has a user that is not MI, bail...
                  return false;
                }
              }
            }
          }

          DeadPhis.push_back(OnePhi);
        } else {
          // This def has a non-debug use. Don't delete the instruction!
          return false;
        }
      }
    }
  }

  // If there are no defs with uses, the instruction is dead.
  return true;
}
Exemplo n.º 6
0
void BitLevelInfo::propagateBitWidth(MachineOperand &MO) {
  assert(MO.isReg() && "Wrong operand type!");

  unsigned RegNo = MO.getReg();
  unsigned char BitWidth = VInstrInfo::getBitWidth(MO);
  assert(BitWidth && "Bit width not available!");

  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(RegNo),
       E = MRI->use_end(); I != E; ++I) {
    MachineOperand &MO = I.getOperand();

    // Propagate bit width information through the def-use chain.
    if (updateBitWidth(MO, BitWidth) && (I->isCopy() || I->isPHI()))
      computeBitWidth(&*I);
  }
}
Exemplo n.º 7
0
bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
                                                const MachineInstr& MI) {
  unsigned DstReg = 0, ZeroReg = 0;

  // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
  if ((MI.getOpcode() == Mips::ADDiu) &&
      (MI.getOperand(1).getReg() == Mips::ZERO) &&
      (MI.getOperand(2).getImm() == 0)) {
    DstReg = MI.getOperand(0).getReg();
    ZeroReg = Mips::ZERO;
  } else if ((MI.getOpcode() == Mips::DADDiu) &&
             (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
             (MI.getOperand(2).getImm() == 0)) {
    DstReg = MI.getOperand(0).getReg();
    ZeroReg = Mips::ZERO_64;
  }

  if (!DstReg)
    return false;

  // Replace uses with ZeroReg.
  for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
       E = MRI->use_end(); U != E;) {
    MachineOperand &MO = *U;
    unsigned OpNo = U.getOperandNo();
    MachineInstr *MI = MO.getParent();
    ++U;

    // Do not replace if it is a phi's operand or is tied to def operand.
    if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
      continue;

    // Also, we have to check that the register class of the operand
    // contains the zero register.
    if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg))
      continue;

    MO.setReg(ZeroReg);
  }

  return true;
}
Exemplo n.º 8
0
bool LazyLiveness::vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB) {
  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
  
  MachineBasicBlock* DefMBB = MRI->def_begin(vreg)->getParent();
  unsigned def = preorder[DefMBB];
  unsigned max_dom = 0;
  for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT[DefMBB]),
       DE = df_end(MDT[DefMBB]); DI != DE; ++DI) {
    if (preorder[DI->getBlock()] > max_dom) {
      max_dom = preorder[(*DI)->getBlock()];
    }
  }
  
  if (preorder[MBB] <= def || max_dom < preorder[MBB])
    return false;
  
  SparseBitVector<128>::iterator I = tv[MBB].begin();
  while (I != tv[MBB].end() && *I <= def) ++I;
  while (I != tv[MBB].end() && *I < max_dom) {
    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(vreg),
         UE = MachineRegisterInfo::use_end(); UI != UE; ++UI) {
      MachineBasicBlock* UseMBB = UI->getParent();
      if (rv[rev_preorder[*I]].test(preorder[UseMBB]))
        return true;
      
      unsigned t_dom = 0;
      for (df_iterator<MachineDomTreeNode*> DI =
           df_begin(MDT[rev_preorder[*I]]), DE = df_end(MDT[rev_preorder[*I]]); 
           DI != DE; ++DI)
        if (preorder[DI->getBlock()] > t_dom) {
          max_dom = preorder[(*DI)->getBlock()];
        }
      I = tv[MBB].begin();
      while (I != tv[MBB].end() && *I < t_dom) ++I;
    }
  }
  
  return false;
}
Exemplo n.º 9
0
static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
                        unsigned UseOpIdx,
                        std::vector<FoldCandidate> &FoldList,
                        SmallVectorImpl<MachineInstr *> &CopiesToReplace,
                        const SIInstrInfo *TII, const SIRegisterInfo &TRI,
                        MachineRegisterInfo &MRI) {
  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);

  // FIXME: Fold operands with subregs.
  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
      UseOp.isImplicit())) {
    return;
  }

  bool FoldingImm = OpToFold.isImm();
  APInt Imm;

  if (FoldingImm) {
    unsigned UseReg = UseOp.getReg();
    const TargetRegisterClass *UseRC
      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
      MRI.getRegClass(UseReg) :
      TRI.getPhysRegClass(UseReg);

    Imm = APInt(64, OpToFold.getImm());

    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
    const TargetRegisterClass *FoldRC =
        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);

    // Split 64-bit constants into 32-bits for folding.
    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
      if (UseRC->getSize() != 8)
        return;

      if (UseOp.getSubReg() == AMDGPU::sub0) {
        Imm = Imm.getLoBits(32);
      } else {
        assert(UseOp.getSubReg() == AMDGPU::sub1);
        Imm = Imm.getHiBits(32);
      }
    }

    // In order to fold immediates into copies, we need to change the
    // copy to a MOV.
    if (UseMI->getOpcode() == AMDGPU::COPY) {
      unsigned DestReg = UseMI->getOperand(0).getReg();
      const TargetRegisterClass *DestRC
        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
        MRI.getRegClass(DestReg) :
        TRI.getPhysRegClass(DestReg);

      unsigned MovOp = TII->getMovOpcode(DestRC);
      if (MovOp == AMDGPU::COPY)
        return;

      UseMI->setDesc(TII->get(MovOp));
      CopiesToReplace.push_back(UseMI);
    }
  }

  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
         RSUse = MRI.use_begin(RegSeqDstReg),
         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
        continue;

      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  CopiesToReplace, TII, TRI, MRI);
    }
    return;
  }

  const MCInstrDesc &UseDesc = UseMI->getDesc();

  // Don't fold into target independent nodes.  Target independent opcodes
  // don't have defined register classes.
  if (UseDesc.isVariadic() ||
      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
    return;

  if (FoldingImm) {
    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
    return;
  }

  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);

  // FIXME: We could try to change the instruction from 64-bit to 32-bit
  // to enable more folding opportunites.  The shrink operands pass
  // already does this.
  return;
}
Exemplo n.º 10
0
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      if (!isSafeToFold(MI.getOpcode()))
        continue;

      unsigned OpSize = TII->getOpSize(MI, 1);
      MachineOperand &OpToFold = MI.getOperand(1);
      bool FoldingImm = OpToFold.isImm();

      // FIXME: We could also be folding things like FrameIndexes and
      // TargetIndexes.
      if (!FoldingImm && !OpToFold.isReg())
        continue;

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases.  A better heuristic is needed.
      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
          !MRI.hasOneUse(MI.getOperand(0).getReg()))
        continue;

      // FIXME: Fold operands with subregs.
      if (OpToFold.isReg() &&
          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
           OpToFold.getSubReg()))
        continue;


      // We need mutate the operands of new mov instructions to add implicit
      // uses of EXEC, but adding them invalidates the use_iterator, so defer
      // this.
      SmallVector<MachineInstr *, 4> CopiesToReplace;

      std::vector<FoldCandidate> FoldList;
      for (MachineRegisterInfo::use_iterator
           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
           Use != E; ++Use) {

        MachineInstr *UseMI = Use->getParent();

        foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
                    CopiesToReplace, TII, TRI, MRI);
      }

      // Make sure we add EXEC uses to any new v_mov instructions created.
      for (MachineInstr *Copy : CopiesToReplace)
        Copy->addImplicitDefUseOperands(MF);

      for (FoldCandidate &Fold : FoldList) {
        if (updateOperand(Fold, TRI)) {
          // Clear kill flags.
          if (!Fold.isImm()) {
            assert(Fold.OpToFold && Fold.OpToFold->isReg());
            Fold.OpToFold->setIsKill(false);
          }
          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
        }
      }
    }
  }
  return false;
}
Exemplo n.º 11
0
void SIFoldOperands::foldOperand(
  MachineOperand &OpToFold,
  MachineInstr *UseMI,
  unsigned UseOpIdx,
  SmallVectorImpl<FoldCandidate> &FoldList,
  SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);

  if (!isUseSafeToFold(TII, *UseMI, UseOp))
    return;

  // FIXME: Fold operands with subregs.
  if (UseOp.isReg() && OpToFold.isReg()) {
    if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
      return;

    // Don't fold subregister extracts into tied operands, only if it is a full
    // copy since a subregister use tied to a full register def doesn't really
    // make sense. e.g. don't fold:
    //
    // %vreg1 = COPY %vreg0:sub1
    // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0>
    //
    //  into
    // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0>
    if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
      return;
  }

  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
  if (UseMI->isRegSequence()) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
           RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
         RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
        continue;

      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  CopiesToReplace);
    }

    return;
  }


  bool FoldingImm = OpToFold.isImm();

  // In order to fold immediates into copies, we need to change the
  // copy to a MOV.
  if (FoldingImm && UseMI->isCopy()) {
    unsigned DestReg = UseMI->getOperand(0).getReg();
    const TargetRegisterClass *DestRC
      = TargetRegisterInfo::isVirtualRegister(DestReg) ?
      MRI->getRegClass(DestReg) :
      TRI->getPhysRegClass(DestReg);

    unsigned MovOp = TII->getMovOpcode(DestRC);
    if (MovOp == AMDGPU::COPY)
      return;

    UseMI->setDesc(TII->get(MovOp));
    CopiesToReplace.push_back(UseMI);
  } else {
    const MCInstrDesc &UseDesc = UseMI->getDesc();

    // Don't fold into target independent nodes.  Target independent opcodes
    // don't have defined register classes.
    if (UseDesc.isVariadic() ||
        UseDesc.OpInfo[UseOpIdx].RegClass == -1)
      return;
  }

  if (!FoldingImm) {
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);

    // FIXME: We could try to change the instruction from 64-bit to 32-bit
    // to enable more folding opportunites.  The shrink operands pass
    // already does this.
    return;
  }


  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
  const TargetRegisterClass *FoldRC =
    TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);


  // Split 64-bit constants into 32-bits for folding.
  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
    unsigned UseReg = UseOp.getReg();
    const TargetRegisterClass *UseRC
      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
      MRI->getRegClass(UseReg) :
      TRI->getPhysRegClass(UseReg);

    if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
      return;

    APInt Imm(64, OpToFold.getImm());
    if (UseOp.getSubReg() == AMDGPU::sub0) {
      Imm = Imm.getLoBits(32);
    } else {
      assert(UseOp.getSubReg() == AMDGPU::sub1);
      Imm = Imm.getHiBits(32);
    }

    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
    return;
  }



  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
}
Exemplo n.º 12
0
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
                                     MachineOperand &OpToFold) const {
  // We need mutate the operands of new mov instructions to add implicit
  // uses of EXEC, but adding them invalidates the use_iterator, so defer
  // this.
  SmallVector<MachineInstr *, 4> CopiesToReplace;
  SmallVector<FoldCandidate, 4> FoldList;
  MachineOperand &Dst = MI.getOperand(0);

  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
  if (FoldingImm) {
    unsigned NumLiteralUses = 0;
    MachineOperand *NonInlineUse = nullptr;
    int NonInlineUseOpNo = -1;

    MachineRegisterInfo::use_iterator NextUse, NextInstUse;
    for (MachineRegisterInfo::use_iterator
           Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
         Use != E; Use = NextUse) {
      NextUse = std::next(Use);
      MachineInstr *UseMI = Use->getParent();
      unsigned OpNo = Use.getOperandNo();

      // Folding the immediate may reveal operations that can be constant
      // folded or replaced with a copy. This can happen for example after
      // frame indices are lowered to constants or from splitting 64-bit
      // constants.
      //
      // We may also encounter cases where one or both operands are
      // immediates materialized into a register, which would ordinarily not
      // be folded due to multiple uses or operand constraints.

      if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
        DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');

        // Some constant folding cases change the same immediate's use to a new
        // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
        // again. The same constant folded instruction could also have a second
        // use operand.
        NextUse = MRI->use_begin(Dst.getReg());
        continue;
      }

      // Try to fold any inline immediate uses, and then only fold other
      // constants if they have one use.
      //
      // The legality of the inline immediate must be checked based on the use
      // operand, not the defining instruction, because 32-bit instructions
      // with 32-bit inline immediate sources may be used to materialize
      // constants used in 16-bit operands.
      //
      // e.g. it is unsafe to fold:
      //  s_mov_b32 s0, 1.0    // materializes 0x3f800000
      //  v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases. A better heuristic is needed.
      if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
        foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
      } else {
        if (++NumLiteralUses == 1) {
          NonInlineUse = &*Use;
          NonInlineUseOpNo = OpNo;
        }
      }
    }

    if (NumLiteralUses == 1) {
      MachineInstr *UseMI = NonInlineUse->getParent();
      foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
    }
  } else {
    // Folding register.
    for (MachineRegisterInfo::use_iterator
           Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
         Use != E; ++Use) {
      MachineInstr *UseMI = Use->getParent();

      foldOperand(OpToFold, UseMI, Use.getOperandNo(),
                  FoldList, CopiesToReplace);
    }
  }

  MachineFunction *MF = MI.getParent()->getParent();
  // Make sure we add EXEC uses to any new v_mov instructions created.
  for (MachineInstr *Copy : CopiesToReplace)
    Copy->addImplicitDefUseOperands(*MF);

  for (FoldCandidate &Fold : FoldList) {
    if (updateOperand(Fold, *TRI)) {
      // Clear kill flags.
      if (Fold.isReg()) {
        assert(Fold.OpToFold && Fold.OpToFold->isReg());
        // FIXME: Probably shouldn't bother trying to fold if not an
        // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
        // copies.
        MRI->clearKillFlags(Fold.OpToFold->getReg());
      }
      DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
            static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
      tryFoldInst(TII, Fold.UseMI);
    } else if (Fold.isCommuted()) {
      // Restoring instruction's original operand order if fold has failed.
      TII->commuteInstruction(*Fold.UseMI, false);
    }
  }
}
Exemplo n.º 13
0
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      if (!isSafeToFold(MI.getOpcode()))
        continue;

      unsigned OpSize = TII->getOpSize(MI, 1);
      MachineOperand &OpToFold = MI.getOperand(1);
      bool FoldingImm = OpToFold.isImm();

      // FIXME: We could also be folding things like FrameIndexes and
      // TargetIndexes.
      if (!FoldingImm && !OpToFold.isReg())
        continue;

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases.  A better heuristic is needed.
      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
          !MRI.hasOneUse(MI.getOperand(0).getReg()))
        continue;

      // FIXME: Fold operands with subregs.
      if (OpToFold.isReg() &&
          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
           OpToFold.getSubReg()))
        continue;

      std::vector<FoldCandidate> FoldList;
      for (MachineRegisterInfo::use_iterator
           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
           Use != E; ++Use) {

        MachineInstr *UseMI = Use->getParent();
        const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());

        // FIXME: Fold operands with subregs.
        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
            UseOp.isImplicit())) {
          continue;
        }

        APInt Imm;

        if (FoldingImm) {
          unsigned UseReg = UseOp.getReg();
          const TargetRegisterClass *UseRC
            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
            MRI.getRegClass(UseReg) :
            TRI.getRegClass(UseReg);

          Imm = APInt(64, OpToFold.getImm());

          // Split 64-bit constants into 32-bits for folding.
          if (UseOp.getSubReg()) {
            if (UseRC->getSize() != 8)
              continue;

            if (UseOp.getSubReg() == AMDGPU::sub0) {
              Imm = Imm.getLoBits(32);
            } else {
              assert(UseOp.getSubReg() == AMDGPU::sub1);
              Imm = Imm.getHiBits(32);
            }
          }

          // In order to fold immediates into copies, we need to change the
          // copy to a MOV.
          if (UseMI->getOpcode() == AMDGPU::COPY) {
            unsigned DestReg = UseMI->getOperand(0).getReg();
            const TargetRegisterClass *DestRC
              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
              MRI.getRegClass(DestReg) :
              TRI.getRegClass(DestReg);

            unsigned MovOp = TII->getMovOpcode(DestRC);
            if (MovOp == AMDGPU::COPY)
              continue;

            UseMI->setDesc(TII->get(MovOp));
          }
        }

        const MCInstrDesc &UseDesc = UseMI->getDesc();

        // Don't fold into target independent nodes.  Target independent opcodes
        // don't have defined register classes.
        if (UseDesc.isVariadic() ||
            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
          continue;

        if (FoldingImm) {
          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
          continue;
        }

        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);

        // FIXME: We could try to change the instruction from 64-bit to 32-bit
        // to enable more folding opportunites.  The shrink operands pass
        // already does this.
      }

      for (FoldCandidate &Fold : FoldList) {
        if (updateOperand(Fold, TRI)) {
          // Clear kill flags.
          if (!Fold.isImm()) {
            assert(Fold.OpToFold && Fold.OpToFold->isReg());
            Fold.OpToFold->setIsKill(false);
          }
          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
        }
      }
    }
  }
  return false;
}
Exemplo n.º 14
0
bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
  // We look for instructions that write S registers that are then read as
  // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
  // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
  // merge two SPR values to form a DPR register.  In order avoid false
  // positives we make sure that there is an SPR producer so we look past
  // COPY and PHI nodes to find it.
  //
  // The best code pattern for when an SPR producer is going to be used by a
  // DPR or QPR consumer depends on whether the other lanes of the
  // corresponding DPR/QPR are currently defined.
  //
  // We can handle these efficiently, depending on the type of
  // pseudo-instruction that is producing the pattern
  //
  //   * COPY:          * VDUP all lanes and merge the results together
  //                      using VEXTs.
  //
  //   * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
  //                      lane, and the other lane(s) of the DPR/QPR register
  //                      that we are inserting in are undefined, use the
  //                      original DPR/QPR value. 
  //                    * Otherwise, fall back on the same stategy as COPY.
  //
  //   * REG_SEQUENCE:  * If all except one of the input operands are
  //                      IMPLICIT_DEFs, insert the VDUP pattern for just the
  //                      defined input operand
  //                    * Otherwise, fall back on the same stategy as COPY.
  //

  // First, get all the reads of D-registers done by this instruction.
  SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
  bool Modified = false;

  for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
     I != E; ++I) {
    // Follow the def-use chain for this DPR through COPYs, and also through
    // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
    // we can end up with multiple defs of this DPR.

    SmallVector<MachineInstr *, 8> DefSrcs;
    if (!TRI->isVirtualRegister(*I))
      continue;
    MachineInstr *Def = MRI->getVRegDef(*I);
    if (!Def)
      continue;

    elideCopiesAndPHIs(Def, DefSrcs);

    for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(),
      EE = DefSrcs.end(); II != EE; ++II) {
      MachineInstr *MI = *II;

      // If we've already analyzed and replaced this operand, don't do
      // anything.
      if (Replacements.find(MI) != Replacements.end())
        continue;

      // Now, work out if the instruction causes a SPR->DPR dependency.
      if (!hasPartialWrite(MI))
        continue;

      // Collect all the uses of this MI's DPR def for updating later.
      SmallVector<MachineOperand*, 8> Uses;
      unsigned DPRDefReg = MI->getOperand(0).getReg();
      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
             E = MRI->use_end(); I != E; ++I)
        Uses.push_back(&I.getOperand());

      // We can optimize this.
      unsigned NewReg = optimizeSDPattern(MI);

      if (NewReg != 0) {
        Modified = true;
        for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
               E = Uses.end(); I != E; ++I) {
          // Make sure to constrain the register class of the new register to
          // match what we're replacing. Otherwise we can optimize a DPR_VFP2
          // reference into a plain DPR, and that will end poorly. NewReg is
          // always virtual here, so there will always be a matching subclass
          // to find.
          MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));

          DEBUG(dbgs() << "Replacing operand "
                       << **I << " with "
                       << PrintReg(NewReg) << "\n");
          (*I)->substVirtReg(NewReg, 0, *TRI);
        }
      }
      Replacements[MI] = NewReg;
    }
  }
  return Modified;
}
Exemplo n.º 15
0
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
/// there is one implicit_def for each use. Add isUndef marker to
/// implicit_def defs and their uses.
bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {

  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
               << "********** Function: "
               << ((Value*)fn.getFunction())->getName() << '\n');

  bool Changed = false;

  TII = fn.getTarget().getInstrInfo();
  TRI = fn.getTarget().getRegisterInfo();
  MRI = &fn.getRegInfo();
  LV = &getAnalysis<LiveVariables>();

  SmallSet<unsigned, 8> ImpDefRegs;
  SmallVector<MachineInstr*, 8> ImpDefMIs;
  SmallVector<MachineInstr*, 4> RUses;
  SmallPtrSet<MachineBasicBlock*,16> Visited;
  SmallPtrSet<MachineInstr*, 8> ModInsts;

  MachineBasicBlock *Entry = fn.begin();
  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
       DFI != E; ++DFI) {
    MachineBasicBlock *MBB = *DFI;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
         I != E; ) {
      MachineInstr *MI = &*I;
      ++I;
      if (MI->isImplicitDef()) {
        ImpDefMIs.push_back(MI);
        // Is this a sub-register read-modify-write?
        if (MI->getOperand(0).readsReg())
          continue;
        unsigned Reg = MI->getOperand(0).getReg();
        ImpDefRegs.insert(Reg);
        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
            ImpDefRegs.insert(*SS);
        }
        continue;
      }

      // Eliminate %reg1032:sub<def> = COPY undef.
      if (MI->isCopy() && MI->getOperand(0).readsReg()) {
        MachineOperand &MO = MI->getOperand(1);
        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
          if (MO.isKill()) {
            LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
            vi.removeKill(MI);
          }
          unsigned Reg = MI->getOperand(0).getReg();
          MI->eraseFromParent();
          Changed = true;

          // A REG_SEQUENCE may have been expanded into partial definitions.
          // If this was the last one, mark Reg as implicitly defined.
          if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
            ImpDefRegs.insert(Reg);
          continue;
        }
      }

      bool ChangedToImpDef = false;
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        MachineOperand& MO = MI->getOperand(i);
        if (!MO.isReg() || !MO.readsReg())
          continue;
        unsigned Reg = MO.getReg();
        if (!Reg)
          continue;
        if (!ImpDefRegs.count(Reg))
          continue;
        // Use is a copy, just turn it into an implicit_def.
        if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
          bool isKill = MO.isKill();
          MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
            MI->RemoveOperand(j);
          if (isKill) {
            ImpDefRegs.erase(Reg);
            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
            vi.removeKill(MI);
          }
          ChangedToImpDef = true;
          Changed = true;
          break;
        }

        Changed = true;
        MO.setIsUndef();
        // This is a partial register redef of an implicit def.
        // Make sure the whole register is defined by the instruction.
        if (MO.isDef()) {
          MI->addRegisterDefined(Reg);
          continue;
        }
        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
          // Make sure other reads of Reg are also marked <undef>.
          for (unsigned j = i+1; j != e; ++j) {
            MachineOperand &MOJ = MI->getOperand(j);
            if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
              MOJ.setIsUndef();
          }
          ImpDefRegs.erase(Reg);
        }
      }

      if (ChangedToImpDef) {
        // Backtrack to process this new implicit_def.
        --I;
      } else {
        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
          MachineOperand& MO = MI->getOperand(i);
          if (!MO.isReg() || !MO.isDef())
            continue;
          ImpDefRegs.erase(MO.getReg());
        }
      }
    }

    // Any outstanding liveout implicit_def's?
    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
      MachineInstr *MI = ImpDefMIs[i];
      unsigned Reg = MI->getOperand(0).getReg();
      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
          !ImpDefRegs.count(Reg)) {
        // Delete all "local" implicit_def's. That include those which define
        // physical registers since they cannot be liveout.
        MI->eraseFromParent();
        Changed = true;
        continue;
      }

      // If there are multiple defs of the same register and at least one
      // is not an implicit_def, do not insert implicit_def's before the
      // uses.
      bool Skip = false;
      SmallVector<MachineInstr*, 4> DeadImpDefs;
      for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
             DE = MRI->def_end(); DI != DE; ++DI) {
        MachineInstr *DeadImpDef = &*DI;
        if (!DeadImpDef->isImplicitDef()) {
          Skip = true;
          break;
        }
        DeadImpDefs.push_back(DeadImpDef);
      }
      if (Skip)
        continue;

      // The only implicit_def which we want to keep are those that are live
      // out of its block.
      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
        DeadImpDefs[j]->eraseFromParent();
      Changed = true;

      // Process each use instruction once.
      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
             UE = MRI->use_end(); UI != UE; ++UI) {
        if (UI.getOperand().isUndef())
          continue;
        MachineInstr *RMI = &*UI;
        if (ModInsts.insert(RMI))
          RUses.push_back(RMI);
      }

      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
        MachineInstr *RMI = RUses[i];

        // Turn a copy use into an implicit_def.
        if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
          RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));

          bool isKill = false;
          SmallVector<unsigned, 4> Ops;
          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
            MachineOperand &RRMO = RMI->getOperand(j);
            if (RRMO.isReg() && RRMO.getReg() == Reg) {
              Ops.push_back(j);
              if (RRMO.isKill())
                isKill = true;
            }
          }
          // Leave the other operands along.
          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
            unsigned OpIdx = Ops[j];
            RMI->RemoveOperand(OpIdx-j);
          }

          // Update LiveVariables varinfo if the instruction is a kill.
          if (isKill) {
            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
            vi.removeKill(RMI);
          }
          continue;
        }

        // Replace Reg with a new vreg that's marked implicit.
        const TargetRegisterClass* RC = MRI->getRegClass(Reg);
        unsigned NewVReg = MRI->createVirtualRegister(RC);
        bool isKill = true;
        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
          MachineOperand &RRMO = RMI->getOperand(j);
          if (RRMO.isReg() && RRMO.getReg() == Reg) {
            RRMO.setReg(NewVReg);
            RRMO.setIsUndef();
            if (isKill) {
              // Only the first operand of NewVReg is marked kill.
              RRMO.setIsKill();
              isKill = false;
            }
          }
        }
      }
      RUses.clear();
      ModInsts.clear();
    }
    ImpDefRegs.clear();
    ImpDefMIs.clear();
  }

  return Changed;
}
Exemplo n.º 16
0
/// Sink3AddrInstruction - A two-address instruction has been converted to a
/// three-address instruction to avoid clobbering a register. Try to sink it
/// past the instruction that would kill the above mentioned register to reduce
/// register pressure.
bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
                                           MachineInstr *MI, unsigned SavedReg,
                                           MachineBasicBlock::iterator OldPos) {
  // Check if it's safe to move this instruction.
  bool SeenStore = true; // Be conservative.
  if (!MI->isSafeToMove(TII, SeenStore, AA))
    return false;

  unsigned DefReg = 0;
  SmallSet<unsigned, 4> UseRegs;

  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    const MachineOperand &MO = MI->getOperand(i);
    if (!MO.isReg())
      continue;
    unsigned MOReg = MO.getReg();
    if (!MOReg)
      continue;
    if (MO.isUse() && MOReg != SavedReg)
      UseRegs.insert(MO.getReg());
    if (!MO.isDef())
      continue;
    if (MO.isImplicit())
      // Don't try to move it if it implicitly defines a register.
      return false;
    if (DefReg)
      // For now, don't move any instructions that define multiple registers.
      return false;
    DefReg = MO.getReg();
  }

  // Find the instruction that kills SavedReg.
  MachineInstr *KillMI = NULL;
  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
         UE = MRI->use_end(); UI != UE; ++UI) {
    MachineOperand &UseMO = UI.getOperand();
    if (!UseMO.isKill())
      continue;
    KillMI = UseMO.getParent();
    break;
  }

  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
    return false;

  // If any of the definitions are used by another instruction between the
  // position and the kill use, then it's not safe to sink it.
  // 
  // FIXME: This can be sped up if there is an easy way to query whether an
  // instruction is before or after another instruction. Then we can use
  // MachineRegisterInfo def / use instead.
  MachineOperand *KillMO = NULL;
  MachineBasicBlock::iterator KillPos = KillMI;
  ++KillPos;

  unsigned NumVisited = 0;
  for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) {
    MachineInstr *OtherMI = I;
    if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
      return false;
    ++NumVisited;
    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = OtherMI->getOperand(i);
      if (!MO.isReg())
        continue;
      unsigned MOReg = MO.getReg();
      if (!MOReg)
        continue;
      if (DefReg == MOReg)
        return false;

      if (MO.isKill()) {
        if (OtherMI == KillMI && MOReg == SavedReg)
          // Save the operand that kills the register. We want to unset the kill
          // marker if we can sink MI past it.
          KillMO = &MO;
        else if (UseRegs.count(MOReg))
          // One of the uses is killed before the destination.
          return false;
      }
    }
  }

  // Update kill and LV information.
  KillMO->setIsKill(false);
  KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
  KillMO->setIsKill(true);
  
  if (LV)
    LV->replaceKillInstruction(SavedReg, KillMI, MI);

  // Move instruction to its destination.
  MBB->remove(MI);
  MBB->insert(KillPos, MI);

  ++Num3AddrSunk;
  return true;
}
Exemplo n.º 17
0
/// TailDuplicateBlocks - Look for small blocks that are unconditionally
/// branched to and do not fall through. Tail-duplicate their instructions
/// into their predecessors to eliminate (dynamic) branches.
bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
  bool MadeChange = false;

  if (PreRegAlloc && TailDupVerify) {
    DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
    VerifyPHIs(MF, true);
  }

  SmallVector<MachineInstr*, 8> NewPHIs;
  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);

  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
    MachineBasicBlock *MBB = I++;

    if (NumTails == TailDupLimit)
      break;

    // Save the successors list.
    SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
                                                MBB->succ_end());

    SmallVector<MachineBasicBlock*, 8> TDBBs;
    SmallVector<MachineInstr*, 16> Copies;
    if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
      ++NumTails;

      // TailBB's immediate successors are now successors of those predecessors
      // which duplicated TailBB. Add the predecessors as sources to the PHI
      // instructions.
      bool isDead = MBB->pred_empty();
      if (PreRegAlloc)
        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);

      // If it is dead, remove it.
      if (isDead) {
        NumInstrDups -= MBB->size();
        RemoveDeadBlock(MBB);
        ++NumDeadBlocks;
      }

      // Update SSA form.
      if (!SSAUpdateVRs.empty()) {
        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
          unsigned VReg = SSAUpdateVRs[i];
          SSAUpdate.Initialize(VReg);

          // If the original definition is still around, add it as an available
          // value.
          MachineInstr *DefMI = MRI->getVRegDef(VReg);
          MachineBasicBlock *DefBB = 0;
          if (DefMI) {
            DefBB = DefMI->getParent();
            SSAUpdate.AddAvailableValue(DefBB, VReg);
          }

          // Add the new vregs as available values.
          DenseMap<unsigned, AvailableValsTy>::iterator LI =
            SSAUpdateVals.find(VReg);  
          for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
            MachineBasicBlock *SrcBB = LI->second[j].first;
            unsigned SrcReg = LI->second[j].second;
            SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
          }

          // Rewrite uses that are outside of the original def's block.
          MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
          while (UI != MRI->use_end()) {
            MachineOperand &UseMO = UI.getOperand();
            MachineInstr *UseMI = &*UI;
            ++UI;
            if (UseMI->isDebugValue()) {
              // SSAUpdate can replace the use with an undef. That creates
              // a debug instruction that is a kill.
              // FIXME: Should it SSAUpdate job to delete debug instructions
              // instead of replacing the use with undef?
              UseMI->eraseFromParent();
              continue;
            }
            if (UseMI->getParent() == DefBB && !UseMI->isPHI())
              continue;
            SSAUpdate.RewriteUse(UseMO);
          }
        }

        SSAUpdateVRs.clear();
        SSAUpdateVals.clear();
      }

      // Eliminate some of the copies inserted by tail duplication to maintain
      // SSA form.
      for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
        MachineInstr *Copy = Copies[i];
        if (!Copy->isCopy())
          continue;
        unsigned Dst = Copy->getOperand(0).getReg();
        unsigned Src = Copy->getOperand(1).getReg();
        MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
        if (++UI == MRI->use_end()) {
          // Copy is the only use. Do trivial copy propagation here.
          MRI->replaceRegWith(Dst, Src);
          Copy->eraseFromParent();
        }
      }

      if (PreRegAlloc && TailDupVerify)
        VerifyPHIs(MF, false);
      MadeChange = true;
    }
  }
  NumAddedPHIs += NewPHIs.size();

  return MadeChange;
}
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
  bool AnyChanges = false;
  MRI = &MF.getRegInfo();
  TRI = MF.getTarget().getRegisterInfo();
  TII = MF.getTarget().getInstrInfo();

  // Treat reserved registers as always live.
  BitVector ReservedRegs = TRI->getReservedRegs(MF);

  // Loop over all instructions in all blocks, from bottom to top, so that it's
  // more likely that chains of dependent but ultimately dead instructions will
  // be cleaned up.
  for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
       I != E; ++I) {
    MachineBasicBlock *MBB = &*I;

    // Start out assuming that reserved registers are live out of this block.
    LivePhysRegs = ReservedRegs;

    // Also add any explicit live-out physregs for this block.
    if (!MBB->empty() && MBB->back().getDesc().isReturn())
      for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
           LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
        unsigned Reg = *LOI;
        if (TargetRegisterInfo::isPhysicalRegister(Reg))
          LivePhysRegs.set(Reg);
      }

    // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
    // are not live across blocks, but some targets (x86) can have flags live
    // out of a block.

    // Now scan the instructions and delete dead ones, tracking physreg
    // liveness as we go.
    for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
         MIE = MBB->rend(); MII != MIE; ) {
      MachineInstr *MI = &*MII;

      // If the instruction is dead, delete it!
      if (isDead(MI)) {
        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
        // It is possible that some DBG_VALUE instructions refer to this
        // instruction.  Examine each def operand for such references;
        // if found, mark the DBG_VALUE as undef (but don't delete it).
        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
          const MachineOperand &MO = MI->getOperand(i);
          if (!MO.isReg() || !MO.isDef())
            continue;
          unsigned Reg = MO.getReg();
          if (!TargetRegisterInfo::isVirtualRegister(Reg))
            continue;
          MachineRegisterInfo::use_iterator nextI;
          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
               E = MRI->use_end(); I!=E; I=nextI) {
            nextI = llvm::next(I);  // I is invalidated by the setReg
            MachineOperand& Use = I.getOperand();
            MachineInstr *UseMI = Use.getParent();
            if (UseMI==MI)
              continue;
            assert(Use.isDebug());
            UseMI->getOperand(0).setReg(0U);
          }
        }
        AnyChanges = true;
        MI->eraseFromParent();
        ++NumDeletes;
        MIE = MBB->rend();
        // MII is now pointing to the next instruction to process,
        // so don't increment it.
        continue;
      }

      // Record the physreg defs.
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = MI->getOperand(i);
        if (MO.isReg() && MO.isDef()) {
          unsigned Reg = MO.getReg();
          if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
            LivePhysRegs.reset(Reg);
            // Check the subreg set, not the alias set, because a def
            // of a super-register may still be partially live after
            // this def.
            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
                 *SubRegs; ++SubRegs)
              LivePhysRegs.reset(*SubRegs);
          }
        }
      }
      // Record the physreg uses, after the defs, in case a physreg is
      // both defined and used in the same instruction.
      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
        const MachineOperand &MO = MI->getOperand(i);
        if (MO.isReg() && MO.isUse()) {
          unsigned Reg = MO.getReg();
          if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
            LivePhysRegs.set(Reg);
            for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
                 *AliasSet; ++AliasSet)
              LivePhysRegs.set(*AliasSet);
          }
        }
      }

      // We didn't delete the current instruction, so increment MII to
      // the next one.
      ++MII;
    }
  }

  LivePhysRegs.clear();
  return AnyChanges;
}
Exemplo n.º 19
0
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(*MF.getFunction()))
    return false;

  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();

  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      if (!isSafeToFold(MI))
        continue;

      unsigned OpSize = TII->getOpSize(MI, 1);
      MachineOperand &OpToFold = MI.getOperand(1);
      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();

      // FIXME: We could also be folding things like FrameIndexes and
      // TargetIndexes.
      if (!FoldingImm && !OpToFold.isReg())
        continue;

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases.  A better heuristic is needed.
      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
          !MRI.hasOneUse(MI.getOperand(0).getReg()))
        continue;

      if (OpToFold.isReg() &&
          !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
        continue;

      // Prevent folding operands backwards in the function. For example,
      // the COPY opcode must not be replaced by 1 in this example:
      //
      //    %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
      //    ...
      //    %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
      MachineOperand &Dst = MI.getOperand(0);
      if (Dst.isReg() &&
          !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
        continue;

      // We need mutate the operands of new mov instructions to add implicit
      // uses of EXEC, but adding them invalidates the use_iterator, so defer
      // this.
      SmallVector<MachineInstr *, 4> CopiesToReplace;

      std::vector<FoldCandidate> FoldList;
      for (MachineRegisterInfo::use_iterator
           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
           Use != E; ++Use) {

        MachineInstr *UseMI = Use->getParent();

        foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
                    CopiesToReplace, TII, TRI, MRI);
      }

      // Make sure we add EXEC uses to any new v_mov instructions created.
      for (MachineInstr *Copy : CopiesToReplace)
        Copy->addImplicitDefUseOperands(MF);

      for (FoldCandidate &Fold : FoldList) {
        if (updateOperand(Fold, TRI)) {
          // Clear kill flags.
          if (Fold.isReg()) {
            assert(Fold.OpToFold && Fold.OpToFold->isReg());
            // FIXME: Probably shouldn't bother trying to fold if not an
            // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
            // copies.
            MRI.clearKillFlags(Fold.OpToFold->getReg());
          }
          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');

          // Folding the immediate may reveal operations that can be constant
          // folded or replaced with a copy. This can happen for example after
          // frame indices are lowered to constants or from splitting 64-bit
          // constants.
          tryConstantFoldOp(MRI, TII, Fold.UseMI);
        }
      }
    }
  }
  return false;
}
Exemplo n.º 20
0
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
  SmallVector<MachineInstr *, 128> Worklist;
  Worklist.push_back(&TopInst);

  while (!Worklist.empty()) {
    MachineInstr *Inst = Worklist.pop_back_val();
    unsigned NewOpcode = getVALUOp(*Inst);
    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
      continue;

    MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();

    // Use the new VALU Opcode.
    const MCInstrDesc &NewDesc = get(NewOpcode);
    Inst->setDesc(NewDesc);

    // Remove any references to SCC. Vector instructions can't read from it, and
    // We're just about to add the implicit use / defs of VCC, and we don't want
    // both.
    for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
      MachineOperand &Op = Inst->getOperand(i);
      if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
        Inst->RemoveOperand(i);
    }

    // Add the implict and explicit register definitions.
    if (NewDesc.ImplicitUses) {
      for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
        unsigned Reg = NewDesc.ImplicitUses[i];
        Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
      }
    }

    if (NewDesc.ImplicitDefs) {
      for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
        unsigned Reg = NewDesc.ImplicitDefs[i];
        Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
      }
    }

    legalizeOperands(Inst);

    // Update the destination register class.
    const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);

    switch (Inst->getOpcode()) {
      // For target instructions, getOpRegClass just returns the virtual
      // register class associated with the operand, so we need to find an
      // equivalent VGPR register class in order to move the instruction to the
      // VALU.
    case AMDGPU::COPY:
    case AMDGPU::PHI:
    case AMDGPU::REG_SEQUENCE:
      if (RI.hasVGPRs(NewDstRC))
        continue;
      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
      if (!NewDstRC)
        continue;
      break;
    default:
      break;
    }

    unsigned DstReg = Inst->getOperand(0).getReg();
    unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
    MRI.replaceRegWith(DstReg, NewDstReg);

    for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
           E = MRI.use_end(); I != E; ++I) {
      MachineInstr &UseMI = *I;
      if (!canReadVGPR(UseMI, I.getOperandNo())) {
        Worklist.push_back(&UseMI);
      }
    }
  }
}
Exemplo n.º 21
0
/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
  // Do a quick scan of the interval values to find if any are remattable.
  reMattable_.clear();
  usedValues_.clear();
  for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
       E = li_->vni_end(); I != E; ++I) {
    VNInfo *VNI = *I;
    if (VNI->isUnused() || !VNI->isDefAccurate())
      continue;
    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
    if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
      continue;
    reMattable_.insert(VNI);
  }

  // Often, no defs are remattable.
  if (reMattable_.empty())
    return;

  // Try to remat before all uses of li_->reg.
  bool anyRemat = false;
  for (MachineRegisterInfo::use_nodbg_iterator
       RI = mri_.use_nodbg_begin(li_->reg);
       MachineInstr *MI = RI.skipInstruction();)
     anyRemat |= reMaterializeFor(MI);

  if (!anyRemat)
    return;

  // Remove any values that were completely rematted.
  bool anyRemoved = false;
  for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
       E = reMattable_.end(); I != E; ++I) {
    VNInfo *VNI = *I;
    if (VNI->hasPHIKill() || usedValues_.count(VNI))
      continue;
    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
    DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
    lis_.RemoveMachineInstrFromMaps(DefMI);
    vrm_.RemoveMachineInstrFromMaps(DefMI);
    DefMI->eraseFromParent();
    VNI->setIsDefAccurate(false);
    anyRemoved = true;
  }

  if (!anyRemoved)
    return;

  // Removing values may cause debug uses where li_ is not live.
  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
       MachineInstr *MI = RI.skipInstruction();) {
    if (!MI->isDebugValue())
      continue;
    // Try to preserve the debug value if li_ is live immediately after it.
    MachineBasicBlock::iterator NextMI = MI;
    ++NextMI;
    if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
      VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
      if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
        continue;
    }
    DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
    MI->eraseFromParent();
  }
}