Ejemplo n.º 1
0
void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
      MachineBasicBlock *NewB) {
  for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
    MachineBasicBlock *SB = *I;
    MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
    for (P = SB->begin(); P != N; ++P) {
      MachineInstr *PN = &*P;
      for (MIOperands MO(PN); MO.isValid(); ++MO)
        if (MO->isMBB() && MO->getMBB() == OldB)
          MO->setMBB(NewB);
    }
  }
}
Ejemplo n.º 2
0
void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
      MachineBasicBlock *NewB) {
  for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
    MachineBasicBlock *SB = *I;
    MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
    for (P = SB->begin(); P != N; ++P) {
      MachineInstr &PN = *P;
      for (MachineOperand &MO : PN.operands())
        if (MO.isMBB() && MO.getMBB() == OldB)
          MO.setMBB(NewB);
    }
  }
}
Ejemplo n.º 3
0
/// Sink instructions into loops if profitable. This especially tries to prevent
/// register spills caused by register pressure if there is little to no
/// overhead moving instructions into loops.
void MachineLICM::SinkIntoLoop() {
  MachineBasicBlock *Preheader = getCurPreheader();
  if (!Preheader)
    return;

  SmallVector<MachineInstr *, 8> Candidates;
  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
       I != Preheader->instr_end(); ++I) {
    // We need to ensure that we can safely move this instruction into the loop.
    // As such, it must not have side-effects, e.g. such as a call has.  
    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
      Candidates.push_back(&*I);
  }

  for (MachineInstr *I : Candidates) {
    const MachineOperand &MO = I->getOperand(0);
    if (!MO.isDef() || !MO.isReg() || !MO.getReg())
      continue;
    if (!MRI->hasOneDef(MO.getReg()))
      continue;
    bool CanSink = true;
    MachineBasicBlock *B = nullptr;
    for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
      // FIXME: Come up with a proper cost model that estimates whether sinking
      // the instruction (and thus possibly executing it on every loop
      // iteration) is more expensive than a register.
      // For now assumes that copies are cheap and thus almost always worth it.
      if (!MI.isCopy()) {
        CanSink = false;
        break;
      }
      if (!B) {
        B = MI.getParent();
        continue;
      }
      B = DT->findNearestCommonDominator(B, MI.getParent());
      if (!B) {
        CanSink = false;
        break;
      }
    }
    if (!CanSink || !B || B == Preheader)
      continue;
    B->splice(B->getFirstNonPHI(), Preheader, I);
  }
}
Ejemplo n.º 4
0
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
  df_iterator_default_set<MachineBasicBlock*> Reachable;
  bool ModifiedPHI = false;

  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
  MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
  MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();

  // Mark all reachable blocks.
  for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
    (void)BB/* Mark all reachable blocks */;

  // Loop over all dead blocks, remembering them and deleting all instructions
  // in them.
  std::vector<MachineBasicBlock*> DeadBlocks;
  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
    MachineBasicBlock *BB = &*I;

    // Test for deadness.
    if (!Reachable.count(BB)) {
      DeadBlocks.push_back(BB);

      // Update dominator and loop info.
      if (MLI) MLI->removeBlock(BB);
      if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);

      while (BB->succ_begin() != BB->succ_end()) {
        MachineBasicBlock* succ = *BB->succ_begin();

        MachineBasicBlock::iterator start = succ->begin();
        while (start != succ->end() && start->isPHI()) {
          for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
            if (start->getOperand(i).isMBB() &&
                start->getOperand(i).getMBB() == BB) {
              start->RemoveOperand(i);
              start->RemoveOperand(i-1);
            }

          start++;
        }

        BB->removeSuccessor(BB->succ_begin());
      }
    }
  }

  // Actually remove the blocks now.
  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
    DeadBlocks[i]->eraseFromParent();

  // Cleanup PHI nodes.
  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
    MachineBasicBlock *BB = &*I;
    // Prune unneeded PHI entries.
    SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
                                             BB->pred_end());
    MachineBasicBlock::iterator phi = BB->begin();
    while (phi != BB->end() && phi->isPHI()) {
      for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
        if (!preds.count(phi->getOperand(i).getMBB())) {
          phi->RemoveOperand(i);
          phi->RemoveOperand(i-1);
          ModifiedPHI = true;
        }

      if (phi->getNumOperands() == 3) {
        const MachineOperand &Input = phi->getOperand(1);
        const MachineOperand &Output = phi->getOperand(0);
        unsigned InputReg = Input.getReg();
        unsigned OutputReg = Output.getReg();
        assert(Output.getSubReg() == 0 && "Cannot have output subregister");
        ModifiedPHI = true;

        if (InputReg != OutputReg) {
          MachineRegisterInfo &MRI = F.getRegInfo();
          unsigned InputSub = Input.getSubReg();
          if (InputSub == 0 &&
              MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) {
            MRI.replaceRegWith(OutputReg, InputReg);
          } else {
            // The input register to the PHI has a subregister or it can't be
            // constrained to the proper register class:
            // insert a COPY instead of simply replacing the output
            // with the input.
            const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
            BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(),
                    TII->get(TargetOpcode::COPY), OutputReg)
                .addReg(InputReg, getRegState(Input), InputSub);
          }
          phi++->eraseFromParent();
        }
        continue;
      }

      ++phi;
    }
  }

  F.RenumberBlocks();

  return (!DeadBlocks.empty() || ModifiedPHI);
}
Ejemplo n.º 5
0
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  bool MadeChange = false;

  MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
  MachineBasicBlock *Entry = MF.begin();

  // Use a depth first order so that in SSA, we encounter all defs before
  // uses. Once the defs of the block have been found, attempt to insert
  // SGPR_USE instructions in successor blocks if required.
  for (MachineBasicBlock *MBB : depth_first(Entry)) {
    for (const MachineInstr &MI : *MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
            // Only consider defs that are live outs. We don't care about def /
            // use within the same block.
            LiveRange &LR = LIS->getInterval(Def);
            if (LIS->isLiveOutOfMBB(LR, MBB))
              SGPRLiveRanges.push_back(std::make_pair(Def, &LR));
          }
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
          SGPRLiveRanges.push_back(std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }

    if (MBB->succ_size() < 2)
      continue;

    // We have structured control flow, so the number of successors should be
    // two.
    assert(MBB->succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB->succ_begin();
    MachineBasicBlock *SuccB = *(++MBB->succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }

    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here. If the register is Live-In to one
      // block, but the other doesn't have any SGPR defs, then there won't be a
      // conflict. Also, if the branch condition is uniform then there will be
      // no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if (!LiveInToA && !LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into neither successor\n");
        continue;
      }

      if (LiveInToA && LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into both successors\n");
        continue;
      }

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected for "
            << PrintReg(Reg, TRI, 0) <<  " in " << *LR
            << " BB#" << SuccA->getNumber() << ", BB#"
            << SuccB->getNumber()
            << " with NCD = BB#" << NCD->getNumber() << '\n');

      assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
             "Not expecting to extend live range of physreg");

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      MachineInstr *NCDSGPRUse =
        BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
                TII->get(AMDGPU::SGPR_USE))
        .addReg(Reg, RegState::Implicit);

      MadeChange = true;

      SlotIndex SI = LIS->InsertMachineInstrInMaps(NCDSGPRUse);
      LIS->extendToIndices(*LR, SI.getRegSlot());

      if (LV) {
        // TODO: This won't work post-SSA
        LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
      }

      DEBUG(NCDSGPRUse->dump());
    }
  }

  return MadeChange;
}
Ejemplo n.º 6
0
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
                                         MachineFunction &MF,
                                         const TargetRegisterInfo *TRI,
                                         const TargetInstrInfo *TII) {
  SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
  // FIXME: For now, we sink only to a successor which has a single predecessor
  // so that we can directly sink COPY instructions to the successor without
  // adding any new block or branch instruction.
  for (MachineBasicBlock *SI : CurBB.successors())
    if (!SI->livein_empty() && SI->pred_size() == 1)
      SinkableBBs.insert(SI);

  if (SinkableBBs.empty())
    return false;

  bool Changed = false;

  // Track which registers have been modified and used between the end of the
  // block and the current instruction.
  ModifiedRegUnits.clear();
  UsedRegUnits.clear();

  for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isDebugInstr())
      continue;

    // Do not move any instruction across function call.
    if (MI->isCall())
      return false;

    if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
                                        TRI);
      continue;
    }

    // Track the operand index for use in Copy.
    SmallVector<unsigned, 2> UsedOpsInCopy;
    // Track the register number defed in Copy.
    SmallVector<unsigned, 2> DefedRegsInCopy;

    // Don't sink the COPY if it would violate a register dependency.
    if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
                              ModifiedRegUnits, UsedRegUnits)) {
      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
                                        TRI);
      continue;
    }
    assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
           "Unexpect SrcReg or DefReg");
    MachineBasicBlock *SuccBB =
        getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
    // Don't sink if we cannot find a single sinkable successor in which Reg
    // is live-in.
    if (!SuccBB) {
      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
                                        TRI);
      continue;
    }
    assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
           "Unexpected predecessor");

    // Clear the kill flag if SrcReg is killed between MI and the end of the
    // block.
    clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
    MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
    performSink(*MI, *SuccBB, InsertPos);
    updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);

    Changed = true;
    ++NumPostRACopySink;
  }
  return Changed;
}
Ejemplo n.º 7
0
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
                                   bool isEntry) {
  auto BII = Blocks.find(&MBB);
  if (BII == Blocks.end())
    return;

  const BlockInfo &BI = BII->second;

  // This is a non-entry block that is WQM throughout, so no need to do
  // anything.
  if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact)
    return;

  LLVM_DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB)
                    << ":\n");

  unsigned SavedWQMReg = 0;
  unsigned SavedNonWWMReg = 0;
  bool WQMFromExec = isEntry;
  char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
  char NonWWMState = 0;

  auto II = MBB.getFirstNonPHI(), IE = MBB.end();
  if (isEntry)
    ++II; // Skip the instruction that saves LiveMask

  // This stores the first instruction where it's safe to switch from WQM to
  // Exact or vice versa.
  MachineBasicBlock::iterator FirstWQM = IE;

  // This stores the first instruction where it's safe to switch from WWM to
  // Exact/WQM or to switch to WWM. It must always be the same as, or after,
  // FirstWQM since if it's safe to switch to/from WWM, it must be safe to
  // switch to/from WQM as well.
  MachineBasicBlock::iterator FirstWWM = IE;
  for (;;) {
    MachineBasicBlock::iterator Next = II;
    char Needs = StateExact | StateWQM; // WWM is disabled by default
    char OutNeeds = 0;

    if (FirstWQM == IE)
      FirstWQM = II;

    if (FirstWWM == IE)
      FirstWWM = II;

    // First, figure out the allowed states (Needs) based on the propagated
    // flags.
    if (II != IE) {
      MachineInstr &MI = *II;

      if (requiresCorrectState(MI)) {
        auto III = Instructions.find(&MI);
        if (III != Instructions.end()) {
          if (III->second.Needs & StateWWM)
            Needs = StateWWM;
          else if (III->second.Needs & StateWQM)
            Needs = StateWQM;
          else
            Needs &= ~III->second.Disabled;
          OutNeeds = III->second.OutNeeds;
        }
      } else {
        // If the instruction doesn't actually need a correct EXEC, then we can
        // safely leave WWM enabled.
        Needs = StateExact | StateWQM | StateWWM;
      }

      if (MI.isTerminator() && OutNeeds == StateExact)
        Needs = StateExact;

      if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
        MI.getOperand(3).setImm(1);

      ++Next;
    } else {
      // End of basic block
      if (BI.OutNeeds & StateWQM)
        Needs = StateWQM;
      else if (BI.OutNeeds == StateExact)
        Needs = StateExact;
      else
        Needs = StateWQM | StateExact;
    }

    // Now, transition if necessary.
    if (!(Needs & State)) {
      MachineBasicBlock::iterator First;
      if (State == StateWWM || Needs == StateWWM) {
        // We must switch to or from WWM
        First = FirstWWM;
      } else {
        // We only need to switch to/from WQM, so we can use FirstWQM
        First = FirstWQM;
      }

      MachineBasicBlock::iterator Before =
          prepareInsertion(MBB, First, II, Needs == StateWQM,
                           Needs == StateExact || WQMFromExec);

      if (State == StateWWM) {
        assert(SavedNonWWMReg);
        fromWWM(MBB, Before, SavedNonWWMReg);
        State = NonWWMState;
      }

      if (Needs == StateWWM) {
        NonWWMState = State;
        SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
        toWWM(MBB, Before, SavedNonWWMReg);
        State = StateWWM;
      } else {
        if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
          if (!WQMFromExec && (OutNeeds & StateWQM))
            SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

          toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
          State = StateExact;
        } else if (State == StateExact && (Needs & StateWQM) &&
                   !(Needs & StateExact)) {
          assert(WQMFromExec == (SavedWQMReg == 0));

          toWQM(MBB, Before, SavedWQMReg);

          if (SavedWQMReg) {
            LIS->createAndComputeVirtRegInterval(SavedWQMReg);
            SavedWQMReg = 0;
          }
          State = StateWQM;
        } else {
          // We can get here if we transitioned from WWM to a non-WWM state that
          // already matches our needs, but we shouldn't need to do anything.
          assert(Needs & State);
        }
      }
    }

    if (Needs != (StateExact | StateWQM | StateWWM)) {
      if (Needs != (StateExact | StateWQM))
        FirstWQM = IE;
      FirstWWM = IE;
    }

    if (II == IE)
      break;
    II = Next;
  }
}
Ejemplo n.º 8
0
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
                                   bool isEntry) {
  auto BII = Blocks.find(&MBB);
  if (BII == Blocks.end())
    return;

  const BlockInfo &BI = BII->second;

  if (!(BI.InNeeds & StateWQM))
    return;

  // This is a non-entry block that is WQM throughout, so no need to do
  // anything.
  if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
    return;

  DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");

  unsigned SavedWQMReg = 0;
  bool WQMFromExec = isEntry;
  char State = isEntry ? StateExact : StateWQM;

  auto II = MBB.getFirstNonPHI(), IE = MBB.end();
  if (isEntry)
    ++II; // Skip the instruction that saves LiveMask

  MachineBasicBlock::iterator First = IE;
  for (;;) {
    MachineBasicBlock::iterator Next = II;
    char Needs = 0;
    char OutNeeds = 0;

    if (First == IE)
      First = II;

    if (II != IE) {
      MachineInstr &MI = *II;

      if (requiresCorrectState(MI)) {
        auto III = Instructions.find(&MI);
        if (III != Instructions.end()) {
          Needs = III->second.Needs;
          OutNeeds = III->second.OutNeeds;
        }
      }

      if (MI.isTerminator() && !Needs && OutNeeds == StateExact)
        Needs = StateExact;

      if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
        MI.getOperand(3).setImm(1);

      ++Next;
    } else {
      // End of basic block
      if (BI.OutNeeds & StateWQM)
        Needs = StateWQM;
      else if (BI.OutNeeds == StateExact)
        Needs = StateExact;
    }

    if (Needs) {
      if (Needs != State) {
        MachineBasicBlock::iterator Before =
            prepareInsertion(MBB, First, II, Needs == StateWQM,
                             Needs == StateExact || WQMFromExec);

        if (Needs == StateExact) {
          if (!WQMFromExec && (OutNeeds & StateWQM))
            SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

          toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
        } else {
          assert(WQMFromExec == (SavedWQMReg == 0));

          toWQM(MBB, Before, SavedWQMReg);

          if (SavedWQMReg) {
            LIS->createAndComputeVirtRegInterval(SavedWQMReg);
            SavedWQMReg = 0;
          }
        }

        State = Needs;
      }

      First = IE;
    }

    if (II == IE)
      break;
    II = Next;
  }
}
Ejemplo n.º 9
0
void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
                                   bool isEntry) {
  auto BII = Blocks.find(&MBB);
  if (BII == Blocks.end())
    return;

  const BlockInfo &BI = BII->second;

  if (!(BI.InNeeds & StateWQM))
    return;

  // This is a non-entry block that is WQM throughout, so no need to do
  // anything.
  if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
    return;

  unsigned SavedWQMReg = 0;
  bool WQMFromExec = isEntry;
  char State = isEntry ? StateExact : StateWQM;

  auto II = MBB.getFirstNonPHI(), IE = MBB.end();
  while (II != IE) {
    MachineInstr &MI = *II;
    ++II;

    // Skip instructions that are not affected by EXEC
    if (TII->isScalarUnit(MI) && !MI.isTerminator())
      continue;

    // Generic instructions such as COPY will either disappear by register
    // coalescing or be lowered to SALU or VALU instructions.
    if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
      if (MI.getNumExplicitOperands() >= 1) {
        const MachineOperand &Op = MI.getOperand(0);
        if (Op.isReg()) {
          if (TRI->isSGPRReg(*MRI, Op.getReg())) {
            // SGPR instructions are not affected by EXEC
            continue;
          }
        }
      }
    }

    char Needs = 0;
    char OutNeeds = 0;
    auto InstrInfoIt = Instructions.find(&MI);
    if (InstrInfoIt != Instructions.end()) {
      Needs = InstrInfoIt->second.Needs;
      OutNeeds = InstrInfoIt->second.OutNeeds;

      // Make sure to switch to Exact mode before the end of the block when
      // Exact and only Exact is needed further downstream.
      if (OutNeeds == StateExact && MI.isTerminator()) {
        assert(Needs == 0);
        Needs = StateExact;
      }
    }

    // State switching
    if (Needs && State != Needs) {
      if (Needs == StateExact) {
        assert(!SavedWQMReg);

        if (!WQMFromExec && (OutNeeds & StateWQM))
          SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

        toExact(MBB, &MI, SavedWQMReg, LiveMaskReg);
      } else {
        assert(WQMFromExec == (SavedWQMReg == 0));
        toWQM(MBB, &MI, SavedWQMReg);
        SavedWQMReg = 0;
      }

      State = Needs;
    }
  }

  if ((BI.OutNeeds & StateWQM) && State != StateWQM) {
    assert(WQMFromExec == (SavedWQMReg == 0));
    toWQM(MBB, MBB.end(), SavedWQMReg);
  } else if (BI.OutNeeds == StateExact && State != StateExact) {
    toExact(MBB, MBB.end(), 0, LiveMaskReg);
  }
}
Ejemplo n.º 10
0
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
 MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  // First pass, collect all live intervals for SGPRs
  for (const MachineBasicBlock &MBB : MF) {
    for (const MachineInstr &MI : MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def)))
            SGPRLiveRanges.push_back(
                std::make_pair(Def, &LIS->getInterval(Def)));
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
            SGPRLiveRanges.push_back(
                std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }
  }

  // Second pass fix the intervals
  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {
    MachineBasicBlock &MBB = *BI;
    if (MBB.succ_size() < 2)
      continue;

    // We have structured control flow, so number of succesors should be two.
    assert(MBB.succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB.succ_begin();
    MachineBasicBlock *SuccB = *(++MBB.succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }
    assert(SuccA && SuccB);
    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here.  If the register is Live-In to
      // one block, but the other doesn't have any SGPR defs, then there
      // won't be a conflict.  Also, if the branch decision is based on
      // a value in an SGPR, then there will be no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if ((!LiveInToA && !LiveInToB) ||
          (LiveInToA && LiveInToB))
        continue;

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected " <<  " in " << *LR <<
                      " BB#" << SuccA->getNumber() << ", BB#" <<
                      SuccB->getNumber() <<
                      " with NCD = " << NCD->getNumber() << '\n');

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
              TII->get(AMDGPU::SGPR_USE))
              .addReg(Reg, RegState::Implicit);
      DEBUG(NCD->getFirstNonPHI()->dump());
    }
  }

  return false;
}