Exemplo n.º 1
0
/// Implements shrink-wrapping of the stack frame. By default, stack frame
/// is created in the function entry block, and is cleaned up in every block
/// that returns. This function finds alternate blocks: one for the frame
/// setup (prolog) and one for the cleanup (epilog).
void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
  static unsigned ShrinkCounter = 0;

  if (ShrinkLimit.getPosition()) {
    if (ShrinkCounter >= ShrinkLimit)
      return;
    ShrinkCounter++;
  }

  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
  auto &HRI = *HST.getRegisterInfo();

  MachineDominatorTree MDT;
  MDT.runOnMachineFunction(MF);
  MachinePostDominatorTree MPT;
  MPT.runOnMachineFunction(MF);

  typedef DenseMap<unsigned,unsigned> UnsignedMap;
  UnsignedMap RPO;
  typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
  RPOTType RPOT(&MF);
  unsigned RPON = 0;
  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
    RPO[(*I)->getNumber()] = RPON++;

  // Don't process functions that have loops, at least for now. Placement
  // of prolog and epilog must take loop structure into account. For simpli-
  // city don't do it right now.
  for (auto &I : MF) {
    unsigned BN = RPO[I.getNumber()];
    for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
      // If found a back-edge, return.
      if (RPO[(*SI)->getNumber()] <= BN)
        return;
    }
  }

  // Collect the set of blocks that need a stack frame to execute. Scan
  // each block for uses/defs of callee-saved registers, calls, etc.
  SmallVector<MachineBasicBlock*,16> SFBlocks;
  BitVector CSR(Hexagon::NUM_TARGET_REGS);
  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
    CSR[*P] = true;

  for (auto &I : MF)
    if (needsStackFrame(I, CSR))
      SFBlocks.push_back(&I);

  DEBUG({
    dbgs() << "Blocks needing SF: {";
    for (auto &B : SFBlocks)
      dbgs() << " BB#" << B->getNumber();
    dbgs() << " }\n";
  });
Exemplo n.º 2
0
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  bool MadeChange = false;

  MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
  MachineBasicBlock *Entry = MF.begin();

  // Use a depth first order so that in SSA, we encounter all defs before
  // uses. Once the defs of the block have been found, attempt to insert
  // SGPR_USE instructions in successor blocks if required.
  for (MachineBasicBlock *MBB : depth_first(Entry)) {
    for (const MachineInstr &MI : *MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
            // Only consider defs that are live outs. We don't care about def /
            // use within the same block.
            LiveRange &LR = LIS->getInterval(Def);
            if (LIS->isLiveOutOfMBB(LR, MBB))
              SGPRLiveRanges.push_back(std::make_pair(Def, &LR));
          }
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
          SGPRLiveRanges.push_back(std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }

    if (MBB->succ_size() < 2)
      continue;

    // We have structured control flow, so the number of successors should be
    // two.
    assert(MBB->succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB->succ_begin();
    MachineBasicBlock *SuccB = *(++MBB->succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }

    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here. If the register is Live-In to one
      // block, but the other doesn't have any SGPR defs, then there won't be a
      // conflict. Also, if the branch condition is uniform then there will be
      // no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if (!LiveInToA && !LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into neither successor\n");
        continue;
      }

      if (LiveInToA && LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into both successors\n");
        continue;
      }

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected for "
            << PrintReg(Reg, TRI, 0) <<  " in " << *LR
            << " BB#" << SuccA->getNumber() << ", BB#"
            << SuccB->getNumber()
            << " with NCD = BB#" << NCD->getNumber() << '\n');

      assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
             "Not expecting to extend live range of physreg");

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      MachineInstr *NCDSGPRUse =
        BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
                TII->get(AMDGPU::SGPR_USE))
        .addReg(Reg, RegState::Implicit);

      MadeChange = true;

      SlotIndex SI = LIS->InsertMachineInstrInMaps(NCDSGPRUse);
      LIS->extendToIndices(*LR, SI.getRegSlot());

      if (LV) {
        // TODO: This won't work post-SSA
        LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
      }

      DEBUG(NCDSGPRUse->dump());
    }
  }

  return MadeChange;
}
Exemplo n.º 3
0
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
 MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  // First pass, collect all live intervals for SGPRs
  for (const MachineBasicBlock &MBB : MF) {
    for (const MachineInstr &MI : MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def)))
            SGPRLiveRanges.push_back(
                std::make_pair(Def, &LIS->getInterval(Def)));
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
            SGPRLiveRanges.push_back(
                std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }
  }

  // Second pass fix the intervals
  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {
    MachineBasicBlock &MBB = *BI;
    if (MBB.succ_size() < 2)
      continue;

    // We have structured control flow, so number of succesors should be two.
    assert(MBB.succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB.succ_begin();
    MachineBasicBlock *SuccB = *(++MBB.succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }
    assert(SuccA && SuccB);
    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here.  If the register is Live-In to
      // one block, but the other doesn't have any SGPR defs, then there
      // won't be a conflict.  Also, if the branch decision is based on
      // a value in an SGPR, then there will be no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if ((!LiveInToA && !LiveInToB) ||
          (LiveInToA && LiveInToB))
        continue;

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected " <<  " in " << *LR <<
                      " BB#" << SuccA->getNumber() << ", BB#" <<
                      SuccB->getNumber() <<
                      " with NCD = " << NCD->getNumber() << '\n');

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
              TII->get(AMDGPU::SGPR_USE))
              .addReg(Reg, RegState::Implicit);
      DEBUG(NCD->getFirstNonPHI()->dump());
    }
  }

  return false;
}