Example #1
0
bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
  RegMap Defs;
  bool Modified = false;

  // Walk over MBB tracking the def points of the registers.
  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
  MachineBasicBlock::iterator NextMII;
  for (; MII != E; MII = NextMII) {
    NextMII = llvm::next(MII);
    MachineInstr *MI = &*MII;

    if (MI->getOpcode() == ARM::VMOVD &&
        !TII->isPredicated(MI)) {
      unsigned SrcReg = MI->getOperand(1).getReg();
      // If we do not find an instruction defining the reg, this means the
      // register should be live-in for this BB. It's always to better to use
      // NEON reg-reg moves.
      unsigned Domain = ARMII::DomainNEON;
      RegMap::iterator DefMI = Defs.find(SrcReg);
      if (DefMI != Defs.end()) {
        Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
        // Instructions in general domain are subreg accesses.
        // Map them to NEON reg-reg moves.
        if (Domain == ARMII::DomainGeneral)
          Domain = ARMII::DomainNEON;
      }

      if (inNEONDomain(Domain, isA8)) {
        // Convert VMOVD to VORRd
        unsigned DestReg = MI->getOperand(0).getReg();

        DEBUG({errs() << "vmov convert: "; MI->dump();});

        // It's safe to ignore imp-defs / imp-uses here, since:
        //  - We're running late, no intelligent condegen passes should be run
        //    afterwards
        //  - The imp-defs / imp-uses are superregs only, we don't care about
        //    them.
        AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
                             TII->get(ARM::VORRd), DestReg)
          .addReg(SrcReg).addReg(SrcReg));
        MBB.erase(MI);
        MachineBasicBlock::iterator I = prior(NextMII);
        MI = &*I;

        DEBUG({errs() << "        into: "; MI->dump();});

        Modified = true;
        ++NumVMovs;
      } else {
// Compute base address using Addr and return the final register.
unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
                                           const MemAddress &Addr) {
  MachineBasicBlock *MBB = MI.getParent();
  MachineBasicBlock::iterator MBBI = MI.getIterator();
  DebugLoc DL = MI.getDebugLoc();

  assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 ||
          Addr.Base.LoSubReg) &&
         "Expected 32-bit Base-Register-Low!!");

  assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 ||
          Addr.Base.HiSubReg) &&
         "Expected 32-bit Base-Register-Hi!!");

  LLVM_DEBUG(dbgs() << "  Re-Computed Anchor-Base:\n");
  MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
  MachineOperand OffsetHi =
    createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
  unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
  unsigned DeadCarryReg =
    MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);

  unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
  unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
  MachineInstr *LoHalf =
    BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
      .addReg(CarryReg, RegState::Define)
      .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
    .add(OffsetLo);
  (void)LoHalf;
  LLVM_DEBUG(dbgs() << "    "; LoHalf->dump(););
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
  DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
  DenseMap<unsigned, MachineInstr*> CopyMap;         // Def -> copies map
  SourceMap SrcMap; // Src -> Def map

  DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");

  bool Changed = false;
  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
    MachineInstr *MI = &*I;
    ++I;

    if (MI->isCopy()) {
      unsigned Def = MI->getOperand(0).getReg();
      unsigned Src = MI->getOperand(1).getReg();

      if (TargetRegisterInfo::isVirtualRegister(Def) ||
          TargetRegisterInfo::isVirtualRegister(Src))
        report_fatal_error("MachineCopyPropagation should be run after"
                           " register allocation!");

      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
      if (CI != AvailCopyMap.end()) {
        MachineInstr *CopyMI = CI->second;
        if (!MRI->isReserved(Def) &&
            (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
            isNopCopy(CopyMI, Def, Src, TRI)) {
          // The two copies cancel out and the source of the first copy
          // hasn't been overridden, eliminate the second one. e.g.
          //  %ECX<def> = COPY %EAX<kill>
          //  ... nothing clobbered EAX.
          //  %EAX<def> = COPY %ECX
          // =>
          //  %ECX<def> = COPY %EAX
          //
          // Also avoid eliminating a copy from reserved registers unless the
          // definition is proven not clobbered. e.g.
          // %RSP<def> = COPY %RAX
          // CALL
          // %RAX<def> = COPY %RSP

          DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump());

          // Clear any kills of Def between CopyMI and MI. This extends the
          // live range.
          for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
            I->clearRegisterKills(Def, TRI);

          removeCopy(MI);
          Changed = true;
          ++NumDeletes;
          continue;
        }
      }
MachineOperand
SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
  APInt V(32, Val, true);
  if (TII->isInlineConstant(V))
    return MachineOperand::CreateImm(Val);

  unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
  MachineInstr *Mov =
  BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
          TII->get(AMDGPU::S_MOV_B32), Reg)
    .addImm(Val);
  (void)Mov;
  LLVM_DEBUG(dbgs() << "    "; Mov->dump());
  return MachineOperand::CreateReg(Reg, false);
}
Example #5
0
  bool isBundlableWithCurrentPMI(MachineInstr &MI,
                                 const DenseMap<unsigned, unsigned> &PV,
                                 std::vector<R600InstrInfo::BankSwizzle> &BS,
                                 bool &isTransSlot) {
    isTransSlot = TII->isTransOnly(MI);
    assert (!isTransSlot || VLIW5);

    // Is the dst reg sequence legal ?
    if (!isTransSlot && !CurrentPacketMIs.empty()) {
      if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
        if (ConsideredInstUsesAlreadyWrittenVectorElement &&
            !TII->isVectorOnly(MI) && VLIW5) {
          isTransSlot = true;
          LLVM_DEBUG({
            dbgs() << "Considering as Trans Inst :";
            MI.dump();
          });
        }
        else
          return false;
  bool runOnMachineFunction(MachineFunction &MF) override {
    ST = &MF.getSubtarget<R600Subtarget>();
    MaxFetchInst = ST->getTexVTXClauseSize();
    TII = ST->getInstrInfo();
    TRI = ST->getRegisterInfo();

    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();

    CFStack CFStack(ST, MF.getFunction().getCallingConv());
    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
        ++MB) {
      MachineBasicBlock &MBB = *MB;
      unsigned CfCount = 0;
      std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
      std::vector<MachineInstr * > IfThenElseStack;
      if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
            getHWInstrDesc(CF_CALL_FS));
        CfCount++;
      }
      std::vector<ClauseFile> FetchClauses, AluClauses;
      std::vector<MachineInstr *> LastAlu(1);
      std::vector<MachineInstr *> ToPopAfter;

      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E;) {
        if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
          LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
          FetchClauses.push_back(MakeFetchClause(MBB, I));
          CfCount++;
          LastAlu.back() = nullptr;
          continue;
        }

        MachineBasicBlock::iterator MI = I;
        if (MI->getOpcode() != R600::ENDIF)
          LastAlu.back() = nullptr;
        if (MI->getOpcode() == R600::CF_ALU)
          LastAlu.back() = &*MI;
        I++;
        bool RequiresWorkAround =
            CFStack.requiresWorkAroundForInst(MI->getOpcode());
        switch (MI->getOpcode()) {
        case R600::CF_ALU_PUSH_BEFORE:
          if (RequiresWorkAround) {
            LLVM_DEBUG(dbgs()
                       << "Applying bug work-around for ALU_PUSH_BEFORE\n");
            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
                .addImm(CfCount + 1)
                .addImm(1);
            MI->setDesc(TII->get(R600::CF_ALU));
            CfCount++;
            CFStack.pushBranch(R600::CF_PUSH_EG);
          } else
            CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
          LLVM_FALLTHROUGH;
        case R600::CF_ALU:
          I = MI;
          AluClauses.push_back(MakeALUClause(MBB, I));
          LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
          CfCount++;
          break;
        case R600::WHILELOOP: {
          CFStack.pushLoop();
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_WHILE_LOOP))
              .addImm(1);
          std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
              std::set<MachineInstr *>());
          Pair.second.insert(MIb);
          LoopStack.push_back(std::move(Pair));
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case R600::ENDLOOP: {
          CFStack.popLoop();
          std::pair<unsigned, std::set<MachineInstr *>> Pair =
              std::move(LoopStack.back());
          LoopStack.pop_back();
          CounterPropagateAddr(Pair.second, CfCount);
          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
              .addImm(Pair.first + 1);
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case R600::IF_PREDICATE_SET: {
          LastAlu.push_back(nullptr);
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_JUMP))
              .addImm(0)
              .addImm(0);
          IfThenElseStack.push_back(MIb);
          LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
          MI->eraseFromParent();
          CfCount++;
          break;
        }
Example #7
0
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF.getSubtarget().getRegisterInfo());
  bool MadeChange = false;

  MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;

  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
  MachineBasicBlock *Entry = MF.begin();

  // Use a depth first order so that in SSA, we encounter all defs before
  // uses. Once the defs of the block have been found, attempt to insert
  // SGPR_USE instructions in successor blocks if required.
  for (MachineBasicBlock *MBB : depth_first(Entry)) {
    for (const MachineInstr &MI : *MBB) {
      for (const MachineOperand &MO : MI.defs()) {
        if (MO.isImplicit())
          continue;
        unsigned Def = MO.getReg();
        if (TargetRegisterInfo::isVirtualRegister(Def)) {
          if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
            // Only consider defs that are live outs. We don't care about def /
            // use within the same block.
            LiveRange &LR = LIS->getInterval(Def);
            if (LIS->isLiveOutOfMBB(LR, MBB))
              SGPRLiveRanges.push_back(std::make_pair(Def, &LR));
          }
        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
          SGPRLiveRanges.push_back(std::make_pair(Def, &LIS->getRegUnit(Def)));
        }
      }
    }

    if (MBB->succ_size() < 2)
      continue;

    // We have structured control flow, so the number of successors should be
    // two.
    assert(MBB->succ_size() == 2);
    MachineBasicBlock *SuccA = *MBB->succ_begin();
    MachineBasicBlock *SuccB = *(++MBB->succ_begin());
    MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);

    if (!NCD)
      continue;

    MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();

    if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
      assert(NCD->succ_size() == 2);
      // We want to make sure we insert the Use after the ENDIF, not after
      // the ELSE.
      NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                            *(++NCD->succ_begin()));
    }

    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
      unsigned Reg = RegLR.first;
      LiveRange *LR = RegLR.second;

      // FIXME: We could be smarter here. If the register is Live-In to one
      // block, but the other doesn't have any SGPR defs, then there won't be a
      // conflict. Also, if the branch condition is uniform then there will be
      // no conflict.
      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);

      if (!LiveInToA && !LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into neither successor\n");
        continue;
      }

      if (LiveInToA && LiveInToB) {
        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
              << " is live into both successors\n");
        continue;
      }

      // This interval is live in to one successor, but not the other, so
      // we need to update its range so it is live in to both.
      DEBUG(dbgs() << "Possible SGPR conflict detected for "
            << PrintReg(Reg, TRI, 0) <<  " in " << *LR
            << " BB#" << SuccA->getNumber() << ", BB#"
            << SuccB->getNumber()
            << " with NCD = BB#" << NCD->getNumber() << '\n');

      assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
             "Not expecting to extend live range of physreg");

      // FIXME: Need to figure out how to update LiveRange here so this pass
      // will be able to preserve LiveInterval analysis.
      MachineInstr *NCDSGPRUse =
        BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
                TII->get(AMDGPU::SGPR_USE))
        .addReg(Reg, RegState::Implicit);

      MadeChange = true;

      SlotIndex SI = LIS->InsertMachineInstrInMaps(NCDSGPRUse);
      LIS->extendToIndices(*LR, SI.getRegSlot());

      if (LV) {
        // TODO: This won't work post-SSA
        LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
      }

      DEBUG(NCDSGPRUse->dump());
    }
  }

  return MadeChange;
}
Example #8
0
/*!
  \note This code was kiped from PPC. There may be more branch analysis for
  CellSPU than what's currently done here.
 */
bool
SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                            MachineBasicBlock *&FBB,
                            SmallVectorImpl<MachineOperand> &Cond,
                            bool AllowModify) const {
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (!isUnpredicatedTerminator(I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (isUncondBranch(LastInst)) {
      // Check for jump tables
      if (!LastInst->getOperand(0).isMBB())
        return true;
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    } else if (isCondBranch(LastInst)) {
      // Block ends with fall-through condbranch.
      TBB = LastInst->getOperand(1).getMBB();
      DEBUG(errs() << "Pushing LastInst:               ");
      DEBUG(LastInst->dump());
      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
      Cond.push_back(LastInst->getOperand(0));
      return false;
    }
    // Otherwise, don't know what this is.
    return true;
  }

  // Get the instruction before it if it's a terminator.
  MachineInstr *SecondLastInst = I;

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() &&
      isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with a conditional and unconditional branch, handle it.
  if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
    TBB =  SecondLastInst->getOperand(1).getMBB();
    DEBUG(errs() << "Pushing SecondLastInst:         ");
    DEBUG(SecondLastInst->dump());
    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
    Cond.push_back(SecondLastInst->getOperand(0));
    FBB = LastInst->getOperand(0).getMBB();
    return false;
  }

  // If the block ends with two unconditional branches, handle it.  The second
  // one is not executed, so remove it.
  if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {

  DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
               << "********** Function: "
               << MF.getName() << "\n");

#if 0
  // for now disable this, if we move NewValueJump before register
  // allocation we need this information.
  LiveVariables &LVs = getAnalysis<LiveVariables>();
#endif

  QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
  QRI =
    static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());

  if (!QRI->Subtarget.hasV4TOps() ||
      DisableNewValueJumps) {
    return false;
  }

  int nvjCount = DbgNVJCount;
  int nvjGenerated = 0;

  // Loop through all the bb's of the function
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
    MachineBasicBlock* MBB = MBBb;

    DEBUG(dbgs() << "** dumping bb ** "
                 << MBB->getNumber() << "\n");
    DEBUG(MBB->dump());
    DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
    bool foundJump    = false;
    bool foundCompare = false;
    bool invertPredicate = false;
    unsigned predReg = 0; // predicate reg of the jump.
    unsigned cmpReg1 = 0;
    int cmpOp2 = 0;
    bool MO1IsKill = false;
    bool MO2IsKill = false;
    MachineBasicBlock::iterator jmpPos;
    MachineBasicBlock::iterator cmpPos;
    MachineInstr *cmpInstr = NULL, *jmpInstr = NULL;
    MachineBasicBlock *jmpTarget = NULL;
    bool afterRA = false;
    bool isSecondOpReg = false;
    bool isSecondOpNewified = false;
    // Traverse the basic block - bottom up
    for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
             MII != E;) {
      MachineInstr *MI = --MII;
      if (MI->isDebugValue()) {
        continue;
      }

      if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
        break;

      DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");

      if (!foundJump &&
         (MI->getOpcode() == Hexagon::JMP_c ||
          MI->getOpcode() == Hexagon::JMP_cNot ||
          MI->getOpcode() == Hexagon::JMP_cdnPt ||
          MI->getOpcode() == Hexagon::JMP_cdnPnt ||
          MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
          MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) {
        // This is where you would insert your compare and
        // instr that feeds compare
        jmpPos = MII;
        jmpInstr = MI;
        predReg = MI->getOperand(0).getReg();
        afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);

        // If ifconverter had not messed up with the kill flags of the
        // operands, the following check on the kill flag would suffice.
        // if(!jmpInstr->getOperand(0).isKill()) break;

        // This predicate register is live out out of BB
        // this would only work if we can actually use Live
        // variable analysis on phy regs - but LLVM does not
        // provide LV analysis on phys regs.
        //if(LVs.isLiveOut(predReg, *MBB)) break;

        // Get all the successors of this block - which will always
        // be 2. Check if the predicate register is live in in those
        // successor. If yes, we can not delete the predicate -
        // I am doing this only because LLVM does not provide LiveOut
        // at the BB level.
        bool predLive = false;
        for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
                            SIE = MBB->succ_end(); SI != SIE; ++SI) {
          MachineBasicBlock* succMBB = *SI;
         if (succMBB->isLiveIn(predReg)) {
            predLive = true;
          }
        }
        if (predLive)
          break;

        jmpTarget = MI->getOperand(1).getMBB();
        foundJump = true;
        if (MI->getOpcode() == Hexagon::JMP_cNot ||
            MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
            MI->getOpcode() == Hexagon::JMP_cdnNotPnt) {
          invertPredicate = true;
        }
        continue;
      }

      // No new value jump if there is a barrier. A barrier has to be in its
      // own packet. A barrier has zero operands. We conservatively bail out
      // here if we see any instruction with zero operands.
      if (foundJump && MI->getNumOperands() == 0)
        break;

      if (foundJump &&
         !foundCompare &&
          MI->getOperand(0).isReg() &&
          MI->getOperand(0).getReg() == predReg) {

        // Not all compares can be new value compare. Arch Spec: 7.6.1.1
        if (QII->isNewValueJumpCandidate(MI)) {

          assert((MI->getDesc().isCompare()) &&
              "Only compare instruction can be collapsed into New Value Jump");
          isSecondOpReg = MI->getOperand(2).isReg();

          if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
                                        afterRA, jmpPos, MF))
            break;

          cmpInstr = MI;
          cmpPos = MII;
          foundCompare = true;

          // We need cmpReg1 and cmpOp2(imm or reg) while building
          // new value jump instruction.
          cmpReg1 = MI->getOperand(1).getReg();
          if (MI->getOperand(1).isKill())
            MO1IsKill = true;

          if (isSecondOpReg) {
            cmpOp2 = MI->getOperand(2).getReg();
            if (MI->getOperand(2).isKill())
              MO2IsKill = true;
          } else
            cmpOp2 = MI->getOperand(2).getImm();
          continue;
        }
      }

      if (foundCompare && foundJump) {

        // If "common" checks fail, bail out on this BB.
        if (!commonChecksToProhibitNewValueJump(afterRA, MII))
          break;

        bool foundFeeder = false;
        MachineBasicBlock::iterator feederPos = MII;
        if (MI->getOperand(0).isReg() &&
            MI->getOperand(0).isDef() &&
           (MI->getOperand(0).getReg() == cmpReg1 ||
            (isSecondOpReg &&
             MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {

          unsigned feederReg = MI->getOperand(0).getReg();

          // First try to see if we can get the feeder from the first operand
          // of the compare. If we can not, and if secondOpReg is true
          // (second operand of the compare is also register), try that one.
          // TODO: Try to come up with some heuristic to figure out which
          // feeder would benefit.

          if (feederReg == cmpReg1) {
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
              if (!isSecondOpReg)
                break;
              else
                continue;
            } else
              foundFeeder = true;
          }

          if (!foundFeeder &&
               isSecondOpReg &&
               feederReg == (unsigned) cmpOp2)
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
              break;

          if (isSecondOpReg) {
            // In case of CMPLT, or CMPLTU, or EQ with the second register
            // to newify, swap the operands.
            if (cmpInstr->getOpcode() == Hexagon::CMPLTrr  ||
                cmpInstr->getOpcode() == Hexagon::CMPLTUrr ||
                (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
                                     feederReg == (unsigned) cmpOp2)) {
              unsigned tmp = cmpReg1;
              bool tmpIsKill = MO1IsKill;
              cmpReg1 = cmpOp2;
              MO1IsKill = MO2IsKill;
              cmpOp2 = tmp;
              MO2IsKill = tmpIsKill;
            }

            // Now we have swapped the operands, all we need to check is,
            // if the second operand (after swap) is the feeder.
            // And if it is, make a note.
            if (feederReg == (unsigned)cmpOp2)
              isSecondOpNewified = true;
          }

          // Now that we are moving feeder close the jump,
          // make sure we are respecting the kill values of
          // the operands of the feeder.

          bool updatedIsKill = false;
          for (unsigned i = 0; i < MI->getNumOperands(); i++) {
            MachineOperand &MO = MI->getOperand(i);
            if (MO.isReg() && MO.isUse()) {
              unsigned feederReg = MO.getReg();
              for (MachineBasicBlock::iterator localII = feederPos,
                   end = jmpPos; localII != end; localII++) {
                MachineInstr *localMI = localII;
                for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
                  MachineOperand &localMO = localMI->getOperand(j);
                  if (localMO.isReg() && localMO.isUse() &&
                      localMO.isKill() && feederReg == localMO.getReg()) {
                    // We found that there is kill of a use register
                    // Set up a kill flag on the register
                    localMO.setIsKill(false);
                    MO.setIsKill();
                    updatedIsKill = true;
                    break;
                  }
                }
                if (updatedIsKill) break;
              }
            }
            if (updatedIsKill) break;
          }

          MBB->splice(jmpPos, MI->getParent(), MI);
          MBB->splice(jmpPos, MI->getParent(), cmpInstr);
          DebugLoc dl = MI->getDebugLoc();
          MachineInstr *NewMI;

           assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
                      "This compare is not a New Value Jump candidate.");
          unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
                                               isSecondOpNewified);
          if (invertPredicate)
            opc = QII->getInvertedPredicatedOpcode(opc);

          // Manage the conversions from CMPGEUri to either CMPEQrr
          // or CMPGTUri properly. See Arch spec for CMPGEUri instructions.
          // This has to be after the getNewValueJumpOpcode function call as
          // second operand of the compare could be modified in this logic.
          if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) {
            if (cmpOp2 == 0) {
              cmpOp2 = cmpReg1;
              MO2IsKill = MO1IsKill;
              isSecondOpReg = true;
            } else
              --cmpOp2;
          }

          // Manage the conversions from CMPGEri to CMPGTUri properly.
          // See Arch spec for CMPGEri instructions.
          if (cmpInstr->getOpcode() == Hexagon::CMPGEri)
            --cmpOp2;

          if (isSecondOpReg) {
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                    .addMBB(jmpTarget);
          }
          else {
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addImm(cmpOp2)
                                    .addMBB(jmpTarget);
          }

          assert(NewMI && "New Value Jump Instruction Not created!");
          if (cmpInstr->getOperand(0).isReg() &&
              cmpInstr->getOperand(0).isKill())
            cmpInstr->getOperand(0).setIsKill(false);
          if (cmpInstr->getOperand(1).isReg() &&
              cmpInstr->getOperand(1).isKill())
            cmpInstr->getOperand(1).setIsKill(false);
          cmpInstr->eraseFromParent();
          jmpInstr->eraseFromParent();
          ++nvjGenerated;
          ++NumNVJGenerated;
          break;
        }
      }
    }
  }

  return true;

}
  virtual bool runOnMachineFunction(MachineFunction &MF) {
    TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
    TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());

    unsigned MaxStack = 0;
    unsigned CurrentStack = 0;
    unsigned CurrentLoopDepth = 0;
    bool HasPush = false;
    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
        ++MB) {
      MachineBasicBlock &MBB = *MB;
      unsigned CfCount = 0;
      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
      std::vector<MachineInstr * > IfThenElseStack;
      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
      if (MFI->ShaderType == 1) {
        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
            getHWInstrDesc(CF_CALL_FS));
        CfCount++;
        MaxStack = 1;
      }
      std::vector<ClauseFile> FetchClauses, AluClauses;
      std::vector<MachineInstr *> LastAlu(1);
      std::vector<MachineInstr *> ToPopAfter;
      
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E;) {
        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
          DEBUG(dbgs() << CfCount << ":"; I->dump(););
          FetchClauses.push_back(MakeFetchClause(MBB, I));
          CfCount++;
          continue;
        }

        MachineBasicBlock::iterator MI = I;
        if (MI->getOpcode() != AMDGPU::ENDIF)
          LastAlu.back() = 0;
        if (MI->getOpcode() == AMDGPU::CF_ALU)
          LastAlu.back() = MI;
        I++;
        switch (MI->getOpcode()) {
        case AMDGPU::CF_ALU_PUSH_BEFORE:
          CurrentStack++;
          MaxStack = std::max(MaxStack, CurrentStack);
          HasPush = true;
          if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
                .addImm(CfCount + 1)
                .addImm(1);
            MI->setDesc(TII->get(AMDGPU::CF_ALU));
            CfCount++;
          }
        case AMDGPU::CF_ALU:
          I = MI;
          AluClauses.push_back(MakeALUClause(MBB, I));
          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
          CfCount++;
          break;
        case AMDGPU::WHILELOOP: {
          CurrentStack+=4;
          CurrentLoopDepth++;
          MaxStack = std::max(MaxStack, CurrentStack);
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_WHILE_LOOP))
              .addImm(1);
          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
              std::set<MachineInstr *>());
          Pair.second.insert(MIb);
          LoopStack.push_back(Pair);
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case AMDGPU::ENDLOOP: {
          CurrentStack-=4;
          CurrentLoopDepth--;
          std::pair<unsigned, std::set<MachineInstr *> > Pair =
              LoopStack.back();
          LoopStack.pop_back();
          CounterPropagateAddr(Pair.second, CfCount);
          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
              .addImm(Pair.first + 1);
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case AMDGPU::IF_PREDICATE_SET: {
          LastAlu.push_back(0);
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_JUMP))
              .addImm(0)
              .addImm(0);
          IfThenElseStack.push_back(MIb);
          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
          MI->eraseFromParent();
          CfCount++;
          break;
        }
Example #11
0
void VPreRegAllocSched::buildMemDepEdges(VSchedGraph &G, ArrayRef<VSUnit*> SUs){
  // The schedule unit and the corresponding memory operand.
  typedef std::vector<std::pair<MachineMemOperand*, VSUnit*> > MemOpMapTy;
  MemOpMapTy VisitedOps;
  Loop *IRL = LI->getLoopFor(G.getEntryBB()->getBasicBlock());

  typedef ArrayRef<VSUnit*>::iterator it;
  for (it I = SUs.begin(), E = SUs.end(); I != E; ++I) {
    VSUnit *DstU = *I;
    MachineInstr *DstMI = DstU->getRepresentativePtr();
    // Skip the non-memory operation and non-call operation.
    if (!mayAccessMemory(DstMI->getDesc())) continue;

    bool isDstWrite = VInstrInfo::mayStore(DstMI);

    // Dirty Hack: Is the const_cast safe?
    MachineMemOperand *DstMO = 0;
    // TODO: Also try to get the address information for call instruction.
    if (!DstMI->memoperands_empty() && !DstMI->hasVolatileMemoryRef()) {
      assert(DstMI->hasOneMemOperand() && "Can not handle multiple mem ops!");
      assert(!DstMI->hasVolatileMemoryRef() && "Can not handle volatile op!");

      // FIXME: DstMO maybe null in a VOpCmdSeq
      if ((DstMO = /*ASSIGNMENT*/ *DstMI->memoperands_begin())){
        assert(!isa<PseudoSourceValue>(DstMO->getValue())
               && "Unexpected frame stuffs!");
      }
    }

    typedef MemOpMapTy::iterator visited_it;
    for (visited_it I = VisitedOps.begin(), E = VisitedOps.end(); I != E; ++I) {
      MachineMemOperand *SrcMO = I->first;
      VSUnit *SrcU = I->second;

      MachineInstr *SrcMI = SrcU->getRepresentativePtr();

      bool MayBothActive = !VInstrInfo::isPredicateMutex(SrcMI, DstMI);
      if (!MayBothActive) ++MutexPredNoAlias;

      // Handle unanalyzable memory access.
      if (DstMO == 0 || SrcMO == 0) {
        // Build the Src -> Dst dependence.
        unsigned Latency = G.getStepsToFinish(SrcMI);
        //if (MayBothActive || SrcMO != DstMO)
        DstU->addDep<true>(SrcU, VDEdge::CreateMemDep(Latency, 0));

        // Build the Dst -> Src (in next iteration) dependence, the dependence
        // occur even if SrcMI and DstMI are mutual exclusive.
        if (G.enablePipeLine()) {
          Latency = G.getStepsToFinish(SrcMI);
          SrcU->addDep<true>(DstU, VDEdge::CreateMemDep(Latency, 1));
        }
        // Go on handle next visited SUnit.
        continue;
      }

      bool isSrcWrite = VInstrInfo::mayStore(SrcMI);

      // Ignore RAR dependence.
      if (!isDstWrite && !isSrcWrite) continue;

      if (!isMachineMemOperandAlias(SrcMO, DstMO, AA, SE))
        continue;

      if (G.enablePipeLine()) {
        assert(IRL && "Can not handle machine loop without IR loop!");
        DEBUG(SrcMI->dump();  dbgs() << "vs\n"; DstMI->dump(); dbgs() << '\n');

        // Dst not depend on Src if they are mutual exclusive.
        if (MayBothActive) {
          // Compute the iterate distance.
          int DepDst = analyzeLoopDep(SrcMO, DstMO, *IRL, true);

          if (DepDst >= 0) {
            unsigned Latency = G.getStepsToFinish(SrcMI);
            DstU->addDep<true>(SrcU, VDEdge::CreateMemDep(Latency, DepDst));
          }
        }

        // We need to compute if Src depend on Dst even if Dst not depend on Src.
        // Because dependence depends on execute order, if SrcMI and DstMI are
        // mutual exclusive.
        int DepDst = analyzeLoopDep(DstMO, SrcMO, *IRL, false);

        if (DepDst >=0 ) {
          unsigned Latency = G.getStepsToFinish(SrcMI);
          SrcU->addDep<true>(DstU, VDEdge::CreateMemDep(Latency, DepDst));
        }
      } else if (MayBothActive) {
        unsigned Latency = G.getStepsToFinish(SrcMI);
        DstU->addDep<true>(SrcU, VDEdge::CreateMemDep(Latency, 0));
      }
    }