Ejemplo n.º 1
0
/// getTripCount - Return a loop-invariant LLVM value indicating the
/// number of times the loop will be executed.  The trip count can
/// be either a register or a constant value.  If the trip-count
/// cannot be determined, this returns null.
///
/// We find the trip count from the phi instruction that defines the
/// induction variable.  We follow the links to the CMP instruction
/// to get the trip count.
///
/// Based upon getTripCount in LoopInfo.
///
CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
                           SmallVector<MachineInstr *, 2> &OldInsts) const {
  MachineBasicBlock *LastMBB = L->getExitingBlock();
  // Don't generate a CTR loop if the loop has more than one exit.
  if (LastMBB == 0)
    return 0;

  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
  if (LastI->getOpcode() != PPC::BCC)
    return 0;

  // We need to make sure that this compare is defining the condition
  // register actually used by the terminating branch.

  unsigned PredReg = LastI->getOperand(1).getReg();
  DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);

  unsigned PredCond = LastI->getOperand(0).getImm();
  if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
    return 0;

  // Check that the loop has a induction variable.
  SmallVector<MachineInstr *, 4> IVars, IOps;
  getCanonicalInductionVariable(L, IVars, IOps);
  for (unsigned i = 0; i < IVars.size(); ++i) {
    MachineInstr *IOp = IOps[i];
    MachineInstr *IV_Inst = IVars[i];

    // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
    //  if Imm is 0, get the count from the PHI opnd
    //  if Imm is -M, than M is the count
    //  Otherwise, Imm is the count
    MachineOperand *IV_Opnd;
    const MachineOperand *InitialValue;
    if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
      InitialValue = &IV_Inst->getOperand(1);
      IV_Opnd = &IV_Inst->getOperand(3);
    } else {
      InitialValue = &IV_Inst->getOperand(3);
      IV_Opnd = &IV_Inst->getOperand(1);
    }

    DEBUG(dbgs() << "Considering:\n");
    DEBUG(dbgs() << "  induction operation: " << *IOp);
    DEBUG(dbgs() << "  induction variable: " << *IV_Inst);
    DEBUG(dbgs() << "  initial value: " << *InitialValue << "\n");
  
    // Look for the cmp instruction to determine if we
    // can get a useful trip count.  The trip count can
    // be either a register or an immediate.  The location
    // of the value depends upon the type (reg or imm).
    for (MachineRegisterInfo::reg_iterator
         RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
         RI != RE; ++RI) {
      IV_Opnd = &RI.getOperand();
      bool SignedCmp, Int64Cmp;
      MachineInstr *MI = IV_Opnd->getParent();
      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
          MI->getOperand(0).getReg() == PredReg) {

        OldInsts.push_back(MI);
        OldInsts.push_back(IOp);
 
        DEBUG(dbgs() << "  compare: " << *MI);
 
        const MachineOperand &MO = MI->getOperand(2);
        assert(MO.isImm() && "IV Cmp Operand should be an immediate");

        int64_t ImmVal;
        if (SignedCmp)
          ImmVal = (short) MO.getImm();
        else
          ImmVal = MO.getImm();
  
        const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
        assert(L->contains(IV_DefInstr->getParent()) &&
               "IV definition should occurs in loop");
        int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
  
        assert(InitialValue->isReg() && "Expecting register for init value");
        unsigned InitialValueReg = InitialValue->getReg();
  
        MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
  
        // Here we need to look for an immediate load (an li or lis/ori pair).
        if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
                         DefInstr->getOpcode() == PPC::ORI)) {
          int64_t start = DefInstr->getOperand(2).getImm();
          MachineInstr *DefInstr2 =
            MRI->getVRegDef(DefInstr->getOperand(1).getReg());
          if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
                            DefInstr2->getOpcode() == PPC::LIS)) {
            DEBUG(dbgs() << "  initial constant: " << *DefInstr);
            DEBUG(dbgs() << "  initial constant: " << *DefInstr2);

            start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
  
            int64_t count = ImmVal - start;
            if ((count % iv_value) != 0) {
              return 0;
            }

            OldInsts.push_back(DefInstr);
            OldInsts.push_back(DefInstr2);

            // count/iv_value, the trip count, should be positive here. If it
            // is negative, that indicates that the counter will wrap.
            if (Int64Cmp)
              return new CountValue(count/iv_value);
            else
              return new CountValue(uint32_t(count/iv_value));
          }
        } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
                                DefInstr->getOpcode() == PPC::LI)) {
          DEBUG(dbgs() << "  initial constant: " << *DefInstr);

          int64_t count = ImmVal -
            int64_t(short(DefInstr->getOperand(1).getImm()));
          if ((count % iv_value) != 0) {
            return 0;
          }

          OldInsts.push_back(DefInstr);

          if (Int64Cmp)
            return new CountValue(count/iv_value);
          else
            return new CountValue(uint32_t(count/iv_value));
        } else if (iv_value == 1 || iv_value == -1) {
          // We can't determine a constant starting value.
          if (ImmVal == 0) {
            return new CountValue(InitialValueReg, iv_value > 0);
          }
          // FIXME: handle non-zero end value.
        }
        // FIXME: handle non-unit increments (we might not want to introduce
        // division but we can handle some 2^n cases with shifts).
  
      }
    }
  }
  return 0;
}
Ejemplo n.º 2
0
std::pair<MachineLegalizer::LegalizeAction, LLT>
MachineLegalizer::getAction(MachineInstr &MI) const {
  return getAction(MI.getOpcode(), MI.getType());
}
Ejemplo n.º 3
0
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;

  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
  const SIInstrInfo *TII = ST.getInstrInfo();

  // Optimize sequences emitted for control flow lowering. They are originally
  // emitted as the separate operations because spill code may need to be
  // inserted for the saved copy of exec.
  //
  //     x = copy exec
  //     z = s_<op>_b64 x, y
  //     exec = copy z
  // =>
  //     x = s_<op>_saveexec_b64 y
  //

  for (MachineBasicBlock &MBB : MF) {
    MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
    MachineBasicBlock::reverse_iterator E = MBB.rend();
    if (I == E)
      continue;

    unsigned CopyToExec = isCopyToExec(*I);
    if (CopyToExec == AMDGPU::NoRegister)
      continue;

    // Scan backwards to find the def.
    auto CopyToExecInst = &*I;
    auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
    if (CopyFromExecInst == E) {
      auto PrepareExecInst = std::next(I);
      if (PrepareExecInst == E)
        continue;
      // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
      if (CopyToExecInst->getOperand(1).isKill() &&
          isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
        DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);

        PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
        PrepareExecInst->getOperand(0).setIsRenamable(false);

        DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');

        CopyToExecInst->eraseFromParent();
      }

      continue;
    }

    if (isLiveOut(MBB, CopyToExec)) {
      // The copied register is live out and has a second use in another block.
      DEBUG(dbgs() << "Exec copy source register is live out\n");
      continue;
    }

    unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
    MachineInstr *SaveExecInst = nullptr;
    SmallVector<MachineInstr *, 4> OtherUseInsts;

    for (MachineBasicBlock::iterator J
           = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
         J != JE; ++J) {
      if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
        DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
        // Make sure this is inserted after any VALU ops that may have been
        // scheduled in between.
        SaveExecInst = nullptr;
        break;
      }

      bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);

      if (J->modifiesRegister(CopyToExec, TRI)) {
        if (SaveExecInst) {
          DEBUG(dbgs() << "Multiple instructions modify "
                << printReg(CopyToExec, TRI) << '\n');
          SaveExecInst = nullptr;
          break;
        }

        unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
        if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
          break;

        if (ReadsCopyFromExec) {
          SaveExecInst = &*J;
          DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
          continue;
        } else {
          DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
          break;
        }
      } else if (ReadsCopyFromExec && !SaveExecInst) {
        // Make sure no other instruction is trying to use this copy, before it
        // will be rewritten by the saveexec, i.e. hasOneUse. There may have
        // been another use, such as an inserted spill. For example:
        //
        // %sgpr0_sgpr1 = COPY %exec
        // spill %sgpr0_sgpr1
        // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
        //
        DEBUG(dbgs() << "Found second use of save inst candidate: "
              << *J << '\n');
        break;
      }

      if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
        assert(SaveExecInst != &*J);
        OtherUseInsts.push_back(&*J);
      }
    }

    if (!SaveExecInst)
      continue;

    DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');

    MachineOperand &Src0 = SaveExecInst->getOperand(1);
    MachineOperand &Src1 = SaveExecInst->getOperand(2);

    MachineOperand *OtherOp = nullptr;

    if (Src0.isReg() && Src0.getReg() == CopyFromExec) {
      OtherOp = &Src1;
    } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) {
      if (!SaveExecInst->isCommutable())
        break;

      OtherOp = &Src0;
    } else
      llvm_unreachable("unexpected");

    CopyFromExecInst->eraseFromParent();

    auto InsPt = SaveExecInst->getIterator();
    const DebugLoc &DL = SaveExecInst->getDebugLoc();

    BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
            CopyFromExec)
      .addReg(OtherOp->getReg());
    SaveExecInst->eraseFromParent();

    CopyToExecInst->eraseFromParent();

    for (MachineInstr *OtherInst : OtherUseInsts) {
      OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
                                    AMDGPU::NoSubRegister, *TRI,
                                    /*ClearIsRenamable=*/true);
    }
  }

  return true;

}
Ejemplo n.º 4
0
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
    MachineInstr &Root, MachineInstr &Prev,
    MachineCombinerPattern Pattern,
    SmallVectorImpl<MachineInstr *> &InsInstrs,
    SmallVectorImpl<MachineInstr *> &DelInstrs,
    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
  MachineFunction *MF = Root.getParent()->getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);

  // This array encodes the operand index for each parameter because the
  // operands may be commuted. Each row corresponds to a pattern value,
  // and each column specifies the index of A, B, X, Y.
  unsigned OpIdx[4][4] = {
    { 1, 1, 2, 2 },
    { 1, 2, 2, 1 },
    { 2, 1, 1, 2 },
    { 2, 2, 1, 1 }
  };

  int Row;
  switch (Pattern) {
  case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
  case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
  case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
  case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
  default: llvm_unreachable("unexpected MachineCombinerPattern");
  }

  MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
  MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
  MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
  MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
  MachineOperand &OpC = Root.getOperand(0);

  unsigned RegA = OpA.getReg();
  unsigned RegB = OpB.getReg();
  unsigned RegX = OpX.getReg();
  unsigned RegY = OpY.getReg();
  unsigned RegC = OpC.getReg();

  if (TargetRegisterInfo::isVirtualRegister(RegA))
    MRI.constrainRegClass(RegA, RC);
  if (TargetRegisterInfo::isVirtualRegister(RegB))
    MRI.constrainRegClass(RegB, RC);
  if (TargetRegisterInfo::isVirtualRegister(RegX))
    MRI.constrainRegClass(RegX, RC);
  if (TargetRegisterInfo::isVirtualRegister(RegY))
    MRI.constrainRegClass(RegY, RC);
  if (TargetRegisterInfo::isVirtualRegister(RegC))
    MRI.constrainRegClass(RegC, RC);

  // Create a new virtual register for the result of (X op Y) instead of
  // recycling RegB because the MachineCombiner's computation of the critical
  // path requires a new register definition rather than an existing one.
  unsigned NewVR = MRI.createVirtualRegister(RC);
  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

  unsigned Opcode = Root.getOpcode();
  bool KillA = OpA.isKill();
  bool KillX = OpX.isKill();
  bool KillY = OpY.isKill();

  // Create new instructions for insertion.
  MachineInstrBuilder MIB1 =
      BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
          .addReg(RegX, getKillRegState(KillX))
          .addReg(RegY, getKillRegState(KillY));
  MachineInstrBuilder MIB2 =
      BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
          .addReg(RegA, getKillRegState(KillA))
          .addReg(NewVR, getKillRegState(true));

  setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);

  // Record new instructions for insertion and old instructions for deletion.
  InsInstrs.push_back(MIB1);
  InsInstrs.push_back(MIB2);
  DelInstrs.push_back(&Prev);
  DelInstrs.push_back(&Root);
}
Ejemplo n.º 5
0
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {

  DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
               << "********** Function: "
               << MF.getName() << "\n");

#if 0
  // for now disable this, if we move NewValueJump before register
  // allocation we need this information.
  LiveVariables &LVs = getAnalysis<LiveVariables>();
#endif

  QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
  QRI =
    static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();

  if (!QRI->Subtarget.hasV4TOps() ||
      DisableNewValueJumps) {
    return false;
  }

  int nvjCount = DbgNVJCount;
  int nvjGenerated = 0;

  // Loop through all the bb's of the function
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
    MachineBasicBlock* MBB = MBBb;

    DEBUG(dbgs() << "** dumping bb ** "
                 << MBB->getNumber() << "\n");
    DEBUG(MBB->dump());
    DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
    bool foundJump    = false;
    bool foundCompare = false;
    bool invertPredicate = false;
    unsigned predReg = 0; // predicate reg of the jump.
    unsigned cmpReg1 = 0;
    int cmpOp2 = 0;
    bool MO1IsKill = false;
    bool MO2IsKill = false;
    MachineBasicBlock::iterator jmpPos;
    MachineBasicBlock::iterator cmpPos;
    MachineInstr *cmpInstr = NULL, *jmpInstr = NULL;
    MachineBasicBlock *jmpTarget = NULL;
    bool afterRA = false;
    bool isSecondOpReg = false;
    bool isSecondOpNewified = false;
    // Traverse the basic block - bottom up
    for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
             MII != E;) {
      MachineInstr *MI = --MII;
      if (MI->isDebugValue()) {
        continue;
      }

      if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
        break;

      DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");

      if (!foundJump &&
         (MI->getOpcode() == Hexagon::JMP_t ||
          MI->getOpcode() == Hexagon::JMP_f ||
          MI->getOpcode() == Hexagon::JMP_tnew_t ||
          MI->getOpcode() == Hexagon::JMP_tnew_nt ||
          MI->getOpcode() == Hexagon::JMP_fnew_t ||
          MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
        // This is where you would insert your compare and
        // instr that feeds compare
        jmpPos = MII;
        jmpInstr = MI;
        predReg = MI->getOperand(0).getReg();
        afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);

        // If ifconverter had not messed up with the kill flags of the
        // operands, the following check on the kill flag would suffice.
        // if(!jmpInstr->getOperand(0).isKill()) break;

        // This predicate register is live out out of BB
        // this would only work if we can actually use Live
        // variable analysis on phy regs - but LLVM does not
        // provide LV analysis on phys regs.
        //if(LVs.isLiveOut(predReg, *MBB)) break;

        // Get all the successors of this block - which will always
        // be 2. Check if the predicate register is live in in those
        // successor. If yes, we can not delete the predicate -
        // I am doing this only because LLVM does not provide LiveOut
        // at the BB level.
        bool predLive = false;
        for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
                            SIE = MBB->succ_end(); SI != SIE; ++SI) {
          MachineBasicBlock* succMBB = *SI;
         if (succMBB->isLiveIn(predReg)) {
            predLive = true;
          }
        }
        if (predLive)
          break;

        jmpTarget = MI->getOperand(1).getMBB();
        foundJump = true;
        if (MI->getOpcode() == Hexagon::JMP_f ||
            MI->getOpcode() == Hexagon::JMP_fnew_t ||
            MI->getOpcode() == Hexagon::JMP_fnew_nt) {
          invertPredicate = true;
        }
        continue;
      }

      // No new value jump if there is a barrier. A barrier has to be in its
      // own packet. A barrier has zero operands. We conservatively bail out
      // here if we see any instruction with zero operands.
      if (foundJump && MI->getNumOperands() == 0)
        break;

      if (foundJump &&
         !foundCompare &&
          MI->getOperand(0).isReg() &&
          MI->getOperand(0).getReg() == predReg) {

        // Not all compares can be new value compare. Arch Spec: 7.6.1.1
        if (QII->isNewValueJumpCandidate(MI)) {

          assert((MI->getDesc().isCompare()) &&
              "Only compare instruction can be collapsed into New Value Jump");
          isSecondOpReg = MI->getOperand(2).isReg();

          if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
                                        afterRA, jmpPos, MF))
            break;

          cmpInstr = MI;
          cmpPos = MII;
          foundCompare = true;

          // We need cmpReg1 and cmpOp2(imm or reg) while building
          // new value jump instruction.
          cmpReg1 = MI->getOperand(1).getReg();
          if (MI->getOperand(1).isKill())
            MO1IsKill = true;

          if (isSecondOpReg) {
            cmpOp2 = MI->getOperand(2).getReg();
            if (MI->getOperand(2).isKill())
              MO2IsKill = true;
          } else
            cmpOp2 = MI->getOperand(2).getImm();
          continue;
        }
      }

      if (foundCompare && foundJump) {

        // If "common" checks fail, bail out on this BB.
        if (!commonChecksToProhibitNewValueJump(afterRA, MII))
          break;

        bool foundFeeder = false;
        MachineBasicBlock::iterator feederPos = MII;
        if (MI->getOperand(0).isReg() &&
            MI->getOperand(0).isDef() &&
           (MI->getOperand(0).getReg() == cmpReg1 ||
            (isSecondOpReg &&
             MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {

          unsigned feederReg = MI->getOperand(0).getReg();

          // First try to see if we can get the feeder from the first operand
          // of the compare. If we can not, and if secondOpReg is true
          // (second operand of the compare is also register), try that one.
          // TODO: Try to come up with some heuristic to figure out which
          // feeder would benefit.

          if (feederReg == cmpReg1) {
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
              if (!isSecondOpReg)
                break;
              else
                continue;
            } else
              foundFeeder = true;
          }

          if (!foundFeeder &&
               isSecondOpReg &&
               feederReg == (unsigned) cmpOp2)
            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
              break;

          if (isSecondOpReg) {
            // In case of CMPLT, or CMPLTU, or EQ with the second register
            // to newify, swap the operands.
            if (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
                                     feederReg == (unsigned) cmpOp2) {
              unsigned tmp = cmpReg1;
              bool tmpIsKill = MO1IsKill;
              cmpReg1 = cmpOp2;
              MO1IsKill = MO2IsKill;
              cmpOp2 = tmp;
              MO2IsKill = tmpIsKill;
            }

            // Now we have swapped the operands, all we need to check is,
            // if the second operand (after swap) is the feeder.
            // And if it is, make a note.
            if (feederReg == (unsigned)cmpOp2)
              isSecondOpNewified = true;
          }

          // Now that we are moving feeder close the jump,
          // make sure we are respecting the kill values of
          // the operands of the feeder.

          bool updatedIsKill = false;
          for (unsigned i = 0; i < MI->getNumOperands(); i++) {
            MachineOperand &MO = MI->getOperand(i);
            if (MO.isReg() && MO.isUse()) {
              unsigned feederReg = MO.getReg();
              for (MachineBasicBlock::iterator localII = feederPos,
                   end = jmpPos; localII != end; localII++) {
                MachineInstr *localMI = localII;
                for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
                  MachineOperand &localMO = localMI->getOperand(j);
                  if (localMO.isReg() && localMO.isUse() &&
                      localMO.isKill() && feederReg == localMO.getReg()) {
                    // We found that there is kill of a use register
                    // Set up a kill flag on the register
                    localMO.setIsKill(false);
                    MO.setIsKill();
                    updatedIsKill = true;
                    break;
                  }
                }
                if (updatedIsKill) break;
              }
            }
            if (updatedIsKill) break;
          }

          MBB->splice(jmpPos, MI->getParent(), MI);
          MBB->splice(jmpPos, MI->getParent(), cmpInstr);
          DebugLoc dl = MI->getDebugLoc();
          MachineInstr *NewMI;

           assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
                      "This compare is not a New Value Jump candidate.");
          unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
                                               isSecondOpNewified,
                                               jmpTarget, MBPI);
          if (invertPredicate)
            opc = QII->getInvertedPredicatedOpcode(opc);

          if (isSecondOpReg)
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                    .addMBB(jmpTarget);

          else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri ||
                    cmpInstr->getOpcode() == Hexagon::CMPGTri) &&
                    cmpOp2 == -1 )
            // Corresponding new-value compare jump instructions don't have the
            // operand for -1 immediate value.
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addMBB(jmpTarget);

          else
            NewMI = BuildMI(*MBB, jmpPos, dl,
                                  QII->get(opc))
                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                    .addImm(cmpOp2)
                                    .addMBB(jmpTarget);

          assert(NewMI && "New Value Jump Instruction Not created!");
          (void)NewMI;
          if (cmpInstr->getOperand(0).isReg() &&
              cmpInstr->getOperand(0).isKill())
            cmpInstr->getOperand(0).setIsKill(false);
          if (cmpInstr->getOperand(1).isReg() &&
              cmpInstr->getOperand(1).isKill())
            cmpInstr->getOperand(1).setIsKill(false);
          cmpInstr->eraseFromParent();
          jmpInstr->eraseFromParent();
          ++nvjGenerated;
          ++NumNVJGenerated;
          break;
        }
      }
    }
  }

  return true;

}
Ejemplo n.º 6
0
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
  MF = &mf;
  MRI = &mf.getRegInfo();
  TRI = MF->getTarget().getRegisterInfo();

  ReservedRegisters = TRI->getReservedRegs(mf);

  unsigned NumRegs = TRI->getNumRegs();
  PhysRegDef  = new MachineInstr*[NumRegs];
  PhysRegUse  = new MachineInstr*[NumRegs];
  PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
  std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
  std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);

  /// Get some space for a respectable number of registers.
  VirtRegInfo.resize(64);

  analyzePHINodes(mf);

  // Calculate live variable information in depth first order on the CFG of the
  // function.  This guarantees that we will see the definition of a virtual
  // register before its uses due to dominance properties of SSA (except for PHI
  // nodes, which are treated as a special case).
  MachineBasicBlock *Entry = MF->begin();
  SmallPtrSet<MachineBasicBlock*,16> Visited;

  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
       DFI != E; ++DFI) {
    MachineBasicBlock *MBB = *DFI;

    // Mark live-in registers as live-in.
    for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
           EE = MBB->livein_end(); II != EE; ++II) {
      assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
             "Cannot have a live-in virtual register!");
      HandlePhysRegDef(*II, 0);
    }

    // Loop over all of the instructions, processing them.
    DistanceMap.clear();
    unsigned Dist = 0;
    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
         I != E; ++I) {
      MachineInstr *MI = I;
      DistanceMap.insert(std::make_pair(MI, Dist++));

      // Process all of the operands of the instruction...
      unsigned NumOperandsToProcess = MI->getNumOperands();

      // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
      // of the uses.  They will be handled in other basic blocks.
      if (MI->getOpcode() == TargetInstrInfo::PHI)
        NumOperandsToProcess = 1;

      SmallVector<unsigned, 4> UseRegs;
      SmallVector<unsigned, 4> DefRegs;
      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
        const MachineOperand &MO = MI->getOperand(i);
        if (!MO.isReg() || MO.getReg() == 0)
          continue;
        unsigned MOReg = MO.getReg();
        if (MO.isUse())
          UseRegs.push_back(MOReg);
        if (MO.isDef())
          DefRegs.push_back(MOReg);
      }

      // Process all uses.
      for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
        unsigned MOReg = UseRegs[i];
        if (TargetRegisterInfo::isVirtualRegister(MOReg))
          HandleVirtRegUse(MOReg, MBB, MI);
        else if (!ReservedRegisters[MOReg])
          HandlePhysRegUse(MOReg, MI);
      }

      // Process all defs.
      for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
        unsigned MOReg = DefRegs[i];
        if (TargetRegisterInfo::isVirtualRegister(MOReg))
          HandleVirtRegDef(MOReg, MI);
        else if (!ReservedRegisters[MOReg])
          HandlePhysRegDef(MOReg, MI);
      }
    }

    // Handle any virtual assignments from PHI nodes which might be at the
    // bottom of this basic block.  We check all of our successor blocks to see
    // if they have PHI nodes, and if so, we simulate an assignment at the end
    // of the current block.
    if (!PHIVarInfo[MBB->getNumber()].empty()) {
      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];

      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
             E = VarInfoVec.end(); I != E; ++I)
        // Mark it alive only in the block we are representing.
        MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
                                MBB);
    }

    // Finally, if the last instruction in the block is a return, make sure to
    // mark it as using all of the live-out values in the function.
    if (!MBB->empty() && MBB->back().getDesc().isReturn()) {
      MachineInstr *Ret = &MBB->back();

      for (MachineRegisterInfo::liveout_iterator
           I = MF->getRegInfo().liveout_begin(),
           E = MF->getRegInfo().liveout_end(); I != E; ++I) {
        assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
               "Cannot have a live-out virtual register!");
        HandlePhysRegUse(*I, Ret);

        // Add live-out registers as implicit uses.
        if (!Ret->readsRegister(*I))
          Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
      }
    }

    // Loop over PhysRegDef / PhysRegUse, killing any registers that are
    // available at the end of the basic block.
    for (unsigned i = 0; i != NumRegs; ++i)
      if (PhysRegDef[i] || PhysRegUse[i])
        HandlePhysRegDef(i, 0);

    std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
    std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
  }

  // Convert and transfer the dead / killed information we have gathered into
  // VirtRegInfo onto MI's.
  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
    for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
      if (VirtRegInfo[i].Kills[j] ==
          MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
        VirtRegInfo[i]
          .Kills[j]->addRegisterDead(i +
                                     TargetRegisterInfo::FirstVirtualRegister,
                                     TRI);
      else
        VirtRegInfo[i]
          .Kills[j]->addRegisterKilled(i +
                                       TargetRegisterInfo::FirstVirtualRegister,
                                       TRI);

  // Check to make sure there are no unreachable blocks in the MC CFG for the
  // function.  If so, it is due to a bug in the instruction selector or some
  // other part of the code generator if this happens.
#ifndef NDEBUG
  for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
    assert(Visited.count(&*i) != 0 && "unreachable basic block found");
#endif

  delete[] PhysRegDef;
  delete[] PhysRegUse;
  delete[] PHIVarInfo;

  return false;
}
Ejemplo n.º 7
0
/// findMatchingInsn - Scan the instructions looking for a load/store that can
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
                                      LdStPairFlags &Flags,
                                      unsigned Limit) {
  MachineBasicBlock::iterator E = I->getParent()->end();
  MachineBasicBlock::iterator MBBI = I;
  MachineInstr *FirstMI = I;
  ++MBBI;

  unsigned Opc = FirstMI->getOpcode();
  bool MayLoad = FirstMI->mayLoad();
  bool IsUnscaled = isUnscaledLdst(Opc);
  unsigned Reg = FirstMI->getOperand(0).getReg();
  unsigned BaseReg = FirstMI->getOperand(1).getReg();
  int Offset = FirstMI->getOperand(2).getImm();

  // Early exit if the first instruction modifies the base register.
  // e.g., ldr x0, [x0]
  // Early exit if the offset if not possible to match. (6 bits of positive
  // range, plus allow an extra one in case we find a later insn that matches
  // with Offset-1
  if (FirstMI->modifiesRegister(BaseReg, TRI))
    return E;
  int OffsetStride =
      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
    return E;

  // Track which registers have been modified and used between the first insn
  // (inclusive) and the second insn.
  BitVector ModifiedRegs, UsedRegs;
  ModifiedRegs.resize(TRI->getNumRegs());
  UsedRegs.resize(TRI->getNumRegs());

  // Remember any instructions that read/write memory between FirstMI and MI.
  SmallVector<MachineInstr *, 4> MemInsns;

  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
    MachineInstr *MI = MBBI;
    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
    // optimization by changing how far we scan.
    if (MI->isDebugValue())
      continue;

    // Now that we know this is a real instruction, count it.
    ++Count;

    bool CanMergeOpc = Opc == MI->getOpcode();
    Flags.setSExtIdx(-1);
    if (!CanMergeOpc) {
      bool IsValidLdStrOpc;
      unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
      if (!IsValidLdStrOpc)
        continue;
      // Opc will be the first instruction in the pair.
      Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
      CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
    }

    if (CanMergeOpc && MI->getOperand(2).isImm()) {
      // If we've found another instruction with the same opcode, check to see
      // if the base and offset are compatible with our starting instruction.
      // These instructions all have scaled immediate operands, so we just
      // check for +1/-1. Make sure to check the new instruction offset is
      // actually an immediate and not a symbolic reference destined for
      // a relocation.
      //
      // Pairwise instructions have a 7-bit signed offset field. Single insns
      // have a 12-bit unsigned offset field. To be a valid combine, the
      // final offset must be in range.
      unsigned MIBaseReg = MI->getOperand(1).getReg();
      int MIOffset = MI->getOperand(2).getImm();
      if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
                                   (Offset + OffsetStride == MIOffset))) {
        int MinOffset = Offset < MIOffset ? Offset : MIOffset;
        // If this is a volatile load/store that otherwise matched, stop looking
        // as something is going on that we don't have enough information to
        // safely transform. Similarly, stop if we see a hint to avoid pairs.
        if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
          return E;
        // If the resultant immediate offset of merging these instructions
        // is out of range for a pairwise instruction, bail and keep looking.
        bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
        if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
          if (MI->mayLoadOrStore())
            MemInsns.push_back(MI);
          continue;
        }
        // If the alignment requirements of the paired (scaled) instruction
        // can't express the offset of the unscaled input, bail and keep
        // looking.
        if (IsUnscaled && EnableAArch64UnscaledMemOp &&
            (alignTo(MinOffset, OffsetStride) != MinOffset)) {
          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
          if (MI->mayLoadOrStore())
            MemInsns.push_back(MI);
          continue;
        }
        // If the destination register of the loads is the same register, bail
        // and keep looking. A load-pair instruction with both destination
        // registers the same is UNPREDICTABLE and will result in an exception.
        if (MayLoad && Reg == MI->getOperand(0).getReg()) {
          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
          if (MI->mayLoadOrStore())
            MemInsns.push_back(MI);
          continue;
        }

        // If the Rt of the second instruction was not modified or used between
        // the two instructions and none of the instructions between the second
        // and first alias with the second, we can combine the second into the
        // first.
        if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
            !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
            !mayAlias(MI, MemInsns, TII)) {
          Flags.setMergeForward(false);
          return MBBI;
        }

        // Likewise, if the Rt of the first instruction is not modified or used
        // between the two instructions and none of the instructions between the
        // first and the second alias with the first, we can combine the first
        // into the second.
        if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
            !(FirstMI->mayLoad() &&
              UsedRegs[FirstMI->getOperand(0).getReg()]) &&
            !mayAlias(FirstMI, MemInsns, TII)) {
          Flags.setMergeForward(true);
          return MBBI;
        }
        // Unable to combine these instructions due to interference in between.
        // Keep looking.
      }
    }

    // If the instruction wasn't a matching load or store.  Stop searching if we
    // encounter a call instruction that might modify memory.
    if (MI->isCall())
      return E;

    // Update modified / uses register lists.
    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);

    // Otherwise, if the base register is modified, we have no match, so
    // return early.
    if (ModifiedRegs[BaseReg])
      return E;

    // Update list of instructions that read/write memory.
    if (MI->mayLoadOrStore())
      MemInsns.push_back(MI);
  }
  return E;
}
Ejemplo n.º 8
0
MipsInstrInfo::BranchType MipsInstrInfo::AnalyzeBranch(
    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
    SmallVectorImpl<MachineOperand> &Cond, bool AllowModify,
    SmallVectorImpl<MachineInstr *> &BranchInstrs) const {

  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();

  // Skip all the debug instructions.
  while (I != REnd && I->isDebugValue())
    ++I;

  if (I == REnd || !isUnpredicatedTerminator(*I)) {
    // This block ends with no branches (it just falls through to its succ).
    // Leave TBB/FBB null.
    TBB = FBB = nullptr;
    return BT_NoBranch;
  }

  MachineInstr *LastInst = &*I;
  unsigned LastOpc = LastInst->getOpcode();
  BranchInstrs.push_back(LastInst);

  // Not an analyzable branch (e.g., indirect jump).
  if (!getAnalyzableBrOpc(LastOpc))
    return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;

  // Get the second to last instruction in the block.
  unsigned SecondLastOpc = 0;
  MachineInstr *SecondLastInst = nullptr;

  if (++I != REnd) {
    SecondLastInst = &*I;
    SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode());

    // Not an analyzable branch (must be an indirect jump).
    if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc)
      return BT_None;
  }

  // If there is only one terminator instruction, process it.
  if (!SecondLastOpc) {
    // Unconditional branch.
    if (LastInst->isUnconditionalBranch()) {
      TBB = LastInst->getOperand(0).getMBB();
      return BT_Uncond;
    }

    // Conditional branch
    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
    return BT_Cond;
  }

  // If we reached here, there are two branches.
  // If there are three terminators, we don't know what sort of block this is.
  if (++I != REnd && isUnpredicatedTerminator(*I))
    return BT_None;

  BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);

  // If second to last instruction is an unconditional branch,
  // analyze it and remove the last instruction.
  if (SecondLastInst->isUnconditionalBranch()) {
    // Return if the last instruction cannot be removed.
    if (!AllowModify)
      return BT_None;

    TBB = SecondLastInst->getOperand(0).getMBB();
    LastInst->eraseFromParent();
    BranchInstrs.pop_back();
    return BT_Uncond;
  }

  // Conditional branch followed by an unconditional branch.
  // The last one must be unconditional.
  if (!LastInst->isUnconditionalBranch())
    return BT_None;

  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
  FBB = LastInst->getOperand(0).getMBB();

  return BT_CondUncond;
}
Ejemplo n.º 9
0
bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 
                                  MachineBasicBlock *&TBB,
                                  MachineBasicBlock *&FBB,
                                  SmallVectorImpl<MachineOperand> &Cond,
                                  bool AllowModify) const 
{
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
    return false;
  
  // Get the last instruction in the block.
  MachineInstr *LastInst = I;
  
  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (!LastInst->getDesc().isBranch())
      return true;

    // Unconditional branch
    if (LastOpc == Mips::J) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }

    Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
    if (BranchCode == Mips::COND_INVALID)
      return true;  // Can't handle indirect branch.

    // Conditional branch
    // Block ends with fall-through condbranch.
    if (LastOpc != Mips::COND_INVALID) {
      int LastNumOp = LastInst->getNumOperands();

      TBB = LastInst->getOperand(LastNumOp-1).getMBB();
      Cond.push_back(MachineOperand::CreateImm(BranchCode));

      for (int i=0; i<LastNumOp-1; i++) {
        Cond.push_back(LastInst->getOperand(i));
      }

      return false;
    }
  }
  
  // Get the instruction before it if it is a terminator.
  MachineInstr *SecondLastInst = I;
  
  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
  Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);

  if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
    int SecondNumOp = SecondLastInst->getNumOperands();

    TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
    Cond.push_back(MachineOperand::CreateImm(BranchCode));

    for (int i=0; i<SecondNumOp-1; i++) {
      Cond.push_back(SecondLastInst->getOperand(i));
    }

    FBB = LastInst->getOperand(0).getMBB();
    return false;
  }
  
  // If the block ends with two unconditional branches, handle it. The last 
  // one is not executed, so remove it.
  if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
Ejemplo n.º 10
0
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
{
  const R600RegisterInfo &TRI = TII->getRegisterInfo();
  if (MI.getOpcode() != AMDGPU::input_perspective)
    return false;
  
  MachineBasicBlock::iterator I = &MI;
  unsigned DstReg = MI.getOperand(0).getReg();
  R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
      ->getInfo<R600MachineFunctionInfo>();
  unsigned IJIndexBase;
  
  // In Evergreen ISA doc section 8.3.2 :
  // We need to interpolate XY and ZW in two different instruction groups.
  // An INTERP_* must occupy all 4 slots of an instruction group.
  // Output of INTERP_XY is written in X,Y slots
  // Output of INTERP_ZW is written in Z,W slots
  //
  // Thus interpolation requires the following sequences :
  //
  // AnyGPR.x = INTERP_ZW; (Write Masked Out)
  // AnyGPR.y = INTERP_ZW; (Write Masked Out)
  // DstGPR.z = INTERP_ZW;
  // DstGPR.w = INTERP_ZW; (End of first IG)
  // DstGPR.x = INTERP_XY;
  // DstGPR.y = INTERP_XY;
  // AnyGPR.z = INTERP_XY; (Write Masked Out)
  // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
  //
  switch (MI.getOperand(1).getImm()) {
  case 0:
    IJIndexBase = MFI->GetIJPerspectiveIndex();
    break;
  case 1:
    IJIndexBase = MFI->GetIJLinearIndex();
    break;
  default:
    assert(0 && "Unknow ij index");
  }

  for (unsigned i = 0; i < 8; i++) {
    unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
        2 * IJIndexBase + ((i + 1) % 2));
    unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
        4 * MI.getOperand(2).getImm());
    
    unsigned Sel;
    switch (i % 4) {
    case 0:Sel = AMDGPU::sel_x;break;
    case 1:Sel = AMDGPU::sel_y;break;
    case 2:Sel = AMDGPU::sel_z;break;
    case 3:Sel = AMDGPU::sel_w;break;
    default:break;
    }
	  
    unsigned Res = TRI.getSubReg(DstReg, Sel);
	  
    const MCInstrDesc &Opcode = (i < 4)?
        TII->get(AMDGPU::INTERP_ZW):
        TII->get(AMDGPU::INTERP_XY);
  
    MachineInstr *NewMI = BuildMI(*(MI.getParent()),
        I, MI.getParent()->findDebugLoc(I),
        Opcode, Res)
        .addReg(IJIndex)
        .addReg(ReadReg)
        .addImm(0);
    
    if (!(i> 1 && i < 6)) {
      TII->addFlag(NewMI, 0, MO_FLAG_MASK);
    }
	  
    if (i % 4 !=  3)
      TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
  }
  
  MI.eraseFromParent();

  return true;
}
Ejemplo n.º 11
0
/// foldMemoryOperand - Try folding stack slot references in Ops into their
/// instructions.
///
/// @param Ops    Operand indices from analyzeVirtReg().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
/// @return       True on success.
bool InlineSpiller::
foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
                  MachineInstr *LoadMI) {
  if (Ops.empty())
    return false;
  // Don't attempt folding in bundles.
  MachineInstr *MI = Ops.front().first;
  if (Ops.back().first != MI || MI->isBundled())
    return false;

  bool WasCopy = MI->isCopy();
  unsigned ImpReg = 0;

  bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT ||
                       MI->getOpcode() == TargetOpcode::STACKMAP);

  // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
  // operands.
  SmallVector<unsigned, 8> FoldOps;
  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
    unsigned Idx = Ops[i].second;
    MachineOperand &MO = MI->getOperand(Idx);
    if (MO.isImplicit()) {
      ImpReg = MO.getReg();
      continue;
    }
    // FIXME: Teach targets to deal with subregs.
    if (!SpillSubRegs && MO.getSubReg())
      return false;
    // We cannot fold a load instruction into a def.
    if (LoadMI && MO.isDef())
      return false;
    // Tied use operands should not be passed to foldMemoryOperand.
    if (!MI->isRegTiedToDefOperand(Idx))
      FoldOps.push_back(Idx);
  }

  MachineInstrSpan MIS(MI);

  MachineInstr *FoldMI =
                LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
                       : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
  if (!FoldMI)
    return false;

  // Remove LIS for any dead defs in the original MI not in FoldMI.
  for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
    if (!MO->isReg())
      continue;
    unsigned Reg = MO->getReg();
    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
        MRI.isReserved(Reg)) {
      continue;
    }
    // Skip non-Defs, including undef uses and internal reads.
    if (MO->isUse())
      continue;
    MIBundleOperands::PhysRegInfo RI =
      MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
    if (RI.Defines)
      continue;
    // FoldMI does not define this physreg. Remove the LI segment.
    assert(MO->isDead() && "Cannot fold physreg def");
    for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
      if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
        SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
        if (VNInfo *VNI = LR->getVNInfoAt(Idx))
          LR->removeValNo(VNI);
      }
    }
  }

  LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
  MI->eraseFromParent();

  // Insert any new instructions other than FoldMI into the LIS maps.
  assert(!MIS.empty() && "Unexpected empty span of instructions!");
  for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end();
       MII != End; ++MII)
    if (&*MII != FoldMI)
      LIS.InsertMachineInstrInMaps(&*MII);

  // TII.foldMemoryOperand may have left some implicit operands on the
  // instruction.  Strip them.
  if (ImpReg)
    for (unsigned i = FoldMI->getNumOperands(); i; --i) {
      MachineOperand &MO = FoldMI->getOperand(i - 1);
      if (!MO.isReg() || !MO.isImplicit())
        break;
      if (MO.getReg() == ImpReg)
        FoldMI->RemoveOperand(i - 1);
    }

  DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
                                           "folded"));

  if (!WasCopy)
    ++NumFolded;
  else if (Ops.front().second == 0)
    ++NumSpills;
  else
    ++NumReloads;
  return true;
}
Ejemplo n.º 12
0
//===----------------------------------------------------------------------===//
// Branch Analysis
//===----------------------------------------------------------------------===//
bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TBB,
                                    MachineBasicBlock *&FBB,
                                    SmallVectorImpl<MachineOperand> &Cond,
                                    bool AllowModify) const {
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (!isUnpredicatedTerminator(I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (MBlaze::isUncondBranchOpcode(LastOpc)) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }
    if (MBlaze::isCondBranchOpcode(LastOpc)) {
      // Block ends with fall-through condbranch.
      TBB = LastInst->getOperand(1).getMBB();
      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
      Cond.push_back(LastInst->getOperand(0));
      return false;
    }
    // Otherwise, don't know what this is.
    return true;
  }

  // Get the instruction before it if it's a terminator.
  MachineInstr *SecondLastInst = I;

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with something like BEQID then BRID, handle it.
  if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
    TBB = SecondLastInst->getOperand(1).getMBB();
    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
    Cond.push_back(SecondLastInst->getOperand(0));
    FBB = LastInst->getOperand(0).getMBB();
    return false;
  }

  // If the block ends with two unconditional branches, handle it.
  // The second one is not executed, so remove it.
  if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
Ejemplo n.º 13
0
bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
  unsigned Opcode = MI.getOpcode();
  return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
         Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
}
Ejemplo n.º 14
0
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIRegisterInfo *TRI =
      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                      I != E; ++I) {
      MachineInstr &MI = *I;

      switch (MI.getOpcode()) {
      default:
        continue;
      case AMDGPU::COPY: {
        if (isVGPRToSGPRCopy(MI, TRI, MRI)) {
          DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
          TII->moveToVALU(MI);
        }

        break;
      }
      case AMDGPU::PHI: {
        DEBUG(dbgs() << "Fixing PHI: " << MI);

        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
          const MachineOperand &Op = MI.getOperand(i);
          unsigned Reg = Op.getReg();
          const TargetRegisterClass *RC
            = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());

          MRI.constrainRegClass(Op.getReg(), RC);
        }
        unsigned Reg = MI.getOperand(0).getReg();
        const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
                                                  MI.getOperand(0).getSubReg());
        if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
          MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
        }

        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
          break;

        // If a PHI node defines an SGPR and any of its operands are VGPRs,
        // then we need to move it to the VALU.
        //
        // Also, if a PHI node defines an SGPR and has all SGPR operands
        // we must move it to the VALU, because the SGPR operands will
        // all end up being assigned the same register, which means
        // there is a potential for a conflict if different threads take
        // different control flow paths.
        //
        // For Example:
        //
        // sgpr0 = def;
        // ...
        // sgpr1 = def;
        // ...
        // sgpr2 = PHI sgpr0, sgpr1
        // use sgpr2;
        //
        // Will Become:
        //
        // sgpr2 = def;
        // ...
        // sgpr2 = def;
        // ...
        // use sgpr2
        //
        // FIXME: This is OK if the branching decision is made based on an
        // SGPR value.
        bool SGPRBranch = false;

        // The one exception to this rule is when one of the operands
        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
        // instruction.  In this case, there we know the program will
        // never enter the second block (the loop) without entering
        // the first block (where the condition is computed), so there
        // is no chance for values to be over-written.

        bool HasBreakDef = false;
        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
          unsigned Reg = MI.getOperand(i).getReg();
          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
            TII->moveToVALU(MI);
            break;
          }
          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
          assert(DefInstr);
          switch(DefInstr->getOpcode()) {

          case AMDGPU::SI_BREAK:
          case AMDGPU::SI_IF_BREAK:
          case AMDGPU::SI_ELSE_BREAK:
          // If we see a PHI instruction that defines an SGPR, then that PHI
          // instruction has already been considered and should have
          // a *_BREAK as an operand.
          case AMDGPU::PHI:
            HasBreakDef = true;
            break;
          }
        }

        if (!SGPRBranch && !HasBreakDef)
          TII->moveToVALU(MI);
        break;
      }
      case AMDGPU::REG_SEQUENCE: {
        if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
            !hasVGPROperands(MI, TRI))
          continue;

        DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);

        TII->moveToVALU(MI);
        break;
      }
      case AMDGPU::INSERT_SUBREG: {
        const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
        DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
        Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
        Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
        if (TRI->isSGPRClass(DstRC) &&
            (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
          DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
          TII->moveToVALU(MI);
        }
        break;
      }
      }
    }
  }

  return true;
}
Ejemplo n.º 15
0
bool
AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                                MachineBasicBlock *&FBB,
                                SmallVectorImpl<MachineOperand> &Cond,
                                bool AllowModify) const {
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.end();
  if (I == MBB.begin())
    return false;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return false;
    --I;
  }
  if (!isUnpredicatedTerminator(I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;

  // If there is only one terminator instruction, process it.
  unsigned LastOpc = LastInst->getOpcode();
  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    if (LastOpc == AArch64::Bimm) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }
    if (isCondBranch(LastOpc)) {
      classifyCondBranch(LastInst, TBB, Cond);
      return false;
    }
    return true;  // Can't handle indirect branch.
  }

  // Get the instruction before it if it is a terminator.
  MachineInstr *SecondLastInst = I;
  unsigned SecondLastOpc = SecondLastInst->getOpcode();

  // If AllowModify is true and the block ends with two or more unconditional
  // branches, delete all but the first unconditional branch.
  if (AllowModify && LastOpc == AArch64::Bimm) {
    while (SecondLastOpc == AArch64::Bimm) {
      LastInst->eraseFromParent();
      LastInst = SecondLastInst;
      LastOpc = LastInst->getOpcode();
      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
        // Return now the only terminator is an unconditional branch.
        TBB = LastInst->getOperand(0).getMBB();
        return false;
      } else {
        SecondLastInst = I;
        SecondLastOpc = SecondLastInst->getOpcode();
      }
    }
  }

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    return true;

  // If the block ends with a B and a Bcc, handle it.
  if (LastOpc == AArch64::Bimm) {
    if (SecondLastOpc == AArch64::Bcc) {
      TBB =  SecondLastInst->getOperand(1).getMBB();
      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
      Cond.push_back(SecondLastInst->getOperand(0));
      FBB = LastInst->getOperand(0).getMBB();
      return false;
    } else if (isCondBranch(SecondLastOpc)) {
      classifyCondBranch(SecondLastInst, TBB, Cond);
      FBB = LastInst->getOperand(0).getMBB();
      return false;
    }
  }

  // If the block ends with two unconditional branches, handle it.  The second
  // one is not executed, so remove it.
  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Otherwise, can't handle this.
  return true;
}
Ejemplo n.º 16
0
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target).  Upon success, this returns false and returns
/// with the following information in various cases:
///
/// 1. If this block ends with no branches (it just falls through to its succ)
///    just return false, leaving TBB/FBB null.
/// 2. If this block ends with only an unconditional branch, it sets TBB to be
///    the destination block.
/// 3. If this block ends with an conditional branch and it falls through to
///    an successor block, it sets TBB to be the branch destination block and a
///    list of operands that evaluate the condition. These
///    operands can be passed to other TargetInstrInfo methods to create new
///    branches.
/// 4. If this block ends with an conditional branch and an unconditional
///    block, it returns the 'true' destination in TBB, the 'false' destination
///    in FBB, and a list of operands that evaluate the condition. These
///    operands can be passed to other TargetInstrInfo methods to create new
///    branches.
///
/// Note that RemoveBranch and InsertBranch must be implemented to support
/// cases where this method returns success.
///
bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                   MachineBasicBlock *&TBB,
                                   MachineBasicBlock *&FBB,
                                   SmallVectorImpl<MachineOperand> &Cond,
                                   bool AllowModify) const {
  // If the block has no terminators, it just falls into the block after it.
  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
  if (I == MBB.end())
    return false;

  if (!isUnpredicatedTerminator(*I))
    return false;

  // Get the last instruction in the block.
  MachineInstr *LastInst = I;
  
  // If there is only one terminator instruction, process it.
  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
    if (IsBRU(LastInst->getOpcode())) {
      TBB = LastInst->getOperand(0).getMBB();
      return false;
    }
    
    XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
    if (BranchCode == XCore::COND_INVALID)
      return true;  // Can't handle indirect branch.
    
    // Conditional branch
    // Block ends with fall-through condbranch.

    TBB = LastInst->getOperand(1).getMBB();
    Cond.push_back(MachineOperand::CreateImm(BranchCode));
    Cond.push_back(LastInst->getOperand(0));
    return false;
  }
  
  // Get the instruction before it if it's a terminator.
  MachineInstr *SecondLastInst = I;

  // If there are three terminators, we don't know what sort of block this is.
  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
    return true;
  
  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
  XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
  
  // If the block ends with conditional branch followed by unconditional,
  // handle it.
  if (BranchCode != XCore::COND_INVALID
    && IsBRU(LastInst->getOpcode())) {

    TBB = SecondLastInst->getOperand(1).getMBB();
    Cond.push_back(MachineOperand::CreateImm(BranchCode));
    Cond.push_back(SecondLastInst->getOperand(0));

    FBB = LastInst->getOperand(0).getMBB();
    return false;
  }
  
  // If the block ends with two unconditional branches, handle it.  The second
  // one is not executed, so remove it.
  if (IsBRU(SecondLastInst->getOpcode()) && 
      IsBRU(LastInst->getOpcode())) {
    TBB = SecondLastInst->getOperand(0).getMBB();
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return false;
  }

  // Likewise if it ends with a branch table followed by an unconditional branch.
  if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
    I = LastInst;
    if (AllowModify)
      I->eraseFromParent();
    return true;
  }

  // Otherwise, can't handle this.
  return true;
}
Ejemplo n.º 17
0
void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
                                             int &AccessScale, int &MinOffset,
                                             int &MaxOffset) const {
  switch (MI.getOpcode()) {
  default: llvm_unreachable("Unkown load/store kind");
  case TargetOpcode::DBG_VALUE:
    AccessScale = 1;
    MinOffset = INT_MIN;
    MaxOffset = INT_MAX;
    return;
  case AArch64::LS8_LDR: case AArch64::LS8_STR:
  case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
  case AArch64::LDRSBw:
  case AArch64::LDRSBx:
    AccessScale = 1;
    MinOffset = 0;
    MaxOffset = 0xfff;
    return;
  case AArch64::LS16_LDR: case AArch64::LS16_STR:
  case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
  case AArch64::LDRSHw:
  case AArch64::LDRSHx:
    AccessScale = 2;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LS32_LDR:  case AArch64::LS32_STR:
  case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
  case AArch64::LDRSWx:
  case AArch64::LDPSWx:
    AccessScale = 4;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LS64_LDR: case AArch64::LS64_STR:
  case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
  case AArch64::PRFM:
    AccessScale = 8;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
    AccessScale = 16;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
  case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
    AccessScale = 4;
    MinOffset = -0x40 * AccessScale;
    MaxOffset = 0x3f * AccessScale;
    return;
  case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
  case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
    AccessScale = 8;
    MinOffset = -0x40 * AccessScale;
    MaxOffset = 0x3f * AccessScale;
    return;
  case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
    AccessScale = 16;
    MinOffset = -0x40 * AccessScale;
    MaxOffset = 0x3f * AccessScale;
    return;
  case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
    AccessScale = 16;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
    AccessScale = 24;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
  case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
    AccessScale = 32;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
    AccessScale = 48;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
    AccessScale = 64;
    MinOffset = 0;
    MaxOffset = 0xfff * AccessScale;
    return;
  }
}
Ejemplo n.º 18
0
bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
    return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
}
Ejemplo n.º 19
0
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
  QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
  MRI = &MF.getRegInfo();

  DenseMap<unsigned, unsigned> PeepholeMap;
  DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;

  if (DisableHexagonPeephole) return false;

  // Loop over all of the basic blocks.
  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
       MBBb != MBBe; ++MBBb) {
    MachineBasicBlock *MBB = &*MBBb;
    PeepholeMap.clear();
    PeepholeDoubleRegsMap.clear();

    // Traverse the basic block.
    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
                                     ++MII) {
      MachineInstr *MI = MII;
      // Look for sign extends:
      // %vreg170<def> = SXTW %vreg166
      if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        // Just handle virtual registers.
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Map the following:
          // %vreg170<def> = SXTW %vreg166
          // PeepholeMap[170] = vreg166
          PeepholeMap[DstReg] = SrcReg;
        }
      }

      // Look for  %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
      // %vreg170:DoublRegs, %vreg169:IntRegs
      if (!DisableOptExtTo64 &&
          MI->getOpcode () == Hexagon::A4_combineir) {
        assert (MI->getNumOperands() == 3);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src1 = MI->getOperand(1);
        MachineOperand &Src2 = MI->getOperand(2);
        if (Src1.getImm() != 0)
          continue;
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src2.getReg();
        PeepholeMap[DstReg] = SrcReg;
      }

      // Look for this sequence below
      // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
      // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
      // and convert into
      // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
      if (MI->getOpcode() == Hexagon::S2_lsr_i_p) {
        assert(MI->getNumOperands() == 3);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src1 = MI->getOperand(1);
        MachineOperand &Src2 = MI->getOperand(2);
        if (Src2.getImm() != 32)
          continue;
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src1.getReg();
        PeepholeDoubleRegsMap[DstReg] =
          std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
      }

      // Look for P=NOT(P).
      if (!DisablePNotP &&
          (MI->getOpcode() == Hexagon::C2_not)) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);
        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        // Just handle virtual registers.
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Map the following:
          // %vreg170<def> = NOT_xx %vreg166
          // PeepholeMap[170] = vreg166
          PeepholeMap[DstReg] = SrcReg;
        }
      }

      // Look for copy:
      // %vreg176<def> = COPY %vreg170:subreg_loreg
      if (!DisableOptSZExt && MI->isCopy()) {
        assert (MI->getNumOperands() == 2);
        MachineOperand &Dst = MI->getOperand(0);
        MachineOperand &Src  = MI->getOperand(1);

        // Make sure we are copying the lower 32 bits.
        if (Src.getSubReg() != Hexagon::subreg_loreg)
          continue;

        unsigned DstReg = Dst.getReg();
        unsigned SrcReg = Src.getReg();
        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
          // Try to find in the map.
          if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
            // Change the 1st operand.
            MI->RemoveOperand(1);
            MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
          } else  {
            DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
              PeepholeDoubleRegsMap.find(SrcReg);
            if (DI != PeepholeDoubleRegsMap.end()) {
              std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
              MI->RemoveOperand(1);
              MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
                                                       false /*isDef*/,
                                                       false /*isImp*/,
                                                       false /*isKill*/,
                                                       false /*isDead*/,
                                                       false /*isUndef*/,
                                                       false /*isEarlyClobber*/,
                                                       PeepholeSrc.second));
            }
          }
        }
      }

      // Look for Predicated instructions.
      if (!DisablePNotP) {
        bool Done = false;
        if (QII->isPredicated(MI)) {
          MachineOperand &Op0 = MI->getOperand(0);
          unsigned Reg0 = Op0.getReg();
          const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
          if (RC0->getID() == Hexagon::PredRegsRegClassID) {
            // Handle instructions that have a prediate register in op0
            // (most cases of predicable instructions).
            if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
              // Try to find in the map.
              if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
                // Change the 1st operand and, flip the opcode.
                MI->getOperand(0).setReg(PeepholeSrc);
                int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
                MI->setDesc(QII->get(NewOp));
                Done = true;
              }
            }
          }
        }

        if (!Done) {
          // Handle special instructions.
          unsigned Op = MI->getOpcode();
          unsigned NewOp = 0;
          unsigned PR = 1, S1 = 2, S2 = 3;   // Operand indices.

          switch (Op) {
            case Hexagon::C2_mux:
            case Hexagon::C2_muxii:
              NewOp = Op;
              break;
            case Hexagon::C2_muxri:
              NewOp = Hexagon::C2_muxir;
              break;
            case Hexagon::C2_muxir:
              NewOp = Hexagon::C2_muxri;
              break;
          }
          if (NewOp) {
            unsigned PSrc = MI->getOperand(PR).getReg();
            if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
              MI->getOperand(PR).setReg(POrig);
              MI->setDesc(QII->get(NewOp));
              // Swap operands S1 and S2.
              MachineOperand Op1 = MI->getOperand(S1);
              MachineOperand Op2 = MI->getOperand(S2);
              ChangeOpInto(MI->getOperand(S1), Op2);
              ChangeOpInto(MI->getOperand(S2), Op1);
            }
          } // if (NewOp)
        } // if (!Done)

      } // if (!DisablePNotP)

    } // Instruction
  } // Basic Block
  return true;
}
Ejemplo n.º 20
0
bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
    return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
}
Ejemplo n.º 21
0
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
  bool Modified = false;
  // Two tranformations to do here:
  // 1) Find loads and stores that can be merged into a single load or store
  //    pair instruction.
  //      e.g.,
  //        ldr x0, [x2]
  //        ldr x1, [x2, #8]
  //        ; becomes
  //        ldp x0, x1, [x2]
  // 2) Find base register updates that can be merged into the load or store
  //    as a base-reg writeback.
  //      e.g.,
  //        ldr x0, [x2]
  //        add x2, x2, #4
  //        ; becomes
  //        ldr x0, [x2], #4

  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
       MBBI != E;) {
    MachineInstr *MI = MBBI;
    switch (MI->getOpcode()) {
    default:
      // Just move on to the next instruction.
      ++MBBI;
      break;
    case AArch64::STRSui:
    case AArch64::STRDui:
    case AArch64::STRQui:
    case AArch64::STRXui:
    case AArch64::STRWui:
    case AArch64::LDRSui:
    case AArch64::LDRDui:
    case AArch64::LDRQui:
    case AArch64::LDRXui:
    case AArch64::LDRWui:
    case AArch64::LDRSWui:
    // do the unscaled versions as well
    case AArch64::STURSi:
    case AArch64::STURDi:
    case AArch64::STURQi:
    case AArch64::STURWi:
    case AArch64::STURXi:
    case AArch64::LDURSi:
    case AArch64::LDURDi:
    case AArch64::LDURQi:
    case AArch64::LDURWi:
    case AArch64::LDURXi:
    case AArch64::LDURSWi: {
      // If this is a volatile load/store, don't mess with it.
      if (MI->hasOrderedMemoryRef()) {
        ++MBBI;
        break;
      }
      // Make sure this is a reg+imm (as opposed to an address reloc).
      if (!MI->getOperand(2).isImm()) {
        ++MBBI;
        break;
      }
      // Check if this load/store has a hint to avoid pair formation.
      // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
      if (TII->isLdStPairSuppressed(MI)) {
        ++MBBI;
        break;
      }
      // Look ahead up to ScanLimit instructions for a pairable instruction.
      LdStPairFlags Flags;
      MachineBasicBlock::iterator Paired =
          findMatchingInsn(MBBI, Flags, ScanLimit);
      if (Paired != E) {
        // Merge the loads into a pair. Keeping the iterator straight is a
        // pain, so we let the merge routine tell us what the next instruction
        // is after it's done mucking about.
        MBBI = mergePairedInsns(MBBI, Paired, Flags);

        Modified = true;
        ++NumPairCreated;
        if (isUnscaledLdst(MI->getOpcode()))
          ++NumUnscaledPairCreated;
        break;
      }
      ++MBBI;
      break;
    }
      // FIXME: Do the other instructions.
    }
  }

  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
       MBBI != E;) {
    MachineInstr *MI = MBBI;
    // Do update merging. It's simpler to keep this separate from the above
    // switch, though not strictly necessary.
    unsigned Opc = MI->getOpcode();
    switch (Opc) {
    default:
      // Just move on to the next instruction.
      ++MBBI;
      break;
    case AArch64::STRSui:
    case AArch64::STRDui:
    case AArch64::STRQui:
    case AArch64::STRXui:
    case AArch64::STRWui:
    case AArch64::LDRSui:
    case AArch64::LDRDui:
    case AArch64::LDRQui:
    case AArch64::LDRXui:
    case AArch64::LDRWui:
    // do the unscaled versions as well
    case AArch64::STURSi:
    case AArch64::STURDi:
    case AArch64::STURQi:
    case AArch64::STURWi:
    case AArch64::STURXi:
    case AArch64::LDURSi:
    case AArch64::LDURDi:
    case AArch64::LDURQi:
    case AArch64::LDURWi:
    case AArch64::LDURXi: {
      // Make sure this is a reg+imm (as opposed to an address reloc).
      if (!MI->getOperand(2).isImm()) {
        ++MBBI;
        break;
      }
      // Look ahead up to ScanLimit instructions for a mergable instruction.
      MachineBasicBlock::iterator Update =
          findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
      if (Update != E) {
        // Merge the update into the ld/st.
        MBBI = mergePostIdxUpdateInsn(MBBI, Update);
        Modified = true;
        ++NumPostFolded;
        break;
      }
      // Don't know how to handle pre/post-index versions, so move to the next
      // instruction.
      if (isUnscaledLdst(Opc)) {
        ++MBBI;
        break;
      }

      // Look back to try to find a pre-index instruction. For example,
      // add x0, x0, #8
      // ldr x1, [x0]
      //   merged into:
      // ldr x1, [x0, #8]!
      Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
      if (Update != E) {
        // Merge the update into the ld/st.
        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
        Modified = true;
        ++NumPreFolded;
        break;
      }

      // Look forward to try to find a post-index instruction. For example,
      // ldr x1, [x0, #64]
      // add x0, x0, #64
      //   merged into:
      // ldr x1, [x0, #64]!

      // The immediate in the load/store is scaled by the size of the register
      // being loaded. The immediate in the add we're looking for,
      // however, is not, so adjust here.
      int Value = MI->getOperand(2).getImm() *
                  TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
                      ->getSize();
      Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
      if (Update != E) {
        // Merge the update into the ld/st.
        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
        Modified = true;
        ++NumPreFolded;
        break;
      }

      // Nothing found. Just move to the next instruction.
      ++MBBI;
      break;
    }
      // FIXME: Do the other instructions.
    }
  }

  return Modified;
}
Ejemplo n.º 22
0
static void LowerTlsAddr(MCStreamer &OutStreamer,
                         X86MCInstLower &MCInstLowering,
                         const MachineInstr &MI) {

  bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
                  MI.getOpcode() == X86::TLS_base_addr64;

  bool needsPadding = MI.getOpcode() == X86::TLS_addr64;

  MCContext &context = OutStreamer.getContext();

  if (needsPadding) {
    MCInst prefix;
    prefix.setOpcode(X86::DATA16_PREFIX);
    OutStreamer.EmitInstruction(prefix);
  }

  MCSymbolRefExpr::VariantKind SRVK;
  switch (MI.getOpcode()) {
    case X86::TLS_addr32:
    case X86::TLS_addr64:
      SRVK = MCSymbolRefExpr::VK_TLSGD;
      break;
    case X86::TLS_base_addr32:
      SRVK = MCSymbolRefExpr::VK_TLSLDM;
      break;
    case X86::TLS_base_addr64:
      SRVK = MCSymbolRefExpr::VK_TLSLD;
      break;
    default:
      llvm_unreachable("unexpected opcode");
  }

  MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
  const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);

  MCInst LEA;
  if (is64Bits) {
    LEA.setOpcode(X86::LEA64r);
    LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
    LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
    LEA.addOperand(MCOperand::CreateImm(1));        // scale
    LEA.addOperand(MCOperand::CreateReg(0));        // index
    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
    LEA.addOperand(MCOperand::CreateReg(0));        // seg
  } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
    LEA.setOpcode(X86::LEA32r);
    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base
    LEA.addOperand(MCOperand::CreateImm(1));        // scale
    LEA.addOperand(MCOperand::CreateReg(0));        // index
    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
    LEA.addOperand(MCOperand::CreateReg(0));        // seg
  } else {
    LEA.setOpcode(X86::LEA32r);
    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
    LEA.addOperand(MCOperand::CreateReg(0));        // base
    LEA.addOperand(MCOperand::CreateImm(1));        // scale
    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
    LEA.addOperand(MCOperand::CreateReg(0));        // seg
  }
  OutStreamer.EmitInstruction(LEA);

  if (needsPadding) {
    MCInst prefix;
    prefix.setOpcode(X86::DATA16_PREFIX);
    OutStreamer.EmitInstruction(prefix);
    prefix.setOpcode(X86::DATA16_PREFIX);
    OutStreamer.EmitInstruction(prefix);
    prefix.setOpcode(X86::REX64_PREFIX);
    OutStreamer.EmitInstruction(prefix);
  }

  MCInst call;
  if (is64Bits)
    call.setOpcode(X86::CALL64pcrel32);
  else
    call.setOpcode(X86::CALLpcrel32);
  StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
  MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
  const MCSymbolRefExpr *tlsRef =
    MCSymbolRefExpr::Create(tlsGetAddr,
                            MCSymbolRefExpr::VK_PLT,
                            context);

  call.addOperand(MCOperand::CreateExpr(tlsRef));
  OutStreamer.EmitInstruction(call);
}
Ejemplo n.º 23
0
static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
                                     const TargetRegisterInfo *TRI,
                                     MachineBasicBlock::iterator II,
                                     unsigned pReg,
                                     bool secondReg,
                                     bool optLocation,
                                     MachineBasicBlock::iterator end,
                                     MachineFunction &MF) {

  MachineInstr *MI = II;

  // If the second operand of the compare is an imm, make sure it's in the
  // range specified by the arch.
  if (!secondReg) {
    int64_t v = MI->getOperand(2).getImm();

    if (!(isUInt<5>(v) ||
         ((MI->getOpcode() == Hexagon::CMPEQri ||
           MI->getOpcode() == Hexagon::CMPGTri) &&
          (v == -1))))
      return false;
  }

  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
  cmpReg1 = MI->getOperand(1).getReg();

  if (secondReg) {
    cmpOp2 = MI->getOperand(2).getReg();

    // Make sure that that second register is not from COPY
    // At machine code level, we don't need this, but if we decide
    // to move new value jump prior to RA, we would be needing this.
    MachineRegisterInfo &MRI = MF.getRegInfo();
    if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
      MachineInstr *def = MRI.getVRegDef(cmpOp2);
      if (def->getOpcode() == TargetOpcode::COPY)
        return false;
    }
  }

  // Walk the instructions after the compare (predicate def) to the jump,
  // and satisfy the following conditions.
  ++II ;
  for (MachineBasicBlock::iterator localII = II; localII != end;
       ++localII) {

    // Check 1.
    // If "common" checks fail, bail out.
    if (!commonChecksToProhibitNewValueJump(optLocation, localII))
      return false;

    // Check 2.
    // If there is a def or use of predicate (result of compare), bail out.
    if (localII->modifiesRegister(pReg, TRI) ||
        localII->readsRegister(pReg, TRI))
      return false;

    // Check 3.
    // If there is a def of any of the use of the compare (operands of compare),
    // bail out.
    // Eg.
    //    p0 = cmp.eq(r2, r0)
    //    r2 = r4
    //    if (p0.new) jump:t .LBB28_3
    if (localII->modifiesRegister(cmpReg1, TRI) ||
        (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
      return false;
  }
  return true;
}
Ejemplo n.º 24
0
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
                              int &SPAdj) {
  assert(Fn.getSubtarget().getRegisterInfo() &&
         "getRegisterInfo() must be implemented!");
  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
  const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();

  if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);

  bool InsideCallSequence = false;

  for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {

    if (I->getOpcode() == FrameSetupOpcode ||
        I->getOpcode() == FrameDestroyOpcode) {
      InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
      SPAdj += TII.getSPAdjust(I);

      MachineBasicBlock::iterator PrevI = BB->end();
      if (I != BB->begin()) PrevI = std::prev(I);
      TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);

      // Visit the instructions created by eliminateCallFramePseudoInstr().
      if (PrevI == BB->end())
        I = BB->begin();     // The replaced instr was the first in the block.
      else
        I = std::next(PrevI);
      continue;
    }

    MachineInstr *MI = I;
    bool DoIncr = true;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      if (!MI->getOperand(i).isFI())
        continue;

      // Frame indices in debug values are encoded in a target independent
      // way with simply the frame index and offset rather than any
      // target-specific addressing mode.
      if (MI->isDebugValue()) {
        assert(i == 0 && "Frame indices can only appear as the first "
                         "operand of a DBG_VALUE machine instruction");
        unsigned Reg;
        MachineOperand &Offset = MI->getOperand(1);
        Offset.setImm(Offset.getImm() +
                      TFI->getFrameIndexReference(
                          Fn, MI->getOperand(0).getIndex(), Reg));
        MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
        continue;
      }

      // TODO: This code should be commoned with the code for
      // PATCHPOINT. There's no good reason for the difference in
      // implementation other than historical accident.  The only
      // remaining difference is the unconditional use of the stack
      // pointer as the base register.
      if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
        assert((!MI->isDebugValue() || i == 0) &&
               "Frame indicies can only appear as the first operand of a "
               "DBG_VALUE machine instruction");
        unsigned Reg;
        MachineOperand &Offset = MI->getOperand(i + 1);
        const unsigned refOffset =
          TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(),
                                            Reg);

        Offset.setImm(Offset.getImm() + refOffset);
        MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
        continue;
      }

      // Some instructions (e.g. inline asm instructions) can have
      // multiple frame indices and/or cause eliminateFrameIndex
      // to insert more than one instruction. We need the register
      // scavenger to go through all of these instructions so that
      // it can update its register information. We keep the
      // iterator at the point before insertion so that we can
      // revisit them in full.
      bool AtBeginning = (I == BB->begin());
      if (!AtBeginning) --I;

      // If this instruction has a FrameIndex operand, we need to
      // use that target machine register info object to eliminate
      // it.
      TRI.eliminateFrameIndex(MI, SPAdj, i,
                              FrameIndexVirtualScavenging ?  nullptr : RS);

      // Reset the iterator if we were at the beginning of the BB.
      if (AtBeginning) {
        I = BB->begin();
        DoIncr = false;
      }

      MI = nullptr;
      break;
    }

    // If we are looking at a call sequence, we need to keep track of
    // the SP adjustment made by each instruction in the sequence.
    // This includes both the frame setup/destroy pseudos (handled above),
    // as well as other instructions that have side effects w.r.t the SP.
    // Note that this must come after eliminateFrameIndex, because 
    // if I itself referred to a frame index, we shouldn't count its own
    // adjustment.
    if (MI && InsideCallSequence)
      SPAdj += TII.getSPAdjust(MI);

    if (DoIncr && I != BB->end()) ++I;

    // Update register states.
    if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
  }
}
Ejemplo n.º 25
0
int AlphaCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) {

  int rv = 0; // Return value; defaults to 0 for unhandled cases
              // or things that get fixed up later by the JIT.

  if (MO.isRegister()) {
    rv = getAlphaRegNumber(MO.getReg());
  } else if (MO.isImmediate()) {
    rv = MO.getImm();
  } else if (MO.isGlobalAddress() || MO.isExternalSymbol()
             || MO.isConstantPoolIndex()) {
    DOUT << MO << " is a relocated op for " << MI << "\n";
    unsigned Reloc = 0;
    int Offset = 0;
    bool useGOT = false;
    switch (MI.getOpcode()) {
    case Alpha::BSR:
      Reloc = Alpha::reloc_bsr;
      break;
    case Alpha::LDLr:
    case Alpha::LDQr:
    case Alpha::LDBUr:
    case Alpha::LDWUr:
    case Alpha::LDSr:
    case Alpha::LDTr:
    case Alpha::LDAr:
    case Alpha::STQr:
    case Alpha::STLr:
    case Alpha::STWr:
    case Alpha::STBr:
    case Alpha::STSr:
    case Alpha::STTr:
      Reloc = Alpha::reloc_gprellow;
      break;
    case Alpha::LDAHr:
      Reloc = Alpha::reloc_gprelhigh;
      break;
    case Alpha::LDQl:
      Reloc = Alpha::reloc_literal;
      useGOT = true;
      break;
    case Alpha::LDAg:
    case Alpha::LDAHg:
      Reloc = Alpha::reloc_gpdist;
      Offset = MI.getOperand(3).getImm();
      break;
    default:
      assert(0 && "unknown relocatable instruction");
      abort();
    }
    if (MO.isGlobalAddress())
      MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
                                                 Reloc, MO.getGlobal(), Offset,
                                                 isa<Function>(MO.getGlobal()),
                                                 useGOT));
    else if (MO.isExternalSymbol())
      MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
                                                     Reloc, MO.getSymbolName(),
                                                     Offset, true));
    else
     MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
                                          Reloc, MO.getIndex(), Offset));
  } else if (MO.isMachineBasicBlock()) {
    MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                               Alpha::reloc_bsr, MO.getMBB()));
  }else {
    cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
    abort();
  }

  return rv;
}
Ejemplo n.º 26
0
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
  bool Modified = false;

  // Yes, CPSR could be livein.
  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
  MachineInstr *CPSRDef = 0;
  MachineInstr *BundleMI = 0;

  // If this BB loops back to itself, conservatively avoid narrowing the
  // first instruction that does partial flag update.
  bool IsSelfLoop = MBB.isSuccessor(&MBB);
  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end();
  MachineBasicBlock::instr_iterator NextMII;
  for (; MII != E; MII = NextMII) {
    NextMII = llvm::next(MII);

    MachineInstr *MI = &*MII;
    if (MI->isBundle()) {
      BundleMI = MI;
      continue;
    }

    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);

    unsigned Opcode = MI->getOpcode();
    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
    if (OPI != ReduceOpcodeMap.end()) {
      const ReduceEntry &Entry = ReduceTable[OPI->second];
      // Ignore "special" cases for now.
      if (Entry.Special) {
        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
          Modified = true;
          MachineBasicBlock::instr_iterator I = prior(NextMII);
          MI = &*I;
        }
        goto ProcessNext;
      }

      // Try to transform to a 16-bit two-address instruction.
      if (Entry.NarrowOpc2 &&
          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
        Modified = true;
        MachineBasicBlock::instr_iterator I = prior(NextMII);
        MI = &*I;
        goto ProcessNext;
      }

      // Try to transform to a 16-bit non-two-address instruction.
      if (Entry.NarrowOpc1 &&
          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
        Modified = true;
        MachineBasicBlock::instr_iterator I = prior(NextMII);
        MI = &*I;
      }
    }

  ProcessNext:
    if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
      // marker is only on the BUNDLE instruction. Process the BUNDLE
      // instruction as we finish with the bundled instruction to work around
      // the inconsistency.
      if (BundleMI->killsRegister(ARM::CPSR))
        LiveCPSR = false;
      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
      if (MO && !MO->isDead())
        LiveCPSR = true;
    }

    bool DefCPSR = false;
    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
    if (MI->isCall()) {
      // Calls don't really set CPSR.
      CPSRDef = 0;
      IsSelfLoop = false;
    } else if (DefCPSR) {
      // This is the last CPSR defining instruction.
      CPSRDef = MI;
      IsSelfLoop = false;
    }
  }

  return Modified;
}
/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
/// result.
///
/// Do not generate an EXTRACT that is used only in a debug use, as this changes
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
  unsigned SrcReg, DstReg, SubIdx;
  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    return false;

  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
      TargetRegisterInfo::isPhysicalRegister(SrcReg))
    return false;

  if (MRI->hasOneNonDBGUse(SrcReg))
    // No other uses.
    return false;

  // Ensure DstReg can get a register class that actually supports
  // sub-registers. Don't change the class until we commit.
  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
  DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
  if (!DstRC)
    return false;

  // The ext instr may be operating on a sub-register of SrcReg as well.
  // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
  // register.
  // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
  // SrcReg:SubIdx should be replaced.
  bool UseSrcSubIdx = TM->getRegisterInfo()->
    getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;

  // The source has other uses. See if we can replace the other uses with use of
  // the result of the extension.
  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
    ReachedBBs.insert(UI.getParent());

  // Uses that are in the same BB of uses of the result of the instruction.
  SmallVector<MachineOperand*, 8> Uses;

  // Uses that the result of the instruction can reach.
  SmallVector<MachineOperand*, 8> ExtendedUses;

  bool ExtendLife = true;
  for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
    MachineInstr *UseMI = UseMO.getParent();
    if (UseMI == MI)
      continue;

    if (UseMI->isPHI()) {
      ExtendLife = false;
      continue;
    }

    // Only accept uses of SrcReg:SubIdx.
    if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
      continue;

    // It's an error to translate this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    //
    // into this:
    //
    //    %reg1025 = <sext> %reg1024
    //     ...
    //    %reg1027 = COPY %reg1025:4
    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    //
    // The problem here is that SUBREG_TO_REG is there to assert that an
    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    // the COPY here, it will give us the value after the <sext>, not the
    // original value of %reg1024 before <sext>.
    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
      continue;

    MachineBasicBlock *UseMBB = UseMI->getParent();
    if (UseMBB == MBB) {
      // Local uses that come after the extension.
      if (!LocalMIs.count(UseMI))
        Uses.push_back(&UseMO);
    } else if (ReachedBBs.count(UseMBB)) {
      // Non-local uses where the result of the extension is used. Always
      // replace these unless it's a PHI.
      Uses.push_back(&UseMO);
    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
      // We may want to extend the live range of the extension result in order
      // to replace these uses.
      ExtendedUses.push_back(&UseMO);
    } else {
      // Both will be live out of the def MBB anyway. Don't extend live range of
      // the extension result.
      ExtendLife = false;
      break;
    }
  }

  if (ExtendLife && !ExtendedUses.empty())
    // Extend the liveness of the extension result.
    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
              std::back_inserter(Uses));

  // Now replace all uses.
  bool Changed = false;
  if (!Uses.empty()) {
    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;

    // Look for PHI uses of the extended result, we don't want to extend the
    // liveness of a PHI input. It breaks all kinds of assumptions down
    // stream. A PHI use is expected to be the kill of its source values.
    for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
      if (UI.isPHI())
        PHIBBs.insert(UI.getParent());

    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
      MachineOperand *UseMO = Uses[i];
      MachineInstr *UseMI = UseMO->getParent();
      MachineBasicBlock *UseMBB = UseMI->getParent();
      if (PHIBBs.count(UseMBB))
        continue;

      // About to add uses of DstReg, clear DstReg's kill flags.
      if (!Changed) {
        MRI->clearKillFlags(DstReg);
        MRI->constrainRegClass(DstReg, DstRC);
      }

      unsigned NewVR = MRI->createVirtualRegister(RC);
      MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
                                   TII->get(TargetOpcode::COPY), NewVR)
        .addReg(DstReg, 0, SubIdx);
      // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
      if (UseSrcSubIdx) {
        Copy->getOperand(0).setSubReg(SubIdx);
        Copy->getOperand(0).setIsUndef();
      }
      UseMO->setReg(NewVR);
      ++NumReuse;
      Changed = true;
    }
  }

  return Changed;
}
Ejemplo n.º 28
0
static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
                                     const TargetRegisterInfo *TRI,
                                     MachineBasicBlock::iterator II,
                                     unsigned pReg,
                                     bool secondReg,
                                     bool optLocation,
                                     MachineBasicBlock::iterator end,
                                     MachineFunction &MF) {

  MachineInstr &MI = *II;

  // If the second operand of the compare is an imm, make sure it's in the
  // range specified by the arch.
  if (!secondReg) {
    int64_t v = MI.getOperand(2).getImm();
    bool Valid = false;

    switch (MI.getOpcode()) {
      case Hexagon::C2_cmpeqi:
      case Hexagon::C4_cmpneqi:
      case Hexagon::C2_cmpgti:
      case Hexagon::C4_cmpltei:
        Valid = (isUInt<5>(v) || v == -1);
        break;
      case Hexagon::C2_cmpgtui:
      case Hexagon::C4_cmplteui:
        Valid = isUInt<5>(v);
        break;
      case Hexagon::S2_tstbit_i:
      case Hexagon::S4_ntstbit_i:
        Valid = (v == 0);
        break;
    }

    if (!Valid)
      return false;
  }

  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
  cmpReg1 = MI.getOperand(1).getReg();

  if (secondReg) {
    cmpOp2 = MI.getOperand(2).getReg();

    // If the same register appears as both operands, we cannot generate a new
    // value compare. Only one operand may use the .new suffix.
    if (cmpReg1 == cmpOp2)
      return false;

    // Make sure that that second register is not from COPY
    // At machine code level, we don't need this, but if we decide
    // to move new value jump prior to RA, we would be needing this.
    MachineRegisterInfo &MRI = MF.getRegInfo();
    if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
      MachineInstr *def = MRI.getVRegDef(cmpOp2);
      if (def->getOpcode() == TargetOpcode::COPY)
        return false;
    }
  }

  // Walk the instructions after the compare (predicate def) to the jump,
  // and satisfy the following conditions.
  ++II ;
  for (MachineBasicBlock::iterator localII = II; localII != end;
       ++localII) {
    if (localII->isDebugValue())
      continue;

    // Check 1.
    // If "common" checks fail, bail out.
    if (!commonChecksToProhibitNewValueJump(optLocation, localII))
      return false;

    // Check 2.
    // If there is a def or use of predicate (result of compare), bail out.
    if (localII->modifiesRegister(pReg, TRI) ||
        localII->readsRegister(pReg, TRI))
      return false;

    // Check 3.
    // If there is a def of any of the use of the compare (operands of compare),
    // bail out.
    // Eg.
    //    p0 = cmp.eq(r2, r0)
    //    r2 = r4
    //    if (p0.new) jump:t .LBB28_3
    if (localII->modifiesRegister(cmpReg1, TRI) ||
        (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
      return false;
  }
  return true;
}
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
                              int &SPAdj) {
  const TargetMachine &TM = Fn.getTarget();
  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
  const TargetFrameLowering *TFI = TM.getFrameLowering();
  bool StackGrowsDown =
    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();

  if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);

  for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {

    if (I->getOpcode() == FrameSetupOpcode ||
        I->getOpcode() == FrameDestroyOpcode) {
      // Remember how much SP has been adjusted to create the call
      // frame.
      int Size = I->getOperand(0).getImm();

      if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
          (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
        Size = -Size;

      SPAdj += Size;

      MachineBasicBlock::iterator PrevI = BB->end();
      if (I != BB->begin()) PrevI = prior(I);
      TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);

      // Visit the instructions created by eliminateCallFramePseudoInstr().
      if (PrevI == BB->end())
        I = BB->begin();     // The replaced instr was the first in the block.
      else
        I = llvm::next(PrevI);
      continue;
    }

    MachineInstr *MI = I;
    bool DoIncr = true;
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      if (!MI->getOperand(i).isFI())
        continue;

      // Frame indicies in debug values are encoded in a target independent
      // way with simply the frame index and offset rather than any
      // target-specific addressing mode.
      if (MI->isDebugValue() ||
          MI->getOpcode() == TargetOpcode::STATEPOINT ||
          MI->getOpcode() == TargetOpcode::STACKMAP ||
          MI->getOpcode() == TargetOpcode::PATCHPOINT) {
        assert((!MI->isDebugValue() || i == 0) &&
               "Frame indicies can only appear as the first operand of a "
               "DBG_VALUE machine instruction");
        unsigned Reg;
        MachineOperand &Offset = MI->getOperand(i + 1);
        //errs() << "offset: " << Offset.getImm() << "\n";
        const unsigned refOffset = (MI->getOpcode() == TargetOpcode::STATEPOINT) ?
          // GC/STATEPOINT specific
          TFI->getFrameIndexReferenceForGC(Fn, MI->getOperand(i).getIndex(), Reg) :
          // General case
          TFI->getFrameIndexReference(Fn, MI->getOperand(i).getIndex(), Reg);

        Offset.setImm(Offset.getImm() + refOffset);
        MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
        continue;
      }

      // Some instructions (e.g. inline asm instructions) can have
      // multiple frame indices and/or cause eliminateFrameIndex
      // to insert more than one instruction. We need the register
      // scavenger to go through all of these instructions so that
      // it can update its register information. We keep the
      // iterator at the point before insertion so that we can
      // revisit them in full.
      bool AtBeginning = (I == BB->begin());
      if (!AtBeginning) --I;

      // If this instruction has a FrameIndex operand, we need to
      // use that target machine register info object to eliminate
      // it.
      TRI.eliminateFrameIndex(MI, SPAdj, i,
                              FrameIndexVirtualScavenging ?  NULL : RS);

      // Reset the iterator if we were at the beginning of the BB.
      if (AtBeginning) {
        I = BB->begin();
        DoIncr = false;
      }

      MI = 0;
      break;
    }

    if (DoIncr && I != BB->end()) ++I;

    // Update register states.
    if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
  }
}
Ejemplo n.º 30
0
void PatmosPostRASchedStrategy::postprocessDAG(ScheduleDAGPostRA *dag)
{
  DAG = dag;

  SUnit *CFL = NULL;
  // Find the inline asm statement, if any. Note that asm is a barrier,
  // therefore there is at most one CFL or inline asm.
  SUnit *Asm = NULL;

  // Push up loads to ensure load delay slot across BBs
  // TODO For some reasons, loads do not always have exit edges, and a latency
  //      of 1; find out why. Happens e.g. in coremark with 16k methods setup.
  for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
         ie = DAG->SUnits.rend(); it != ie; it++) {
    MachineInstr *MI = it->getInstr();
    if (!MI) continue;

    if (MI->mayLoad()) {
      SDep Dep(&*it, SDep::Artificial);
      Dep.setLatency(computeExitLatency(*it));
      DAG->ExitSU.addPred(Dep);
    }
  }

  // Find the branch/call/ret instruction if available
  for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
       ie = DAG->SUnits.rend(); it != ie; it++)
  {
    MachineInstr *MI = it->getInstr();
    if (!MI) continue;
    if (isPatmosCFL(MI->getOpcode(), MI->getDesc().TSFlags)) {
      CFL = &*it;
      break;
    }
    if (MI->isInlineAsm()) {
      Asm = &*it;
      break;
    }
  }

  const PatmosSubtarget *PST = PTM.getSubtargetImpl();

  unsigned DelaySlot = CFL ? PST->getDelaySlotCycles(CFL->getInstr()) : 0;

  if (CFL) {
    // RET and CALL have implicit deps on the return values and call
    // arguments. Remove all those edges to schedule them into the delay slot
    // if the registers are not actually used by CALL and RET
    if (CFL->getInstr()->isReturn() || CFL->getInstr()->isCall())
      removeImplicitCFLDeps(*CFL);

    // Add an artificial dep from CFL to exit for the delay slot
    SDep DelayDep(CFL, SDep::Artificial);
    DelayDep.setLatency(DelaySlot + 1);
    DAG->ExitSU.addPred(DelayDep);

    CFL->isScheduleLow = true;

    if (PTM.getSubtargetImpl()->getCFLType() != PatmosSubtarget::CFL_DELAYED) {
      // Push up single instructions that can be scheduled in the same
      // cycle as the branch
      unsigned LowCount = 0;
      SUnit *LowSU = 0;
      for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
             ie = DAG->SUnits.rend(); it != ie; it++) {
        if (&*it == CFL) continue;
        
        MachineInstr *MI = it->getInstr();
        if (!MI) continue;
        
        if (it->getHeight() <= DelaySlot) {
          LowCount++;
          if (PII.canIssueInSlot(MI, LowCount)) {
            LowSU = &*it;
          }
        }
      }

      if (LowSU && LowCount == 1) {
        SDep Dep(LowSU, SDep::Artificial);
        Dep.setLatency(DelaySlot + 1);
        DAG->ExitSU.addPred(Dep);
      }
    }

    if (PTM.getSubtargetImpl()->getCFLType() == PatmosSubtarget::CFL_NON_DELAYED) {
      // Add dependencies from all other instructions to exit
      for (std::vector<SUnit>::reverse_iterator it = DAG->SUnits.rbegin(),
             ie = DAG->SUnits.rend(); it != ie; it++) {
        if (&*it == CFL) continue;

        MachineInstr *MI = it->getInstr();
        if (!MI) continue;

        SDep Dep(&*it, SDep::Artificial);
        Dep.setLatency(DelaySlot + 1);
        DAG->ExitSU.addPred(Dep);
      }
    }
  }

  // Add an exit delay between loads and inline asm, in case asm is empty
  if (Asm) {
    std::vector<SUnit*> PredLoads;
    for (SUnit::pred_iterator it = Asm->Preds.begin(), ie = Asm->Preds.end();
         it != ie; it++)
    {
      if (!it->getSUnit()) continue;
      MachineInstr *MI = it->getSUnit()->getInstr();
      // Check for loads
      if (!MI || !MI->mayLoad()) continue;
      PredLoads.push_back(it->getSUnit());
    }
    for (std::vector<SUnit*>::iterator it = PredLoads.begin(),
         ie = PredLoads.end(); it != ie; it++)
    {
      // Add a delay between loads and inline-asm, even if the operand is not
      // used.
      SDep Dep(*it, SDep::Artificial);
      Dep.setLatency( computeExitLatency(**it) );

      Asm->addPred(Dep);
    }
  }

  // remove barriers between loads/stores with different memory type
  removeTypedMemBarriers();

  // remove any dependency between instructions with mutually exclusive
  // predicates
  removeExclusivePredDeps();

  // TODO SWS and LWS do not have ST as implicit def edges
  // TODO CALL has chain edges to all SWS/.. instructions, remove

  // TODO remove edges from MUL to other MULs to overlap MUL and MFS for
  //      pipelined muls.
}