Пример #1
0
unsigned
SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
  MachineBasicBlock::iterator I = MBB.end();
  removeHBR(MBB);
  if (I == MBB.begin())
    return 0;
  --I;
  while (I->isDebugValue()) {
    if (I == MBB.begin())
      return 0;
    --I;
  }
  if (!isCondBranch(I) && !isUncondBranch(I))
    return 0;

  // Remove the first branch.
  DEBUG(errs() << "Removing branch:                ");
  DEBUG(I->dump());
  I->eraseFromParent();
  I = MBB.end();
  if (I == MBB.begin())
    return 1;

  --I;
  if (!(isCondBranch(I) || isUncondBranch(I)))
    return 1;

  // Remove the second branch.
  DEBUG(errs() << "Removing second branch:         ");
  DEBUG(I->dump());
  I->eraseFromParent();
  return 2;
}
  bool runOnMachineFunction(MachineFunction &MF) override {
    ST = &MF.getSubtarget<R600Subtarget>();
    MaxFetchInst = ST->getTexVTXClauseSize();
    TII = ST->getInstrInfo();
    TRI = ST->getRegisterInfo();

    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();

    CFStack CFStack(ST, MF.getFunction().getCallingConv());
    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
        ++MB) {
      MachineBasicBlock &MBB = *MB;
      unsigned CfCount = 0;
      std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
      std::vector<MachineInstr * > IfThenElseStack;
      if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
            getHWInstrDesc(CF_CALL_FS));
        CfCount++;
      }
      std::vector<ClauseFile> FetchClauses, AluClauses;
      std::vector<MachineInstr *> LastAlu(1);
      std::vector<MachineInstr *> ToPopAfter;

      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E;) {
        if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
          LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
          FetchClauses.push_back(MakeFetchClause(MBB, I));
          CfCount++;
          LastAlu.back() = nullptr;
          continue;
        }

        MachineBasicBlock::iterator MI = I;
        if (MI->getOpcode() != R600::ENDIF)
          LastAlu.back() = nullptr;
        if (MI->getOpcode() == R600::CF_ALU)
          LastAlu.back() = &*MI;
        I++;
        bool RequiresWorkAround =
            CFStack.requiresWorkAroundForInst(MI->getOpcode());
        switch (MI->getOpcode()) {
        case R600::CF_ALU_PUSH_BEFORE:
          if (RequiresWorkAround) {
            LLVM_DEBUG(dbgs()
                       << "Applying bug work-around for ALU_PUSH_BEFORE\n");
            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
                .addImm(CfCount + 1)
                .addImm(1);
            MI->setDesc(TII->get(R600::CF_ALU));
            CfCount++;
            CFStack.pushBranch(R600::CF_PUSH_EG);
          } else
            CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
          LLVM_FALLTHROUGH;
        case R600::CF_ALU:
          I = MI;
          AluClauses.push_back(MakeALUClause(MBB, I));
          LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
          CfCount++;
          break;
        case R600::WHILELOOP: {
          CFStack.pushLoop();
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_WHILE_LOOP))
              .addImm(1);
          std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
              std::set<MachineInstr *>());
          Pair.second.insert(MIb);
          LoopStack.push_back(std::move(Pair));
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case R600::ENDLOOP: {
          CFStack.popLoop();
          std::pair<unsigned, std::set<MachineInstr *>> Pair =
              std::move(LoopStack.back());
          LoopStack.pop_back();
          CounterPropagateAddr(Pair.second, CfCount);
          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
              .addImm(Pair.first + 1);
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case R600::IF_PREDICATE_SET: {
          LastAlu.push_back(nullptr);
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_JUMP))
              .addImm(0)
              .addImm(0);
          IfThenElseStack.push_back(MIb);
          LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
          MI->eraseFromParent();
          CfCount++;
          break;
        }
Пример #3
0
void X86CmovConverterPass::convertCmovInstsToBranches(
    SmallVectorImpl<MachineInstr *> &Group) const {
  assert(!Group.empty() && "No CMOV instructions to convert");
  ++NumOfOptimizedCmovGroups;

  // If the CMOV group is not packed, e.g., there are debug instructions between
  // first CMOV and last CMOV, then pack the group and make the CMOV instruction
  // consecutive by moving the debug instructions to after the last CMOV. 
  packCmovGroup(Group.front(), Group.back());

  // To convert a CMOVcc instruction, we actually have to insert the diamond
  // control-flow pattern.  The incoming instruction knows the destination vreg
  // to set, the condition code register to branch on, the true/false values to
  // select between, and a branch opcode to use.

  // Before
  // -----
  // MBB:
  //   cond = cmp ...
  //   v1 = CMOVge t1, f1, cond
  //   v2 = CMOVlt t2, f2, cond
  //   v3 = CMOVge v1, f3, cond
  //
  // After
  // -----
  // MBB:
  //   cond = cmp ...
  //   jge %SinkMBB
  //
  // FalseMBB:
  //   jmp %SinkMBB
  //
  // SinkMBB:
  //   %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
  //   %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
  //                                          ; true-value with false-value
  //   %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
  //                                          ; previous Phi instruction result

  MachineInstr &MI = *Group.front();
  MachineInstr *LastCMOV = Group.back();
  DebugLoc DL = MI.getDebugLoc();

  X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode()));
  X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
  // Potentially swap the condition codes so that any memory operand to a CMOV
  // is in the *false* position instead of the *true* position. We can invert
  // any non-memory operand CMOV instructions to cope with this and we ensure
  // memory operand CMOVs are only included with a single condition code.
  if (llvm::any_of(Group, [&](MachineInstr *I) {
        return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC;
      }))
    std::swap(CC, OppCC);

  MachineBasicBlock *MBB = MI.getParent();
  MachineFunction::iterator It = ++MBB->getIterator();
  MachineFunction *F = MBB->getParent();
  const BasicBlock *BB = MBB->getBasicBlock();

  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
  F->insert(It, FalseMBB);
  F->insert(It, SinkMBB);

  // If the EFLAGS register isn't dead in the terminator, then claim that it's
  // live into the sink and copy blocks.
  if (checkEFLAGSLive(LastCMOV)) {
    FalseMBB->addLiveIn(X86::EFLAGS);
    SinkMBB->addLiveIn(X86::EFLAGS);
  }

  // Transfer the remainder of BB and its successor edges to SinkMBB.
  SinkMBB->splice(SinkMBB->begin(), MBB,
                  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
  SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

  // Add the false and sink blocks as its successors.
  MBB->addSuccessor(FalseMBB);
  MBB->addSuccessor(SinkMBB);

  // Create the conditional branch instruction.
  BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB);

  // Add the sink block to the false block successors.
  FalseMBB->addSuccessor(SinkMBB);

  MachineInstrBuilder MIB;
  MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
  MachineBasicBlock::iterator MIItEnd =
      std::next(MachineBasicBlock::iterator(LastCMOV));
  MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();

  // First we need to insert an explicit load on the false path for any memory
  // operand. We also need to potentially do register rewriting here, but it is
  // simpler as the memory operands are always on the false path so we can
  // simply take that input, whatever it is.
  DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
    auto &MI = *MIIt++;
    // Skip any CMOVs in this group which don't load from memory.
    if (!MI.mayLoad()) {
      // Remember the false-side register input.
      unsigned FalseReg =
          MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2)
              .getReg();
      // Walk back through any intermediate cmovs referenced.
      while (true) {
        auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
        if (FRIt == FalseBBRegRewriteTable.end())
          break;
        FalseReg = FRIt->second;
      }
      FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
      continue;
    }

    // The condition must be the *opposite* of the one we've decided to branch
    // on as the branch will go *around* the load and the load should happen
    // when the CMOV condition is false.
    assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC &&
           "Can only handle memory-operand cmov instructions with a condition "
           "opposite to the selected branch direction.");

    // The goal is to rewrite the cmov from:
    //
    //   MBB:
    //     %A = CMOVcc %B (tied), (mem)
    //
    // to
    //
    //   MBB:
    //     %A = CMOVcc %B (tied), %C
    //   FalseMBB:
    //     %C = MOV (mem)
    //
    // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
    //
    //   MBB:
    //     JMP!cc SinkMBB
    //   FalseMBB:
    //     %C = MOV (mem)
    //   SinkMBB:
    //     %A = PHI [ %C, FalseMBB ], [ %B, MBB]

    // Get a fresh register to use as the destination of the MOV.
    const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
    unsigned TmpReg = MRI->createVirtualRegister(RC);

    SmallVector<MachineInstr *, 4> NewMIs;
    bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
                                             /*UnfoldLoad*/ true,
                                             /*UnfoldStore*/ false, NewMIs);
    (void)Unfolded;
    assert(Unfolded && "Should never fail to unfold a loading cmov!");

    // Move the new CMOV to just before the old one and reset any impacted
    // iterator.
    auto *NewCMOV = NewMIs.pop_back_val();
    assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC &&
           "Last new instruction isn't the expected CMOV!");
    LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
    MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV);
    if (&*MIItBegin == &MI)
      MIItBegin = MachineBasicBlock::iterator(NewCMOV);

    // Sink whatever instructions were needed to produce the unfolded operand
    // into the false block.
    for (auto *NewMI : NewMIs) {
      LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
      FalseMBB->insert(FalseInsertionPoint, NewMI);
      // Re-map any operands that are from other cmovs to the inputs for this block.
      for (auto &MOp : NewMI->uses()) {
        if (!MOp.isReg())
          continue;
        auto It = FalseBBRegRewriteTable.find(MOp.getReg());
        if (It == FalseBBRegRewriteTable.end())
          continue;

        MOp.setReg(It->second);
        // This might have been a kill when it referenced the cmov result, but
        // it won't necessarily be once rewritten.
        // FIXME: We could potentially improve this by tracking whether the
        // operand to the cmov was also a kill, and then skipping the PHI node
        // construction below.
        MOp.setIsKill(false);
      }
    }
    MBB->erase(MachineBasicBlock::iterator(MI),
               std::next(MachineBasicBlock::iterator(MI)));

    // Add this PHI to the rewrite table.
    FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
  }

  // As we are creating the PHIs, we have to be careful if there is more than
  // one.  Later CMOVs may reference the results of earlier CMOVs, but later
  // PHIs have to reference the individual true/false inputs from earlier PHIs.
  // That also means that PHI construction must work forward from earlier to
  // later, and that the code must maintain a mapping from earlier PHI's
  // destination registers, and the registers that went into the PHI.
  DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;

  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
    unsigned DestReg = MIIt->getOperand(0).getReg();
    unsigned Op1Reg = MIIt->getOperand(1).getReg();
    unsigned Op2Reg = MIIt->getOperand(2).getReg();

    // If this CMOV we are processing is the opposite condition from the jump we
    // generated, then we have to swap the operands for the PHI that is going to
    // be generated.
    if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC)
      std::swap(Op1Reg, Op2Reg);

    auto Op1Itr = RegRewriteTable.find(Op1Reg);
    if (Op1Itr != RegRewriteTable.end())
      Op1Reg = Op1Itr->second.first;

    auto Op2Itr = RegRewriteTable.find(Op2Reg);
    if (Op2Itr != RegRewriteTable.end())
      Op2Reg = Op2Itr->second.second;

    //  SinkMBB:
    //   %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
    //  ...
    MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
              .addReg(Op1Reg)
              .addMBB(FalseMBB)
              .addReg(Op2Reg)
              .addMBB(MBB);
    (void)MIB;
    LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
    LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());

    // Add this PHI to the rewrite table.
    RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
  }

  // Now remove the CMOV(s).
  MBB->erase(MIItBegin, MIItEnd);
}
  virtual bool runOnMachineFunction(MachineFunction &MF) {
    TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
    TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());

    unsigned MaxStack = 0;
    unsigned CurrentStack = 0;
    unsigned CurrentLoopDepth = 0;
    bool HasPush = false;
    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
        ++MB) {
      MachineBasicBlock &MBB = *MB;
      unsigned CfCount = 0;
      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
      std::vector<MachineInstr * > IfThenElseStack;
      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
      if (MFI->ShaderType == 1) {
        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
            getHWInstrDesc(CF_CALL_FS));
        CfCount++;
        MaxStack = 1;
      }
      std::vector<ClauseFile> FetchClauses, AluClauses;
      std::vector<MachineInstr *> LastAlu(1);
      std::vector<MachineInstr *> ToPopAfter;
      
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E;) {
        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
          DEBUG(dbgs() << CfCount << ":"; I->dump(););
          FetchClauses.push_back(MakeFetchClause(MBB, I));
          CfCount++;
          continue;
        }

        MachineBasicBlock::iterator MI = I;
        if (MI->getOpcode() != AMDGPU::ENDIF)
          LastAlu.back() = 0;
        if (MI->getOpcode() == AMDGPU::CF_ALU)
          LastAlu.back() = MI;
        I++;
        switch (MI->getOpcode()) {
        case AMDGPU::CF_ALU_PUSH_BEFORE:
          CurrentStack++;
          MaxStack = std::max(MaxStack, CurrentStack);
          HasPush = true;
          if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
                .addImm(CfCount + 1)
                .addImm(1);
            MI->setDesc(TII->get(AMDGPU::CF_ALU));
            CfCount++;
          }
        case AMDGPU::CF_ALU:
          I = MI;
          AluClauses.push_back(MakeALUClause(MBB, I));
          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
          CfCount++;
          break;
        case AMDGPU::WHILELOOP: {
          CurrentStack+=4;
          CurrentLoopDepth++;
          MaxStack = std::max(MaxStack, CurrentStack);
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_WHILE_LOOP))
              .addImm(1);
          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
              std::set<MachineInstr *>());
          Pair.second.insert(MIb);
          LoopStack.push_back(Pair);
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case AMDGPU::ENDLOOP: {
          CurrentStack-=4;
          CurrentLoopDepth--;
          std::pair<unsigned, std::set<MachineInstr *> > Pair =
              LoopStack.back();
          LoopStack.pop_back();
          CounterPropagateAddr(Pair.second, CfCount);
          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
              .addImm(Pair.first + 1);
          MI->eraseFromParent();
          CfCount++;
          break;
        }
        case AMDGPU::IF_PREDICATE_SET: {
          LastAlu.push_back(0);
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
              getHWInstrDesc(CF_JUMP))
              .addImm(0)
              .addImm(0);
          IfThenElseStack.push_back(MIb);
          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
          MI->eraseFromParent();
          CfCount++;
          break;
        }
Пример #5
0
bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {

    bool Changed = false;
    MachineBasicBlock::iterator I0, I1;
    for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
        const MCInstrDesc& MCid = I->getDesc();

        switch(MCid.getOpcode()) {
        default:
            ++I;
            continue;
        case Mips::SETGP2:
            // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9"
            BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu),
                    I->getOperand(0).getReg())
            .addReg(Mips::V0).addReg(I->getOperand(1).getReg());
            break;

        /*
        	// trick for SH
        	case Mips::ULW:
        	  I0 = I++;
        	  I1 = I++;
        	  BuildMI(MBB, I0, I0->getDebugLoc(), TII->get(Mips::SB)).addOperand(I1->getOperand(1))
        			  .addOperand(I0->getOperand(1)).addOperand(I0->getOperand(2));
        	  BuildMI(MBB, I0, I0->getDebugLoc(), TII->get(Mips::SRL)).addOperand(I0->getOperand(0))
        			  .addOperand(I1->getOperand(1)).addImm(8);
        	  BuildMI(MBB, I0, I0->getDebugLoc(), TII->get(Mips::SB)).addOperand(I1->getOperand(1))
        			  .addOperand(I0->getOperand(1)).addImm(I0->getOperand(2).getImm() + 1);

        	  MBB.erase(I0);
        	  MBB.erase(I1);
        	  MBB.erase(I++);

        	  continue;

        	// trick for LH
        	case Mips::ULH:
        	  I->dump();
        	  BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::LB)).addOperand(I->getOperand(0)).
        			  addOperand(I->getOperand(1)).addImm(I->getOperand(2).getImm()+1);
        	  break;

        	// trick for LHu
        	case Mips::ULHu:
        	  I->dump();
        	  BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::LBu)).addOperand(I->getOperand(0)).
        			  addOperand(I->getOperand(1)).addImm(I->getOperand(2).getImm()+1);
        	  break;
        */

        case Mips::SYNC:
            I->dump();
            BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::NOP));
            break;

        case Mips::ADD:
            I->dump();
            BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu)).addOperand(I->getOperand(0)).
            addOperand(I->getOperand(1)).addOperand(I->getOperand(2));
            break;

        case Mips::ADDi:
            I->dump();
            BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDiu)).addOperand(I->getOperand(0)).
            addOperand(I->getOperand(1)).addOperand(I->getOperand(2));
            break;

        case Mips::SUB:
            I->dump();
            BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::SUBu)).addOperand(I->getOperand(0)).
            addOperand(I->getOperand(1)).addOperand(I->getOperand(2));
            break;

        case Mips::B:
            I->dump();
            BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::BGEZ)).addReg(Mips::ZERO).
            addOperand(I->getOperand(0));
            break;


        case Mips::LWL:
        case Mips::LWR:
        case Mips::SWL:
        case Mips::SWR:

        case Mips::LL:
        case Mips::LL_P8:
        case Mips::SC:
        case Mips::SC_P8:

        case Mips::MOVZ_I_I:
        case Mips::MOVN_I_I:

        case Mips::ROTR:
        case Mips::ROTRV:

        case Mips::EXT:
        case Mips::INS:
        case Mips::RDHWR:

        case Mips::MUL:
        case Mips::MULTu:
        case Mips::MULT:
        case Mips::SDIV:
        case Mips::UDIV:

        case Mips::ULW:
        case Mips::USW:

            /*
            case Mips::MTHI:
            case Mips::MTLO:
            case Mips::MFHI:
            case Mips::MFLO:
            */


            /*
            case Mips::BGEZAL:
            case Mips::BLTZAL:
            */


            I->dump();
            ++I;
            continue;


        case Mips::BuildPairF64:
            ExpandBuildPairF64(MBB, I);
            break;
        case Mips::ExtractElementF64:
            ExpandExtractElementF64(MBB, I);
            break;
        }

        // delete original instr
        MBB.erase(I++);
        Changed = true;
    }

    return Changed;
}