Ejemplo n.º 1
0
// Traverse the DFG and collect the set dead RefNodes and the set of
// dead instructions. Return "true" if any of these sets is non-empty,
// "false" otherwise.
bool DeadCodeElimination::collect() {
  // This function works by first finding all live nodes. The dead nodes
  // are then the complement of the set of live nodes.
  //
  // Assume that all nodes are dead. Identify instructions which must be
  // considered live, i.e. instructions with observable side-effects, such
  // as calls and stores. All arguments of such instructions are considered
  // live. For each live def, all operands used in the corresponding
  // instruction are considered live. For each live use, all its reaching
  // defs are considered live.
  LiveNodes.clear();
  SetVector<NodeId> WorkQ;
  for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG))
    for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG))
      scanInstr(IA, WorkQ);

  while (!WorkQ.empty()) {
    NodeId N = *WorkQ.begin();
    WorkQ.remove(N);
    LiveNodes.insert(N);
    auto RA = DFG.addr<RefNode*>(N);
    if (DFG.IsDef(RA))
      processDef(RA, WorkQ);
    else
      processUse(RA, WorkQ);
  }

  if (trace()) {
    dbgs() << "Live nodes:\n";
    for (NodeId N : LiveNodes) {
      auto RA = DFG.addr<RefNode*>(N);
      dbgs() << PrintNode<RefNode*>(RA, DFG) << "\n";
    }
  }

  auto IsDead = [this] (NodeAddr<InstrNode*> IA) -> bool {
    for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG))
      if (LiveNodes.count(DA.Id))
        return false;
    return true;
  };

  for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
    for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
      for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
        if (!LiveNodes.count(RA.Id))
          DeadNodes.insert(RA.Id);
      if (DFG.IsCode<NodeAttrs::Stmt>(IA))
        if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
          continue;
      if (IsDead(IA)) {
        DeadInstrs.insert(IA.Id);
        if (trace())
          dbgs() << "Dead instr: " << PrintNode<InstrNode*>(IA, DFG) << "\n";
      }
    }
  }

  return !DeadNodes.empty();
}
Ejemplo n.º 2
0
void GCPtrTracker::recalculateBBsStates() {
  SetVector<const BasicBlock *> Worklist;
  // TODO: This order is suboptimal, it's better to replace it with priority
  // queue where priority is RPO number of BB.
  for (auto &BBI : BlockMap)
    Worklist.insert(BBI.first);

  // This loop iterates the AvailableIn/Out sets until it converges.
  // The AvailableIn and AvailableOut sets decrease as we iterate.
  while (!Worklist.empty()) {
    const BasicBlock *BB = Worklist.pop_back_val();
    BasicBlockState *BBS = BlockMap[BB];

    size_t OldInCount = BBS->AvailableIn.size();
    for (const BasicBlock *PBB : predecessors(BB))
      set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut);

    assert(OldInCount >= BBS->AvailableIn.size() && "invariant!");

    bool InputsChanged = OldInCount != BBS->AvailableIn.size();
    bool ContributionChanged =
        removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution);
    if (!InputsChanged && !ContributionChanged)
      continue;

    size_t OldOutCount = BBS->AvailableOut.size();
    transferBlock(BB, *BBS, ContributionChanged);
    if (OldOutCount != BBS->AvailableOut.size()) {
      assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
      Worklist.insert(succ_begin(BB), succ_end(BB));
    }
  }
}
Ejemplo n.º 3
0
// If a linkonce global is present in the MustPreserveSymbols, we need to make
// sure we honor this. To force the compiler to not drop it, we add it to the
// "llvm.compiler.used" global.
void LTOCodeGenerator::preserveDiscardableGVs(
    Module &TheModule,
    llvm::function_ref<bool(const GlobalValue &)> mustPreserveGV) {
  SetVector<Constant *> UsedValuesSet;
  if (GlobalVariable *LLVMUsed =
          TheModule.getGlobalVariable("llvm.compiler.used")) {
    ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
    for (auto &V : Inits->operands())
      UsedValuesSet.insert(cast<Constant>(&V));
    LLVMUsed->eraseFromParent();
  }
  llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext());
  auto mayPreserveGlobal = [&](GlobalValue &GV) {
    if (!GV.isDiscardableIfUnused() || GV.isDeclaration())
      return;
    if (!mustPreserveGV(GV))
      return;
    if (GV.hasAvailableExternallyLinkage()) {
      emitWarning(
          (Twine("Linker asked to preserve available_externally global: '") +
           GV.getName() + "'").str());
      return;
    }
    if (GV.hasInternalLinkage()) {
      emitWarning((Twine("Linker asked to preserve internal global: '") +
                   GV.getName() + "'").str());
      return;
    }
    UsedValuesSet.insert(ConstantExpr::getBitCast(&GV, i8PTy));
  };
  for (auto &GV : TheModule)
    mayPreserveGlobal(GV);
  for (auto &GV : TheModule.globals())
    mayPreserveGlobal(GV);
  for (auto &GV : TheModule.aliases())
    mayPreserveGlobal(GV);

  if (UsedValuesSet.empty())
    return;

  llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesSet.size());
  auto *LLVMUsed = new llvm::GlobalVariable(
      TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage,
      llvm::ConstantArray::get(ATy, UsedValuesSet.getArrayRef()),
      "llvm.compiler.used");
  LLVMUsed->setSection("llvm.metadata");
}
Ejemplo n.º 4
0
bool PlaceSafepoints::runOnFunction(Function &F) {
  if (F.isDeclaration() || F.empty()) {
    // This is a declaration, nothing to do.  Must exit early to avoid crash in
    // dom tree calculation
    return false;
  }

  if (isGCSafepointPoll(F)) {
    // Given we're inlining this inside of safepoint poll insertion, this
    // doesn't make any sense.  Note that we do make any contained calls
    // parseable after we inline a poll.
    return false;
  }

  if (!shouldRewriteFunction(F))
    return false;

  bool modified = false;

  // In various bits below, we rely on the fact that uses are reachable from
  // defs.  When there are basic blocks unreachable from the entry, dominance
  // and reachablity queries return non-sensical results.  Thus, we preprocess
  // the function to ensure these properties hold.
  modified |= removeUnreachableBlocks(F);

  // STEP 1 - Insert the safepoint polling locations.  We do not need to
  // actually insert parse points yet.  That will be done for all polls and
  // calls in a single pass.

  DominatorTree DT;
  DT.recalculate(F);

  SmallVector<Instruction *, 16> PollsNeeded;
  std::vector<CallSite> ParsePointNeeded;

  if (enableBackedgeSafepoints(F)) {
    // Construct a pass manager to run the LoopPass backedge logic.  We
    // need the pass manager to handle scheduling all the loop passes
    // appropriately.  Doing this by hand is painful and just not worth messing
    // with for the moment.
    legacy::FunctionPassManager FPM(F.getParent());
    bool CanAssumeCallSafepoints = enableCallSafepoints(F);
    PlaceBackedgeSafepointsImpl *PBS =
      new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
    FPM.add(PBS);
    FPM.run(F);

    // We preserve dominance information when inserting the poll, otherwise
    // we'd have to recalculate this on every insert
    DT.recalculate(F);

    auto &PollLocations = PBS->PollLocations;

    auto OrderByBBName = [](Instruction *a, Instruction *b) {
      return a->getParent()->getName() < b->getParent()->getName();
    };
    // We need the order of list to be stable so that naming ends up stable
    // when we split edges.  This makes test cases much easier to write.
    std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);

    // We can sometimes end up with duplicate poll locations.  This happens if
    // a single loop is visited more than once.   The fact this happens seems
    // wrong, but it does happen for the split-backedge.ll test case.
    PollLocations.erase(std::unique(PollLocations.begin(),
                                    PollLocations.end()),
                        PollLocations.end());

    // Insert a poll at each point the analysis pass identified
    // The poll location must be the terminator of a loop latch block.
    for (TerminatorInst *Term : PollLocations) {
      // We are inserting a poll, the function is modified
      modified = true;

      if (SplitBackedge) {
        // Split the backedge of the loop and insert the poll within that new
        // basic block.  This creates a loop with two latches per original
        // latch (which is non-ideal), but this appears to be easier to
        // optimize in practice than inserting the poll immediately before the
        // latch test.

        // Since this is a latch, at least one of the successors must dominate
        // it. Its possible that we have a) duplicate edges to the same header
        // and b) edges to distinct loop headers.  We need to insert pools on
        // each.
        SetVector<BasicBlock *> Headers;
        for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
          BasicBlock *Succ = Term->getSuccessor(i);
          if (DT.dominates(Succ, Term->getParent())) {
            Headers.insert(Succ);
          }
        }
        assert(!Headers.empty() && "poll location is not a loop latch?");

        // The split loop structure here is so that we only need to recalculate
        // the dominator tree once.  Alternatively, we could just keep it up to
        // date and use a more natural merged loop.
        SetVector<BasicBlock *> SplitBackedges;
        for (BasicBlock *Header : Headers) {
          BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
          PollsNeeded.push_back(NewBB->getTerminator());
          NumBackedgeSafepoints++;
        }
      } else {
        // Split the latch block itself, right before the terminator.
        PollsNeeded.push_back(Term);
        NumBackedgeSafepoints++;
      }
    }
  }

  if (enableEntrySafepoints(F)) {
    Instruction *Location = findLocationForEntrySafepoint(F, DT);
    if (!Location) {
      // policy choice not to insert?
    } else {
      PollsNeeded.push_back(Location);
      modified = true;
      NumEntrySafepoints++;
    }
  }

  // Now that we've identified all the needed safepoint poll locations, insert
  // safepoint polls themselves.
  for (Instruction *PollLocation : PollsNeeded) {
    std::vector<CallSite> RuntimeCalls;
    InsertSafepointPoll(PollLocation, RuntimeCalls);
    ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
                            RuntimeCalls.end());
  }

  // If we've been asked to not wrap the calls with gc.statepoint, then we're
  // done.  In the near future, this option will be "constant folded" to true,
  // and the code below that deals with insert gc.statepoint calls will be
  // removed.  Wrapping potentially safepointing calls in gc.statepoint will
  // then become the responsibility of the RewriteStatepointsForGC pass.
  if (NoStatepoints)
    return modified;

  PollsNeeded.clear(); // make sure we don't accidentally use
  // The dominator tree has been invalidated by the inlining performed in the
  // above loop.  TODO: Teach the inliner how to update the dom tree?
  DT.recalculate(F);

  if (enableCallSafepoints(F)) {
    std::vector<CallSite> Calls;
    findCallSafepoints(F, Calls);
    NumCallSafepoints += Calls.size();
    ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
  }

  // Unique the vectors since we can end up with duplicates if we scan the call
  // site for call safepoints after we add it for entry or backedge.  The
  // only reason we need tracking at all is that some functions might have
  // polls but not call safepoints and thus we might miss marking the runtime
  // calls for the polls. (This is useful in test cases!)
  unique_unsorted(ParsePointNeeded);

  // Any parse point (no matter what source) will be handled here

  // We're about to start modifying the function
  if (!ParsePointNeeded.empty())
    modified = true;

  // Now run through and insert the safepoints, but do _NOT_ update or remove
  // any existing uses.  We have references to live variables that need to
  // survive to the last iteration of this loop.
  std::vector<Value *> Results;
  Results.reserve(ParsePointNeeded.size());
  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    CallSite &CS = ParsePointNeeded[i];

    // For invoke statepoints we need to remove all phi nodes at the normal
    // destination block.
    // Reason for this is that we can place gc_result only after last phi node
    // in basic block. We will get malformed code after RAUW for the
    // gc_result if one of this phi nodes uses result from the invoke.
    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
      normalizeForInvokeSafepoint(Invoke->getNormalDest(),
                                  Invoke->getParent());
    }

    Value *GCResult = ReplaceWithStatepoint(CS);
    Results.push_back(GCResult);
  }
  assert(Results.size() == ParsePointNeeded.size());

  // Adjust all users of the old call sites to use the new ones instead
  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
    CallSite &CS = ParsePointNeeded[i];
    Value *GCResult = Results[i];
    if (GCResult) {
      // Can not RAUW for the invoke gc result in case of phi nodes preset.
      assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));

      // Replace all uses with the new call
      CS.getInstruction()->replaceAllUsesWith(GCResult);
    }

    // Now that we've handled all uses, remove the original call itself
    // Note: The insert point can't be the deleted instruction!
    CS.getInstruction()->eraseFromParent();
  }
  return modified;
}
Ejemplo n.º 5
0
bool PlaceSafepoints::runOnFunction(Function &F) {
  if (F.isDeclaration() || F.empty()) {
    // This is a declaration, nothing to do.  Must exit early to avoid crash in
    // dom tree calculation
    return false;
  }

  if (isGCSafepointPoll(F)) {
    // Given we're inlining this inside of safepoint poll insertion, this
    // doesn't make any sense.  Note that we do make any contained calls
    // parseable after we inline a poll.
    return false;
  }

  if (!shouldRewriteFunction(F))
    return false;

  const TargetLibraryInfo &TLI =
      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();

  bool Modified = false;

  // In various bits below, we rely on the fact that uses are reachable from
  // defs.  When there are basic blocks unreachable from the entry, dominance
  // and reachablity queries return non-sensical results.  Thus, we preprocess
  // the function to ensure these properties hold.
  Modified |= removeUnreachableBlocks(F);

  // STEP 1 - Insert the safepoint polling locations.  We do not need to
  // actually insert parse points yet.  That will be done for all polls and
  // calls in a single pass.

  DominatorTree DT;
  DT.recalculate(F);

  SmallVector<Instruction *, 16> PollsNeeded;
  std::vector<CallSite> ParsePointNeeded;

  if (enableBackedgeSafepoints(F)) {
    // Construct a pass manager to run the LoopPass backedge logic.  We
    // need the pass manager to handle scheduling all the loop passes
    // appropriately.  Doing this by hand is painful and just not worth messing
    // with for the moment.
    legacy::FunctionPassManager FPM(F.getParent());
    bool CanAssumeCallSafepoints = enableCallSafepoints(F);
    auto *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
    FPM.add(PBS);
    FPM.run(F);

    // We preserve dominance information when inserting the poll, otherwise
    // we'd have to recalculate this on every insert
    DT.recalculate(F);

    auto &PollLocations = PBS->PollLocations;

    auto OrderByBBName = [](Instruction *a, Instruction *b) {
      return a->getParent()->getName() < b->getParent()->getName();
    };
    // We need the order of list to be stable so that naming ends up stable
    // when we split edges.  This makes test cases much easier to write.
    llvm::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);

    // We can sometimes end up with duplicate poll locations.  This happens if
    // a single loop is visited more than once.   The fact this happens seems
    // wrong, but it does happen for the split-backedge.ll test case.
    PollLocations.erase(std::unique(PollLocations.begin(),
                                    PollLocations.end()),
                        PollLocations.end());

    // Insert a poll at each point the analysis pass identified
    // The poll location must be the terminator of a loop latch block.
    for (TerminatorInst *Term : PollLocations) {
      // We are inserting a poll, the function is modified
      Modified = true;

      if (SplitBackedge) {
        // Split the backedge of the loop and insert the poll within that new
        // basic block.  This creates a loop with two latches per original
        // latch (which is non-ideal), but this appears to be easier to
        // optimize in practice than inserting the poll immediately before the
        // latch test.

        // Since this is a latch, at least one of the successors must dominate
        // it. Its possible that we have a) duplicate edges to the same header
        // and b) edges to distinct loop headers.  We need to insert pools on
        // each.
        SetVector<BasicBlock *> Headers;
        for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
          BasicBlock *Succ = Term->getSuccessor(i);
          if (DT.dominates(Succ, Term->getParent())) {
            Headers.insert(Succ);
          }
        }
        assert(!Headers.empty() && "poll location is not a loop latch?");

        // The split loop structure here is so that we only need to recalculate
        // the dominator tree once.  Alternatively, we could just keep it up to
        // date and use a more natural merged loop.
        SetVector<BasicBlock *> SplitBackedges;
        for (BasicBlock *Header : Headers) {
          BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
          PollsNeeded.push_back(NewBB->getTerminator());
          NumBackedgeSafepoints++;
        }
      } else {
        // Split the latch block itself, right before the terminator.
        PollsNeeded.push_back(Term);
        NumBackedgeSafepoints++;
      }
    }
  }

  if (enableEntrySafepoints(F)) {
    if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) {
      PollsNeeded.push_back(Location);
      Modified = true;
      NumEntrySafepoints++;
    }
    // TODO: else we should assert that there was, in fact, a policy choice to
    // not insert a entry safepoint poll.
  }

  // Now that we've identified all the needed safepoint poll locations, insert
  // safepoint polls themselves.
  for (Instruction *PollLocation : PollsNeeded) {
    std::vector<CallSite> RuntimeCalls;
    InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
    ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
                            RuntimeCalls.end());
  }

  return Modified;
}
Ejemplo n.º 6
0
// Erase the nodes given in the Nodes set from DFG. In addition to removing
// them from the DFG, if a node corresponds to a statement, the corresponding
// machine instruction is erased from the function.
bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
  if (Nodes.empty())
    return false;

  // Prepare the actual set of ref nodes to remove: ref nodes from Nodes
  // are included directly, for each InstrNode in Nodes, include the set
  // of all RefNodes from it.
  NodeList DRNs, DINs;
  for (auto I : Nodes) {
    auto BA = DFG.addr<NodeBase*>(I);
    uint16_t Type = BA.Addr->getType();
    if (Type == NodeAttrs::Ref) {
      DRNs.push_back(DFG.addr<RefNode*>(I));
      continue;
    }

    // If it's a code node, add all ref nodes from it.
    uint16_t Kind = BA.Addr->getKind();
    if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) {
      for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG))
        DRNs.push_back(N);
      DINs.push_back(DFG.addr<InstrNode*>(I));
    } else {
      llvm_unreachable("Unexpected code node");
      return false;
    }
  }

  // Sort the list so that use nodes are removed first. This makes the
  // "unlink" functions a bit faster.
  auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool {
    uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind();
    if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def)
      return true;
    if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use)
      return false;
    return A.Id < B.Id;
  };
  llvm::sort(DRNs, UsesFirst);

  if (trace())
    dbgs() << "Removing dead ref nodes:\n";
  for (NodeAddr<RefNode*> RA : DRNs) {
    if (trace())
      dbgs() << "  " << PrintNode<RefNode*>(RA, DFG) << '\n';
    if (DFG.IsUse(RA))
      DFG.unlinkUse(RA, true);
    else if (DFG.IsDef(RA))
      DFG.unlinkDef(RA, true);
  }

  // Now, remove all dead instruction nodes.
  for (NodeAddr<InstrNode*> IA : DINs) {
    NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
    BA.Addr->removeMember(IA, DFG);
    if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
      continue;

    MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
    if (trace())
      dbgs() << "erasing: " << *MI;
    MI->eraseFromParent();
  }
  return true;
}
bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
                                                     MachineLoopInfo &MLI,
                                                     MachineLoop *Loop) {
  MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
  SetVector<MachineBasicBlock *> RewriteSuccs;

  // DFS through Loop's body, looking for for irreducible control flow. Loop is
  // natural, and we stay in its body, and we treat any nested loops
  // monolithically, so any cycles we encounter indicate irreducibility.
  SmallPtrSet<MachineBasicBlock *, 8> OnStack;
  SmallPtrSet<MachineBasicBlock *, 8> Visited;
  SmallVector<SuccessorList, 4> LoopWorklist;
  LoopWorklist.push_back(SuccessorList(Header));
  OnStack.insert(Header);
  Visited.insert(Header);
  while (!LoopWorklist.empty()) {
    SuccessorList &Top = LoopWorklist.back();
    if (Top.HasNext()) {
      MachineBasicBlock *Next = Top.Next();
      if (Next == Header || (Loop && !Loop->contains(Next)))
        continue;
      if (LLVM_LIKELY(OnStack.insert(Next).second)) {
        if (!Visited.insert(Next).second) {
          OnStack.erase(Next);
          continue;
        }
        MachineLoop *InnerLoop = MLI.getLoopFor(Next);
        if (InnerLoop != Loop)
          LoopWorklist.push_back(SuccessorList(InnerLoop));
        else
          LoopWorklist.push_back(SuccessorList(Next));
      } else {
        RewriteSuccs.insert(Top.getBlock());
      }
      continue;
    }
    OnStack.erase(Top.getBlock());
    LoopWorklist.pop_back();
  }

  // Most likely, we didn't find any irreducible control flow.
  if (LLVM_LIKELY(RewriteSuccs.empty()))
    return false;

  DEBUG(dbgs() << "Irreducible control flow detected!\n");

  // Ok. We have irreducible control flow! Create a dispatch block which will
  // contains a jump table to any block in the problematic set of blocks.
  MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
  MF.insert(MF.end(), Dispatch);
  MLI.changeLoopFor(Dispatch, Loop);

  // Add the jump table.
  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(),
                                    TII.get(WebAssembly::BR_TABLE_I32));

  // Add the register which will be used to tell the jump table which block to
  // jump to.
  MachineRegisterInfo &MRI = MF.getRegInfo();
  unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  MIB.addReg(Reg);

  // Collect all the blocks which need to have their successors rewritten,
  // add the successors to the jump table, and remember their index.
  DenseMap<MachineBasicBlock *, unsigned> Indices;
  SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
                                                   RewriteSuccs.end());
  while (!SuccWorklist.empty()) {
    MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
    auto Pair = Indices.insert(std::make_pair(MBB, 0));
    if (!Pair.second)
      continue;

    unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
    DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index << "\n");

    Pair.first->second = Index;
    for (auto Pred : MBB->predecessors())
      RewriteSuccs.insert(Pred);

    MIB.addMBB(MBB);
    Dispatch->addSuccessor(MBB);

    MetaBlock Meta(MBB);
    for (auto *Succ : Meta.successors())
      if (Succ != Header && (!Loop || Loop->contains(Succ)))
        SuccWorklist.push_back(Succ);
  }

  // Rewrite the problematic successors for every block in RewriteSuccs.
  // For simplicity, we just introduce a new block for every edge we need to
  // rewrite. Fancier things are possible.
  for (MachineBasicBlock *MBB : RewriteSuccs) {
    DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
    for (auto *Succ : MBB->successors()) {
      if (!Indices.count(Succ))
        continue;

      MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
      MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
                                             : MF.end(),
                Split);
      MLI.changeLoopFor(Split, Loop);

      // Set the jump table's register of the index of the block we wish to
      // jump to, and jump to the jump table.
      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32),
              Reg)
          .addImm(Indices[Succ]);
      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR))
          .addMBB(Dispatch);
      Split->addSuccessor(Dispatch);
      Map[Succ] = Split;
    }
    // Remap the terminator operands and the successor list.
    for (MachineInstr &Term : MBB->terminators())
      for (auto &Op : Term.explicit_uses())
        if (Op.isMBB() && Indices.count(Op.getMBB()))
          Op.setMBB(Map[Op.getMBB()]);
    for (auto Rewrite : Map)
      MBB->replaceSuccessor(Rewrite.first, Rewrite.second);
  }

  // Create a fake default label, because br_table requires one.
  MIB.addMBB(MIB.getInstr()
                 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
                 .getMBB());

  return true;
}